sanitize 2.1.1 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

@@ -0,0 +1,545 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Sanitize::Transformers::CleanElement' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ strings = {
9
+ :basic => {
10
+ :html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',
11
+ :default => 'Lorem ipsum dolor sit amet ',
12
+ :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet ',
13
+ :basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
14
+ :relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> '
15
+ },
16
+
17
+ :malformed => {
18
+ :html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
19
+ :default => 'Lorem dolor sit amet ',
20
+ :restricted => 'Lorem <strong>dolor</strong> sit amet ',
21
+ :basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
22
+ :relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet ',
23
+ },
24
+
25
+ :unclosed => {
26
+ :html => '<p>a</p><blockquote>b',
27
+ :default => ' a b ',
28
+ :restricted => ' a b ',
29
+ :basic => '<p>a</p><blockquote>b</blockquote>',
30
+ :relaxed => '<p>a</p><blockquote>b</blockquote>'
31
+ },
32
+
33
+ :malicious => {
34
+ :html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
35
+ :default => 'Lorem ipsum dolor sit amet &lt;script&gt;alert("hello world");',
36
+ :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet &lt;script&gt;alert("hello world");',
37
+ :basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");',
38
+ :relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");'
39
+ }
40
+ }
41
+
42
+ protocols = {
43
+ 'protocol-based JS injection: simple, no spaces' => {
44
+ :html => '<a href="javascript:alert(\'XSS\');">foo</a>',
45
+ :default => 'foo',
46
+ :restricted => 'foo',
47
+ :basic => '<a rel="nofollow">foo</a>',
48
+ :relaxed => '<a>foo</a>'
49
+ },
50
+
51
+ 'protocol-based JS injection: simple, spaces before' => {
52
+ :html => '<a href="javascript :alert(\'XSS\');">foo</a>',
53
+ :default => 'foo',
54
+ :restricted => 'foo',
55
+ :basic => '<a rel="nofollow">foo</a>',
56
+ :relaxed => '<a>foo</a>'
57
+ },
58
+
59
+ 'protocol-based JS injection: simple, spaces after' => {
60
+ :html => '<a href="javascript: alert(\'XSS\');">foo</a>',
61
+ :default => 'foo',
62
+ :restricted => 'foo',
63
+ :basic => '<a rel="nofollow">foo</a>',
64
+ :relaxed => '<a>foo</a>'
65
+ },
66
+
67
+ 'protocol-based JS injection: simple, spaces before and after' => {
68
+ :html => '<a href="javascript : alert(\'XSS\');">foo</a>',
69
+ :default => 'foo',
70
+ :restricted => 'foo',
71
+ :basic => '<a rel="nofollow">foo</a>',
72
+ :relaxed => '<a>foo</a>'
73
+ },
74
+
75
+ 'protocol-based JS injection: preceding colon' => {
76
+ :html => '<a href=":javascript:alert(\'XSS\');">foo</a>',
77
+ :default => 'foo',
78
+ :restricted => 'foo',
79
+ :basic => '<a rel="nofollow">foo</a>',
80
+ :relaxed => '<a>foo</a>'
81
+ },
82
+
83
+ 'protocol-based JS injection: UTF-8 encoding' => {
84
+ :html => '<a href="javascript&#58;">foo</a>',
85
+ :default => 'foo',
86
+ :restricted => 'foo',
87
+ :basic => '<a rel="nofollow">foo</a>',
88
+ :relaxed => '<a>foo</a>'
89
+ },
90
+
91
+ 'protocol-based JS injection: long UTF-8 encoding' => {
92
+ :html => '<a href="javascript&#0058;">foo</a>',
93
+ :default => 'foo',
94
+ :restricted => 'foo',
95
+ :basic => '<a rel="nofollow">foo</a>',
96
+ :relaxed => '<a>foo</a>'
97
+ },
98
+
99
+ 'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
100
+ :html => '<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>foo</a>',
101
+ :default => 'foo',
102
+ :restricted => 'foo',
103
+ :basic => '<a rel="nofollow">foo</a>',
104
+ :relaxed => '<a>foo</a>'
105
+ },
106
+
107
+ 'protocol-based JS injection: hex encoding' => {
108
+ :html => '<a href="javascript&#x3A;">foo</a>',
109
+ :default => 'foo',
110
+ :restricted => 'foo',
111
+ :basic => '<a rel="nofollow">foo</a>',
112
+ :relaxed => '<a>foo</a>'
113
+ },
114
+
115
+ 'protocol-based JS injection: long hex encoding' => {
116
+ :html => '<a href="javascript&#x003A;">foo</a>',
117
+ :default => 'foo',
118
+ :restricted => 'foo',
119
+ :basic => '<a rel="nofollow">foo</a>',
120
+ :relaxed => '<a>foo</a>'
121
+ },
122
+
123
+ 'protocol-based JS injection: hex encoding without semicolons' => {
124
+ :html => '<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>foo</a>',
125
+ :default => 'foo',
126
+ :restricted => 'foo',
127
+ :basic => '<a rel="nofollow">foo</a>',
128
+ :relaxed => '<a>foo</a>'
129
+ },
130
+
131
+ 'protocol-based JS injection: null char' => {
132
+ :html => "<img src=java\0script:alert(\"XSS\")>",
133
+ :default => '',
134
+ :restricted => '',
135
+ :basic => '',
136
+ :relaxed => '<img>'
137
+ },
138
+
139
+ 'protocol-based JS injection: invalid URL char' => {
140
+ :html => '<img src=java\script:alert("XSS")>',
141
+ :default => '',
142
+ :restricted => '',
143
+ :basic => '',
144
+ :relaxed => '<img>'
145
+ },
146
+
147
+ 'protocol-based JS injection: spaces and entities' => {
148
+ :html => '<img src=" &#14; javascript:alert(\'XSS\');">',
149
+ :default => '',
150
+ :restricted => '',
151
+ :basic => '',
152
+ :relaxed => '<img>'
153
+ },
154
+
155
+ 'protocol whitespace' => {
156
+ :html => '<a href=" http://example.com/"></a>',
157
+ :default => '',
158
+ :restricted => '',
159
+ :basic => '<a href="http://example.com/" rel="nofollow"></a>',
160
+ :relaxed => '<a href="http://example.com/"></a>'
161
+ }
162
+ }
163
+
164
+ describe 'Default config' do
165
+ it 'should remove non-allowlisted elements, leaving safe contents behind' do
166
+ Sanitize.fragment('foo <b>bar</b> <strong><a href="#a">baz</a></strong> quux')
167
+ .must_equal 'foo bar baz quux'
168
+
169
+ Sanitize.fragment('<script>alert("<xss>");</script>')
170
+ .must_equal ''
171
+
172
+ Sanitize.fragment('<<script>script>alert("<xss>");</<script>>')
173
+ .must_equal '&lt;'
174
+
175
+ Sanitize.fragment('< script <>> alert("<xss>");</script>')
176
+ .must_equal '&lt; script &lt;&gt;&gt; alert("");'
177
+ end
178
+
179
+ it 'should surround the contents of :whitespace_elements with space characters when removing the element' do
180
+ Sanitize.fragment('foo<div>bar</div>baz')
181
+ .must_equal 'foo bar baz'
182
+
183
+ Sanitize.fragment('foo<br>bar<br>baz')
184
+ .must_equal 'foo bar baz'
185
+
186
+ Sanitize.fragment('foo<hr>bar<hr>baz')
187
+ .must_equal 'foo bar baz'
188
+ end
189
+
190
+ it 'should not choke on several instances of the same element in a row' do
191
+ Sanitize.fragment('<img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif">')
192
+ .must_equal ''
193
+ end
194
+
195
+ it 'should not preserve the content of removed `iframe` elements' do
196
+ Sanitize.fragment('<iframe>hello! <script>alert(0)</script></iframe>')
197
+ .must_equal ''
198
+ end
199
+
200
+ it 'should not preserve the content of removed `math` elements' do
201
+ Sanitize.fragment('<math>hello! <script>alert(0)</script></math>')
202
+ .must_equal ''
203
+ end
204
+
205
+ it 'should not preserve the content of removed `noembed` elements' do
206
+ Sanitize.fragment('<noembed>hello! <script>alert(0)</script></noembed>')
207
+ .must_equal ''
208
+ end
209
+
210
+ it 'should not preserve the content of removed `noframes` elements' do
211
+ Sanitize.fragment('<noframes>hello! <script>alert(0)</script></noframes>')
212
+ .must_equal ''
213
+ end
214
+
215
+ it 'should not preserve the content of removed `noscript` elements' do
216
+ Sanitize.fragment('<noscript>hello! <script>alert(0)</script></noscript>')
217
+ .must_equal ''
218
+ end
219
+
220
+ it 'should not preserve the content of removed `plaintext` elements' do
221
+ Sanitize.fragment('<plaintext>hello! <script>alert(0)</script>')
222
+ .must_equal ''
223
+ end
224
+
225
+ it 'should not preserve the content of removed `script` elements' do
226
+ Sanitize.fragment('<script>hello! <script>alert(0)</script></script>')
227
+ .must_equal ''
228
+ end
229
+
230
+ it 'should not preserve the content of removed `style` elements' do
231
+ Sanitize.fragment('<style>hello! <script>alert(0)</script></style>')
232
+ .must_equal ''
233
+ end
234
+
235
+ it 'should not preserve the content of removed `svg` elements' do
236
+ Sanitize.fragment('<svg>hello! <script>alert(0)</script></svg>')
237
+ .must_equal ''
238
+ end
239
+
240
+ it 'should not preserve the content of removed `xmp` elements' do
241
+ Sanitize.fragment('<xmp>hello! <script>alert(0)</script></xmp>')
242
+ .must_equal ''
243
+ end
244
+
245
+ strings.each do |name, data|
246
+ it "should clean #{name} HTML" do
247
+ Sanitize.fragment(data[:html]).must_equal(data[:default])
248
+ end
249
+ end
250
+
251
+ protocols.each do |name, data|
252
+ it "should not allow #{name}" do
253
+ Sanitize.fragment(data[:html]).must_equal(data[:default])
254
+ end
255
+ end
256
+ end
257
+
258
+ describe 'Restricted config' do
259
+ before do
260
+ @s = Sanitize.new(Sanitize::Config::RESTRICTED)
261
+ end
262
+
263
+ strings.each do |name, data|
264
+ it "should clean #{name} HTML" do
265
+ @s.fragment(data[:html]).must_equal(data[:restricted])
266
+ end
267
+ end
268
+
269
+ protocols.each do |name, data|
270
+ it "should not allow #{name}" do
271
+ @s.fragment(data[:html]).must_equal(data[:restricted])
272
+ end
273
+ end
274
+ end
275
+
276
+ describe 'Basic config' do
277
+ before do
278
+ @s = Sanitize.new(Sanitize::Config::BASIC)
279
+ end
280
+
281
+ it 'should not choke on valueless attributes' do
282
+ @s.fragment('foo <a href>foo</a> bar')
283
+ .must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
284
+ end
285
+
286
+ it 'should downcase attribute names' do
287
+ @s.fragment('<a HREF="javascript:alert(\'foo\')">bar</a>')
288
+ .must_equal '<a rel="nofollow">bar</a>'
289
+ end
290
+
291
+ strings.each do |name, data|
292
+ it "should clean #{name} HTML" do
293
+ @s.fragment(data[:html]).must_equal(data[:basic])
294
+ end
295
+ end
296
+
297
+ protocols.each do |name, data|
298
+ it "should not allow #{name}" do
299
+ @s.fragment(data[:html]).must_equal(data[:basic])
300
+ end
301
+ end
302
+ end
303
+
304
+ describe 'Relaxed config' do
305
+ before do
306
+ @s = Sanitize.new(Sanitize::Config::RELAXED)
307
+ end
308
+
309
+ it 'should encode special chars in attribute values' do
310
+ @s.fragment('<a href="http://example.com" title="<b>&eacute;xamples</b> & things">foo</a>')
311
+ .must_equal '<a href="http://example.com" title="<b>éxamples</b> &amp; things">foo</a>'
312
+ end
313
+
314
+ strings.each do |name, data|
315
+ it "should clean #{name} HTML" do
316
+ @s.fragment(data[:html]).must_equal(data[:relaxed])
317
+ end
318
+ end
319
+
320
+ protocols.each do |name, data|
321
+ it "should not allow #{name}" do
322
+ @s.fragment(data[:html]).must_equal(data[:relaxed])
323
+ end
324
+ end
325
+ end
326
+
327
+ describe 'Custom configs' do
328
+ it 'should allow attributes on all elements if allowlisted under :all' do
329
+ input = '<p class="foo">bar</p>'
330
+
331
+ Sanitize.fragment(input).must_equal ' bar '
332
+
333
+ Sanitize.fragment(input, {
334
+ :elements => ['p'],
335
+ :attributes => {:all => ['class']}
336
+ }).must_equal input
337
+
338
+ Sanitize.fragment(input, {
339
+ :elements => ['p'],
340
+ :attributes => {'div' => ['class']}
341
+ }).must_equal '<p>bar</p>'
342
+
343
+ Sanitize.fragment(input, {
344
+ :elements => ['p'],
345
+ :attributes => {'p' => ['title'], :all => ['class']}
346
+ }).must_equal input
347
+ end
348
+
349
+ it "should not allow relative URLs when relative URLs aren't allowlisted" do
350
+ input = '<a href="/foo/bar">Link</a>'
351
+
352
+ Sanitize.fragment(input,
353
+ :elements => ['a'],
354
+ :attributes => {'a' => ['href']},
355
+ :protocols => {'a' => {'href' => ['http']}}
356
+ ).must_equal '<a>Link</a>'
357
+ end
358
+
359
+ it 'should allow relative URLs containing colons when the colon is not in the first path segment' do
360
+ input = '<a href="/wiki/Special:Random">Random Page</a>'
361
+
362
+ Sanitize.fragment(input, {
363
+ :elements => ['a'],
364
+ :attributes => {'a' => ['href']},
365
+ :protocols => {'a' => {'href' => [:relative]}}
366
+ }).must_equal input
367
+ end
368
+
369
+ it 'should allow relative URLs containing colons when the colon is part of an anchor' do
370
+ input = '<a href="#fn:1">Footnote 1</a>'
371
+
372
+ Sanitize.fragment(input, {
373
+ :elements => ['a'],
374
+ :attributes => {'a' => ['href']},
375
+ :protocols => {'a' => {'href' => [:relative]}}
376
+ }).must_equal input
377
+
378
+ input = '<a href="somepage#fn:1">Footnote 1</a>'
379
+
380
+ Sanitize.fragment(input, {
381
+ :elements => ['a'],
382
+ :attributes => {'a' => ['href']},
383
+ :protocols => {'a' => {'href' => [:relative]}}
384
+ }).must_equal input
385
+ end
386
+
387
+ it 'should remove the contents of filtered nodes when :remove_contents is true' do
388
+ Sanitize.fragment('foo bar <div>baz<span>quux</span></div>',
389
+ :remove_contents => true
390
+ ).must_equal 'foo bar '
391
+ end
392
+
393
+ it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as strings' do
394
+ Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
395
+ :remove_contents => ['script', 'span']
396
+ ).must_equal 'foo bar baz hi '
397
+
398
+ Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
399
+ :remove_contents => Set.new(['script', 'span'])
400
+ ).must_equal 'foo bar baz hi '
401
+ end
402
+
403
+ it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as symbols' do
404
+ Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
405
+ :remove_contents => [:script, :span]
406
+ ).must_equal 'foo bar baz hi '
407
+
408
+ Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
409
+ :remove_contents => Set.new([:script, :span])
410
+ ).must_equal 'foo bar baz hi '
411
+ end
412
+
413
+ it 'should remove the contents of allowlisted iframes' do
414
+ Sanitize.fragment('<iframe>hi <script>hello</script></iframe>',
415
+ :elements => ['iframe']
416
+ ).must_equal '<iframe></iframe>'
417
+ end
418
+
419
+ it 'should not allow arbitrary HTML5 data attributes by default' do
420
+ Sanitize.fragment('<b data-foo="bar"></b>',
421
+ :elements => ['b']
422
+ ).must_equal '<b></b>'
423
+
424
+ Sanitize.fragment('<b class="foo" data-foo="bar"></b>',
425
+ :attributes => {'b' => ['class']},
426
+ :elements => ['b']
427
+ ).must_equal '<b class="foo"></b>'
428
+ end
429
+
430
+ it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do
431
+ s = Sanitize.new(
432
+ :attributes => {'b' => [:data]},
433
+ :elements => ['b']
434
+ )
435
+
436
+ s.fragment('<b data-foo="valid" data-bar="valid"></b>')
437
+ .must_equal '<b data-foo="valid" data-bar="valid"></b>'
438
+
439
+ s.fragment('<b data-="invalid"></b>')
440
+ .must_equal '<b></b>'
441
+
442
+ s.fragment('<b data-="invalid"></b>')
443
+ .must_equal '<b></b>'
444
+
445
+ s.fragment('<b data-xml="invalid"></b>')
446
+ .must_equal '<b></b>'
447
+
448
+ s.fragment('<b data-xmlfoo="invalid"></b>')
449
+ .must_equal '<b></b>'
450
+
451
+ s.fragment('<b data-f:oo="valid"></b>')
452
+ .must_equal '<b></b>'
453
+
454
+ s.fragment('<b data-f/oo="partial"></b>')
455
+ .must_equal '<b data-f=""></b>' # Nokogiri quirk; not ideal, but harmless
456
+
457
+ s.fragment('<b data-éfoo="valid"></b>')
458
+ .must_equal '<b></b>' # Another annoying Nokogiri quirk.
459
+ end
460
+
461
+ it 'should replace whitespace_elements with configured :before and :after values' do
462
+ s = Sanitize.new(
463
+ :whitespace_elements => {
464
+ 'p' => { :before => "\n", :after => "\n" },
465
+ 'div' => { :before => "\n", :after => "\n" },
466
+ 'br' => { :before => "\n", :after => "\n" },
467
+ }
468
+ )
469
+
470
+ s.fragment('<p>foo</p>').must_equal "\nfoo\n"
471
+ s.fragment('<p>foo</p><p>bar</p>').must_equal "\nfoo\n\nbar\n"
472
+ s.fragment('foo<div>bar</div>baz').must_equal "foo\nbar\nbaz"
473
+ s.fragment('foo<br>bar<br>baz').must_equal "foo\nbar\nbaz"
474
+ end
475
+
476
+ it 'should handle protocols correctly regardless of case' do
477
+ input = '<a href="hTTpS://foo.com/">Text</a>'
478
+
479
+ Sanitize.fragment(input, {
480
+ :elements => ['a'],
481
+ :attributes => {'a' => ['href']},
482
+ :protocols => {'a' => {'href' => ['https']}}
483
+ }).must_equal input
484
+
485
+ input = '<a href="mailto:someone@example.com?Subject=Hello">Text</a>'
486
+
487
+ Sanitize.fragment(input, {
488
+ :elements => ['a'],
489
+ :attributes => {'a' => ['href']},
490
+ :protocols => {'a' => {'href' => ['https']}}
491
+ }).must_equal "<a>Text</a>"
492
+ end
493
+
494
+ it 'should sanitize protocols in data attributes even if data attributes are generically allowed' do
495
+ input = '<a data-url="mailto:someone@example.com">Text</a>'
496
+
497
+ Sanitize.fragment(input, {
498
+ :elements => ['a'],
499
+ :attributes => {'a' => [:data]},
500
+ :protocols => {'a' => {'data-url' => ['https']}}
501
+ }).must_equal "<a>Text</a>"
502
+
503
+ Sanitize.fragment(input, {
504
+ :elements => ['a'],
505
+ :attributes => {'a' => [:data]},
506
+ :protocols => {'a' => {'data-url' => ['mailto']}}
507
+ }).must_equal input
508
+ end
509
+
510
+ it 'should prevent `<meta>` tags from being used to set a non-UTF-8 charset' do
511
+ Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
512
+ :elements => %w[html head meta body],
513
+ :attributes => {'meta' => ['charset']}
514
+ ).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>"
515
+
516
+ Sanitize.document('<html><meta charset="utf-8">Howdy!</html>',
517
+ :elements => %w[html meta],
518
+ :attributes => {'meta' => ['charset']}
519
+ ).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
520
+
521
+ Sanitize.document('<html><meta charset="us-ascii">Howdy!</html>',
522
+ :elements => %w[html meta],
523
+ :attributes => {'meta' => ['charset']}
524
+ ).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
525
+
526
+ Sanitize.document('<html><meta http-equiv="content-type" content=" text/html; charset=us-ascii">Howdy!</html>',
527
+ :elements => %w[html meta],
528
+ :attributes => {'meta' => %w[content http-equiv]}
529
+ ).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>"
530
+
531
+ Sanitize.document('<html><meta http-equiv="Content-Type" content="text/plain;charset = us-ascii">Howdy!</html>',
532
+ :elements => %w[html meta],
533
+ :attributes => {'meta' => %w[content http-equiv]}
534
+ ).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>"
535
+ end
536
+
537
+ it 'should not modify `<meta>` tags that already set a UTF-8 charset' do
538
+ Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
539
+ :elements => %w[html head meta body],
540
+ :attributes => {'meta' => %w[content http-equiv]}
541
+ ).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>"
542
+ end
543
+
544
+ end
545
+ end
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Config' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ def verify_deeply_frozen(config)
9
+ config.must_be :frozen?
10
+
11
+ if Hash === config
12
+ config.each_value {|v| verify_deeply_frozen(v) }
13
+ elsif Set === config || Array === config
14
+ config.each {|v| verify_deeply_frozen(v) }
15
+ end
16
+ end
17
+
18
+ it 'built-in configs should be deeply frozen' do
19
+ verify_deeply_frozen Sanitize::Config::DEFAULT
20
+ verify_deeply_frozen Sanitize::Config::BASIC
21
+ verify_deeply_frozen Sanitize::Config::RELAXED
22
+ verify_deeply_frozen Sanitize::Config::RESTRICTED
23
+ end
24
+
25
+ describe '.freeze_config' do
26
+ it 'should deeply freeze and return a configuration Hash' do
27
+ a = {:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}}
28
+ b = Sanitize::Config.freeze_config(a)
29
+
30
+ b.must_be_same_as a
31
+ verify_deeply_frozen a
32
+ end
33
+ end
34
+
35
+ describe '.merge' do
36
+ it 'should deeply merge a configuration Hash' do
37
+ # Freeze to ensure that we get an error if either Hash is modified.
38
+ a = Sanitize::Config.freeze_config({:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}})
39
+ b = Sanitize::Config.freeze_config({:one => {:one_two => true, :one_three => 3}, :two => 2})
40
+
41
+ c = Sanitize::Config.merge(a, b)
42
+
43
+ c.wont_be_same_as a
44
+ c.wont_be_same_as b
45
+
46
+ c.must_equal(
47
+ :one => {
48
+ :one_one => [0, '1', :a],
49
+ :one_two => true,
50
+ :one_three => 3
51
+ },
52
+
53
+ :two => 2
54
+ )
55
+
56
+ c[:one].wont_be_same_as a[:one]
57
+ c[:one][:one_one].wont_be_same_as a[:one][:one_one]
58
+ end
59
+
60
+ it 'should raise an ArgumentError if either argument is not a Hash' do
61
+ proc { Sanitize::Config.merge('foo', {}) }.must_raise ArgumentError
62
+ proc { Sanitize::Config.merge({}, 'foo') }.must_raise ArgumentError
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ # Miscellaneous attempts to sneak maliciously crafted CSS past Sanitize. Some of
5
+ # these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
6
+ # Sheet.
7
+ #
8
+ # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
9
+
10
+ describe 'Malicious CSS' do
11
+ make_my_diffs_pretty!
12
+ parallelize_me!
13
+
14
+ before do
15
+ @s = Sanitize::CSS.new(Sanitize::Config::RELAXED)
16
+ end
17
+
18
+ it 'should not be possible to inject an expression by munging it with a comment' do
19
+ @s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]).
20
+ must_equal ''
21
+
22
+ @s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]).
23
+ must_equal ''
24
+ end
25
+
26
+ it 'should not be possible to inject an expression by munging it with a newline' do
27
+ @s.properties(%[width:\nexpression(alert('XSS'));]).
28
+ must_equal ''
29
+ end
30
+
31
+ it 'should not allow the javascript protocol' do
32
+ @s.properties(%[background-image:url("javascript:alert('XSS')");]).
33
+ must_equal ''
34
+
35
+ Sanitize.fragment(%[<div style="background-image: url(&#1;javascript:alert('XSS'))">],
36
+ Sanitize::Config::RELAXED).must_equal '<div></div>'
37
+ end
38
+
39
+ it 'should not allow behaviors' do
40
+ @s.properties(%[behavior: url(xss.htc);]).must_equal ''
41
+ end
42
+ end