sanitize 4.6.6 → 5.2.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +147 -16
- data/README.md +61 -41
- data/lib/sanitize.rb +37 -61
- data/lib/sanitize/config/default.rb +10 -4
- data/lib/sanitize/css.rb +2 -2
- data/lib/sanitize/transformers/clean_comment.rb +1 -1
- data/lib/sanitize/transformers/clean_css.rb +3 -3
- data/lib/sanitize/transformers/clean_doctype.rb +1 -1
- data/lib/sanitize/transformers/clean_element.rb +54 -13
- data/lib/sanitize/version.rb +1 -1
- data/test/common.rb +0 -31
- data/test/test_clean_comment.rb +1 -5
- data/test/test_clean_css.rb +1 -1
- data/test/test_clean_doctype.rb +8 -8
- data/test/test_clean_element.rb +121 -26
- data/test/test_malicious_html.rb +50 -7
- data/test/test_parser.rb +3 -32
- data/test/test_sanitize.rb +103 -18
- data/test/test_sanitize_css.rb +43 -16
- data/test/test_transformers.rb +29 -23
- metadata +16 -18
- data/test/test_unicode.rb +0 -95
data/test/test_clean_css.rb
CHANGED
@@ -13,7 +13,7 @@ describe 'Sanitize::Transformers::CSS::CleanAttribute' do
|
|
13
13
|
@s.fragment(%[
|
14
14
|
<div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
|
15
15
|
].strip).must_equal %[
|
16
|
-
<div style="color: #fff; /*
|
16
|
+
<div style="color: #fff; /* <-- evil! */"></div>
|
17
17
|
].strip
|
18
18
|
end
|
19
19
|
|
data/test/test_clean_doctype.rb
CHANGED
@@ -11,7 +11,7 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'should remove doctype declarations' do
|
14
|
-
@s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html
|
14
|
+
@s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html>"
|
15
15
|
@s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
|
16
16
|
end
|
17
17
|
|
@@ -34,27 +34,27 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
34
34
|
|
35
35
|
it 'should allow doctype declarations in documents' do
|
36
36
|
@s.document('<!DOCTYPE html><html>foo</html>')
|
37
|
-
.must_equal "<!DOCTYPE html
|
37
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
38
38
|
|
39
39
|
@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
40
|
-
.must_equal "<!DOCTYPE html
|
40
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
41
41
|
|
42
42
|
@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
43
|
-
.must_equal "<!DOCTYPE html
|
43
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
44
44
|
end
|
45
45
|
|
46
46
|
it 'should not allow obviously invalid doctype declarations in documents' do
|
47
47
|
@s.document('<!DOCTYPE blah blah blah><html>foo</html>')
|
48
|
-
.must_equal "<!DOCTYPE html
|
48
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
49
49
|
|
50
50
|
@s.document('<!DOCTYPE blah><html>foo</html>')
|
51
|
-
.must_equal "<!DOCTYPE html
|
51
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
52
52
|
|
53
53
|
@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
54
|
-
.must_equal "<!DOCTYPE html
|
54
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
55
55
|
|
56
56
|
@s.document('<!whatever><html>foo</html>')
|
57
|
-
.must_equal "<html>foo</html
|
57
|
+
.must_equal "<html>foo</html>"
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'should not allow doctype definitions in fragments' do
|
data/test/test_clean_element.rb
CHANGED
@@ -8,25 +8,22 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
8
8
|
strings = {
|
9
9
|
:basic => {
|
10
10
|
:html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',
|
11
|
-
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> alert("hello world");'
|
11
|
+
:default => 'Lorem ipsum dolor sit amet ',
|
12
|
+
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet ',
|
13
|
+
:basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
|
14
|
+
:relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> '
|
16
15
|
},
|
17
16
|
|
18
17
|
:malformed => {
|
19
18
|
:html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
|
20
|
-
|
21
|
-
:
|
22
|
-
:
|
23
|
-
:
|
24
|
-
:relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet alert("hello world");',
|
19
|
+
:default => 'Lorem dolor sit amet ',
|
20
|
+
:restricted => 'Lorem <strong>dolor</strong> sit amet ',
|
21
|
+
:basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
|
22
|
+
:relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet ',
|
25
23
|
},
|
26
24
|
|
27
25
|
:unclosed => {
|
28
26
|
:html => '<p>a</p><blockquote>b',
|
29
|
-
|
30
27
|
:default => ' a b ',
|
31
28
|
:restricted => ' a b ',
|
32
29
|
:basic => '<p>a</p><blockquote>b</blockquote>',
|
@@ -35,7 +32,6 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
35
32
|
|
36
33
|
:malicious => {
|
37
34
|
:html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
|
38
|
-
|
39
35
|
:default => 'Lorem ipsum dolor sit amet <script>alert("hello world");',
|
40
36
|
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert("hello world");',
|
41
37
|
:basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");',
|
@@ -166,15 +162,15 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
166
162
|
}
|
167
163
|
|
168
164
|
describe 'Default config' do
|
169
|
-
it 'should remove non-
|
165
|
+
it 'should remove non-allowlisted elements, leaving safe contents behind' do
|
170
166
|
Sanitize.fragment('foo <b>bar</b> <strong><a href="#a">baz</a></strong> quux')
|
171
167
|
.must_equal 'foo bar baz quux'
|
172
168
|
|
173
169
|
Sanitize.fragment('<script>alert("<xss>");</script>')
|
174
|
-
.must_equal '
|
170
|
+
.must_equal ''
|
175
171
|
|
176
172
|
Sanitize.fragment('<<script>script>alert("<xss>");</<script>>')
|
177
|
-
.must_equal '<
|
173
|
+
.must_equal '<'
|
178
174
|
|
179
175
|
Sanitize.fragment('< script <>> alert("<xss>");</script>')
|
180
176
|
.must_equal '< script <>> alert("");'
|
@@ -196,6 +192,56 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
196
192
|
.must_equal ''
|
197
193
|
end
|
198
194
|
|
195
|
+
it 'should not preserve the content of removed `iframe` elements' do
|
196
|
+
Sanitize.fragment('<iframe>hello! <script>alert(0)</script></iframe>')
|
197
|
+
.must_equal ''
|
198
|
+
end
|
199
|
+
|
200
|
+
it 'should not preserve the content of removed `math` elements' do
|
201
|
+
Sanitize.fragment('<math>hello! <script>alert(0)</script></math>')
|
202
|
+
.must_equal ''
|
203
|
+
end
|
204
|
+
|
205
|
+
it 'should not preserve the content of removed `noembed` elements' do
|
206
|
+
Sanitize.fragment('<noembed>hello! <script>alert(0)</script></noembed>')
|
207
|
+
.must_equal ''
|
208
|
+
end
|
209
|
+
|
210
|
+
it 'should not preserve the content of removed `noframes` elements' do
|
211
|
+
Sanitize.fragment('<noframes>hello! <script>alert(0)</script></noframes>')
|
212
|
+
.must_equal ''
|
213
|
+
end
|
214
|
+
|
215
|
+
it 'should not preserve the content of removed `noscript` elements' do
|
216
|
+
Sanitize.fragment('<noscript>hello! <script>alert(0)</script></noscript>')
|
217
|
+
.must_equal ''
|
218
|
+
end
|
219
|
+
|
220
|
+
it 'should not preserve the content of removed `plaintext` elements' do
|
221
|
+
Sanitize.fragment('<plaintext>hello! <script>alert(0)</script>')
|
222
|
+
.must_equal ''
|
223
|
+
end
|
224
|
+
|
225
|
+
it 'should not preserve the content of removed `script` elements' do
|
226
|
+
Sanitize.fragment('<script>hello! <script>alert(0)</script></script>')
|
227
|
+
.must_equal ''
|
228
|
+
end
|
229
|
+
|
230
|
+
it 'should not preserve the content of removed `style` elements' do
|
231
|
+
Sanitize.fragment('<style>hello! <script>alert(0)</script></style>')
|
232
|
+
.must_equal ''
|
233
|
+
end
|
234
|
+
|
235
|
+
it 'should not preserve the content of removed `svg` elements' do
|
236
|
+
Sanitize.fragment('<svg>hello! <script>alert(0)</script></svg>')
|
237
|
+
.must_equal ''
|
238
|
+
end
|
239
|
+
|
240
|
+
it 'should not preserve the content of removed `xmp` elements' do
|
241
|
+
Sanitize.fragment('<xmp>hello! <script>alert(0)</script></xmp>')
|
242
|
+
.must_equal ''
|
243
|
+
end
|
244
|
+
|
199
245
|
strings.each do |name, data|
|
200
246
|
it "should clean #{name} HTML" do
|
201
247
|
Sanitize.fragment(data[:html]).must_equal(data[:default])
|
@@ -234,7 +280,7 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
234
280
|
|
235
281
|
it 'should not choke on valueless attributes' do
|
236
282
|
@s.fragment('foo <a href>foo</a> bar')
|
237
|
-
.must_equal 'foo <a href rel="nofollow">foo</a> bar'
|
283
|
+
.must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
|
238
284
|
end
|
239
285
|
|
240
286
|
it 'should downcase attribute names' do
|
@@ -262,7 +308,7 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
262
308
|
|
263
309
|
it 'should encode special chars in attribute values' do
|
264
310
|
@s.fragment('<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>')
|
265
|
-
.must_equal '<a href="http://example.com" title="
|
311
|
+
.must_equal '<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>'
|
266
312
|
end
|
267
313
|
|
268
314
|
strings.each do |name, data|
|
@@ -279,7 +325,7 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
279
325
|
end
|
280
326
|
|
281
327
|
describe 'Custom configs' do
|
282
|
-
it 'should allow attributes on all elements if
|
328
|
+
it 'should allow attributes on all elements if allowlisted under :all' do
|
283
329
|
input = '<p class="foo">bar</p>'
|
284
330
|
|
285
331
|
Sanitize.fragment(input).must_equal ' bar '
|
@@ -300,7 +346,7 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
300
346
|
}).must_equal input
|
301
347
|
end
|
302
348
|
|
303
|
-
it "should not allow relative URLs when relative URLs aren't
|
349
|
+
it "should not allow relative URLs when relative URLs aren't allowlisted" do
|
304
350
|
input = '<a href="/foo/bar">Link</a>'
|
305
351
|
|
306
352
|
Sanitize.fragment(input,
|
@@ -344,16 +390,30 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
344
390
|
).must_equal 'foo bar '
|
345
391
|
end
|
346
392
|
|
347
|
-
it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do
|
348
|
-
Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
|
393
|
+
it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as strings' do
|
394
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
349
395
|
:remove_contents => ['script', 'span']
|
350
|
-
).must_equal 'foo bar baz '
|
396
|
+
).must_equal 'foo bar baz hi '
|
397
|
+
|
398
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
399
|
+
:remove_contents => Set.new(['script', 'span'])
|
400
|
+
).must_equal 'foo bar baz hi '
|
351
401
|
end
|
352
402
|
|
353
|
-
it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do
|
354
|
-
Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
|
403
|
+
it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as symbols' do
|
404
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
355
405
|
:remove_contents => [:script, :span]
|
356
|
-
).must_equal 'foo bar baz '
|
406
|
+
).must_equal 'foo bar baz hi '
|
407
|
+
|
408
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
409
|
+
:remove_contents => Set.new([:script, :span])
|
410
|
+
).must_equal 'foo bar baz hi '
|
411
|
+
end
|
412
|
+
|
413
|
+
it 'should remove the contents of allowlisted iframes' do
|
414
|
+
Sanitize.fragment('<iframe>hi <script>hello</script></iframe>',
|
415
|
+
:elements => ['iframe']
|
416
|
+
).must_equal '<iframe></iframe>'
|
357
417
|
end
|
358
418
|
|
359
419
|
it 'should not allow arbitrary HTML5 data attributes by default' do
|
@@ -413,7 +473,7 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
413
473
|
s.fragment('foo<br>bar<br>baz').must_equal "foo\nbar\nbaz"
|
414
474
|
end
|
415
475
|
|
416
|
-
it '
|
476
|
+
it 'should handle protocols correctly regardless of case' do
|
417
477
|
input = '<a href="hTTpS://foo.com/">Text</a>'
|
418
478
|
|
419
479
|
Sanitize.fragment(input, {
|
@@ -430,5 +490,40 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
430
490
|
:protocols => {'a' => {'href' => ['https']}}
|
431
491
|
}).must_equal "<a>Text</a>"
|
432
492
|
end
|
493
|
+
|
494
|
+
it 'should prevent `<meta>` tags from being used to set a non-UTF-8 charset' do
|
495
|
+
Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
|
496
|
+
:elements => %w[html head meta body],
|
497
|
+
:attributes => {'meta' => ['charset']}
|
498
|
+
).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>"
|
499
|
+
|
500
|
+
Sanitize.document('<html><meta charset="utf-8">Howdy!</html>',
|
501
|
+
:elements => %w[html meta],
|
502
|
+
:attributes => {'meta' => ['charset']}
|
503
|
+
).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
504
|
+
|
505
|
+
Sanitize.document('<html><meta charset="us-ascii">Howdy!</html>',
|
506
|
+
:elements => %w[html meta],
|
507
|
+
:attributes => {'meta' => ['charset']}
|
508
|
+
).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
509
|
+
|
510
|
+
Sanitize.document('<html><meta http-equiv="content-type" content=" text/html; charset=us-ascii">Howdy!</html>',
|
511
|
+
:elements => %w[html meta],
|
512
|
+
:attributes => {'meta' => %w[content http-equiv]}
|
513
|
+
).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>"
|
514
|
+
|
515
|
+
Sanitize.document('<html><meta http-equiv="Content-Type" content="text/plain;charset = us-ascii">Howdy!</html>',
|
516
|
+
:elements => %w[html meta],
|
517
|
+
:attributes => {'meta' => %w[content http-equiv]}
|
518
|
+
).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>"
|
519
|
+
end
|
520
|
+
|
521
|
+
it 'should not modify `<meta>` tags that already set a UTF-8 charset' do
|
522
|
+
Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
|
523
|
+
:elements => %w[html head meta body],
|
524
|
+
:attributes => {'meta' => %w[content http-equiv]}
|
525
|
+
).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>"
|
526
|
+
end
|
527
|
+
|
433
528
|
end
|
434
529
|
end
|
data/test/test_malicious_html.rb
CHANGED
@@ -43,7 +43,7 @@ describe 'Malicious HTML' do
|
|
43
43
|
describe '<body>' do
|
44
44
|
it 'should not be possible to inject JS via a malformed event attribute' do
|
45
45
|
@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>').
|
46
|
-
must_equal "<html><head></head><body></body></html
|
46
|
+
must_equal "<html><head></head><body></body></html>"
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
@@ -65,7 +65,7 @@ describe 'Malicious HTML' do
|
|
65
65
|
|
66
66
|
it 'should not be possible to inject <script> via a malformed <img> tag' do
|
67
67
|
@s.fragment('<img """><script>alert("XSS")</script>">').
|
68
|
-
must_equal '<img>
|
68
|
+
must_equal '<img>">'
|
69
69
|
end
|
70
70
|
|
71
71
|
it 'should not be possible to inject protocol-based JS' do
|
@@ -117,24 +117,26 @@ describe 'Malicious HTML' do
|
|
117
117
|
describe '<script>' do
|
118
118
|
it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
|
119
119
|
@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
|
120
|
-
must_equal '
|
120
|
+
must_equal ''
|
121
121
|
end
|
122
122
|
|
123
123
|
it 'should not be possible to inject <script> via extraneous open brackets' do
|
124
124
|
@s.fragment(%[<<script>alert("XSS");//<</script>]).
|
125
|
-
must_equal '<
|
125
|
+
must_equal '<'
|
126
126
|
end
|
127
127
|
end
|
128
128
|
|
129
129
|
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
130
130
|
# attempt to preserve server-side includes. This can result in XSS since an
|
131
|
-
# unescaped double quote can allow an attacker to inject a non-
|
131
|
+
# unescaped double quote can allow an attacker to inject a non-allowlisted
|
132
132
|
# attribute. Sanitize works around this by implementing its own escaping for
|
133
133
|
# affected attributes.
|
134
134
|
#
|
135
135
|
# The relevant libxml2 code is here:
|
136
136
|
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
137
137
|
describe 'unsafe libxml2 server-side includes in attributes' do
|
138
|
+
using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system?
|
139
|
+
|
138
140
|
tag_configs = [
|
139
141
|
{
|
140
142
|
tag_name: 'a',
|
@@ -166,7 +168,21 @@ describe 'Malicious HTML' do
|
|
166
168
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
167
169
|
|
168
170
|
it 'should escape unsafe characters in attributes' do
|
169
|
-
|
171
|
+
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
172
|
+
|
173
|
+
# This uses Nokogumbo's HTML-compliant serializer rather than
|
174
|
+
# libxml2's.
|
175
|
+
@s.fragment(input).
|
176
|
+
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
177
|
+
|
178
|
+
# This uses the not-quite-standards-compliant libxml2 serializer via
|
179
|
+
# Nokogiri, so the output may be a little different as of Nokogiri
|
180
|
+
# 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
|
181
|
+
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
182
|
+
fragment = Nokogiri::HTML.fragment(input)
|
183
|
+
@s.node!(fragment)
|
184
|
+
fragment.to_html.
|
185
|
+
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
170
186
|
end
|
171
187
|
|
172
188
|
it 'should round-trip to the same output' do
|
@@ -179,7 +195,21 @@ describe 'Malicious HTML' do
|
|
179
195
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
180
196
|
|
181
197
|
it 'should not escape characters unnecessarily' do
|
182
|
-
|
198
|
+
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
199
|
+
|
200
|
+
# This uses Nokogumbo's HTML-compliant serializer rather than
|
201
|
+
# libxml2's.
|
202
|
+
@s.fragment(input).
|
203
|
+
must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
204
|
+
|
205
|
+
# This uses the not-quite-standards-compliant libxml2 serializer via
|
206
|
+
# Nokogiri, so the output may be a little different as of Nokogiri
|
207
|
+
# 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
|
208
|
+
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
209
|
+
fragment = Nokogiri::HTML.fragment(input)
|
210
|
+
@s.node!(fragment)
|
211
|
+
fragment.to_html.
|
212
|
+
must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
|
183
213
|
end
|
184
214
|
|
185
215
|
it 'should round-trip to the same output' do
|
@@ -189,4 +219,17 @@ describe 'Malicious HTML' do
|
|
189
219
|
end
|
190
220
|
end
|
191
221
|
end
|
222
|
+
|
223
|
+
# https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
|
224
|
+
describe 'foreign content bypass in relaxed config' do
|
225
|
+
it 'prevents a sanitization bypass via carefully crafted foreign content' do
|
226
|
+
%w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
|
227
|
+
@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]).
|
228
|
+
must_equal ''
|
229
|
+
|
230
|
+
@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]).
|
231
|
+
must_equal ''
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
192
235
|
end
|
data/test/test_parser.rb
CHANGED
@@ -19,8 +19,8 @@ describe 'Parser' do
|
|
19
19
|
end
|
20
20
|
|
21
21
|
it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
|
22
|
-
Sanitize.fragment('foo <script>bar').must_equal 'foo
|
23
|
-
Sanitize.fragment('foo <style>bar').must_equal 'foo
|
22
|
+
Sanitize.fragment('foo <script>bar').must_equal 'foo '
|
23
|
+
Sanitize.fragment('foo <style>bar').must_equal 'foo '
|
24
24
|
end
|
25
25
|
|
26
26
|
it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
|
@@ -28,35 +28,6 @@ describe 'Parser' do
|
|
28
28
|
Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D').must_equal 'OMG HAPPY BIRTHDAY! *<:-D'
|
29
29
|
end
|
30
30
|
|
31
|
-
# https://github.com/sparklemotion/nokogiri/issues/1008
|
32
|
-
it 'should work around the libxml2 content-type meta tag bug' do
|
33
|
-
Sanitize.document('<html><head></head><body>Howdy!</body></html>',
|
34
|
-
:elements => %w[html head body]
|
35
|
-
).must_equal "<html><head></head><body>Howdy!</body></html>\n"
|
36
|
-
|
37
|
-
Sanitize.document('<html><head></head><body>Howdy!</body></html>',
|
38
|
-
:elements => %w[html head meta body]
|
39
|
-
).must_equal "<html><head></head><body>Howdy!</body></html>\n"
|
40
|
-
|
41
|
-
Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
|
42
|
-
:elements => %w[html head meta body],
|
43
|
-
:attributes => {'meta' => ['charset']}
|
44
|
-
).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>\n"
|
45
|
-
|
46
|
-
Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
|
47
|
-
:elements => %w[html head meta body],
|
48
|
-
:attributes => {'meta' => %w[charset content http-equiv]}
|
49
|
-
).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>\n"
|
50
|
-
|
51
|
-
# Edge case: an existing content-type meta tag with a non-UTF-8 content type
|
52
|
-
# will be converted to UTF-8, since that's the only output encoding we
|
53
|
-
# support.
|
54
|
-
Sanitize.document('<html><head><meta http-equiv="content-type" content="text/html;charset=us-ascii"></head><body>Howdy!</body></html>',
|
55
|
-
:elements => %w[html head meta body],
|
56
|
-
:attributes => {'meta' => %w[charset content http-equiv]}
|
57
|
-
).must_equal "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"></head><body>Howdy!</body></html>\n"
|
58
|
-
end
|
59
|
-
|
60
31
|
describe 'when siblings are added after a node during traversal' do
|
61
32
|
it 'the added siblings should be traversed' do
|
62
33
|
html = %[
|
@@ -84,7 +55,7 @@ describe 'Parser' do
|
|
84
55
|
siblings << env[:node][:id]
|
85
56
|
end
|
86
57
|
|
87
|
-
return {:
|
58
|
+
return {:node_allowlist => [env[:node]]}
|
88
59
|
})
|
89
60
|
|
90
61
|
# All siblings should be traversed, and in the order added.
|