sanitize 4.6.6 → 5.2.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

@@ -13,7 +13,7 @@ describe 'Sanitize::Transformers::CSS::CleanAttribute' do
13
13
  @s.fragment(%[
14
14
  <div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
15
15
  ].strip).must_equal %[
16
- <div style="color: #fff; /* &lt;-- evil! */"></div>
16
+ <div style="color: #fff; /* <-- evil! */"></div>
17
17
  ].strip
18
18
  end
19
19
 
@@ -11,7 +11,7 @@ describe 'Sanitize::Transformers::CleanDoctype' do
11
11
  end
12
12
 
13
13
  it 'should remove doctype declarations' do
14
- @s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html>\n"
14
+ @s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html>"
15
15
  @s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
16
16
  end
17
17
 
@@ -34,27 +34,27 @@ describe 'Sanitize::Transformers::CleanDoctype' do
34
34
 
35
35
  it 'should allow doctype declarations in documents' do
36
36
  @s.document('<!DOCTYPE html><html>foo</html>')
37
- .must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
37
+ .must_equal "<!DOCTYPE html><html>foo</html>"
38
38
 
39
39
  @s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
40
- .must_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n<html>foo</html>\n"
40
+ .must_equal "<!DOCTYPE html><html>foo</html>"
41
41
 
42
42
  @s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
43
- .must_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html>foo</html>\n"
43
+ .must_equal "<!DOCTYPE html><html>foo</html>"
44
44
  end
45
45
 
46
46
  it 'should not allow obviously invalid doctype declarations in documents' do
47
47
  @s.document('<!DOCTYPE blah blah blah><html>foo</html>')
48
- .must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
48
+ .must_equal "<!DOCTYPE html><html>foo</html>"
49
49
 
50
50
  @s.document('<!DOCTYPE blah><html>foo</html>')
51
- .must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
51
+ .must_equal "<!DOCTYPE html><html>foo</html>"
52
52
 
53
53
  @s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
54
- .must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
54
+ .must_equal "<!DOCTYPE html><html>foo</html>"
55
55
 
56
56
  @s.document('<!whatever><html>foo</html>')
57
- .must_equal "<html>foo</html>\n"
57
+ .must_equal "<html>foo</html>"
58
58
  end
59
59
 
60
60
  it 'should not allow doctype definitions in fragments' do
@@ -8,25 +8,22 @@ describe 'Sanitize::Transformers::CleanElement' do
8
8
  strings = {
9
9
  :basic => {
10
10
  :html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',
11
-
12
- :default => 'Lorem ipsum dolor sit amet .foo { color: #fff; } alert("hello world");',
13
- :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet .foo { color: #fff; } alert("hello world");',
14
- :basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet .foo { color: #fff; } alert("hello world");',
15
- :relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> alert("hello world");'
11
+ :default => 'Lorem ipsum dolor sit amet ',
12
+ :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet ',
13
+ :basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
14
+ :relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> '
16
15
  },
17
16
 
18
17
  :malformed => {
19
18
  :html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
20
-
21
- :default => 'Lorem dolor sit amet alert("hello world");',
22
- :restricted => 'Lorem <strong>dolor</strong> sit amet alert("hello world");',
23
- :basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
24
- :relaxed => 'Lorem <a href="pants" title="foo&gt;ipsum &lt;a href="><strong>dolor</strong></a> sit<br>amet alert("hello world");',
19
+ :default => 'Lorem dolor sit amet ',
20
+ :restricted => 'Lorem <strong>dolor</strong> sit amet ',
21
+ :basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
22
+ :relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet ',
25
23
  },
26
24
 
27
25
  :unclosed => {
28
26
  :html => '<p>a</p><blockquote>b',
29
-
30
27
  :default => ' a b ',
31
28
  :restricted => ' a b ',
32
29
  :basic => '<p>a</p><blockquote>b</blockquote>',
@@ -35,7 +32,6 @@ describe 'Sanitize::Transformers::CleanElement' do
35
32
 
36
33
  :malicious => {
37
34
  :html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
38
-
39
35
  :default => 'Lorem ipsum dolor sit amet &lt;script&gt;alert("hello world");',
40
36
  :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet &lt;script&gt;alert("hello world");',
41
37
  :basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");',
@@ -166,15 +162,15 @@ describe 'Sanitize::Transformers::CleanElement' do
166
162
  }
167
163
 
168
164
  describe 'Default config' do
169
- it 'should remove non-whitelisted elements, leaving safe contents behind' do
165
+ it 'should remove non-allowlisted elements, leaving safe contents behind' do
170
166
  Sanitize.fragment('foo <b>bar</b> <strong><a href="#a">baz</a></strong> quux')
171
167
  .must_equal 'foo bar baz quux'
172
168
 
173
169
  Sanitize.fragment('<script>alert("<xss>");</script>')
174
- .must_equal 'alert("&lt;xss&gt;");'
170
+ .must_equal ''
175
171
 
176
172
  Sanitize.fragment('<<script>script>alert("<xss>");</<script>>')
177
- .must_equal '&lt;script&gt;alert("&lt;xss&gt;");&lt;/&lt;script&gt;&gt;'
173
+ .must_equal '&lt;'
178
174
 
179
175
  Sanitize.fragment('< script <>> alert("<xss>");</script>')
180
176
  .must_equal '&lt; script &lt;&gt;&gt; alert("");'
@@ -196,6 +192,56 @@ describe 'Sanitize::Transformers::CleanElement' do
196
192
  .must_equal ''
197
193
  end
198
194
 
195
+ it 'should not preserve the content of removed `iframe` elements' do
196
+ Sanitize.fragment('<iframe>hello! <script>alert(0)</script></iframe>')
197
+ .must_equal ''
198
+ end
199
+
200
+ it 'should not preserve the content of removed `math` elements' do
201
+ Sanitize.fragment('<math>hello! <script>alert(0)</script></math>')
202
+ .must_equal ''
203
+ end
204
+
205
+ it 'should not preserve the content of removed `noembed` elements' do
206
+ Sanitize.fragment('<noembed>hello! <script>alert(0)</script></noembed>')
207
+ .must_equal ''
208
+ end
209
+
210
+ it 'should not preserve the content of removed `noframes` elements' do
211
+ Sanitize.fragment('<noframes>hello! <script>alert(0)</script></noframes>')
212
+ .must_equal ''
213
+ end
214
+
215
+ it 'should not preserve the content of removed `noscript` elements' do
216
+ Sanitize.fragment('<noscript>hello! <script>alert(0)</script></noscript>')
217
+ .must_equal ''
218
+ end
219
+
220
+ it 'should not preserve the content of removed `plaintext` elements' do
221
+ Sanitize.fragment('<plaintext>hello! <script>alert(0)</script>')
222
+ .must_equal ''
223
+ end
224
+
225
+ it 'should not preserve the content of removed `script` elements' do
226
+ Sanitize.fragment('<script>hello! <script>alert(0)</script></script>')
227
+ .must_equal ''
228
+ end
229
+
230
+ it 'should not preserve the content of removed `style` elements' do
231
+ Sanitize.fragment('<style>hello! <script>alert(0)</script></style>')
232
+ .must_equal ''
233
+ end
234
+
235
+ it 'should not preserve the content of removed `svg` elements' do
236
+ Sanitize.fragment('<svg>hello! <script>alert(0)</script></svg>')
237
+ .must_equal ''
238
+ end
239
+
240
+ it 'should not preserve the content of removed `xmp` elements' do
241
+ Sanitize.fragment('<xmp>hello! <script>alert(0)</script></xmp>')
242
+ .must_equal ''
243
+ end
244
+
199
245
  strings.each do |name, data|
200
246
  it "should clean #{name} HTML" do
201
247
  Sanitize.fragment(data[:html]).must_equal(data[:default])
@@ -234,7 +280,7 @@ describe 'Sanitize::Transformers::CleanElement' do
234
280
 
235
281
  it 'should not choke on valueless attributes' do
236
282
  @s.fragment('foo <a href>foo</a> bar')
237
- .must_equal 'foo <a href rel="nofollow">foo</a> bar'
283
+ .must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
238
284
  end
239
285
 
240
286
  it 'should downcase attribute names' do
@@ -262,7 +308,7 @@ describe 'Sanitize::Transformers::CleanElement' do
262
308
 
263
309
  it 'should encode special chars in attribute values' do
264
310
  @s.fragment('<a href="http://example.com" title="<b>&eacute;xamples</b> & things">foo</a>')
265
- .must_equal '<a href="http://example.com" title="&lt;b&gt;éxamples&lt;/b&gt; &amp; things">foo</a>'
311
+ .must_equal '<a href="http://example.com" title="<bxamples</b> &amp; things">foo</a>'
266
312
  end
267
313
 
268
314
  strings.each do |name, data|
@@ -279,7 +325,7 @@ describe 'Sanitize::Transformers::CleanElement' do
279
325
  end
280
326
 
281
327
  describe 'Custom configs' do
282
- it 'should allow attributes on all elements if whitelisted under :all' do
328
+ it 'should allow attributes on all elements if allowlisted under :all' do
283
329
  input = '<p class="foo">bar</p>'
284
330
 
285
331
  Sanitize.fragment(input).must_equal ' bar '
@@ -300,7 +346,7 @@ describe 'Sanitize::Transformers::CleanElement' do
300
346
  }).must_equal input
301
347
  end
302
348
 
303
- it "should not allow relative URLs when relative URLs aren't whitelisted" do
349
+ it "should not allow relative URLs when relative URLs aren't allowlisted" do
304
350
  input = '<a href="/foo/bar">Link</a>'
305
351
 
306
352
  Sanitize.fragment(input,
@@ -344,16 +390,30 @@ describe 'Sanitize::Transformers::CleanElement' do
344
390
  ).must_equal 'foo bar '
345
391
  end
346
392
 
347
- it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do
348
- Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
393
+ it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as strings' do
394
+ Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
349
395
  :remove_contents => ['script', 'span']
350
- ).must_equal 'foo bar baz '
396
+ ).must_equal 'foo bar baz hi '
397
+
398
+ Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
399
+ :remove_contents => Set.new(['script', 'span'])
400
+ ).must_equal 'foo bar baz hi '
351
401
  end
352
402
 
353
- it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do
354
- Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
403
+ it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as symbols' do
404
+ Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
355
405
  :remove_contents => [:script, :span]
356
- ).must_equal 'foo bar baz '
406
+ ).must_equal 'foo bar baz hi '
407
+
408
+ Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
409
+ :remove_contents => Set.new([:script, :span])
410
+ ).must_equal 'foo bar baz hi '
411
+ end
412
+
413
+ it 'should remove the contents of allowlisted iframes' do
414
+ Sanitize.fragment('<iframe>hi <script>hello</script></iframe>',
415
+ :elements => ['iframe']
416
+ ).must_equal '<iframe></iframe>'
357
417
  end
358
418
 
359
419
  it 'should not allow arbitrary HTML5 data attributes by default' do
@@ -413,7 +473,7 @@ describe 'Sanitize::Transformers::CleanElement' do
413
473
  s.fragment('foo<br>bar<br>baz').must_equal "foo\nbar\nbaz"
414
474
  end
415
475
 
416
- it 'handles protocols correctly regardless of case' do
476
+ it 'should handle protocols correctly regardless of case' do
417
477
  input = '<a href="hTTpS://foo.com/">Text</a>'
418
478
 
419
479
  Sanitize.fragment(input, {
@@ -430,5 +490,40 @@ describe 'Sanitize::Transformers::CleanElement' do
430
490
  :protocols => {'a' => {'href' => ['https']}}
431
491
  }).must_equal "<a>Text</a>"
432
492
  end
493
+
494
+ it 'should prevent `<meta>` tags from being used to set a non-UTF-8 charset' do
495
+ Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
496
+ :elements => %w[html head meta body],
497
+ :attributes => {'meta' => ['charset']}
498
+ ).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>"
499
+
500
+ Sanitize.document('<html><meta charset="utf-8">Howdy!</html>',
501
+ :elements => %w[html meta],
502
+ :attributes => {'meta' => ['charset']}
503
+ ).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
504
+
505
+ Sanitize.document('<html><meta charset="us-ascii">Howdy!</html>',
506
+ :elements => %w[html meta],
507
+ :attributes => {'meta' => ['charset']}
508
+ ).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
509
+
510
+ Sanitize.document('<html><meta http-equiv="content-type" content=" text/html; charset=us-ascii">Howdy!</html>',
511
+ :elements => %w[html meta],
512
+ :attributes => {'meta' => %w[content http-equiv]}
513
+ ).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>"
514
+
515
+ Sanitize.document('<html><meta http-equiv="Content-Type" content="text/plain;charset = us-ascii">Howdy!</html>',
516
+ :elements => %w[html meta],
517
+ :attributes => {'meta' => %w[content http-equiv]}
518
+ ).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>"
519
+ end
520
+
521
+ it 'should not modify `<meta>` tags that already set a UTF-8 charset' do
522
+ Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
523
+ :elements => %w[html head meta body],
524
+ :attributes => {'meta' => %w[content http-equiv]}
525
+ ).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>"
526
+ end
527
+
433
528
  end
434
529
  end
@@ -43,7 +43,7 @@ describe 'Malicious HTML' do
43
43
  describe '<body>' do
44
44
  it 'should not be possible to inject JS via a malformed event attribute' do
45
45
  @s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>').
46
- must_equal "<html><head></head><body></body></html>\n"
46
+ must_equal "<html><head></head><body></body></html>"
47
47
  end
48
48
  end
49
49
 
@@ -65,7 +65,7 @@ describe 'Malicious HTML' do
65
65
 
66
66
  it 'should not be possible to inject <script> via a malformed <img> tag' do
67
67
  @s.fragment('<img """><script>alert("XSS")</script>">').
68
- must_equal '<img>alert("XSS")"&gt;'
68
+ must_equal '<img>"&gt;'
69
69
  end
70
70
 
71
71
  it 'should not be possible to inject protocol-based JS' do
@@ -117,24 +117,26 @@ describe 'Malicious HTML' do
117
117
  describe '<script>' do
118
118
  it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
119
119
  @s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
120
- must_equal 'alert(1)'
120
+ must_equal ''
121
121
  end
122
122
 
123
123
  it 'should not be possible to inject <script> via extraneous open brackets' do
124
124
  @s.fragment(%[<<script>alert("XSS");//<</script>]).
125
- must_equal '&lt;alert("XSS");//&lt;'
125
+ must_equal '&lt;'
126
126
  end
127
127
  end
128
128
 
129
129
  # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
130
130
  # attempt to preserve server-side includes. This can result in XSS since an
131
- # unescaped double quote can allow an attacker to inject a non-whitelisted
131
+ # unescaped double quote can allow an attacker to inject a non-allowlisted
132
132
  # attribute. Sanitize works around this by implementing its own escaping for
133
133
  # affected attributes.
134
134
  #
135
135
  # The relevant libxml2 code is here:
136
136
  # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
137
137
  describe 'unsafe libxml2 server-side includes in attributes' do
138
+ using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system?
139
+
138
140
  tag_configs = [
139
141
  {
140
142
  tag_name: 'a',
@@ -166,7 +168,21 @@ describe 'Malicious HTML' do
166
168
  input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
167
169
 
168
170
  it 'should escape unsafe characters in attributes' do
169
- @s.fragment(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
171
+ skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
172
+
173
+ # This uses Nokogumbo's HTML-compliant serializer rather than
174
+ # libxml2's.
175
+ @s.fragment(input).
176
+ must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
177
+
178
+ # This uses the not-quite-standards-compliant libxml2 serializer via
179
+ # Nokogiri, so the output may be a little different as of Nokogiri
180
+ # 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
181
+ # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
182
+ fragment = Nokogiri::HTML.fragment(input)
183
+ @s.node!(fragment)
184
+ fragment.to_html.
185
+ must_equal(%[<#{tag_name} #{attr_name}="examp&lt;!--%22%20onmouseover=alert(1)&gt;--&gt;le.com">foo</#{tag_name}>])
170
186
  end
171
187
 
172
188
  it 'should round-trip to the same output' do
@@ -179,7 +195,21 @@ describe 'Malicious HTML' do
179
195
  input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
180
196
 
181
197
  it 'should not escape characters unnecessarily' do
182
- @s.fragment(input).must_equal(input)
198
+ skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
199
+
200
+ # This uses Nokogumbo's HTML-compliant serializer rather than
201
+ # libxml2's.
202
+ @s.fragment(input).
203
+ must_equal(%[<#{tag_name} #{attr_name}="examp<!--&quot; onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
204
+
205
+ # This uses the not-quite-standards-compliant libxml2 serializer via
206
+ # Nokogiri, so the output may be a little different as of Nokogiri
207
+ # 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
208
+ # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
209
+ fragment = Nokogiri::HTML.fragment(input)
210
+ @s.node!(fragment)
211
+ fragment.to_html.
212
+ must_equal(%[<#{tag_name} #{attr_name}='examp&lt;!--" onmouseover=alert(1)&gt;--&gt;le.com'>foo</#{tag_name}>])
183
213
  end
184
214
 
185
215
  it 'should round-trip to the same output' do
@@ -189,4 +219,17 @@ describe 'Malicious HTML' do
189
219
  end
190
220
  end
191
221
  end
222
+
223
+ # https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
224
+ describe 'foreign content bypass in relaxed config' do
225
+ it 'prevents a sanitization bypass via carefully crafted foreign content' do
226
+ %w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
227
+ @s.fragment(%[<math><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/]).
228
+ must_equal ''
229
+
230
+ @s.fragment(%[<svg><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/]).
231
+ must_equal ''
232
+ end
233
+ end
234
+ end
192
235
  end
data/test/test_parser.rb CHANGED
@@ -19,8 +19,8 @@ describe 'Parser' do
19
19
  end
20
20
 
21
21
  it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
22
- Sanitize.fragment('foo <script>bar').must_equal 'foo bar'
23
- Sanitize.fragment('foo <style>bar').must_equal 'foo bar'
22
+ Sanitize.fragment('foo <script>bar').must_equal 'foo '
23
+ Sanitize.fragment('foo <style>bar').must_equal 'foo '
24
24
  end
25
25
 
26
26
  it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
@@ -28,35 +28,6 @@ describe 'Parser' do
28
28
  Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D').must_equal 'OMG HAPPY BIRTHDAY! *&lt;:-D'
29
29
  end
30
30
 
31
- # https://github.com/sparklemotion/nokogiri/issues/1008
32
- it 'should work around the libxml2 content-type meta tag bug' do
33
- Sanitize.document('<html><head></head><body>Howdy!</body></html>',
34
- :elements => %w[html head body]
35
- ).must_equal "<html><head></head><body>Howdy!</body></html>\n"
36
-
37
- Sanitize.document('<html><head></head><body>Howdy!</body></html>',
38
- :elements => %w[html head meta body]
39
- ).must_equal "<html><head></head><body>Howdy!</body></html>\n"
40
-
41
- Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
42
- :elements => %w[html head meta body],
43
- :attributes => {'meta' => ['charset']}
44
- ).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>\n"
45
-
46
- Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
47
- :elements => %w[html head meta body],
48
- :attributes => {'meta' => %w[charset content http-equiv]}
49
- ).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>\n"
50
-
51
- # Edge case: an existing content-type meta tag with a non-UTF-8 content type
52
- # will be converted to UTF-8, since that's the only output encoding we
53
- # support.
54
- Sanitize.document('<html><head><meta http-equiv="content-type" content="text/html;charset=us-ascii"></head><body>Howdy!</body></html>',
55
- :elements => %w[html head meta body],
56
- :attributes => {'meta' => %w[charset content http-equiv]}
57
- ).must_equal "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"></head><body>Howdy!</body></html>\n"
58
- end
59
-
60
31
  describe 'when siblings are added after a node during traversal' do
61
32
  it 'the added siblings should be traversed' do
62
33
  html = %[
@@ -84,7 +55,7 @@ describe 'Parser' do
84
55
  siblings << env[:node][:id]
85
56
  end
86
57
 
87
- return {:node_whitelist => [env[:node]]}
58
+ return {:node_allowlist => [env[:node]]}
88
59
  })
89
60
 
90
61
  # All siblings should be traversed, and in the order added.