sanitize 6.0.0 → 6.0.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +52 -0
- data/README.md +25 -19
- data/lib/sanitize/config/default.rb +5 -0
- data/lib/sanitize/transformers/clean_element.rb +45 -0
- data/lib/sanitize/version.rb +1 -1
- data/test/test_clean_comment.rb +16 -16
- data/test/test_clean_css.rb +5 -5
- data/test/test_clean_doctype.rb +15 -15
- data/test/test_clean_element.rb +99 -92
- data/test/test_config.rb +9 -9
- data/test/test_malicious_css.rb +7 -7
- data/test/test_malicious_html.rb +135 -31
- data/test/test_parser.rb +8 -8
- data/test/test_sanitize.rb +24 -24
- data/test/test_sanitize_css.rb +53 -53
- data/test/test_transformers.rb +37 -37
- metadata +3 -3
data/test/test_malicious_html.rb
CHANGED
@@ -17,111 +17,111 @@ describe 'Malicious HTML' do
|
|
17
17
|
|
18
18
|
describe 'comments' do
|
19
19
|
it 'should not allow script injection via conditional comments' do
|
20
|
-
@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]).
|
20
|
+
_(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->])).
|
21
21
|
must_equal ''
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
describe 'interpolation (ERB, PHP, etc.)' do
|
26
26
|
it 'should escape ERB-style tags' do
|
27
|
-
@s.fragment('<% naughty_ruby_code %>').
|
27
|
+
_(@s.fragment('<% naughty_ruby_code %>')).
|
28
28
|
must_equal '<% naughty_ruby_code %>'
|
29
29
|
|
30
|
-
@s.fragment('<%= naughty_ruby_code %>').
|
30
|
+
_(@s.fragment('<%= naughty_ruby_code %>')).
|
31
31
|
must_equal '<%= naughty_ruby_code %>'
|
32
32
|
end
|
33
33
|
|
34
34
|
it 'should remove PHP-style tags' do
|
35
|
-
@s.fragment('<? naughtyPHPCode(); ?>').
|
35
|
+
_(@s.fragment('<? naughtyPHPCode(); ?>')).
|
36
36
|
must_equal ''
|
37
37
|
|
38
|
-
@s.fragment('<?= naughtyPHPCode(); ?>').
|
38
|
+
_(@s.fragment('<?= naughtyPHPCode(); ?>')).
|
39
39
|
must_equal ''
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
43
|
describe '<body>' do
|
44
44
|
it 'should not be possible to inject JS via a malformed event attribute' do
|
45
|
-
@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>').
|
45
|
+
_(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>')).
|
46
46
|
must_equal "<html><head></head><body></body></html>"
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
50
|
describe '<iframe>' do
|
51
51
|
it 'should not be possible to inject an iframe using an improperly closed tag' do
|
52
|
-
@s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <]).
|
52
|
+
_(@s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <])).
|
53
53
|
must_equal ''
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
57
|
describe '<img>' do
|
58
58
|
it 'should not be possible to inject JS via an unquoted <img> src attribute' do
|
59
|
-
@s.fragment("<img src=javascript:alert('XSS')>").must_equal '<img>'
|
59
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal '<img>'
|
60
60
|
end
|
61
61
|
|
62
62
|
it 'should not be possible to inject JS using grave accents as <img> src delimiters' do
|
63
|
-
@s.fragment("<img src=`javascript:alert('XSS')`>").must_equal '<img>'
|
63
|
+
_(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal '<img>'
|
64
64
|
end
|
65
65
|
|
66
66
|
it 'should not be possible to inject <script> via a malformed <img> tag' do
|
67
|
-
@s.fragment('<img """><script>alert("XSS")</script>">').
|
67
|
+
_(@s.fragment('<img """><script>alert("XSS")</script>">')).
|
68
68
|
must_equal '<img>">'
|
69
69
|
end
|
70
70
|
|
71
71
|
it 'should not be possible to inject protocol-based JS' do
|
72
|
-
@s.fragment('<img src=javascript:alert('XSS')>').
|
72
|
+
_(@s.fragment('<img src=javascript:alert('XSS')>')).
|
73
73
|
must_equal '<img>'
|
74
74
|
|
75
|
-
@s.fragment('<img src=javascript:alert('XSS')>').
|
75
|
+
_(@s.fragment('<img src=javascript:alert('XSS')>')).
|
76
76
|
must_equal '<img>'
|
77
77
|
|
78
|
-
@s.fragment('<img src=javascript:alert('XSS')>').
|
78
|
+
_(@s.fragment('<img src=javascript:alert('XSS')>')).
|
79
79
|
must_equal '<img>'
|
80
80
|
|
81
81
|
# Encoded tab character.
|
82
|
-
@s.fragment(%[<img src="jav	ascript:alert('XSS');">]).
|
82
|
+
_(@s.fragment(%[<img src="jav	ascript:alert('XSS');">])).
|
83
83
|
must_equal '<img>'
|
84
84
|
|
85
85
|
# Encoded newline.
|
86
|
-
@s.fragment(%[<img src="jav
ascript:alert('XSS');">]).
|
86
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">])).
|
87
87
|
must_equal '<img>'
|
88
88
|
|
89
89
|
# Encoded carriage return.
|
90
|
-
@s.fragment(%[<img src="jav
ascript:alert('XSS');">]).
|
90
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">])).
|
91
91
|
must_equal '<img>'
|
92
92
|
|
93
93
|
# Null byte.
|
94
|
-
@s.fragment(%[<img src=java\0script:alert("XSS")>]).
|
94
|
+
_(@s.fragment(%[<img src=java\0script:alert("XSS")>])).
|
95
95
|
must_equal '<img>'
|
96
96
|
|
97
97
|
# Spaces plus meta char.
|
98
|
-
@s.fragment(%[<img src="  javascript:alert('XSS');">]).
|
98
|
+
_(@s.fragment(%[<img src="  javascript:alert('XSS');">])).
|
99
99
|
must_equal '<img>'
|
100
100
|
|
101
101
|
# Mixed spaces and tabs.
|
102
|
-
@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]).
|
102
|
+
_(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">])).
|
103
103
|
must_equal '<img>'
|
104
104
|
end
|
105
105
|
|
106
106
|
it 'should not be possible to inject protocol-based JS via whitespace' do
|
107
|
-
@s.fragment(%[<img src="jav\tascript:alert('XSS');">]).
|
107
|
+
_(@s.fragment(%[<img src="jav\tascript:alert('XSS');">])).
|
108
108
|
must_equal '<img>'
|
109
109
|
end
|
110
110
|
|
111
111
|
it 'should not be possible to inject JS using a half-open <img> tag' do
|
112
|
-
@s.fragment(%[<img src="javascript:alert('XSS')"]).
|
112
|
+
_(@s.fragment(%[<img src="javascript:alert('XSS')"])).
|
113
113
|
must_equal ''
|
114
114
|
end
|
115
115
|
end
|
116
116
|
|
117
117
|
describe '<script>' do
|
118
118
|
it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
|
119
|
-
@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
|
119
|
+
_(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>])).
|
120
120
|
must_equal ''
|
121
121
|
end
|
122
122
|
|
123
123
|
it 'should not be possible to inject <script> via extraneous open brackets' do
|
124
|
-
@s.fragment(%[<<script>alert("XSS");//<</script>]).
|
124
|
+
_(@s.fragment(%[<<script>alert("XSS");//<</script>])).
|
125
125
|
must_equal '<'
|
126
126
|
end
|
127
127
|
end
|
@@ -172,7 +172,7 @@ describe 'Malicious HTML' do
|
|
172
172
|
|
173
173
|
# This uses Nokogumbo's HTML-compliant serializer rather than
|
174
174
|
# libxml2's.
|
175
|
-
@s.fragment(input).
|
175
|
+
_(@s.fragment(input)).
|
176
176
|
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
177
177
|
|
178
178
|
# This uses the not-quite-standards-compliant libxml2 serializer via
|
@@ -181,13 +181,13 @@ describe 'Malicious HTML' do
|
|
181
181
|
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
182
182
|
fragment = Nokogiri::HTML.fragment(input)
|
183
183
|
@s.node!(fragment)
|
184
|
-
fragment.to_html.
|
184
|
+
_(fragment.to_html).
|
185
185
|
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
186
186
|
end
|
187
187
|
|
188
188
|
it 'should round-trip to the same output' do
|
189
189
|
output = @s.fragment(input)
|
190
|
-
@s.fragment(output).must_equal(output)
|
190
|
+
_(@s.fragment(output)).must_equal(output)
|
191
191
|
end
|
192
192
|
end
|
193
193
|
|
@@ -199,7 +199,7 @@ describe 'Malicious HTML' do
|
|
199
199
|
|
200
200
|
# This uses Nokogumbo's HTML-compliant serializer rather than
|
201
201
|
# libxml2's.
|
202
|
-
@s.fragment(input).
|
202
|
+
_(@s.fragment(input)).
|
203
203
|
must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
204
204
|
|
205
205
|
# This uses the not-quite-standards-compliant libxml2 serializer via
|
@@ -208,13 +208,13 @@ describe 'Malicious HTML' do
|
|
208
208
|
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
209
209
|
fragment = Nokogiri::HTML.fragment(input)
|
210
210
|
@s.node!(fragment)
|
211
|
-
fragment.to_html.
|
211
|
+
_(fragment.to_html).
|
212
212
|
must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
|
213
213
|
end
|
214
214
|
|
215
215
|
it 'should round-trip to the same output' do
|
216
216
|
output = @s.fragment(input)
|
217
|
-
@s.fragment(output).must_equal(output)
|
217
|
+
_(@s.fragment(output)).must_equal(output)
|
218
218
|
end
|
219
219
|
end
|
220
220
|
end
|
@@ -224,12 +224,116 @@ describe 'Malicious HTML' do
|
|
224
224
|
describe 'foreign content bypass in relaxed config' do
|
225
225
|
it 'prevents a sanitization bypass via carefully crafted foreign content' do
|
226
226
|
%w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
|
227
|
-
@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]).
|
227
|
+
_(@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/])).
|
228
228
|
must_equal ''
|
229
229
|
|
230
|
-
@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]).
|
230
|
+
_(@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/])).
|
231
231
|
must_equal ''
|
232
232
|
end
|
233
233
|
end
|
234
234
|
end
|
235
|
+
|
236
|
+
# These tests cover an unsupported and unsafe custom config that allows MathML
|
237
|
+
# and SVG elements, which Sanitize's docs specifically say multiple times in
|
238
|
+
# big prominent warnings that you SHOULD NOT DO because Sanitize doesn't
|
239
|
+
# support MathML or SVG.
|
240
|
+
#
|
241
|
+
# Do not use the custom configs you see in these tests! If you do, you may be
|
242
|
+
# creating XSS vulnerabilities in your application.
|
243
|
+
describe 'foreign content bypass in unsafe custom config that allows MathML or SVG' do
|
244
|
+
unescaped_content_elements = %w[
|
245
|
+
noembed
|
246
|
+
noframes
|
247
|
+
plaintext
|
248
|
+
script
|
249
|
+
xmp
|
250
|
+
]
|
251
|
+
|
252
|
+
removed_content_elements = %w[
|
253
|
+
iframe
|
254
|
+
]
|
255
|
+
|
256
|
+
removed_elements = %w[
|
257
|
+
noscript
|
258
|
+
style
|
259
|
+
]
|
260
|
+
|
261
|
+
before do
|
262
|
+
@s = Sanitize.new(
|
263
|
+
Sanitize::Config.merge(
|
264
|
+
Sanitize::Config::RELAXED,
|
265
|
+
elements: Sanitize::Config::RELAXED[:elements] +
|
266
|
+
unescaped_content_elements +
|
267
|
+
removed_content_elements +
|
268
|
+
%w[math svg]
|
269
|
+
)
|
270
|
+
)
|
271
|
+
end
|
272
|
+
|
273
|
+
unescaped_content_elements.each do |name|
|
274
|
+
it "forcibly escapes text content inside `<#{name}>` in a MathML namespace" do
|
275
|
+
assert_equal(
|
276
|
+
"<math><#{name}><img src=x onerror=alert(1)></#{name}></math>",
|
277
|
+
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
278
|
+
)
|
279
|
+
end
|
280
|
+
|
281
|
+
it "forcibly escapes text content inside `<#{name}>` in an SVG namespace" do
|
282
|
+
assert_equal(
|
283
|
+
"<svg><#{name}><img src=x onerror=alert(1)></#{name}></svg>",
|
284
|
+
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
285
|
+
)
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
removed_content_elements.each do |name|
|
290
|
+
it "removes text content inside `<#{name}>` in a MathML namespace" do
|
291
|
+
assert_equal(
|
292
|
+
"<math><#{name}></#{name}></math>",
|
293
|
+
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
294
|
+
)
|
295
|
+
end
|
296
|
+
|
297
|
+
it "removes text content inside `<#{name}>` in an SVG namespace" do
|
298
|
+
assert_equal(
|
299
|
+
"<svg><#{name}></#{name}></svg>",
|
300
|
+
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
301
|
+
)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
removed_elements.each do |name|
|
306
|
+
it "removes `<#{name}>` elements in a MathML namespace" do
|
307
|
+
assert_equal(
|
308
|
+
'<math></math>',
|
309
|
+
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
310
|
+
)
|
311
|
+
end
|
312
|
+
|
313
|
+
it "removes `<#{name}>` elements in an SVG namespace" do
|
314
|
+
assert_equal(
|
315
|
+
'<svg></svg>',
|
316
|
+
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
317
|
+
)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
describe 'sanitization bypass by exploiting scripting-disabled <noscript> behavior' do
|
323
|
+
before do
|
324
|
+
@s = Sanitize.new(
|
325
|
+
Sanitize::Config.merge(
|
326
|
+
Sanitize::Config::RELAXED,
|
327
|
+
elements: Sanitize::Config::RELAXED[:elements] + ['noscript']
|
328
|
+
)
|
329
|
+
)
|
330
|
+
end
|
331
|
+
|
332
|
+
it 'is prevented by removing `<noscript>` elements regardless of the allowlist' do
|
333
|
+
assert_equal(
|
334
|
+
'',
|
335
|
+
@s.fragment(%[<noscript><div id='</noscript><img src=x onerror=alert(1)> '>])
|
336
|
+
)
|
337
|
+
end
|
338
|
+
end
|
235
339
|
end
|
data/test/test_parser.rb
CHANGED
@@ -6,26 +6,26 @@ describe 'Parser' do
|
|
6
6
|
parallelize_me!
|
7
7
|
|
8
8
|
it 'should translate valid entities into characters' do
|
9
|
-
Sanitize.fragment("'é&").must_equal("'é&")
|
9
|
+
_(Sanitize.fragment("'é&")).must_equal("'é&")
|
10
10
|
end
|
11
11
|
|
12
12
|
it 'should translate orphaned ampersands into entities' do
|
13
|
-
Sanitize.fragment('at&t').must_equal('at&t')
|
13
|
+
_(Sanitize.fragment('at&t')).must_equal('at&t')
|
14
14
|
end
|
15
15
|
|
16
16
|
it 'should not add newlines after tags when serializing a fragment' do
|
17
|
-
Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p'])
|
17
|
+
_(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p']))
|
18
18
|
.must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>"
|
19
19
|
end
|
20
20
|
|
21
21
|
it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
|
22
|
-
Sanitize.fragment('foo <script>bar').must_equal 'foo '
|
23
|
-
Sanitize.fragment('foo <style>bar').must_equal 'foo '
|
22
|
+
_(Sanitize.fragment('foo <script>bar')).must_equal 'foo '
|
23
|
+
_(Sanitize.fragment('foo <style>bar')).must_equal 'foo '
|
24
24
|
end
|
25
25
|
|
26
26
|
it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
|
27
|
-
Sanitize.fragment('1 > 2 and 2 < 1').must_equal '1 > 2 and 2 < 1'
|
28
|
-
Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D').must_equal 'OMG HAPPY BIRTHDAY! *<:-D'
|
27
|
+
_(Sanitize.fragment('1 > 2 and 2 < 1')).must_equal '1 > 2 and 2 < 1'
|
28
|
+
_(Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D')).must_equal 'OMG HAPPY BIRTHDAY! *<:-D'
|
29
29
|
end
|
30
30
|
|
31
31
|
describe 'when siblings are added after a node during traversal' do
|
@@ -59,7 +59,7 @@ describe 'Parser' do
|
|
59
59
|
})
|
60
60
|
|
61
61
|
# All siblings should be traversed, and in the order added.
|
62
|
-
siblings.must_equal [
|
62
|
+
_(siblings).must_equal [
|
63
63
|
"added_one_one_one",
|
64
64
|
"added_one_one",
|
65
65
|
"added_one_two",
|
data/test/test_sanitize.rb
CHANGED
@@ -9,7 +9,7 @@ describe 'Sanitize' do
|
|
9
9
|
]
|
10
10
|
|
11
11
|
Sanitize.new({ :transformers => transformers })
|
12
|
-
transformers.length.must_equal(1)
|
12
|
+
_(transformers.length).must_equal(1)
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
@@ -24,33 +24,33 @@ describe 'Sanitize' do
|
|
24
24
|
end
|
25
25
|
|
26
26
|
it 'should sanitize an HTML document' do
|
27
|
-
@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>')
|
27
|
+
_(@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'))
|
28
28
|
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'should not modify the input string' do
|
32
32
|
input = '<!DOCTYPE html><b>foo</b>'
|
33
33
|
@s.document(input)
|
34
|
-
input.must_equal('<!DOCTYPE html><b>foo</b>')
|
34
|
+
_(input).must_equal('<!DOCTYPE html><b>foo</b>')
|
35
35
|
end
|
36
36
|
|
37
37
|
it 'should not choke on frozen documents' do
|
38
|
-
@s.document('<!doctype html><html><b>foo</b>'.freeze).must_equal "<html>foo</html>"
|
38
|
+
_(@s.document('<!doctype html><html><b>foo</b>'.freeze)).must_equal "<html>foo</html>"
|
39
39
|
end
|
40
40
|
|
41
41
|
it 'should normalize newlines' do
|
42
|
-
@s.document("a\r\n\n\r\r\r\nz").must_equal "<html>a\n\n\n\n\nz</html>"
|
42
|
+
_(@s.document("a\r\n\n\r\r\r\nz")).must_equal "<html>a\n\n\n\n\nz</html>"
|
43
43
|
end
|
44
44
|
|
45
45
|
it 'should strip control characters (except ASCII whitespace)' do
|
46
46
|
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
47
47
|
whitespace = "\t\n\f\u0020"
|
48
|
-
@s.document("a#{sample_control_chars}#{whitespace}z").must_equal "<html>a#{whitespace}z</html>"
|
48
|
+
_(@s.document("a#{sample_control_chars}#{whitespace}z")).must_equal "<html>a#{whitespace}z</html>"
|
49
49
|
end
|
50
50
|
|
51
51
|
it 'should strip non-characters' do
|
52
52
|
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
53
|
-
@s.document("a#{sample_non_chars}z").must_equal "<html>az</html>"
|
53
|
+
_(@s.document("a#{sample_non_chars}z")).must_equal "<html>az</html>"
|
54
54
|
end
|
55
55
|
|
56
56
|
describe 'when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH' do
|
@@ -71,7 +71,7 @@ describe 'Sanitize' do
|
|
71
71
|
end
|
72
72
|
|
73
73
|
it 'does not raise an ArgumentError exception' do
|
74
|
-
@s.document(content).must_equal '<html>foo</html>'
|
74
|
+
_(@s.document(content)).must_equal '<html>foo</html>'
|
75
75
|
end
|
76
76
|
end
|
77
77
|
end
|
@@ -79,40 +79,40 @@ describe 'Sanitize' do
|
|
79
79
|
|
80
80
|
describe '#fragment' do
|
81
81
|
it 'should sanitize an HTML fragment' do
|
82
|
-
@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
82
|
+
_(@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'))
|
83
83
|
.must_equal 'Lorem ipsum dolor sit amet '
|
84
84
|
end
|
85
85
|
|
86
86
|
it 'should not modify the input string' do
|
87
87
|
input = '<b>foo</b>'
|
88
88
|
@s.fragment(input)
|
89
|
-
input.must_equal '<b>foo</b>'
|
89
|
+
_(input).must_equal '<b>foo</b>'
|
90
90
|
end
|
91
91
|
|
92
92
|
it 'should not choke on fragments containing <html> or <body>' do
|
93
|
-
@s.fragment('<html><b>foo</b></html>').must_equal 'foo'
|
94
|
-
@s.fragment('<body><b>foo</b></body>').must_equal 'foo'
|
95
|
-
@s.fragment('<html><body><b>foo</b></body></html>').must_equal 'foo'
|
96
|
-
@s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>').must_equal 'foo'
|
93
|
+
_(@s.fragment('<html><b>foo</b></html>')).must_equal 'foo'
|
94
|
+
_(@s.fragment('<body><b>foo</b></body>')).must_equal 'foo'
|
95
|
+
_(@s.fragment('<html><body><b>foo</b></body></html>')).must_equal 'foo'
|
96
|
+
_(@s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>')).must_equal 'foo'
|
97
97
|
end
|
98
98
|
|
99
99
|
it 'should not choke on frozen fragments' do
|
100
|
-
@s.fragment('<b>foo</b>'.freeze).must_equal 'foo'
|
100
|
+
_(@s.fragment('<b>foo</b>'.freeze)).must_equal 'foo'
|
101
101
|
end
|
102
102
|
|
103
103
|
it 'should normalize newlines' do
|
104
|
-
@s.fragment("a\r\n\n\r\r\r\nz").must_equal "a\n\n\n\n\nz"
|
104
|
+
_(@s.fragment("a\r\n\n\r\r\r\nz")).must_equal "a\n\n\n\n\nz"
|
105
105
|
end
|
106
106
|
|
107
107
|
it 'should strip control characters (except ASCII whitespace)' do
|
108
108
|
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
109
109
|
whitespace = "\t\n\f\u0020"
|
110
|
-
@s.fragment("a#{sample_control_chars}#{whitespace}z").must_equal "a#{whitespace}z"
|
110
|
+
_(@s.fragment("a#{sample_control_chars}#{whitespace}z")).must_equal "a#{whitespace}z"
|
111
111
|
end
|
112
112
|
|
113
113
|
it 'should strip non-characters' do
|
114
114
|
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
115
|
-
@s.fragment("a#{sample_non_chars}z").must_equal "az"
|
115
|
+
_(@s.fragment("a#{sample_non_chars}z")).must_equal "az"
|
116
116
|
end
|
117
117
|
|
118
118
|
describe 'when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH' do
|
@@ -133,7 +133,7 @@ describe 'Sanitize' do
|
|
133
133
|
end
|
134
134
|
|
135
135
|
it 'does not raise an ArgumentError exception' do
|
136
|
-
@s.fragment(content).must_equal 'foo'
|
136
|
+
_(@s.fragment(content)).must_equal 'foo'
|
137
137
|
end
|
138
138
|
end
|
139
139
|
end
|
@@ -147,13 +147,13 @@ describe 'Sanitize' do
|
|
147
147
|
doc.xpath('/html/body/node()').each {|node| frag << node }
|
148
148
|
|
149
149
|
@s.node!(frag)
|
150
|
-
frag.to_html.must_equal 'Lorem ipsum dolor sit amet '
|
150
|
+
_(frag.to_html).must_equal 'Lorem ipsum dolor sit amet '
|
151
151
|
end
|
152
152
|
|
153
153
|
describe "when the given node is a document and <html> isn't allowlisted" do
|
154
154
|
it 'should raise a Sanitize::Error' do
|
155
155
|
doc = Nokogiri::HTML5.parse('foo')
|
156
|
-
proc { @s.node!(doc) }.must_raise Sanitize::Error
|
156
|
+
_(proc { @s.node!(doc) }).must_raise Sanitize::Error
|
157
157
|
end
|
158
158
|
end
|
159
159
|
end
|
@@ -163,7 +163,7 @@ describe 'Sanitize' do
|
|
163
163
|
describe '.document' do
|
164
164
|
it 'should sanitize an HTML document with the given config' do
|
165
165
|
html = '<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'
|
166
|
-
Sanitize.document(html, :elements => ['html'])
|
166
|
+
_(Sanitize.document(html, :elements => ['html']))
|
167
167
|
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
168
168
|
end
|
169
169
|
end
|
@@ -171,7 +171,7 @@ describe 'Sanitize' do
|
|
171
171
|
describe '.fragment' do
|
172
172
|
it 'should sanitize an HTML fragment with the given config' do
|
173
173
|
html = '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'
|
174
|
-
Sanitize.fragment(html, :elements => ['strong'])
|
174
|
+
_(Sanitize.fragment(html, :elements => ['strong']))
|
175
175
|
.must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
176
176
|
end
|
177
177
|
end
|
@@ -184,7 +184,7 @@ describe 'Sanitize' do
|
|
184
184
|
doc.xpath('/html/body/node()').each {|node| frag << node }
|
185
185
|
|
186
186
|
Sanitize.node!(frag, :elements => ['strong'])
|
187
|
-
frag.to_html.must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
187
|
+
_(frag.to_html).must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
188
188
|
end
|
189
189
|
end
|
190
190
|
end
|