sanitize 5.1.0 → 6.0.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +155 -18
- data/LICENSE +1 -1
- data/README.md +67 -74
- data/lib/sanitize/config/default.rb +6 -1
- data/lib/sanitize/config/relaxed.rb +1 -1
- data/lib/sanitize/css.rb +2 -2
- data/lib/sanitize/transformers/clean_comment.rb +1 -1
- data/lib/sanitize/transformers/clean_css.rb +3 -3
- data/lib/sanitize/transformers/clean_doctype.rb +1 -1
- data/lib/sanitize/transformers/clean_element.rb +62 -20
- data/lib/sanitize/version.rb +1 -1
- data/lib/sanitize.rb +17 -13
- data/test/test_clean_comment.rb +16 -16
- data/test/test_clean_css.rb +5 -5
- data/test/test_clean_doctype.rb +15 -15
- data/test/test_clean_element.rb +130 -97
- data/test/test_config.rb +9 -9
- data/test/test_malicious_css.rb +7 -7
- data/test/test_malicious_html.rb +153 -30
- data/test/test_parser.rb +9 -9
- data/test/test_sanitize.rb +29 -29
- data/test/test_sanitize_css.rb +57 -57
- data/test/test_transformers.rb +48 -42
- metadata +17 -31
data/test/test_malicious_html.rb
CHANGED
@@ -17,124 +17,126 @@ describe 'Malicious HTML' do
|
|
17
17
|
|
18
18
|
describe 'comments' do
|
19
19
|
it 'should not allow script injection via conditional comments' do
|
20
|
-
@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]).
|
20
|
+
_(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->])).
|
21
21
|
must_equal ''
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
describe 'interpolation (ERB, PHP, etc.)' do
|
26
26
|
it 'should escape ERB-style tags' do
|
27
|
-
@s.fragment('<% naughty_ruby_code %>').
|
27
|
+
_(@s.fragment('<% naughty_ruby_code %>')).
|
28
28
|
must_equal '<% naughty_ruby_code %>'
|
29
29
|
|
30
|
-
@s.fragment('<%= naughty_ruby_code %>').
|
30
|
+
_(@s.fragment('<%= naughty_ruby_code %>')).
|
31
31
|
must_equal '<%= naughty_ruby_code %>'
|
32
32
|
end
|
33
33
|
|
34
34
|
it 'should remove PHP-style tags' do
|
35
|
-
@s.fragment('<? naughtyPHPCode(); ?>').
|
35
|
+
_(@s.fragment('<? naughtyPHPCode(); ?>')).
|
36
36
|
must_equal ''
|
37
37
|
|
38
|
-
@s.fragment('<?= naughtyPHPCode(); ?>').
|
38
|
+
_(@s.fragment('<?= naughtyPHPCode(); ?>')).
|
39
39
|
must_equal ''
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
43
|
describe '<body>' do
|
44
44
|
it 'should not be possible to inject JS via a malformed event attribute' do
|
45
|
-
@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>').
|
45
|
+
_(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>')).
|
46
46
|
must_equal "<html><head></head><body></body></html>"
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
50
|
describe '<iframe>' do
|
51
51
|
it 'should not be possible to inject an iframe using an improperly closed tag' do
|
52
|
-
@s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <]).
|
52
|
+
_(@s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <])).
|
53
53
|
must_equal ''
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
57
|
describe '<img>' do
|
58
58
|
it 'should not be possible to inject JS via an unquoted <img> src attribute' do
|
59
|
-
@s.fragment("<img src=javascript:alert('XSS')>").must_equal '<img>'
|
59
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal '<img>'
|
60
60
|
end
|
61
61
|
|
62
62
|
it 'should not be possible to inject JS using grave accents as <img> src delimiters' do
|
63
|
-
@s.fragment("<img src=`javascript:alert('XSS')`>").must_equal '<img>'
|
63
|
+
_(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal '<img>'
|
64
64
|
end
|
65
65
|
|
66
66
|
it 'should not be possible to inject <script> via a malformed <img> tag' do
|
67
|
-
@s.fragment('<img """><script>alert("XSS")</script>">').
|
67
|
+
_(@s.fragment('<img """><script>alert("XSS")</script>">')).
|
68
68
|
must_equal '<img>">'
|
69
69
|
end
|
70
70
|
|
71
71
|
it 'should not be possible to inject protocol-based JS' do
|
72
|
-
@s.fragment('<img src=javascript:alert('XSS')>').
|
72
|
+
_(@s.fragment('<img src=javascript:alert('XSS')>')).
|
73
73
|
must_equal '<img>'
|
74
74
|
|
75
|
-
@s.fragment('<img src=javascript:alert('XSS')>').
|
75
|
+
_(@s.fragment('<img src=javascript:alert('XSS')>')).
|
76
76
|
must_equal '<img>'
|
77
77
|
|
78
|
-
@s.fragment('<img src=javascript:alert('XSS')>').
|
78
|
+
_(@s.fragment('<img src=javascript:alert('XSS')>')).
|
79
79
|
must_equal '<img>'
|
80
80
|
|
81
81
|
# Encoded tab character.
|
82
|
-
@s.fragment(%[<img src="jav	ascript:alert('XSS');">]).
|
82
|
+
_(@s.fragment(%[<img src="jav	ascript:alert('XSS');">])).
|
83
83
|
must_equal '<img>'
|
84
84
|
|
85
85
|
# Encoded newline.
|
86
|
-
@s.fragment(%[<img src="jav
ascript:alert('XSS');">]).
|
86
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">])).
|
87
87
|
must_equal '<img>'
|
88
88
|
|
89
89
|
# Encoded carriage return.
|
90
|
-
@s.fragment(%[<img src="jav
ascript:alert('XSS');">]).
|
90
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">])).
|
91
91
|
must_equal '<img>'
|
92
92
|
|
93
93
|
# Null byte.
|
94
|
-
@s.fragment(%[<img src=java\0script:alert("XSS")>]).
|
94
|
+
_(@s.fragment(%[<img src=java\0script:alert("XSS")>])).
|
95
95
|
must_equal '<img>'
|
96
96
|
|
97
97
|
# Spaces plus meta char.
|
98
|
-
@s.fragment(%[<img src="  javascript:alert('XSS');">]).
|
98
|
+
_(@s.fragment(%[<img src="  javascript:alert('XSS');">])).
|
99
99
|
must_equal '<img>'
|
100
100
|
|
101
101
|
# Mixed spaces and tabs.
|
102
|
-
@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]).
|
102
|
+
_(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">])).
|
103
103
|
must_equal '<img>'
|
104
104
|
end
|
105
105
|
|
106
106
|
it 'should not be possible to inject protocol-based JS via whitespace' do
|
107
|
-
@s.fragment(%[<img src="jav\tascript:alert('XSS');">]).
|
107
|
+
_(@s.fragment(%[<img src="jav\tascript:alert('XSS');">])).
|
108
108
|
must_equal '<img>'
|
109
109
|
end
|
110
110
|
|
111
111
|
it 'should not be possible to inject JS using a half-open <img> tag' do
|
112
|
-
@s.fragment(%[<img src="javascript:alert('XSS')"]).
|
112
|
+
_(@s.fragment(%[<img src="javascript:alert('XSS')"])).
|
113
113
|
must_equal ''
|
114
114
|
end
|
115
115
|
end
|
116
116
|
|
117
117
|
describe '<script>' do
|
118
118
|
it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
|
119
|
-
@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
|
119
|
+
_(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>])).
|
120
120
|
must_equal ''
|
121
121
|
end
|
122
122
|
|
123
123
|
it 'should not be possible to inject <script> via extraneous open brackets' do
|
124
|
-
@s.fragment(%[<<script>alert("XSS");//<</script>]).
|
124
|
+
_(@s.fragment(%[<<script>alert("XSS");//<</script>])).
|
125
125
|
must_equal '<'
|
126
126
|
end
|
127
127
|
end
|
128
128
|
|
129
129
|
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
130
130
|
# attempt to preserve server-side includes. This can result in XSS since an
|
131
|
-
# unescaped double quote can allow an attacker to inject a non-
|
131
|
+
# unescaped double quote can allow an attacker to inject a non-allowlisted
|
132
132
|
# attribute. Sanitize works around this by implementing its own escaping for
|
133
133
|
# affected attributes.
|
134
134
|
#
|
135
135
|
# The relevant libxml2 code is here:
|
136
136
|
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
137
137
|
describe 'unsafe libxml2 server-side includes in attributes' do
|
138
|
+
using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system?
|
139
|
+
|
138
140
|
tag_configs = [
|
139
141
|
{
|
140
142
|
tag_name: 'a',
|
@@ -166,9 +168,11 @@ describe 'Malicious HTML' do
|
|
166
168
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
167
169
|
|
168
170
|
it 'should escape unsafe characters in attributes' do
|
171
|
+
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
172
|
+
|
169
173
|
# This uses Nokogumbo's HTML-compliant serializer rather than
|
170
174
|
# libxml2's.
|
171
|
-
@s.fragment(input).
|
175
|
+
_(@s.fragment(input)).
|
172
176
|
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
173
177
|
|
174
178
|
# This uses the not-quite-standards-compliant libxml2 serializer via
|
@@ -177,13 +181,13 @@ describe 'Malicious HTML' do
|
|
177
181
|
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
178
182
|
fragment = Nokogiri::HTML.fragment(input)
|
179
183
|
@s.node!(fragment)
|
180
|
-
fragment.to_html.
|
184
|
+
_(fragment.to_html).
|
181
185
|
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
182
186
|
end
|
183
187
|
|
184
188
|
it 'should round-trip to the same output' do
|
185
189
|
output = @s.fragment(input)
|
186
|
-
@s.fragment(output).must_equal(output)
|
190
|
+
_(@s.fragment(output)).must_equal(output)
|
187
191
|
end
|
188
192
|
end
|
189
193
|
|
@@ -191,9 +195,11 @@ describe 'Malicious HTML' do
|
|
191
195
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
192
196
|
|
193
197
|
it 'should not escape characters unnecessarily' do
|
198
|
+
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
199
|
+
|
194
200
|
# This uses Nokogumbo's HTML-compliant serializer rather than
|
195
201
|
# libxml2's.
|
196
|
-
@s.fragment(input).
|
202
|
+
_(@s.fragment(input)).
|
197
203
|
must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
198
204
|
|
199
205
|
# This uses the not-quite-standards-compliant libxml2 serializer via
|
@@ -202,15 +208,132 @@ describe 'Malicious HTML' do
|
|
202
208
|
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
203
209
|
fragment = Nokogiri::HTML.fragment(input)
|
204
210
|
@s.node!(fragment)
|
205
|
-
fragment.to_html.
|
211
|
+
_(fragment.to_html).
|
206
212
|
must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
|
207
213
|
end
|
208
214
|
|
209
215
|
it 'should round-trip to the same output' do
|
210
216
|
output = @s.fragment(input)
|
211
|
-
@s.fragment(output).must_equal(output)
|
217
|
+
_(@s.fragment(output)).must_equal(output)
|
212
218
|
end
|
213
219
|
end
|
214
220
|
end
|
215
221
|
end
|
222
|
+
|
223
|
+
# https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
|
224
|
+
describe 'foreign content bypass in relaxed config' do
|
225
|
+
it 'prevents a sanitization bypass via carefully crafted foreign content' do
|
226
|
+
%w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
|
227
|
+
_(@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/])).
|
228
|
+
must_equal ''
|
229
|
+
|
230
|
+
_(@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/])).
|
231
|
+
must_equal ''
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
# These tests cover an unsupported and unsafe custom config that allows MathML
|
237
|
+
# and SVG elements, which Sanitize's docs specifically say multiple times in
|
238
|
+
# big prominent warnings that you SHOULD NOT DO because Sanitize doesn't
|
239
|
+
# support MathML or SVG.
|
240
|
+
#
|
241
|
+
# Do not use the custom configs you see in these tests! If you do, you may be
|
242
|
+
# creating XSS vulnerabilities in your application.
|
243
|
+
describe 'foreign content bypass in unsafe custom config that allows MathML or SVG' do
|
244
|
+
unescaped_content_elements = %w[
|
245
|
+
noembed
|
246
|
+
noframes
|
247
|
+
plaintext
|
248
|
+
script
|
249
|
+
xmp
|
250
|
+
]
|
251
|
+
|
252
|
+
removed_content_elements = %w[
|
253
|
+
iframe
|
254
|
+
]
|
255
|
+
|
256
|
+
removed_elements = %w[
|
257
|
+
noscript
|
258
|
+
style
|
259
|
+
]
|
260
|
+
|
261
|
+
before do
|
262
|
+
@s = Sanitize.new(
|
263
|
+
Sanitize::Config.merge(
|
264
|
+
Sanitize::Config::RELAXED,
|
265
|
+
elements: Sanitize::Config::RELAXED[:elements] +
|
266
|
+
unescaped_content_elements +
|
267
|
+
removed_content_elements +
|
268
|
+
%w[math svg]
|
269
|
+
)
|
270
|
+
)
|
271
|
+
end
|
272
|
+
|
273
|
+
unescaped_content_elements.each do |name|
|
274
|
+
it "forcibly escapes text content inside `<#{name}>` in a MathML namespace" do
|
275
|
+
assert_equal(
|
276
|
+
"<math><#{name}><img src=x onerror=alert(1)></#{name}></math>",
|
277
|
+
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
278
|
+
)
|
279
|
+
end
|
280
|
+
|
281
|
+
it "forcibly escapes text content inside `<#{name}>` in an SVG namespace" do
|
282
|
+
assert_equal(
|
283
|
+
"<svg><#{name}><img src=x onerror=alert(1)></#{name}></svg>",
|
284
|
+
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
285
|
+
)
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
removed_content_elements.each do |name|
|
290
|
+
it "removes text content inside `<#{name}>` in a MathML namespace" do
|
291
|
+
assert_equal(
|
292
|
+
"<math><#{name}></#{name}></math>",
|
293
|
+
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
294
|
+
)
|
295
|
+
end
|
296
|
+
|
297
|
+
it "removes text content inside `<#{name}>` in an SVG namespace" do
|
298
|
+
assert_equal(
|
299
|
+
"<svg><#{name}></#{name}></svg>",
|
300
|
+
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
301
|
+
)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
removed_elements.each do |name|
|
306
|
+
it "removes `<#{name}>` elements in a MathML namespace" do
|
307
|
+
assert_equal(
|
308
|
+
'<math></math>',
|
309
|
+
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
310
|
+
)
|
311
|
+
end
|
312
|
+
|
313
|
+
it "removes `<#{name}>` elements in an SVG namespace" do
|
314
|
+
assert_equal(
|
315
|
+
'<svg></svg>',
|
316
|
+
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
317
|
+
)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
describe 'sanitization bypass by exploiting scripting-disabled <noscript> behavior' do
|
323
|
+
before do
|
324
|
+
@s = Sanitize.new(
|
325
|
+
Sanitize::Config.merge(
|
326
|
+
Sanitize::Config::RELAXED,
|
327
|
+
elements: Sanitize::Config::RELAXED[:elements] + ['noscript']
|
328
|
+
)
|
329
|
+
)
|
330
|
+
end
|
331
|
+
|
332
|
+
it 'is prevented by removing `<noscript>` elements regardless of the allowlist' do
|
333
|
+
assert_equal(
|
334
|
+
'',
|
335
|
+
@s.fragment(%[<noscript><div id='</noscript><img src=x onerror=alert(1)> '>])
|
336
|
+
)
|
337
|
+
end
|
338
|
+
end
|
216
339
|
end
|
data/test/test_parser.rb
CHANGED
@@ -6,26 +6,26 @@ describe 'Parser' do
|
|
6
6
|
parallelize_me!
|
7
7
|
|
8
8
|
it 'should translate valid entities into characters' do
|
9
|
-
Sanitize.fragment("'é&").must_equal("'é&")
|
9
|
+
_(Sanitize.fragment("'é&")).must_equal("'é&")
|
10
10
|
end
|
11
11
|
|
12
12
|
it 'should translate orphaned ampersands into entities' do
|
13
|
-
Sanitize.fragment('at&t').must_equal('at&t')
|
13
|
+
_(Sanitize.fragment('at&t')).must_equal('at&t')
|
14
14
|
end
|
15
15
|
|
16
16
|
it 'should not add newlines after tags when serializing a fragment' do
|
17
|
-
Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p'])
|
17
|
+
_(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p']))
|
18
18
|
.must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>"
|
19
19
|
end
|
20
20
|
|
21
21
|
it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
|
22
|
-
Sanitize.fragment('foo <script>bar').must_equal 'foo '
|
23
|
-
Sanitize.fragment('foo <style>bar').must_equal 'foo '
|
22
|
+
_(Sanitize.fragment('foo <script>bar')).must_equal 'foo '
|
23
|
+
_(Sanitize.fragment('foo <style>bar')).must_equal 'foo '
|
24
24
|
end
|
25
25
|
|
26
26
|
it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
|
27
|
-
Sanitize.fragment('1 > 2 and 2 < 1').must_equal '1 > 2 and 2 < 1'
|
28
|
-
Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D').must_equal 'OMG HAPPY BIRTHDAY! *<:-D'
|
27
|
+
_(Sanitize.fragment('1 > 2 and 2 < 1')).must_equal '1 > 2 and 2 < 1'
|
28
|
+
_(Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D')).must_equal 'OMG HAPPY BIRTHDAY! *<:-D'
|
29
29
|
end
|
30
30
|
|
31
31
|
describe 'when siblings are added after a node during traversal' do
|
@@ -55,11 +55,11 @@ describe 'Parser' do
|
|
55
55
|
siblings << env[:node][:id]
|
56
56
|
end
|
57
57
|
|
58
|
-
return {:
|
58
|
+
return {:node_allowlist => [env[:node]]}
|
59
59
|
})
|
60
60
|
|
61
61
|
# All siblings should be traversed, and in the order added.
|
62
|
-
siblings.must_equal [
|
62
|
+
_(siblings).must_equal [
|
63
63
|
"added_one_one_one",
|
64
64
|
"added_one_one",
|
65
65
|
"added_one_two",
|
data/test/test_sanitize.rb
CHANGED
@@ -9,7 +9,7 @@ describe 'Sanitize' do
|
|
9
9
|
]
|
10
10
|
|
11
11
|
Sanitize.new({ :transformers => transformers })
|
12
|
-
transformers.length.must_equal(1)
|
12
|
+
_(transformers.length).must_equal(1)
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
@@ -24,38 +24,38 @@ describe 'Sanitize' do
|
|
24
24
|
end
|
25
25
|
|
26
26
|
it 'should sanitize an HTML document' do
|
27
|
-
@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>')
|
27
|
+
_(@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'))
|
28
28
|
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'should not modify the input string' do
|
32
32
|
input = '<!DOCTYPE html><b>foo</b>'
|
33
33
|
@s.document(input)
|
34
|
-
input.must_equal('<!DOCTYPE html><b>foo</b>')
|
34
|
+
_(input).must_equal('<!DOCTYPE html><b>foo</b>')
|
35
35
|
end
|
36
36
|
|
37
37
|
it 'should not choke on frozen documents' do
|
38
|
-
@s.document('<!doctype html><html><b>foo</b>'.freeze).must_equal "<html>foo</html>"
|
38
|
+
_(@s.document('<!doctype html><html><b>foo</b>'.freeze)).must_equal "<html>foo</html>"
|
39
39
|
end
|
40
40
|
|
41
41
|
it 'should normalize newlines' do
|
42
|
-
@s.document("a\r\n\n\r\r\r\nz").must_equal "<html>a\n\n\n\n\nz</html>"
|
42
|
+
_(@s.document("a\r\n\n\r\r\r\nz")).must_equal "<html>a\n\n\n\n\nz</html>"
|
43
43
|
end
|
44
44
|
|
45
45
|
it 'should strip control characters (except ASCII whitespace)' do
|
46
46
|
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
47
47
|
whitespace = "\t\n\f\u0020"
|
48
|
-
@s.document("a#{sample_control_chars}#{whitespace}z").must_equal "<html>a#{whitespace}z</html>"
|
48
|
+
_(@s.document("a#{sample_control_chars}#{whitespace}z")).must_equal "<html>a#{whitespace}z</html>"
|
49
49
|
end
|
50
50
|
|
51
51
|
it 'should strip non-characters' do
|
52
52
|
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
53
|
-
@s.document("a#{sample_non_chars}z").must_equal "<html>az</html>"
|
53
|
+
_(@s.document("a#{sample_non_chars}z")).must_equal "<html>az</html>"
|
54
54
|
end
|
55
55
|
|
56
|
-
describe 'when html body exceeds
|
56
|
+
describe 'when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH' do
|
57
57
|
let(:content) do
|
58
|
-
content = nest_html_content('<b>foo</b>',
|
58
|
+
content = nest_html_content('<b>foo</b>', Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH)
|
59
59
|
"<html>#{content}</html>"
|
60
60
|
end
|
61
61
|
|
@@ -71,7 +71,7 @@ describe 'Sanitize' do
|
|
71
71
|
end
|
72
72
|
|
73
73
|
it 'does not raise an ArgumentError exception' do
|
74
|
-
@s.document(content).must_equal '<html>foo</html>'
|
74
|
+
_(@s.document(content)).must_equal '<html>foo</html>'
|
75
75
|
end
|
76
76
|
end
|
77
77
|
end
|
@@ -79,45 +79,45 @@ describe 'Sanitize' do
|
|
79
79
|
|
80
80
|
describe '#fragment' do
|
81
81
|
it 'should sanitize an HTML fragment' do
|
82
|
-
@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
82
|
+
_(@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'))
|
83
83
|
.must_equal 'Lorem ipsum dolor sit amet '
|
84
84
|
end
|
85
85
|
|
86
86
|
it 'should not modify the input string' do
|
87
87
|
input = '<b>foo</b>'
|
88
88
|
@s.fragment(input)
|
89
|
-
input.must_equal '<b>foo</b>'
|
89
|
+
_(input).must_equal '<b>foo</b>'
|
90
90
|
end
|
91
91
|
|
92
92
|
it 'should not choke on fragments containing <html> or <body>' do
|
93
|
-
@s.fragment('<html><b>foo</b></html>').must_equal 'foo'
|
94
|
-
@s.fragment('<body><b>foo</b></body>').must_equal 'foo'
|
95
|
-
@s.fragment('<html><body><b>foo</b></body></html>').must_equal 'foo'
|
96
|
-
@s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>').must_equal 'foo'
|
93
|
+
_(@s.fragment('<html><b>foo</b></html>')).must_equal 'foo'
|
94
|
+
_(@s.fragment('<body><b>foo</b></body>')).must_equal 'foo'
|
95
|
+
_(@s.fragment('<html><body><b>foo</b></body></html>')).must_equal 'foo'
|
96
|
+
_(@s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>')).must_equal 'foo'
|
97
97
|
end
|
98
98
|
|
99
99
|
it 'should not choke on frozen fragments' do
|
100
|
-
@s.fragment('<b>foo</b>'.freeze).must_equal 'foo'
|
100
|
+
_(@s.fragment('<b>foo</b>'.freeze)).must_equal 'foo'
|
101
101
|
end
|
102
102
|
|
103
103
|
it 'should normalize newlines' do
|
104
|
-
@s.fragment("a\r\n\n\r\r\r\nz").must_equal "a\n\n\n\n\nz"
|
104
|
+
_(@s.fragment("a\r\n\n\r\r\r\nz")).must_equal "a\n\n\n\n\nz"
|
105
105
|
end
|
106
106
|
|
107
107
|
it 'should strip control characters (except ASCII whitespace)' do
|
108
108
|
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
109
109
|
whitespace = "\t\n\f\u0020"
|
110
|
-
@s.fragment("a#{sample_control_chars}#{whitespace}z").must_equal "a#{whitespace}z"
|
110
|
+
_(@s.fragment("a#{sample_control_chars}#{whitespace}z")).must_equal "a#{whitespace}z"
|
111
111
|
end
|
112
112
|
|
113
113
|
it 'should strip non-characters' do
|
114
114
|
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
115
|
-
@s.fragment("a#{sample_non_chars}z").must_equal "az"
|
115
|
+
_(@s.fragment("a#{sample_non_chars}z")).must_equal "az"
|
116
116
|
end
|
117
117
|
|
118
|
-
describe 'when html body exceeds
|
118
|
+
describe 'when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH' do
|
119
119
|
let(:content) do
|
120
|
-
content = nest_html_content('<b>foo</b>',
|
120
|
+
content = nest_html_content('<b>foo</b>', Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH)
|
121
121
|
"<body>#{content}</body>"
|
122
122
|
end
|
123
123
|
|
@@ -133,7 +133,7 @@ describe 'Sanitize' do
|
|
133
133
|
end
|
134
134
|
|
135
135
|
it 'does not raise an ArgumentError exception' do
|
136
|
-
@s.fragment(content).must_equal 'foo'
|
136
|
+
_(@s.fragment(content)).must_equal 'foo'
|
137
137
|
end
|
138
138
|
end
|
139
139
|
end
|
@@ -147,13 +147,13 @@ describe 'Sanitize' do
|
|
147
147
|
doc.xpath('/html/body/node()').each {|node| frag << node }
|
148
148
|
|
149
149
|
@s.node!(frag)
|
150
|
-
frag.to_html.must_equal 'Lorem ipsum dolor sit amet '
|
150
|
+
_(frag.to_html).must_equal 'Lorem ipsum dolor sit amet '
|
151
151
|
end
|
152
152
|
|
153
|
-
describe "when the given node is a document and <html> isn't
|
153
|
+
describe "when the given node is a document and <html> isn't allowlisted" do
|
154
154
|
it 'should raise a Sanitize::Error' do
|
155
155
|
doc = Nokogiri::HTML5.parse('foo')
|
156
|
-
proc { @s.node!(doc) }.must_raise Sanitize::Error
|
156
|
+
_(proc { @s.node!(doc) }).must_raise Sanitize::Error
|
157
157
|
end
|
158
158
|
end
|
159
159
|
end
|
@@ -163,7 +163,7 @@ describe 'Sanitize' do
|
|
163
163
|
describe '.document' do
|
164
164
|
it 'should sanitize an HTML document with the given config' do
|
165
165
|
html = '<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'
|
166
|
-
Sanitize.document(html, :elements => ['html'])
|
166
|
+
_(Sanitize.document(html, :elements => ['html']))
|
167
167
|
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
168
168
|
end
|
169
169
|
end
|
@@ -171,7 +171,7 @@ describe 'Sanitize' do
|
|
171
171
|
describe '.fragment' do
|
172
172
|
it 'should sanitize an HTML fragment with the given config' do
|
173
173
|
html = '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'
|
174
|
-
Sanitize.fragment(html, :elements => ['strong'])
|
174
|
+
_(Sanitize.fragment(html, :elements => ['strong']))
|
175
175
|
.must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
176
176
|
end
|
177
177
|
end
|
@@ -184,7 +184,7 @@ describe 'Sanitize' do
|
|
184
184
|
doc.xpath('/html/body/node()').each {|node| frag << node }
|
185
185
|
|
186
186
|
Sanitize.node!(frag, :elements => ['strong'])
|
187
|
-
frag.to_html.must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
187
|
+
_(frag.to_html).must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
188
188
|
end
|
189
189
|
end
|
190
190
|
end
|