sanitize 4.6.4 → 6.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +259 -16
- data/LICENSE +1 -1
- data/README.md +89 -76
- data/lib/sanitize/config/default.rb +15 -4
- data/lib/sanitize/config/relaxed.rb +1 -1
- data/lib/sanitize/css.rb +2 -2
- data/lib/sanitize/transformers/clean_comment.rb +1 -1
- data/lib/sanitize/transformers/clean_css.rb +4 -3
- data/lib/sanitize/transformers/clean_doctype.rb +1 -1
- data/lib/sanitize/transformers/clean_element.rb +105 -22
- data/lib/sanitize/version.rb +1 -3
- data/lib/sanitize.rb +56 -72
- data/test/common.rb +0 -31
- data/test/test_clean_comment.rb +16 -20
- data/test/test_clean_css.rb +6 -6
- data/test/test_clean_doctype.rb +22 -22
- data/test/test_clean_element.rb +200 -82
- data/test/test_config.rb +9 -9
- data/test/test_malicious_css.rb +20 -7
- data/test/test_malicious_html.rb +179 -32
- data/test/test_parser.rb +9 -38
- data/test/test_sanitize.rb +114 -29
- data/test/test_sanitize_css.rb +88 -61
- data/test/test_transformers.rb +52 -46
- metadata +17 -33
- data/test/test_unicode.rb +0 -95
data/test/test_malicious_html.rb
CHANGED
@@ -17,124 +17,126 @@ describe 'Malicious HTML' do
|
|
17
17
|
|
18
18
|
describe 'comments' do
|
19
19
|
it 'should not allow script injection via conditional comments' do
|
20
|
-
@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]).
|
20
|
+
_(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->])).
|
21
21
|
must_equal ''
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
25
|
describe 'interpolation (ERB, PHP, etc.)' do
|
26
26
|
it 'should escape ERB-style tags' do
|
27
|
-
@s.fragment('<% naughty_ruby_code %>').
|
27
|
+
_(@s.fragment('<% naughty_ruby_code %>')).
|
28
28
|
must_equal '<% naughty_ruby_code %>'
|
29
29
|
|
30
|
-
@s.fragment('<%= naughty_ruby_code %>').
|
30
|
+
_(@s.fragment('<%= naughty_ruby_code %>')).
|
31
31
|
must_equal '<%= naughty_ruby_code %>'
|
32
32
|
end
|
33
33
|
|
34
34
|
it 'should remove PHP-style tags' do
|
35
|
-
@s.fragment('<? naughtyPHPCode(); ?>').
|
35
|
+
_(@s.fragment('<? naughtyPHPCode(); ?>')).
|
36
36
|
must_equal ''
|
37
37
|
|
38
|
-
@s.fragment('<?= naughtyPHPCode(); ?>').
|
38
|
+
_(@s.fragment('<?= naughtyPHPCode(); ?>')).
|
39
39
|
must_equal ''
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
43
|
describe '<body>' do
|
44
44
|
it 'should not be possible to inject JS via a malformed event attribute' do
|
45
|
-
@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>').
|
46
|
-
must_equal "<html><head></head><body></body></html
|
45
|
+
_(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>')).
|
46
|
+
must_equal "<html><head></head><body></body></html>"
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
50
|
describe '<iframe>' do
|
51
51
|
it 'should not be possible to inject an iframe using an improperly closed tag' do
|
52
|
-
@s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <]).
|
52
|
+
_(@s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <])).
|
53
53
|
must_equal ''
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
57
|
describe '<img>' do
|
58
58
|
it 'should not be possible to inject JS via an unquoted <img> src attribute' do
|
59
|
-
@s.fragment("<img src=javascript:alert('XSS')>").must_equal '<img>'
|
59
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal '<img>'
|
60
60
|
end
|
61
61
|
|
62
62
|
it 'should not be possible to inject JS using grave accents as <img> src delimiters' do
|
63
|
-
@s.fragment("<img src=`javascript:alert('XSS')`>").must_equal '<img>'
|
63
|
+
_(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal '<img>'
|
64
64
|
end
|
65
65
|
|
66
66
|
it 'should not be possible to inject <script> via a malformed <img> tag' do
|
67
|
-
@s.fragment('<img """><script>alert("XSS")</script>">').
|
68
|
-
must_equal '<img>
|
67
|
+
_(@s.fragment('<img """><script>alert("XSS")</script>">')).
|
68
|
+
must_equal '<img>">'
|
69
69
|
end
|
70
70
|
|
71
71
|
it 'should not be possible to inject protocol-based JS' do
|
72
|
-
@s.fragment('<img src=javascript:alert('XSS')>').
|
72
|
+
_(@s.fragment('<img src=javascript:alert('XSS')>')).
|
73
73
|
must_equal '<img>'
|
74
74
|
|
75
|
-
@s.fragment('<img src=javascript:alert('XSS')>').
|
75
|
+
_(@s.fragment('<img src=javascript:alert('XSS')>')).
|
76
76
|
must_equal '<img>'
|
77
77
|
|
78
|
-
@s.fragment('<img src=javascript:alert('XSS')>').
|
78
|
+
_(@s.fragment('<img src=javascript:alert('XSS')>')).
|
79
79
|
must_equal '<img>'
|
80
80
|
|
81
81
|
# Encoded tab character.
|
82
|
-
@s.fragment(%[<img src="jav	ascript:alert('XSS');">]).
|
82
|
+
_(@s.fragment(%[<img src="jav	ascript:alert('XSS');">])).
|
83
83
|
must_equal '<img>'
|
84
84
|
|
85
85
|
# Encoded newline.
|
86
|
-
@s.fragment(%[<img src="jav
ascript:alert('XSS');">]).
|
86
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">])).
|
87
87
|
must_equal '<img>'
|
88
88
|
|
89
89
|
# Encoded carriage return.
|
90
|
-
@s.fragment(%[<img src="jav
ascript:alert('XSS');">]).
|
90
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">])).
|
91
91
|
must_equal '<img>'
|
92
92
|
|
93
93
|
# Null byte.
|
94
|
-
@s.fragment(%[<img src=java\0script:alert("XSS")>]).
|
94
|
+
_(@s.fragment(%[<img src=java\0script:alert("XSS")>])).
|
95
95
|
must_equal '<img>'
|
96
96
|
|
97
97
|
# Spaces plus meta char.
|
98
|
-
@s.fragment(%[<img src="  javascript:alert('XSS');">]).
|
98
|
+
_(@s.fragment(%[<img src="  javascript:alert('XSS');">])).
|
99
99
|
must_equal '<img>'
|
100
100
|
|
101
101
|
# Mixed spaces and tabs.
|
102
|
-
@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]).
|
102
|
+
_(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">])).
|
103
103
|
must_equal '<img>'
|
104
104
|
end
|
105
105
|
|
106
106
|
it 'should not be possible to inject protocol-based JS via whitespace' do
|
107
|
-
@s.fragment(%[<img src="jav\tascript:alert('XSS');">]).
|
107
|
+
_(@s.fragment(%[<img src="jav\tascript:alert('XSS');">])).
|
108
108
|
must_equal '<img>'
|
109
109
|
end
|
110
110
|
|
111
111
|
it 'should not be possible to inject JS using a half-open <img> tag' do
|
112
|
-
@s.fragment(%[<img src="javascript:alert('XSS')"]).
|
112
|
+
_(@s.fragment(%[<img src="javascript:alert('XSS')"])).
|
113
113
|
must_equal ''
|
114
114
|
end
|
115
115
|
end
|
116
116
|
|
117
117
|
describe '<script>' do
|
118
118
|
it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
|
119
|
-
@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
|
120
|
-
must_equal '
|
119
|
+
_(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>])).
|
120
|
+
must_equal ''
|
121
121
|
end
|
122
122
|
|
123
123
|
it 'should not be possible to inject <script> via extraneous open brackets' do
|
124
|
-
@s.fragment(%[<<script>alert("XSS");//<</script>]).
|
125
|
-
must_equal '<
|
124
|
+
_(@s.fragment(%[<<script>alert("XSS");//<</script>])).
|
125
|
+
must_equal '<'
|
126
126
|
end
|
127
127
|
end
|
128
128
|
|
129
129
|
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
130
130
|
# attempt to preserve server-side includes. This can result in XSS since an
|
131
|
-
# unescaped double quote can allow an attacker to inject a non-
|
131
|
+
# unescaped double quote can allow an attacker to inject a non-allowlisted
|
132
132
|
# attribute. Sanitize works around this by implementing its own escaping for
|
133
133
|
# affected attributes.
|
134
134
|
#
|
135
135
|
# The relevant libxml2 code is here:
|
136
136
|
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
137
137
|
describe 'unsafe libxml2 server-side includes in attributes' do
|
138
|
+
using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system?
|
139
|
+
|
138
140
|
tag_configs = [
|
139
141
|
{
|
140
142
|
tag_name: 'a',
|
@@ -166,12 +168,26 @@ describe 'Malicious HTML' do
|
|
166
168
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
167
169
|
|
168
170
|
it 'should escape unsafe characters in attributes' do
|
169
|
-
|
171
|
+
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
172
|
+
|
173
|
+
# This uses Nokogumbo's HTML-compliant serializer rather than
|
174
|
+
# libxml2's.
|
175
|
+
_(@s.fragment(input)).
|
176
|
+
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
177
|
+
|
178
|
+
# This uses the not-quite-standards-compliant libxml2 serializer via
|
179
|
+
# Nokogiri, so the output may be a little different as of Nokogiri
|
180
|
+
# 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
|
181
|
+
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
182
|
+
fragment = Nokogiri::HTML.fragment(input)
|
183
|
+
@s.node!(fragment)
|
184
|
+
_(fragment.to_html).
|
185
|
+
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
170
186
|
end
|
171
187
|
|
172
188
|
it 'should round-trip to the same output' do
|
173
189
|
output = @s.fragment(input)
|
174
|
-
@s.fragment(output).must_equal(output)
|
190
|
+
_(@s.fragment(output)).must_equal(output)
|
175
191
|
end
|
176
192
|
end
|
177
193
|
|
@@ -179,14 +195,145 @@ describe 'Malicious HTML' do
|
|
179
195
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
180
196
|
|
181
197
|
it 'should not escape characters unnecessarily' do
|
182
|
-
|
198
|
+
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
199
|
+
|
200
|
+
# This uses Nokogumbo's HTML-compliant serializer rather than
|
201
|
+
# libxml2's.
|
202
|
+
_(@s.fragment(input)).
|
203
|
+
must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
204
|
+
|
205
|
+
# This uses the not-quite-standards-compliant libxml2 serializer via
|
206
|
+
# Nokogiri, so the output may be a little different as of Nokogiri
|
207
|
+
# 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
|
208
|
+
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
209
|
+
fragment = Nokogiri::HTML.fragment(input)
|
210
|
+
@s.node!(fragment)
|
211
|
+
_(fragment.to_html).
|
212
|
+
must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
|
183
213
|
end
|
184
214
|
|
185
215
|
it 'should round-trip to the same output' do
|
186
216
|
output = @s.fragment(input)
|
187
|
-
@s.fragment(output).must_equal(output)
|
217
|
+
_(@s.fragment(output)).must_equal(output)
|
188
218
|
end
|
189
219
|
end
|
190
220
|
end
|
191
221
|
end
|
222
|
+
|
223
|
+
# https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
|
224
|
+
describe 'foreign content bypass in relaxed config' do
|
225
|
+
it 'prevents a sanitization bypass via carefully crafted foreign content' do
|
226
|
+
%w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
|
227
|
+
_(@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/])).
|
228
|
+
must_equal ''
|
229
|
+
|
230
|
+
_(@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/])).
|
231
|
+
must_equal ''
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
# These tests cover an unsupported and unsafe custom config that allows MathML
|
237
|
+
# and SVG elements, which Sanitize's docs specifically say multiple times in
|
238
|
+
# big prominent warnings that you SHOULD NOT DO because Sanitize doesn't
|
239
|
+
# support MathML or SVG.
|
240
|
+
#
|
241
|
+
# Do not use the custom configs you see in these tests! If you do, you may be
|
242
|
+
# creating XSS vulnerabilities in your application.
|
243
|
+
describe 'foreign content bypass in unsafe custom config that allows MathML or SVG' do
|
244
|
+
unescaped_content_elements = %w[
|
245
|
+
noembed
|
246
|
+
noframes
|
247
|
+
plaintext
|
248
|
+
script
|
249
|
+
xmp
|
250
|
+
]
|
251
|
+
|
252
|
+
removed_content_elements = %w[
|
253
|
+
iframe
|
254
|
+
]
|
255
|
+
|
256
|
+
removed_elements = %w[
|
257
|
+
noscript
|
258
|
+
style
|
259
|
+
]
|
260
|
+
|
261
|
+
before do
|
262
|
+
@s = Sanitize.new(
|
263
|
+
Sanitize::Config.merge(
|
264
|
+
Sanitize::Config::RELAXED,
|
265
|
+
elements: Sanitize::Config::RELAXED[:elements] +
|
266
|
+
unescaped_content_elements +
|
267
|
+
removed_content_elements +
|
268
|
+
%w[math svg]
|
269
|
+
)
|
270
|
+
)
|
271
|
+
end
|
272
|
+
|
273
|
+
unescaped_content_elements.each do |name|
|
274
|
+
it "forcibly escapes text content inside `<#{name}>` in a MathML namespace" do
|
275
|
+
assert_equal(
|
276
|
+
"<math><#{name}><img src=x onerror=alert(1)></#{name}></math>",
|
277
|
+
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
278
|
+
)
|
279
|
+
end
|
280
|
+
|
281
|
+
it "forcibly escapes text content inside `<#{name}>` in an SVG namespace" do
|
282
|
+
assert_equal(
|
283
|
+
"<svg><#{name}><img src=x onerror=alert(1)></#{name}></svg>",
|
284
|
+
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
285
|
+
)
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
removed_content_elements.each do |name|
|
290
|
+
it "removes text content inside `<#{name}>` in a MathML namespace" do
|
291
|
+
assert_equal(
|
292
|
+
"<math><#{name}></#{name}></math>",
|
293
|
+
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
294
|
+
)
|
295
|
+
end
|
296
|
+
|
297
|
+
it "removes text content inside `<#{name}>` in an SVG namespace" do
|
298
|
+
assert_equal(
|
299
|
+
"<svg><#{name}></#{name}></svg>",
|
300
|
+
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
301
|
+
)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
removed_elements.each do |name|
|
306
|
+
it "removes `<#{name}>` elements in a MathML namespace" do
|
307
|
+
assert_equal(
|
308
|
+
'<math></math>',
|
309
|
+
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
310
|
+
)
|
311
|
+
end
|
312
|
+
|
313
|
+
it "removes `<#{name}>` elements in an SVG namespace" do
|
314
|
+
assert_equal(
|
315
|
+
'<svg></svg>',
|
316
|
+
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
317
|
+
)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
describe 'sanitization bypass by exploiting scripting-disabled <noscript> behavior' do
|
323
|
+
before do
|
324
|
+
@s = Sanitize.new(
|
325
|
+
Sanitize::Config.merge(
|
326
|
+
Sanitize::Config::RELAXED,
|
327
|
+
elements: Sanitize::Config::RELAXED[:elements] + ['noscript']
|
328
|
+
)
|
329
|
+
)
|
330
|
+
end
|
331
|
+
|
332
|
+
it 'is prevented by removing `<noscript>` elements regardless of the allowlist' do
|
333
|
+
assert_equal(
|
334
|
+
'',
|
335
|
+
@s.fragment(%[<noscript><div id='</noscript><img src=x onerror=alert(1)> '>])
|
336
|
+
)
|
337
|
+
end
|
338
|
+
end
|
192
339
|
end
|
data/test/test_parser.rb
CHANGED
@@ -6,55 +6,26 @@ describe 'Parser' do
|
|
6
6
|
parallelize_me!
|
7
7
|
|
8
8
|
it 'should translate valid entities into characters' do
|
9
|
-
Sanitize.fragment("'é&").must_equal("'é&")
|
9
|
+
_(Sanitize.fragment("'é&")).must_equal("'é&")
|
10
10
|
end
|
11
11
|
|
12
12
|
it 'should translate orphaned ampersands into entities' do
|
13
|
-
Sanitize.fragment('at&t').must_equal('at&t')
|
13
|
+
_(Sanitize.fragment('at&t')).must_equal('at&t')
|
14
14
|
end
|
15
15
|
|
16
16
|
it 'should not add newlines after tags when serializing a fragment' do
|
17
|
-
Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p'])
|
17
|
+
_(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p']))
|
18
18
|
.must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>"
|
19
19
|
end
|
20
20
|
|
21
21
|
it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
|
22
|
-
Sanitize.fragment('foo <script>bar').must_equal 'foo
|
23
|
-
Sanitize.fragment('foo <style>bar').must_equal 'foo
|
22
|
+
_(Sanitize.fragment('foo <script>bar')).must_equal 'foo '
|
23
|
+
_(Sanitize.fragment('foo <style>bar')).must_equal 'foo '
|
24
24
|
end
|
25
25
|
|
26
26
|
it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
|
27
|
-
Sanitize.fragment('1 > 2 and 2 < 1').must_equal '1 > 2 and 2 < 1'
|
28
|
-
Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D').must_equal 'OMG HAPPY BIRTHDAY! *<:-D'
|
29
|
-
end
|
30
|
-
|
31
|
-
# https://github.com/sparklemotion/nokogiri/issues/1008
|
32
|
-
it 'should work around the libxml2 content-type meta tag bug' do
|
33
|
-
Sanitize.document('<html><head></head><body>Howdy!</body></html>',
|
34
|
-
:elements => %w[html head body]
|
35
|
-
).must_equal "<html><head></head><body>Howdy!</body></html>\n"
|
36
|
-
|
37
|
-
Sanitize.document('<html><head></head><body>Howdy!</body></html>',
|
38
|
-
:elements => %w[html head meta body]
|
39
|
-
).must_equal "<html><head></head><body>Howdy!</body></html>\n"
|
40
|
-
|
41
|
-
Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
|
42
|
-
:elements => %w[html head meta body],
|
43
|
-
:attributes => {'meta' => ['charset']}
|
44
|
-
).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>\n"
|
45
|
-
|
46
|
-
Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
|
47
|
-
:elements => %w[html head meta body],
|
48
|
-
:attributes => {'meta' => %w[charset content http-equiv]}
|
49
|
-
).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>\n"
|
50
|
-
|
51
|
-
# Edge case: an existing content-type meta tag with a non-UTF-8 content type
|
52
|
-
# will be converted to UTF-8, since that's the only output encoding we
|
53
|
-
# support.
|
54
|
-
Sanitize.document('<html><head><meta http-equiv="content-type" content="text/html;charset=us-ascii"></head><body>Howdy!</body></html>',
|
55
|
-
:elements => %w[html head meta body],
|
56
|
-
:attributes => {'meta' => %w[charset content http-equiv]}
|
57
|
-
).must_equal "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"></head><body>Howdy!</body></html>\n"
|
27
|
+
_(Sanitize.fragment('1 > 2 and 2 < 1')).must_equal '1 > 2 and 2 < 1'
|
28
|
+
_(Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D')).must_equal 'OMG HAPPY BIRTHDAY! *<:-D'
|
58
29
|
end
|
59
30
|
|
60
31
|
describe 'when siblings are added after a node during traversal' do
|
@@ -84,11 +55,11 @@ describe 'Parser' do
|
|
84
55
|
siblings << env[:node][:id]
|
85
56
|
end
|
86
57
|
|
87
|
-
return {:
|
58
|
+
return {:node_allowlist => [env[:node]]}
|
88
59
|
})
|
89
60
|
|
90
61
|
# All siblings should be traversed, and in the order added.
|
91
|
-
siblings.must_equal [
|
62
|
+
_(siblings).must_equal [
|
92
63
|
"added_one_one_one",
|
93
64
|
"added_one_one",
|
94
65
|
"added_one_two",
|
data/test/test_sanitize.rb
CHANGED
@@ -9,7 +9,7 @@ describe 'Sanitize' do
|
|
9
9
|
]
|
10
10
|
|
11
11
|
Sanitize.new({ :transformers => transformers })
|
12
|
-
transformers.length.must_equal(1)
|
12
|
+
_(transformers.length).must_equal(1)
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
@@ -24,42 +24,118 @@ describe 'Sanitize' do
|
|
24
24
|
end
|
25
25
|
|
26
26
|
it 'should sanitize an HTML document' do
|
27
|
-
@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>')
|
28
|
-
.must_equal "<html>Lorem ipsum dolor sit amet
|
27
|
+
_(@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'))
|
28
|
+
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'should not modify the input string' do
|
32
32
|
input = '<!DOCTYPE html><b>foo</b>'
|
33
33
|
@s.document(input)
|
34
|
-
input.must_equal('<!DOCTYPE html><b>foo</b>')
|
34
|
+
_(input).must_equal('<!DOCTYPE html><b>foo</b>')
|
35
35
|
end
|
36
36
|
|
37
37
|
it 'should not choke on frozen documents' do
|
38
|
-
@s.document('<!doctype html><html><b>foo</b>'.freeze).must_equal "<html>foo</html
|
38
|
+
_(@s.document('<!doctype html><html><b>foo</b>'.freeze)).must_equal "<html>foo</html>"
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'should normalize newlines' do
|
42
|
+
_(@s.document("a\r\n\n\r\r\r\nz")).must_equal "<html>a\n\n\n\n\nz</html>"
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should strip control characters (except ASCII whitespace)' do
|
46
|
+
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
47
|
+
whitespace = "\t\n\f\u0020"
|
48
|
+
_(@s.document("a#{sample_control_chars}#{whitespace}z")).must_equal "<html>a#{whitespace}z</html>"
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'should strip non-characters' do
|
52
|
+
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
53
|
+
_(@s.document("a#{sample_non_chars}z")).must_equal "<html>az</html>"
|
54
|
+
end
|
55
|
+
|
56
|
+
describe 'when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH' do
|
57
|
+
let(:content) do
|
58
|
+
content = nest_html_content('<b>foo</b>', Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH)
|
59
|
+
"<html>#{content}</html>"
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'raises an ArgumentError exception' do
|
63
|
+
assert_raises ArgumentError do
|
64
|
+
@s.document(content)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
describe 'and :max_tree_depth of -1 is supplied in :parser_options' do
|
69
|
+
before do
|
70
|
+
@s = Sanitize.new(elements: ['html'], parser_options: { max_tree_depth: -1 })
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'does not raise an ArgumentError exception' do
|
74
|
+
_(@s.document(content)).must_equal '<html>foo</html>'
|
75
|
+
end
|
76
|
+
end
|
39
77
|
end
|
40
78
|
end
|
41
79
|
|
42
80
|
describe '#fragment' do
|
43
81
|
it 'should sanitize an HTML fragment' do
|
44
|
-
@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
45
|
-
.must_equal 'Lorem ipsum dolor sit amet
|
82
|
+
_(@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'))
|
83
|
+
.must_equal 'Lorem ipsum dolor sit amet '
|
46
84
|
end
|
47
85
|
|
48
86
|
it 'should not modify the input string' do
|
49
87
|
input = '<b>foo</b>'
|
50
88
|
@s.fragment(input)
|
51
|
-
input.must_equal '<b>foo</b>'
|
89
|
+
_(input).must_equal '<b>foo</b>'
|
52
90
|
end
|
53
91
|
|
54
92
|
it 'should not choke on fragments containing <html> or <body>' do
|
55
|
-
@s.fragment('<html><b>foo</b></html>').must_equal 'foo'
|
56
|
-
@s.fragment('<body><b>foo</b></body>').must_equal 'foo'
|
57
|
-
@s.fragment('<html><body><b>foo</b></body></html>').must_equal 'foo'
|
58
|
-
@s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>').must_equal 'foo'
|
93
|
+
_(@s.fragment('<html><b>foo</b></html>')).must_equal 'foo'
|
94
|
+
_(@s.fragment('<body><b>foo</b></body>')).must_equal 'foo'
|
95
|
+
_(@s.fragment('<html><body><b>foo</b></body></html>')).must_equal 'foo'
|
96
|
+
_(@s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>')).must_equal 'foo'
|
59
97
|
end
|
60
98
|
|
61
99
|
it 'should not choke on frozen fragments' do
|
62
|
-
@s.fragment('<b>foo</b>'.freeze).must_equal 'foo'
|
100
|
+
_(@s.fragment('<b>foo</b>'.freeze)).must_equal 'foo'
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'should normalize newlines' do
|
104
|
+
_(@s.fragment("a\r\n\n\r\r\r\nz")).must_equal "a\n\n\n\n\nz"
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'should strip control characters (except ASCII whitespace)' do
|
108
|
+
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
109
|
+
whitespace = "\t\n\f\u0020"
|
110
|
+
_(@s.fragment("a#{sample_control_chars}#{whitespace}z")).must_equal "a#{whitespace}z"
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'should strip non-characters' do
|
114
|
+
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
115
|
+
_(@s.fragment("a#{sample_non_chars}z")).must_equal "az"
|
116
|
+
end
|
117
|
+
|
118
|
+
describe 'when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH' do
|
119
|
+
let(:content) do
|
120
|
+
content = nest_html_content('<b>foo</b>', Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH)
|
121
|
+
"<body>#{content}</body>"
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'raises an ArgumentError exception' do
|
125
|
+
assert_raises ArgumentError do
|
126
|
+
@s.fragment(content)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
describe 'and :max_tree_depth of -1 is supplied in :parser_options' do
|
131
|
+
before do
|
132
|
+
@s = Sanitize.new(parser_options: { max_tree_depth: -1 })
|
133
|
+
end
|
134
|
+
|
135
|
+
it 'does not raise an ArgumentError exception' do
|
136
|
+
_(@s.fragment(content)).must_equal 'foo'
|
137
|
+
end
|
138
|
+
end
|
63
139
|
end
|
64
140
|
end
|
65
141
|
|
@@ -71,13 +147,13 @@ describe 'Sanitize' do
|
|
71
147
|
doc.xpath('/html/body/node()').each {|node| frag << node }
|
72
148
|
|
73
149
|
@s.node!(frag)
|
74
|
-
frag.to_html.must_equal 'Lorem ipsum dolor sit amet
|
150
|
+
_(frag.to_html).must_equal 'Lorem ipsum dolor sit amet '
|
75
151
|
end
|
76
152
|
|
77
|
-
describe "when the given node is a document and <html> isn't
|
153
|
+
describe "when the given node is a document and <html> isn't allowlisted" do
|
78
154
|
it 'should raise a Sanitize::Error' do
|
79
155
|
doc = Nokogiri::HTML5.parse('foo')
|
80
|
-
proc { @s.node!(doc) }.must_raise Sanitize::Error
|
156
|
+
_(proc { @s.node!(doc) }).must_raise Sanitize::Error
|
81
157
|
end
|
82
158
|
end
|
83
159
|
end
|
@@ -85,28 +161,37 @@ describe 'Sanitize' do
|
|
85
161
|
|
86
162
|
describe 'class methods' do
|
87
163
|
describe '.document' do
|
88
|
-
it 'should
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
end
|
164
|
+
it 'should sanitize an HTML document with the given config' do
|
165
|
+
html = '<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'
|
166
|
+
_(Sanitize.document(html, :elements => ['html']))
|
167
|
+
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
93
168
|
end
|
94
169
|
end
|
95
170
|
|
96
171
|
describe '.fragment' do
|
97
|
-
it 'should
|
98
|
-
|
99
|
-
|
100
|
-
|
172
|
+
it 'should sanitize an HTML fragment with the given config' do
|
173
|
+
html = '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'
|
174
|
+
_(Sanitize.fragment(html, :elements => ['strong']))
|
175
|
+
.must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
101
176
|
end
|
102
177
|
end
|
103
178
|
|
104
179
|
describe '.node!' do
|
105
|
-
it 'should
|
106
|
-
|
107
|
-
|
108
|
-
|
180
|
+
it 'should sanitize a Nokogiri::XML::Node with the given config' do
|
181
|
+
doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
182
|
+
frag = doc.fragment
|
183
|
+
|
184
|
+
doc.xpath('/html/body/node()').each {|node| frag << node }
|
185
|
+
|
186
|
+
Sanitize.node!(frag, :elements => ['strong'])
|
187
|
+
_(frag.to_html).must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
109
188
|
end
|
110
189
|
end
|
111
190
|
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
def nest_html_content(html_content, depth)
|
195
|
+
"#{'<span>' * depth}#{html_content}#{'</span>' * depth}"
|
196
|
+
end
|
112
197
|
end
|