sanitize 6.1.3 → 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +32 -14
- data/LICENSE +3 -1
- data/README.md +120 -238
- data/lib/sanitize/config/basic.rb +15 -15
- data/lib/sanitize/config/default.rb +45 -45
- data/lib/sanitize/config/relaxed.rb +136 -32
- data/lib/sanitize/config/restricted.rb +2 -2
- data/lib/sanitize/config.rb +12 -14
- data/lib/sanitize/css.rb +308 -308
- data/lib/sanitize/transformers/clean_cdata.rb +9 -9
- data/lib/sanitize/transformers/clean_comment.rb +9 -9
- data/lib/sanitize/transformers/clean_css.rb +59 -55
- data/lib/sanitize/transformers/clean_doctype.rb +15 -15
- data/lib/sanitize/transformers/clean_element.rb +220 -237
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +38 -38
- data/test/common.rb +4 -3
- data/test/test_clean_comment.rb +26 -25
- data/test/test_clean_css.rb +14 -13
- data/test/test_clean_doctype.rb +21 -20
- data/test/test_clean_element.rb +258 -273
- data/test/test_config.rb +22 -21
- data/test/test_malicious_css.rb +20 -19
- data/test/test_malicious_html.rb +100 -99
- data/test/test_parser.rb +26 -25
- data/test/test_sanitize.rb +70 -69
- data/test/test_sanitize_css.rb +149 -114
- data/test/test_transformers.rb +81 -83
- metadata +14 -43
data/test/test_clean_element.rb
CHANGED
@@ -1,245 +1,246 @@
|
|
1
|
-
#
|
2
|
-
require_relative 'common'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require_relative "common"
|
4
|
+
|
5
|
+
describe "Sanitize::Transformers::CleanElement" do
|
5
6
|
make_my_diffs_pretty!
|
6
7
|
parallelize_me!
|
7
8
|
|
8
9
|
strings = {
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
10
|
+
basic: {
|
11
|
+
html: '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',
|
12
|
+
default: "Lorem ipsum dolor sit amet ",
|
13
|
+
restricted: "<b>Lorem</b> ipsum <strong>dolor</strong> sit amet ",
|
14
|
+
basic: '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
|
15
|
+
relaxed: '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> '
|
15
16
|
},
|
16
17
|
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
18
|
+
malformed: {
|
19
|
+
html: 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
|
20
|
+
default: "Lorem dolor sit amet ",
|
21
|
+
restricted: "Lorem <strong>dolor</strong> sit amet ",
|
22
|
+
basic: 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
|
23
|
+
relaxed: 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet '
|
23
24
|
},
|
24
25
|
|
25
|
-
:
|
26
|
-
:
|
27
|
-
:
|
28
|
-
:
|
29
|
-
:
|
30
|
-
:
|
26
|
+
unclosed: {
|
27
|
+
html: "<p>a</p><blockquote>b",
|
28
|
+
default: " a b ",
|
29
|
+
restricted: " a b ",
|
30
|
+
basic: "<p>a</p><blockquote>b</blockquote>",
|
31
|
+
relaxed: "<p>a</p><blockquote>b</blockquote>"
|
31
32
|
},
|
32
33
|
|
33
|
-
:
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
34
|
+
malicious: {
|
35
|
+
html: '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
|
36
|
+
default: 'Lorem ipsum dolor sit amet <script>alert("hello world");',
|
37
|
+
restricted: '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert("hello world");',
|
38
|
+
basic: '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");',
|
39
|
+
relaxed: '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");'
|
39
40
|
}
|
40
41
|
}
|
41
42
|
|
42
43
|
protocols = {
|
43
|
-
|
44
|
-
:
|
45
|
-
:
|
46
|
-
:
|
47
|
-
:
|
48
|
-
:
|
44
|
+
"protocol-based JS injection: simple, no spaces" => {
|
45
|
+
html: '<a href="javascript:alert(\'XSS\');">foo</a>',
|
46
|
+
default: "foo",
|
47
|
+
restricted: "foo",
|
48
|
+
basic: '<a rel="nofollow">foo</a>',
|
49
|
+
relaxed: "<a>foo</a>"
|
49
50
|
},
|
50
51
|
|
51
|
-
|
52
|
-
:
|
53
|
-
:
|
54
|
-
:
|
55
|
-
:
|
56
|
-
:
|
52
|
+
"protocol-based JS injection: simple, spaces before" => {
|
53
|
+
html: '<a href="javascript :alert(\'XSS\');">foo</a>',
|
54
|
+
default: "foo",
|
55
|
+
restricted: "foo",
|
56
|
+
basic: '<a rel="nofollow">foo</a>',
|
57
|
+
relaxed: "<a>foo</a>"
|
57
58
|
},
|
58
59
|
|
59
|
-
|
60
|
-
:
|
61
|
-
:
|
62
|
-
:
|
63
|
-
:
|
64
|
-
:
|
60
|
+
"protocol-based JS injection: simple, spaces after" => {
|
61
|
+
html: '<a href="javascript: alert(\'XSS\');">foo</a>',
|
62
|
+
default: "foo",
|
63
|
+
restricted: "foo",
|
64
|
+
basic: '<a rel="nofollow">foo</a>',
|
65
|
+
relaxed: "<a>foo</a>"
|
65
66
|
},
|
66
67
|
|
67
|
-
|
68
|
-
:
|
69
|
-
:
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
68
|
+
"protocol-based JS injection: simple, spaces before and after" => {
|
69
|
+
html: '<a href="javascript : alert(\'XSS\');">foo</a>',
|
70
|
+
default: "foo",
|
71
|
+
restricted: "foo",
|
72
|
+
basic: '<a rel="nofollow">foo</a>',
|
73
|
+
relaxed: "<a>foo</a>"
|
73
74
|
},
|
74
75
|
|
75
|
-
|
76
|
-
:
|
77
|
-
:
|
78
|
-
:
|
79
|
-
:
|
80
|
-
:
|
76
|
+
"protocol-based JS injection: preceding colon" => {
|
77
|
+
html: '<a href=":javascript:alert(\'XSS\');">foo</a>',
|
78
|
+
default: "foo",
|
79
|
+
restricted: "foo",
|
80
|
+
basic: '<a rel="nofollow">foo</a>',
|
81
|
+
relaxed: "<a>foo</a>"
|
81
82
|
},
|
82
83
|
|
83
|
-
|
84
|
-
:
|
85
|
-
:
|
86
|
-
:
|
87
|
-
:
|
88
|
-
:
|
84
|
+
"protocol-based JS injection: UTF-8 encoding" => {
|
85
|
+
html: '<a href="javascript:">foo</a>',
|
86
|
+
default: "foo",
|
87
|
+
restricted: "foo",
|
88
|
+
basic: '<a rel="nofollow">foo</a>',
|
89
|
+
relaxed: "<a>foo</a>"
|
89
90
|
},
|
90
91
|
|
91
|
-
|
92
|
-
:
|
93
|
-
:
|
94
|
-
:
|
95
|
-
:
|
96
|
-
:
|
92
|
+
"protocol-based JS injection: long UTF-8 encoding" => {
|
93
|
+
html: '<a href="javascript:">foo</a>',
|
94
|
+
default: "foo",
|
95
|
+
restricted: "foo",
|
96
|
+
basic: '<a rel="nofollow">foo</a>',
|
97
|
+
relaxed: "<a>foo</a>"
|
97
98
|
},
|
98
99
|
|
99
|
-
|
100
|
-
:
|
101
|
-
:
|
102
|
-
:
|
103
|
-
:
|
104
|
-
:
|
100
|
+
"protocol-based JS injection: long UTF-8 encoding without semicolons" => {
|
101
|
+
html: "<a href=javascript:alert('XSS')>foo</a>",
|
102
|
+
default: "foo",
|
103
|
+
restricted: "foo",
|
104
|
+
basic: '<a rel="nofollow">foo</a>',
|
105
|
+
relaxed: "<a>foo</a>"
|
105
106
|
},
|
106
107
|
|
107
|
-
|
108
|
-
:
|
109
|
-
:
|
110
|
-
:
|
111
|
-
:
|
112
|
-
:
|
108
|
+
"protocol-based JS injection: hex encoding" => {
|
109
|
+
html: '<a href="javascript:">foo</a>',
|
110
|
+
default: "foo",
|
111
|
+
restricted: "foo",
|
112
|
+
basic: '<a rel="nofollow">foo</a>',
|
113
|
+
relaxed: "<a>foo</a>"
|
113
114
|
},
|
114
115
|
|
115
|
-
|
116
|
-
:
|
117
|
-
:
|
118
|
-
:
|
119
|
-
:
|
120
|
-
:
|
116
|
+
"protocol-based JS injection: long hex encoding" => {
|
117
|
+
html: '<a href="javascript:">foo</a>',
|
118
|
+
default: "foo",
|
119
|
+
restricted: "foo",
|
120
|
+
basic: '<a rel="nofollow">foo</a>',
|
121
|
+
relaxed: "<a>foo</a>"
|
121
122
|
},
|
122
123
|
|
123
|
-
|
124
|
-
:
|
125
|
-
:
|
126
|
-
:
|
127
|
-
:
|
128
|
-
:
|
124
|
+
"protocol-based JS injection: hex encoding without semicolons" => {
|
125
|
+
html: "<a href=javascript:alert('XSS')>foo</a>",
|
126
|
+
default: "foo",
|
127
|
+
restricted: "foo",
|
128
|
+
basic: '<a rel="nofollow">foo</a>',
|
129
|
+
relaxed: "<a>foo</a>"
|
129
130
|
},
|
130
131
|
|
131
|
-
|
132
|
-
:
|
133
|
-
:
|
134
|
-
:
|
135
|
-
:
|
136
|
-
:
|
132
|
+
"protocol-based JS injection: null char" => {
|
133
|
+
html: "<img src=java\0script:alert(\"XSS\")>",
|
134
|
+
default: "",
|
135
|
+
restricted: "",
|
136
|
+
basic: "",
|
137
|
+
relaxed: "<img>"
|
137
138
|
},
|
138
139
|
|
139
|
-
|
140
|
-
:
|
141
|
-
:
|
142
|
-
:
|
143
|
-
:
|
144
|
-
:
|
140
|
+
"protocol-based JS injection: invalid URL char" => {
|
141
|
+
html: '<img src=java\script:alert("XSS")>',
|
142
|
+
default: "",
|
143
|
+
restricted: "",
|
144
|
+
basic: "",
|
145
|
+
relaxed: "<img>"
|
145
146
|
},
|
146
147
|
|
147
|
-
|
148
|
-
:
|
149
|
-
:
|
150
|
-
:
|
151
|
-
:
|
152
|
-
:
|
148
|
+
"protocol-based JS injection: spaces and entities" => {
|
149
|
+
html: '<img src="  javascript:alert(\'XSS\');">',
|
150
|
+
default: "",
|
151
|
+
restricted: "",
|
152
|
+
basic: "",
|
153
|
+
relaxed: "<img>"
|
153
154
|
},
|
154
155
|
|
155
|
-
|
156
|
-
:
|
157
|
-
:
|
158
|
-
:
|
159
|
-
:
|
160
|
-
:
|
156
|
+
"protocol whitespace" => {
|
157
|
+
html: '<a href=" http://example.com/"></a>',
|
158
|
+
default: "",
|
159
|
+
restricted: "",
|
160
|
+
basic: '<a href="http://example.com/" rel="nofollow"></a>',
|
161
|
+
relaxed: '<a href="http://example.com/"></a>'
|
161
162
|
}
|
162
163
|
}
|
163
164
|
|
164
|
-
describe
|
165
|
-
it
|
165
|
+
describe "Default config" do
|
166
|
+
it "should remove non-allowlisted elements, leaving safe contents behind" do
|
166
167
|
_(Sanitize.fragment('foo <b>bar</b> <strong><a href="#a">baz</a></strong> quux'))
|
167
|
-
.must_equal
|
168
|
+
.must_equal "foo bar baz quux"
|
168
169
|
|
169
170
|
_(Sanitize.fragment('<script>alert("<xss>");</script>'))
|
170
|
-
.must_equal
|
171
|
+
.must_equal ""
|
171
172
|
|
172
173
|
_(Sanitize.fragment('<<script>script>alert("<xss>");</<script>>'))
|
173
|
-
.must_equal
|
174
|
+
.must_equal "<"
|
174
175
|
|
175
176
|
_(Sanitize.fragment('< script <>> alert("<xss>");</script>'))
|
176
177
|
.must_equal '< script <>> alert("");'
|
177
178
|
end
|
178
179
|
|
179
|
-
it
|
180
|
-
_(Sanitize.fragment(
|
181
|
-
.must_equal
|
180
|
+
it "should surround the contents of :whitespace_elements with space characters when removing the element" do
|
181
|
+
_(Sanitize.fragment("foo<div>bar</div>baz"))
|
182
|
+
.must_equal "foo bar baz"
|
182
183
|
|
183
|
-
_(Sanitize.fragment(
|
184
|
-
.must_equal
|
184
|
+
_(Sanitize.fragment("foo<br>bar<br>baz"))
|
185
|
+
.must_equal "foo bar baz"
|
185
186
|
|
186
|
-
_(Sanitize.fragment(
|
187
|
-
.must_equal
|
187
|
+
_(Sanitize.fragment("foo<hr>bar<hr>baz"))
|
188
|
+
.must_equal "foo bar baz"
|
188
189
|
end
|
189
190
|
|
190
|
-
it
|
191
|
+
it "should not choke on several instances of the same element in a row" do
|
191
192
|
_(Sanitize.fragment('<img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif">'))
|
192
|
-
.must_equal
|
193
|
+
.must_equal ""
|
193
194
|
end
|
194
195
|
|
195
|
-
it
|
196
|
-
_(Sanitize.fragment(
|
197
|
-
.must_equal
|
196
|
+
it "should not preserve the content of removed `iframe` elements" do
|
197
|
+
_(Sanitize.fragment("<iframe>hello! <script>alert(0)</script></iframe>"))
|
198
|
+
.must_equal ""
|
198
199
|
end
|
199
200
|
|
200
|
-
it
|
201
|
-
_(Sanitize.fragment(
|
202
|
-
.must_equal
|
201
|
+
it "should not preserve the content of removed `math` elements" do
|
202
|
+
_(Sanitize.fragment("<math>hello! <script>alert(0)</script></math>"))
|
203
|
+
.must_equal ""
|
203
204
|
end
|
204
205
|
|
205
|
-
it
|
206
|
-
_(Sanitize.fragment(
|
207
|
-
.must_equal
|
206
|
+
it "should not preserve the content of removed `noembed` elements" do
|
207
|
+
_(Sanitize.fragment("<noembed>hello! <script>alert(0)</script></noembed>"))
|
208
|
+
.must_equal ""
|
208
209
|
end
|
209
210
|
|
210
|
-
it
|
211
|
-
_(Sanitize.fragment(
|
212
|
-
.must_equal
|
211
|
+
it "should not preserve the content of removed `noframes` elements" do
|
212
|
+
_(Sanitize.fragment("<noframes>hello! <script>alert(0)</script></noframes>"))
|
213
|
+
.must_equal ""
|
213
214
|
end
|
214
215
|
|
215
|
-
it
|
216
|
-
_(Sanitize.fragment(
|
217
|
-
.must_equal
|
216
|
+
it "should not preserve the content of removed `noscript` elements" do
|
217
|
+
_(Sanitize.fragment("<noscript>hello! <script>alert(0)</script></noscript>"))
|
218
|
+
.must_equal ""
|
218
219
|
end
|
219
220
|
|
220
|
-
it
|
221
|
-
_(Sanitize.fragment(
|
222
|
-
.must_equal
|
221
|
+
it "should not preserve the content of removed `plaintext` elements" do
|
222
|
+
_(Sanitize.fragment("<plaintext>hello! <script>alert(0)</script>"))
|
223
|
+
.must_equal ""
|
223
224
|
end
|
224
225
|
|
225
|
-
it
|
226
|
-
_(Sanitize.fragment(
|
227
|
-
.must_equal
|
226
|
+
it "should not preserve the content of removed `script` elements" do
|
227
|
+
_(Sanitize.fragment("<script>hello! <script>alert(0)</script></script>"))
|
228
|
+
.must_equal ""
|
228
229
|
end
|
229
230
|
|
230
|
-
it
|
231
|
-
_(Sanitize.fragment(
|
232
|
-
.must_equal
|
231
|
+
it "should not preserve the content of removed `style` elements" do
|
232
|
+
_(Sanitize.fragment("<style>hello! <script>alert(0)</script></style>"))
|
233
|
+
.must_equal ""
|
233
234
|
end
|
234
235
|
|
235
|
-
it
|
236
|
-
_(Sanitize.fragment(
|
237
|
-
.must_equal
|
236
|
+
it "should not preserve the content of removed `svg` elements" do
|
237
|
+
_(Sanitize.fragment("<svg>hello! <script>alert(0)</script></svg>"))
|
238
|
+
.must_equal ""
|
238
239
|
end
|
239
240
|
|
240
|
-
it
|
241
|
-
_(Sanitize.fragment(
|
242
|
-
.must_equal
|
241
|
+
it "should not preserve the content of removed `xmp` elements" do
|
242
|
+
_(Sanitize.fragment("<xmp>hello! <script>alert(0)</script></xmp>"))
|
243
|
+
.must_equal ""
|
243
244
|
end
|
244
245
|
|
245
246
|
strings.each do |name, data|
|
@@ -255,7 +256,7 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
255
256
|
end
|
256
257
|
end
|
257
258
|
|
258
|
-
describe
|
259
|
+
describe "Restricted config" do
|
259
260
|
before do
|
260
261
|
@s = Sanitize.new(Sanitize::Config::RESTRICTED)
|
261
262
|
end
|
@@ -273,17 +274,17 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
273
274
|
end
|
274
275
|
end
|
275
276
|
|
276
|
-
describe
|
277
|
+
describe "Basic config" do
|
277
278
|
before do
|
278
279
|
@s = Sanitize.new(Sanitize::Config::BASIC)
|
279
280
|
end
|
280
281
|
|
281
|
-
it
|
282
|
-
_(@s.fragment(
|
282
|
+
it "should not choke on valueless attributes" do
|
283
|
+
_(@s.fragment("foo <a href>foo</a> bar"))
|
283
284
|
.must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
|
284
285
|
end
|
285
286
|
|
286
|
-
it
|
287
|
+
it "should downcase attribute names" do
|
287
288
|
_(@s.fragment('<a HREF="javascript:alert(\'foo\')">bar</a>'))
|
288
289
|
.must_equal '<a rel="nofollow">bar</a>'
|
289
290
|
end
|
@@ -301,12 +302,12 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
301
302
|
end
|
302
303
|
end
|
303
304
|
|
304
|
-
describe
|
305
|
+
describe "Relaxed config" do
|
305
306
|
before do
|
306
307
|
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
307
308
|
end
|
308
309
|
|
309
|
-
it
|
310
|
+
it "should encode special chars in attribute values" do
|
310
311
|
_(@s.fragment('<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>'))
|
311
312
|
.must_equal '<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>'
|
312
313
|
end
|
@@ -324,25 +325,25 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
324
325
|
end
|
325
326
|
end
|
326
327
|
|
327
|
-
describe
|
328
|
-
it
|
328
|
+
describe "Custom configs" do
|
329
|
+
it "should allow attributes on all elements if allowlisted under :all" do
|
329
330
|
input = '<p class="foo">bar</p>'
|
330
331
|
|
331
|
-
_(Sanitize.fragment(input)).must_equal
|
332
|
+
_(Sanitize.fragment(input)).must_equal " bar "
|
332
333
|
|
333
334
|
_(Sanitize.fragment(input, {
|
334
|
-
:
|
335
|
-
:
|
335
|
+
elements: ["p"],
|
336
|
+
attributes: {all: ["class"]}
|
336
337
|
})).must_equal input
|
337
338
|
|
338
339
|
_(Sanitize.fragment(input, {
|
339
|
-
:
|
340
|
-
:
|
341
|
-
})).must_equal
|
340
|
+
elements: ["p"],
|
341
|
+
attributes: {"div" => ["class"]}
|
342
|
+
})).must_equal "<p>bar</p>"
|
342
343
|
|
343
344
|
_(Sanitize.fragment(input, {
|
344
|
-
:
|
345
|
-
:
|
345
|
+
elements: ["p"],
|
346
|
+
attributes: {"p" => ["title"], :all => ["class"]}
|
346
347
|
})).must_equal input
|
347
348
|
end
|
348
349
|
|
@@ -350,203 +351,187 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
350
351
|
input = '<a href="/foo/bar">Link</a>'
|
351
352
|
|
352
353
|
_(Sanitize.fragment(input,
|
353
|
-
:
|
354
|
-
:
|
355
|
-
:
|
356
|
-
)).must_equal '<a>Link</a>'
|
354
|
+
elements: ["a"],
|
355
|
+
attributes: {"a" => ["href"]},
|
356
|
+
protocols: {"a" => {"href" => ["http"]}})).must_equal "<a>Link</a>"
|
357
357
|
end
|
358
358
|
|
359
|
-
it
|
359
|
+
it "should allow relative URLs containing colons when the colon is not in the first path segment" do
|
360
360
|
input = '<a href="/wiki/Special:Random">Random Page</a>'
|
361
361
|
|
362
362
|
_(Sanitize.fragment(input, {
|
363
|
-
:
|
364
|
-
:
|
365
|
-
:
|
363
|
+
elements: ["a"],
|
364
|
+
attributes: {"a" => ["href"]},
|
365
|
+
protocols: {"a" => {"href" => [:relative]}}
|
366
366
|
})).must_equal input
|
367
367
|
end
|
368
368
|
|
369
|
-
it
|
369
|
+
it "should allow relative URLs containing colons when the colon is part of an anchor" do
|
370
370
|
input = '<a href="#fn:1">Footnote 1</a>'
|
371
371
|
|
372
372
|
_(Sanitize.fragment(input, {
|
373
|
-
:
|
374
|
-
:
|
375
|
-
:
|
373
|
+
elements: ["a"],
|
374
|
+
attributes: {"a" => ["href"]},
|
375
|
+
protocols: {"a" => {"href" => [:relative]}}
|
376
376
|
})).must_equal input
|
377
377
|
|
378
378
|
input = '<a href="somepage#fn:1">Footnote 1</a>'
|
379
379
|
|
380
380
|
_(Sanitize.fragment(input, {
|
381
|
-
:
|
382
|
-
:
|
383
|
-
:
|
381
|
+
elements: ["a"],
|
382
|
+
attributes: {"a" => ["href"]},
|
383
|
+
protocols: {"a" => {"href" => [:relative]}}
|
384
384
|
})).must_equal input
|
385
385
|
end
|
386
386
|
|
387
|
-
it
|
388
|
-
_(Sanitize.fragment(
|
389
|
-
:
|
390
|
-
)).must_equal 'foo bar '
|
387
|
+
it "should remove the contents of filtered nodes when :remove_contents is true" do
|
388
|
+
_(Sanitize.fragment("foo bar <div>baz<span>quux</span></div>",
|
389
|
+
remove_contents: true)).must_equal "foo bar "
|
391
390
|
end
|
392
391
|
|
393
|
-
it
|
392
|
+
it "should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as strings" do
|
394
393
|
_(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
395
|
-
:
|
396
|
-
)).must_equal 'foo bar baz hi '
|
394
|
+
remove_contents: ["script", "span"])).must_equal "foo bar baz hi "
|
397
395
|
|
398
396
|
_(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
399
|
-
:
|
400
|
-
)).must_equal 'foo bar baz hi '
|
397
|
+
remove_contents: Set.new(["script", "span"]))).must_equal "foo bar baz hi "
|
401
398
|
end
|
402
399
|
|
403
|
-
it
|
400
|
+
it "should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as symbols" do
|
404
401
|
_(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
405
|
-
:
|
406
|
-
)).must_equal 'foo bar baz hi '
|
402
|
+
remove_contents: [:script, :span])).must_equal "foo bar baz hi "
|
407
403
|
|
408
404
|
_(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
409
|
-
:
|
410
|
-
)).must_equal 'foo bar baz hi '
|
405
|
+
remove_contents: Set.new([:script, :span]))).must_equal "foo bar baz hi "
|
411
406
|
end
|
412
407
|
|
413
|
-
it
|
414
|
-
_(Sanitize.fragment(
|
415
|
-
:
|
416
|
-
)).must_equal '<iframe></iframe>'
|
408
|
+
it "should remove the contents of allowlisted iframes" do
|
409
|
+
_(Sanitize.fragment("<iframe>hi <script>hello</script></iframe>",
|
410
|
+
elements: ["iframe"])).must_equal "<iframe></iframe>"
|
417
411
|
end
|
418
412
|
|
419
|
-
it
|
413
|
+
it "should not allow arbitrary HTML5 data attributes by default" do
|
420
414
|
_(Sanitize.fragment('<b data-foo="bar"></b>',
|
421
|
-
:
|
422
|
-
)).must_equal '<b></b>'
|
415
|
+
elements: ["b"])).must_equal "<b></b>"
|
423
416
|
|
424
417
|
_(Sanitize.fragment('<b class="foo" data-foo="bar"></b>',
|
425
|
-
:
|
426
|
-
:
|
427
|
-
)).must_equal '<b class="foo"></b>'
|
418
|
+
attributes: {"b" => ["class"]},
|
419
|
+
elements: ["b"])).must_equal '<b class="foo"></b>'
|
428
420
|
end
|
429
421
|
|
430
|
-
it
|
422
|
+
it "should allow arbitrary HTML5 data attributes when the :attributes config includes :data" do
|
431
423
|
s = Sanitize.new(
|
432
|
-
:
|
433
|
-
:
|
424
|
+
attributes: {"b" => [:data]},
|
425
|
+
elements: ["b"]
|
434
426
|
)
|
435
427
|
|
436
428
|
_(s.fragment('<b data-foo="valid" data-bar="valid"></b>'))
|
437
429
|
.must_equal '<b data-foo="valid" data-bar="valid"></b>'
|
438
430
|
|
439
431
|
_(s.fragment('<b data-="invalid"></b>'))
|
440
|
-
.must_equal
|
432
|
+
.must_equal "<b></b>"
|
441
433
|
|
442
434
|
_(s.fragment('<b data-="invalid"></b>'))
|
443
|
-
.must_equal
|
435
|
+
.must_equal "<b></b>"
|
444
436
|
|
445
437
|
_(s.fragment('<b data-xml="invalid"></b>'))
|
446
|
-
.must_equal
|
438
|
+
.must_equal "<b></b>"
|
447
439
|
|
448
440
|
_(s.fragment('<b data-xmlfoo="invalid"></b>'))
|
449
|
-
.must_equal
|
441
|
+
.must_equal "<b></b>"
|
450
442
|
|
451
443
|
_(s.fragment('<b data-f:oo="valid"></b>'))
|
452
|
-
.must_equal
|
444
|
+
.must_equal "<b></b>"
|
453
445
|
|
454
446
|
_(s.fragment('<b data-f/oo="partial"></b>'))
|
455
447
|
.must_equal '<b data-f=""></b>' # Nokogiri quirk; not ideal, but harmless
|
456
448
|
|
457
449
|
_(s.fragment('<b data-éfoo="valid"></b>'))
|
458
|
-
.must_equal
|
450
|
+
.must_equal "<b></b>" # Another annoying Nokogiri quirk.
|
459
451
|
end
|
460
452
|
|
461
|
-
it
|
453
|
+
it "should replace whitespace_elements with configured :before and :after values" do
|
462
454
|
s = Sanitize.new(
|
463
|
-
:
|
464
|
-
|
465
|
-
|
466
|
-
|
455
|
+
whitespace_elements: {
|
456
|
+
"p" => {before: "\n", after: "\n"},
|
457
|
+
"div" => {before: "\n", after: "\n"},
|
458
|
+
"br" => {before: "\n", after: "\n"}
|
467
459
|
}
|
468
460
|
)
|
469
461
|
|
470
|
-
_(s.fragment(
|
471
|
-
_(s.fragment(
|
472
|
-
_(s.fragment(
|
473
|
-
_(s.fragment(
|
462
|
+
_(s.fragment("<p>foo</p>")).must_equal "\nfoo\n"
|
463
|
+
_(s.fragment("<p>foo</p><p>bar</p>")).must_equal "\nfoo\n\nbar\n"
|
464
|
+
_(s.fragment("foo<div>bar</div>baz")).must_equal "foo\nbar\nbaz"
|
465
|
+
_(s.fragment("foo<br>bar<br>baz")).must_equal "foo\nbar\nbaz"
|
474
466
|
end
|
475
467
|
|
476
|
-
it
|
468
|
+
it "should handle protocols correctly regardless of case" do
|
477
469
|
input = '<a href="hTTpS://foo.com/">Text</a>'
|
478
470
|
|
479
471
|
_(Sanitize.fragment(input, {
|
480
|
-
:
|
481
|
-
:
|
482
|
-
:
|
472
|
+
elements: ["a"],
|
473
|
+
attributes: {"a" => ["href"]},
|
474
|
+
protocols: {"a" => {"href" => ["https"]}}
|
483
475
|
})).must_equal input
|
484
476
|
|
485
477
|
input = '<a href="mailto:someone@example.com?Subject=Hello">Text</a>'
|
486
478
|
|
487
479
|
_(Sanitize.fragment(input, {
|
488
|
-
:
|
489
|
-
:
|
490
|
-
:
|
480
|
+
elements: ["a"],
|
481
|
+
attributes: {"a" => ["href"]},
|
482
|
+
protocols: {"a" => {"href" => ["https"]}}
|
491
483
|
})).must_equal "<a>Text</a>"
|
492
484
|
end
|
493
485
|
|
494
|
-
it
|
486
|
+
it "should sanitize protocols in data attributes even if data attributes are generically allowed" do
|
495
487
|
input = '<a data-url="mailto:someone@example.com">Text</a>'
|
496
488
|
|
497
489
|
_(Sanitize.fragment(input, {
|
498
|
-
:
|
499
|
-
:
|
500
|
-
:
|
490
|
+
elements: ["a"],
|
491
|
+
attributes: {"a" => [:data]},
|
492
|
+
protocols: {"a" => {"data-url" => ["https"]}}
|
501
493
|
})).must_equal "<a>Text</a>"
|
502
494
|
|
503
495
|
_(Sanitize.fragment(input, {
|
504
|
-
:
|
505
|
-
:
|
506
|
-
:
|
496
|
+
elements: ["a"],
|
497
|
+
attributes: {"a" => [:data]},
|
498
|
+
protocols: {"a" => {"data-url" => ["mailto"]}}
|
507
499
|
})).must_equal input
|
508
500
|
end
|
509
501
|
|
510
|
-
it
|
502
|
+
it "should prevent `<meta>` tags from being used to set a non-UTF-8 charset" do
|
511
503
|
_(Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
|
512
|
-
:
|
513
|
-
:
|
514
|
-
)).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>"
|
504
|
+
elements: %w[html head meta body],
|
505
|
+
attributes: {"meta" => ["charset"]})).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>"
|
515
506
|
|
516
507
|
_(Sanitize.document('<html><meta charset="utf-8">Howdy!</html>',
|
517
|
-
:
|
518
|
-
:
|
519
|
-
)).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
508
|
+
elements: %w[html meta],
|
509
|
+
attributes: {"meta" => ["charset"]})).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
520
510
|
|
521
511
|
_(Sanitize.document('<html><meta charset="us-ascii">Howdy!</html>',
|
522
|
-
:
|
523
|
-
:
|
524
|
-
)).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
512
|
+
elements: %w[html meta],
|
513
|
+
attributes: {"meta" => ["charset"]})).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
525
514
|
|
526
515
|
_(Sanitize.document('<html><meta http-equiv="content-type" content=" text/html; charset=us-ascii">Howdy!</html>',
|
527
|
-
:
|
528
|
-
:
|
529
|
-
)).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>"
|
516
|
+
elements: %w[html meta],
|
517
|
+
attributes: {"meta" => %w[content http-equiv]})).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>"
|
530
518
|
|
531
519
|
_(Sanitize.document('<html><meta http-equiv="Content-Type" content="text/plain;charset = us-ascii">Howdy!</html>',
|
532
|
-
:
|
533
|
-
:
|
534
|
-
)).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>"
|
520
|
+
elements: %w[html meta],
|
521
|
+
attributes: {"meta" => %w[content http-equiv]})).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>"
|
535
522
|
end
|
536
523
|
|
537
|
-
it
|
524
|
+
it "should not modify `<meta>` tags that already set a UTF-8 charset" do
|
538
525
|
_(Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
|
539
|
-
:
|
540
|
-
:
|
541
|
-
)).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>"
|
526
|
+
elements: %w[html head meta body],
|
527
|
+
attributes: {"meta" => %w[content http-equiv]})).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>"
|
542
528
|
end
|
543
529
|
|
544
|
-
it
|
530
|
+
it "always removes `<noscript>` elements even if `noscript` is in the allowlist" do
|
545
531
|
assert_equal(
|
546
|
-
|
547
|
-
Sanitize.fragment(
|
532
|
+
"",
|
533
|
+
Sanitize.fragment("<noscript>foo</noscript>", elements: ["noscript"])
|
548
534
|
)
|
549
535
|
end
|
550
|
-
|
551
536
|
end
|
552
537
|
end
|