sanitize 6.1.3 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +32 -14
- data/LICENSE +3 -1
- data/README.md +120 -238
- data/lib/sanitize/config/basic.rb +15 -15
- data/lib/sanitize/config/default.rb +45 -45
- data/lib/sanitize/config/relaxed.rb +136 -32
- data/lib/sanitize/config/restricted.rb +2 -2
- data/lib/sanitize/config.rb +12 -14
- data/lib/sanitize/css.rb +308 -308
- data/lib/sanitize/transformers/clean_cdata.rb +9 -9
- data/lib/sanitize/transformers/clean_comment.rb +9 -9
- data/lib/sanitize/transformers/clean_css.rb +59 -55
- data/lib/sanitize/transformers/clean_doctype.rb +15 -15
- data/lib/sanitize/transformers/clean_element.rb +220 -237
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +38 -38
- data/test/common.rb +4 -3
- data/test/test_clean_comment.rb +26 -25
- data/test/test_clean_css.rb +14 -13
- data/test/test_clean_doctype.rb +21 -20
- data/test/test_clean_element.rb +258 -273
- data/test/test_config.rb +22 -21
- data/test/test_malicious_css.rb +20 -19
- data/test/test_malicious_html.rb +100 -99
- data/test/test_parser.rb +26 -25
- data/test/test_sanitize.rb +70 -69
- data/test/test_sanitize_css.rb +149 -114
- data/test/test_transformers.rb +81 -83
- metadata +14 -43
data/test/test_clean_element.rb
CHANGED
@@ -1,245 +1,246 @@
|
|
1
|
-
#
|
2
|
-
require_relative 'common'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require_relative "common"
|
4
|
+
|
5
|
+
describe "Sanitize::Transformers::CleanElement" do
|
5
6
|
make_my_diffs_pretty!
|
6
7
|
parallelize_me!
|
7
8
|
|
8
9
|
strings = {
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
10
|
+
basic: {
|
11
|
+
html: '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',
|
12
|
+
default: "Lorem ipsum dolor sit amet ",
|
13
|
+
restricted: "<b>Lorem</b> ipsum <strong>dolor</strong> sit amet ",
|
14
|
+
basic: '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
|
15
|
+
relaxed: '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> '
|
15
16
|
},
|
16
17
|
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
18
|
+
malformed: {
|
19
|
+
html: 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
|
20
|
+
default: "Lorem dolor sit amet ",
|
21
|
+
restricted: "Lorem <strong>dolor</strong> sit amet ",
|
22
|
+
basic: 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
|
23
|
+
relaxed: 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet '
|
23
24
|
},
|
24
25
|
|
25
|
-
:
|
26
|
-
:
|
27
|
-
:
|
28
|
-
:
|
29
|
-
:
|
30
|
-
:
|
26
|
+
unclosed: {
|
27
|
+
html: "<p>a</p><blockquote>b",
|
28
|
+
default: " a b ",
|
29
|
+
restricted: " a b ",
|
30
|
+
basic: "<p>a</p><blockquote>b</blockquote>",
|
31
|
+
relaxed: "<p>a</p><blockquote>b</blockquote>"
|
31
32
|
},
|
32
33
|
|
33
|
-
:
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
34
|
+
malicious: {
|
35
|
+
html: '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
|
36
|
+
default: 'Lorem ipsum dolor sit amet <script>alert("hello world");',
|
37
|
+
restricted: '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert("hello world");',
|
38
|
+
basic: '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");',
|
39
|
+
relaxed: '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");'
|
39
40
|
}
|
40
41
|
}
|
41
42
|
|
42
43
|
protocols = {
|
43
|
-
|
44
|
-
:
|
45
|
-
:
|
46
|
-
:
|
47
|
-
:
|
48
|
-
:
|
44
|
+
"protocol-based JS injection: simple, no spaces" => {
|
45
|
+
html: '<a href="javascript:alert(\'XSS\');">foo</a>',
|
46
|
+
default: "foo",
|
47
|
+
restricted: "foo",
|
48
|
+
basic: '<a rel="nofollow">foo</a>',
|
49
|
+
relaxed: "<a>foo</a>"
|
49
50
|
},
|
50
51
|
|
51
|
-
|
52
|
-
:
|
53
|
-
:
|
54
|
-
:
|
55
|
-
:
|
56
|
-
:
|
52
|
+
"protocol-based JS injection: simple, spaces before" => {
|
53
|
+
html: '<a href="javascript :alert(\'XSS\');">foo</a>',
|
54
|
+
default: "foo",
|
55
|
+
restricted: "foo",
|
56
|
+
basic: '<a rel="nofollow">foo</a>',
|
57
|
+
relaxed: "<a>foo</a>"
|
57
58
|
},
|
58
59
|
|
59
|
-
|
60
|
-
:
|
61
|
-
:
|
62
|
-
:
|
63
|
-
:
|
64
|
-
:
|
60
|
+
"protocol-based JS injection: simple, spaces after" => {
|
61
|
+
html: '<a href="javascript: alert(\'XSS\');">foo</a>',
|
62
|
+
default: "foo",
|
63
|
+
restricted: "foo",
|
64
|
+
basic: '<a rel="nofollow">foo</a>',
|
65
|
+
relaxed: "<a>foo</a>"
|
65
66
|
},
|
66
67
|
|
67
|
-
|
68
|
-
:
|
69
|
-
:
|
70
|
-
:
|
71
|
-
:
|
72
|
-
:
|
68
|
+
"protocol-based JS injection: simple, spaces before and after" => {
|
69
|
+
html: '<a href="javascript : alert(\'XSS\');">foo</a>',
|
70
|
+
default: "foo",
|
71
|
+
restricted: "foo",
|
72
|
+
basic: '<a rel="nofollow">foo</a>',
|
73
|
+
relaxed: "<a>foo</a>"
|
73
74
|
},
|
74
75
|
|
75
|
-
|
76
|
-
:
|
77
|
-
:
|
78
|
-
:
|
79
|
-
:
|
80
|
-
:
|
76
|
+
"protocol-based JS injection: preceding colon" => {
|
77
|
+
html: '<a href=":javascript:alert(\'XSS\');">foo</a>',
|
78
|
+
default: "foo",
|
79
|
+
restricted: "foo",
|
80
|
+
basic: '<a rel="nofollow">foo</a>',
|
81
|
+
relaxed: "<a>foo</a>"
|
81
82
|
},
|
82
83
|
|
83
|
-
|
84
|
-
:
|
85
|
-
:
|
86
|
-
:
|
87
|
-
:
|
88
|
-
:
|
84
|
+
"protocol-based JS injection: UTF-8 encoding" => {
|
85
|
+
html: '<a href="javascript:">foo</a>',
|
86
|
+
default: "foo",
|
87
|
+
restricted: "foo",
|
88
|
+
basic: '<a rel="nofollow">foo</a>',
|
89
|
+
relaxed: "<a>foo</a>"
|
89
90
|
},
|
90
91
|
|
91
|
-
|
92
|
-
:
|
93
|
-
:
|
94
|
-
:
|
95
|
-
:
|
96
|
-
:
|
92
|
+
"protocol-based JS injection: long UTF-8 encoding" => {
|
93
|
+
html: '<a href="javascript:">foo</a>',
|
94
|
+
default: "foo",
|
95
|
+
restricted: "foo",
|
96
|
+
basic: '<a rel="nofollow">foo</a>',
|
97
|
+
relaxed: "<a>foo</a>"
|
97
98
|
},
|
98
99
|
|
99
|
-
|
100
|
-
:
|
101
|
-
:
|
102
|
-
:
|
103
|
-
:
|
104
|
-
:
|
100
|
+
"protocol-based JS injection: long UTF-8 encoding without semicolons" => {
|
101
|
+
html: "<a href=javascript:alert('XSS')>foo</a>",
|
102
|
+
default: "foo",
|
103
|
+
restricted: "foo",
|
104
|
+
basic: '<a rel="nofollow">foo</a>',
|
105
|
+
relaxed: "<a>foo</a>"
|
105
106
|
},
|
106
107
|
|
107
|
-
|
108
|
-
:
|
109
|
-
:
|
110
|
-
:
|
111
|
-
:
|
112
|
-
:
|
108
|
+
"protocol-based JS injection: hex encoding" => {
|
109
|
+
html: '<a href="javascript:">foo</a>',
|
110
|
+
default: "foo",
|
111
|
+
restricted: "foo",
|
112
|
+
basic: '<a rel="nofollow">foo</a>',
|
113
|
+
relaxed: "<a>foo</a>"
|
113
114
|
},
|
114
115
|
|
115
|
-
|
116
|
-
:
|
117
|
-
:
|
118
|
-
:
|
119
|
-
:
|
120
|
-
:
|
116
|
+
"protocol-based JS injection: long hex encoding" => {
|
117
|
+
html: '<a href="javascript:">foo</a>',
|
118
|
+
default: "foo",
|
119
|
+
restricted: "foo",
|
120
|
+
basic: '<a rel="nofollow">foo</a>',
|
121
|
+
relaxed: "<a>foo</a>"
|
121
122
|
},
|
122
123
|
|
123
|
-
|
124
|
-
:
|
125
|
-
:
|
126
|
-
:
|
127
|
-
:
|
128
|
-
:
|
124
|
+
"protocol-based JS injection: hex encoding without semicolons" => {
|
125
|
+
html: "<a href=javascript:alert('XSS')>foo</a>",
|
126
|
+
default: "foo",
|
127
|
+
restricted: "foo",
|
128
|
+
basic: '<a rel="nofollow">foo</a>',
|
129
|
+
relaxed: "<a>foo</a>"
|
129
130
|
},
|
130
131
|
|
131
|
-
|
132
|
-
:
|
133
|
-
:
|
134
|
-
:
|
135
|
-
:
|
136
|
-
:
|
132
|
+
"protocol-based JS injection: null char" => {
|
133
|
+
html: "<img src=java\0script:alert(\"XSS\")>",
|
134
|
+
default: "",
|
135
|
+
restricted: "",
|
136
|
+
basic: "",
|
137
|
+
relaxed: "<img>"
|
137
138
|
},
|
138
139
|
|
139
|
-
|
140
|
-
:
|
141
|
-
:
|
142
|
-
:
|
143
|
-
:
|
144
|
-
:
|
140
|
+
"protocol-based JS injection: invalid URL char" => {
|
141
|
+
html: '<img src=java\script:alert("XSS")>',
|
142
|
+
default: "",
|
143
|
+
restricted: "",
|
144
|
+
basic: "",
|
145
|
+
relaxed: "<img>"
|
145
146
|
},
|
146
147
|
|
147
|
-
|
148
|
-
:
|
149
|
-
:
|
150
|
-
:
|
151
|
-
:
|
152
|
-
:
|
148
|
+
"protocol-based JS injection: spaces and entities" => {
|
149
|
+
html: '<img src="  javascript:alert(\'XSS\');">',
|
150
|
+
default: "",
|
151
|
+
restricted: "",
|
152
|
+
basic: "",
|
153
|
+
relaxed: "<img>"
|
153
154
|
},
|
154
155
|
|
155
|
-
|
156
|
-
:
|
157
|
-
:
|
158
|
-
:
|
159
|
-
:
|
160
|
-
:
|
156
|
+
"protocol whitespace" => {
|
157
|
+
html: '<a href=" http://example.com/"></a>',
|
158
|
+
default: "",
|
159
|
+
restricted: "",
|
160
|
+
basic: '<a href="http://example.com/" rel="nofollow"></a>',
|
161
|
+
relaxed: '<a href="http://example.com/"></a>'
|
161
162
|
}
|
162
163
|
}
|
163
164
|
|
164
|
-
describe
|
165
|
-
it
|
165
|
+
describe "Default config" do
|
166
|
+
it "should remove non-allowlisted elements, leaving safe contents behind" do
|
166
167
|
_(Sanitize.fragment('foo <b>bar</b> <strong><a href="#a">baz</a></strong> quux'))
|
167
|
-
.must_equal
|
168
|
+
.must_equal "foo bar baz quux"
|
168
169
|
|
169
170
|
_(Sanitize.fragment('<script>alert("<xss>");</script>'))
|
170
|
-
.must_equal
|
171
|
+
.must_equal ""
|
171
172
|
|
172
173
|
_(Sanitize.fragment('<<script>script>alert("<xss>");</<script>>'))
|
173
|
-
.must_equal
|
174
|
+
.must_equal "<"
|
174
175
|
|
175
176
|
_(Sanitize.fragment('< script <>> alert("<xss>");</script>'))
|
176
177
|
.must_equal '< script <>> alert("");'
|
177
178
|
end
|
178
179
|
|
179
|
-
it
|
180
|
-
_(Sanitize.fragment(
|
181
|
-
.must_equal
|
180
|
+
it "should surround the contents of :whitespace_elements with space characters when removing the element" do
|
181
|
+
_(Sanitize.fragment("foo<div>bar</div>baz"))
|
182
|
+
.must_equal "foo bar baz"
|
182
183
|
|
183
|
-
_(Sanitize.fragment(
|
184
|
-
.must_equal
|
184
|
+
_(Sanitize.fragment("foo<br>bar<br>baz"))
|
185
|
+
.must_equal "foo bar baz"
|
185
186
|
|
186
|
-
_(Sanitize.fragment(
|
187
|
-
.must_equal
|
187
|
+
_(Sanitize.fragment("foo<hr>bar<hr>baz"))
|
188
|
+
.must_equal "foo bar baz"
|
188
189
|
end
|
189
190
|
|
190
|
-
it
|
191
|
+
it "should not choke on several instances of the same element in a row" do
|
191
192
|
_(Sanitize.fragment('<img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif">'))
|
192
|
-
.must_equal
|
193
|
+
.must_equal ""
|
193
194
|
end
|
194
195
|
|
195
|
-
it
|
196
|
-
_(Sanitize.fragment(
|
197
|
-
.must_equal
|
196
|
+
it "should not preserve the content of removed `iframe` elements" do
|
197
|
+
_(Sanitize.fragment("<iframe>hello! <script>alert(0)</script></iframe>"))
|
198
|
+
.must_equal ""
|
198
199
|
end
|
199
200
|
|
200
|
-
it
|
201
|
-
_(Sanitize.fragment(
|
202
|
-
.must_equal
|
201
|
+
it "should not preserve the content of removed `math` elements" do
|
202
|
+
_(Sanitize.fragment("<math>hello! <script>alert(0)</script></math>"))
|
203
|
+
.must_equal ""
|
203
204
|
end
|
204
205
|
|
205
|
-
it
|
206
|
-
_(Sanitize.fragment(
|
207
|
-
.must_equal
|
206
|
+
it "should not preserve the content of removed `noembed` elements" do
|
207
|
+
_(Sanitize.fragment("<noembed>hello! <script>alert(0)</script></noembed>"))
|
208
|
+
.must_equal ""
|
208
209
|
end
|
209
210
|
|
210
|
-
it
|
211
|
-
_(Sanitize.fragment(
|
212
|
-
.must_equal
|
211
|
+
it "should not preserve the content of removed `noframes` elements" do
|
212
|
+
_(Sanitize.fragment("<noframes>hello! <script>alert(0)</script></noframes>"))
|
213
|
+
.must_equal ""
|
213
214
|
end
|
214
215
|
|
215
|
-
it
|
216
|
-
_(Sanitize.fragment(
|
217
|
-
.must_equal
|
216
|
+
it "should not preserve the content of removed `noscript` elements" do
|
217
|
+
_(Sanitize.fragment("<noscript>hello! <script>alert(0)</script></noscript>"))
|
218
|
+
.must_equal ""
|
218
219
|
end
|
219
220
|
|
220
|
-
it
|
221
|
-
_(Sanitize.fragment(
|
222
|
-
.must_equal
|
221
|
+
it "should not preserve the content of removed `plaintext` elements" do
|
222
|
+
_(Sanitize.fragment("<plaintext>hello! <script>alert(0)</script>"))
|
223
|
+
.must_equal ""
|
223
224
|
end
|
224
225
|
|
225
|
-
it
|
226
|
-
_(Sanitize.fragment(
|
227
|
-
.must_equal
|
226
|
+
it "should not preserve the content of removed `script` elements" do
|
227
|
+
_(Sanitize.fragment("<script>hello! <script>alert(0)</script></script>"))
|
228
|
+
.must_equal ""
|
228
229
|
end
|
229
230
|
|
230
|
-
it
|
231
|
-
_(Sanitize.fragment(
|
232
|
-
.must_equal
|
231
|
+
it "should not preserve the content of removed `style` elements" do
|
232
|
+
_(Sanitize.fragment("<style>hello! <script>alert(0)</script></style>"))
|
233
|
+
.must_equal ""
|
233
234
|
end
|
234
235
|
|
235
|
-
it
|
236
|
-
_(Sanitize.fragment(
|
237
|
-
.must_equal
|
236
|
+
it "should not preserve the content of removed `svg` elements" do
|
237
|
+
_(Sanitize.fragment("<svg>hello! <script>alert(0)</script></svg>"))
|
238
|
+
.must_equal ""
|
238
239
|
end
|
239
240
|
|
240
|
-
it
|
241
|
-
_(Sanitize.fragment(
|
242
|
-
.must_equal
|
241
|
+
it "should not preserve the content of removed `xmp` elements" do
|
242
|
+
_(Sanitize.fragment("<xmp>hello! <script>alert(0)</script></xmp>"))
|
243
|
+
.must_equal ""
|
243
244
|
end
|
244
245
|
|
245
246
|
strings.each do |name, data|
|
@@ -255,7 +256,7 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
255
256
|
end
|
256
257
|
end
|
257
258
|
|
258
|
-
describe
|
259
|
+
describe "Restricted config" do
|
259
260
|
before do
|
260
261
|
@s = Sanitize.new(Sanitize::Config::RESTRICTED)
|
261
262
|
end
|
@@ -273,17 +274,17 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
273
274
|
end
|
274
275
|
end
|
275
276
|
|
276
|
-
describe
|
277
|
+
describe "Basic config" do
|
277
278
|
before do
|
278
279
|
@s = Sanitize.new(Sanitize::Config::BASIC)
|
279
280
|
end
|
280
281
|
|
281
|
-
it
|
282
|
-
_(@s.fragment(
|
282
|
+
it "should not choke on valueless attributes" do
|
283
|
+
_(@s.fragment("foo <a href>foo</a> bar"))
|
283
284
|
.must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
|
284
285
|
end
|
285
286
|
|
286
|
-
it
|
287
|
+
it "should downcase attribute names" do
|
287
288
|
_(@s.fragment('<a HREF="javascript:alert(\'foo\')">bar</a>'))
|
288
289
|
.must_equal '<a rel="nofollow">bar</a>'
|
289
290
|
end
|
@@ -301,12 +302,12 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
301
302
|
end
|
302
303
|
end
|
303
304
|
|
304
|
-
describe
|
305
|
+
describe "Relaxed config" do
|
305
306
|
before do
|
306
307
|
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
307
308
|
end
|
308
309
|
|
309
|
-
it
|
310
|
+
it "should encode special chars in attribute values" do
|
310
311
|
_(@s.fragment('<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>'))
|
311
312
|
.must_equal '<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>'
|
312
313
|
end
|
@@ -324,25 +325,25 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
324
325
|
end
|
325
326
|
end
|
326
327
|
|
327
|
-
describe
|
328
|
-
it
|
328
|
+
describe "Custom configs" do
|
329
|
+
it "should allow attributes on all elements if allowlisted under :all" do
|
329
330
|
input = '<p class="foo">bar</p>'
|
330
331
|
|
331
|
-
_(Sanitize.fragment(input)).must_equal
|
332
|
+
_(Sanitize.fragment(input)).must_equal " bar "
|
332
333
|
|
333
334
|
_(Sanitize.fragment(input, {
|
334
|
-
:
|
335
|
-
:
|
335
|
+
elements: ["p"],
|
336
|
+
attributes: {all: ["class"]}
|
336
337
|
})).must_equal input
|
337
338
|
|
338
339
|
_(Sanitize.fragment(input, {
|
339
|
-
:
|
340
|
-
:
|
341
|
-
})).must_equal
|
340
|
+
elements: ["p"],
|
341
|
+
attributes: {"div" => ["class"]}
|
342
|
+
})).must_equal "<p>bar</p>"
|
342
343
|
|
343
344
|
_(Sanitize.fragment(input, {
|
344
|
-
:
|
345
|
-
:
|
345
|
+
elements: ["p"],
|
346
|
+
attributes: {"p" => ["title"], :all => ["class"]}
|
346
347
|
})).must_equal input
|
347
348
|
end
|
348
349
|
|
@@ -350,203 +351,187 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
350
351
|
input = '<a href="/foo/bar">Link</a>'
|
351
352
|
|
352
353
|
_(Sanitize.fragment(input,
|
353
|
-
:
|
354
|
-
:
|
355
|
-
:
|
356
|
-
)).must_equal '<a>Link</a>'
|
354
|
+
elements: ["a"],
|
355
|
+
attributes: {"a" => ["href"]},
|
356
|
+
protocols: {"a" => {"href" => ["http"]}})).must_equal "<a>Link</a>"
|
357
357
|
end
|
358
358
|
|
359
|
-
it
|
359
|
+
it "should allow relative URLs containing colons when the colon is not in the first path segment" do
|
360
360
|
input = '<a href="/wiki/Special:Random">Random Page</a>'
|
361
361
|
|
362
362
|
_(Sanitize.fragment(input, {
|
363
|
-
:
|
364
|
-
:
|
365
|
-
:
|
363
|
+
elements: ["a"],
|
364
|
+
attributes: {"a" => ["href"]},
|
365
|
+
protocols: {"a" => {"href" => [:relative]}}
|
366
366
|
})).must_equal input
|
367
367
|
end
|
368
368
|
|
369
|
-
it
|
369
|
+
it "should allow relative URLs containing colons when the colon is part of an anchor" do
|
370
370
|
input = '<a href="#fn:1">Footnote 1</a>'
|
371
371
|
|
372
372
|
_(Sanitize.fragment(input, {
|
373
|
-
:
|
374
|
-
:
|
375
|
-
:
|
373
|
+
elements: ["a"],
|
374
|
+
attributes: {"a" => ["href"]},
|
375
|
+
protocols: {"a" => {"href" => [:relative]}}
|
376
376
|
})).must_equal input
|
377
377
|
|
378
378
|
input = '<a href="somepage#fn:1">Footnote 1</a>'
|
379
379
|
|
380
380
|
_(Sanitize.fragment(input, {
|
381
|
-
:
|
382
|
-
:
|
383
|
-
:
|
381
|
+
elements: ["a"],
|
382
|
+
attributes: {"a" => ["href"]},
|
383
|
+
protocols: {"a" => {"href" => [:relative]}}
|
384
384
|
})).must_equal input
|
385
385
|
end
|
386
386
|
|
387
|
-
it
|
388
|
-
_(Sanitize.fragment(
|
389
|
-
:
|
390
|
-
)).must_equal 'foo bar '
|
387
|
+
it "should remove the contents of filtered nodes when :remove_contents is true" do
|
388
|
+
_(Sanitize.fragment("foo bar <div>baz<span>quux</span></div>",
|
389
|
+
remove_contents: true)).must_equal "foo bar "
|
391
390
|
end
|
392
391
|
|
393
|
-
it
|
392
|
+
it "should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as strings" do
|
394
393
|
_(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
395
|
-
:
|
396
|
-
)).must_equal 'foo bar baz hi '
|
394
|
+
remove_contents: ["script", "span"])).must_equal "foo bar baz hi "
|
397
395
|
|
398
396
|
_(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
399
|
-
:
|
400
|
-
)).must_equal 'foo bar baz hi '
|
397
|
+
remove_contents: Set.new(["script", "span"]))).must_equal "foo bar baz hi "
|
401
398
|
end
|
402
399
|
|
403
|
-
it
|
400
|
+
it "should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as symbols" do
|
404
401
|
_(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
405
|
-
:
|
406
|
-
)).must_equal 'foo bar baz hi '
|
402
|
+
remove_contents: [:script, :span])).must_equal "foo bar baz hi "
|
407
403
|
|
408
404
|
_(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
409
|
-
:
|
410
|
-
)).must_equal 'foo bar baz hi '
|
405
|
+
remove_contents: Set.new([:script, :span]))).must_equal "foo bar baz hi "
|
411
406
|
end
|
412
407
|
|
413
|
-
it
|
414
|
-
_(Sanitize.fragment(
|
415
|
-
:
|
416
|
-
)).must_equal '<iframe></iframe>'
|
408
|
+
it "should remove the contents of allowlisted iframes" do
|
409
|
+
_(Sanitize.fragment("<iframe>hi <script>hello</script></iframe>",
|
410
|
+
elements: ["iframe"])).must_equal "<iframe></iframe>"
|
417
411
|
end
|
418
412
|
|
419
|
-
it
|
413
|
+
it "should not allow arbitrary HTML5 data attributes by default" do
|
420
414
|
_(Sanitize.fragment('<b data-foo="bar"></b>',
|
421
|
-
:
|
422
|
-
)).must_equal '<b></b>'
|
415
|
+
elements: ["b"])).must_equal "<b></b>"
|
423
416
|
|
424
417
|
_(Sanitize.fragment('<b class="foo" data-foo="bar"></b>',
|
425
|
-
:
|
426
|
-
:
|
427
|
-
)).must_equal '<b class="foo"></b>'
|
418
|
+
attributes: {"b" => ["class"]},
|
419
|
+
elements: ["b"])).must_equal '<b class="foo"></b>'
|
428
420
|
end
|
429
421
|
|
430
|
-
it
|
422
|
+
it "should allow arbitrary HTML5 data attributes when the :attributes config includes :data" do
|
431
423
|
s = Sanitize.new(
|
432
|
-
:
|
433
|
-
:
|
424
|
+
attributes: {"b" => [:data]},
|
425
|
+
elements: ["b"]
|
434
426
|
)
|
435
427
|
|
436
428
|
_(s.fragment('<b data-foo="valid" data-bar="valid"></b>'))
|
437
429
|
.must_equal '<b data-foo="valid" data-bar="valid"></b>'
|
438
430
|
|
439
431
|
_(s.fragment('<b data-="invalid"></b>'))
|
440
|
-
.must_equal
|
432
|
+
.must_equal "<b></b>"
|
441
433
|
|
442
434
|
_(s.fragment('<b data-="invalid"></b>'))
|
443
|
-
.must_equal
|
435
|
+
.must_equal "<b></b>"
|
444
436
|
|
445
437
|
_(s.fragment('<b data-xml="invalid"></b>'))
|
446
|
-
.must_equal
|
438
|
+
.must_equal "<b></b>"
|
447
439
|
|
448
440
|
_(s.fragment('<b data-xmlfoo="invalid"></b>'))
|
449
|
-
.must_equal
|
441
|
+
.must_equal "<b></b>"
|
450
442
|
|
451
443
|
_(s.fragment('<b data-f:oo="valid"></b>'))
|
452
|
-
.must_equal
|
444
|
+
.must_equal "<b></b>"
|
453
445
|
|
454
446
|
_(s.fragment('<b data-f/oo="partial"></b>'))
|
455
447
|
.must_equal '<b data-f=""></b>' # Nokogiri quirk; not ideal, but harmless
|
456
448
|
|
457
449
|
_(s.fragment('<b data-éfoo="valid"></b>'))
|
458
|
-
.must_equal
|
450
|
+
.must_equal "<b></b>" # Another annoying Nokogiri quirk.
|
459
451
|
end
|
460
452
|
|
461
|
-
it
|
453
|
+
it "should replace whitespace_elements with configured :before and :after values" do
|
462
454
|
s = Sanitize.new(
|
463
|
-
:
|
464
|
-
|
465
|
-
|
466
|
-
|
455
|
+
whitespace_elements: {
|
456
|
+
"p" => {before: "\n", after: "\n"},
|
457
|
+
"div" => {before: "\n", after: "\n"},
|
458
|
+
"br" => {before: "\n", after: "\n"}
|
467
459
|
}
|
468
460
|
)
|
469
461
|
|
470
|
-
_(s.fragment(
|
471
|
-
_(s.fragment(
|
472
|
-
_(s.fragment(
|
473
|
-
_(s.fragment(
|
462
|
+
_(s.fragment("<p>foo</p>")).must_equal "\nfoo\n"
|
463
|
+
_(s.fragment("<p>foo</p><p>bar</p>")).must_equal "\nfoo\n\nbar\n"
|
464
|
+
_(s.fragment("foo<div>bar</div>baz")).must_equal "foo\nbar\nbaz"
|
465
|
+
_(s.fragment("foo<br>bar<br>baz")).must_equal "foo\nbar\nbaz"
|
474
466
|
end
|
475
467
|
|
476
|
-
it
|
468
|
+
it "should handle protocols correctly regardless of case" do
|
477
469
|
input = '<a href="hTTpS://foo.com/">Text</a>'
|
478
470
|
|
479
471
|
_(Sanitize.fragment(input, {
|
480
|
-
:
|
481
|
-
:
|
482
|
-
:
|
472
|
+
elements: ["a"],
|
473
|
+
attributes: {"a" => ["href"]},
|
474
|
+
protocols: {"a" => {"href" => ["https"]}}
|
483
475
|
})).must_equal input
|
484
476
|
|
485
477
|
input = '<a href="mailto:someone@example.com?Subject=Hello">Text</a>'
|
486
478
|
|
487
479
|
_(Sanitize.fragment(input, {
|
488
|
-
:
|
489
|
-
:
|
490
|
-
:
|
480
|
+
elements: ["a"],
|
481
|
+
attributes: {"a" => ["href"]},
|
482
|
+
protocols: {"a" => {"href" => ["https"]}}
|
491
483
|
})).must_equal "<a>Text</a>"
|
492
484
|
end
|
493
485
|
|
494
|
-
it
|
486
|
+
it "should sanitize protocols in data attributes even if data attributes are generically allowed" do
|
495
487
|
input = '<a data-url="mailto:someone@example.com">Text</a>'
|
496
488
|
|
497
489
|
_(Sanitize.fragment(input, {
|
498
|
-
:
|
499
|
-
:
|
500
|
-
:
|
490
|
+
elements: ["a"],
|
491
|
+
attributes: {"a" => [:data]},
|
492
|
+
protocols: {"a" => {"data-url" => ["https"]}}
|
501
493
|
})).must_equal "<a>Text</a>"
|
502
494
|
|
503
495
|
_(Sanitize.fragment(input, {
|
504
|
-
:
|
505
|
-
:
|
506
|
-
:
|
496
|
+
elements: ["a"],
|
497
|
+
attributes: {"a" => [:data]},
|
498
|
+
protocols: {"a" => {"data-url" => ["mailto"]}}
|
507
499
|
})).must_equal input
|
508
500
|
end
|
509
501
|
|
510
|
-
it
|
502
|
+
it "should prevent `<meta>` tags from being used to set a non-UTF-8 charset" do
|
511
503
|
_(Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
|
512
|
-
:
|
513
|
-
:
|
514
|
-
)).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>"
|
504
|
+
elements: %w[html head meta body],
|
505
|
+
attributes: {"meta" => ["charset"]})).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>"
|
515
506
|
|
516
507
|
_(Sanitize.document('<html><meta charset="utf-8">Howdy!</html>',
|
517
|
-
:
|
518
|
-
:
|
519
|
-
)).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
508
|
+
elements: %w[html meta],
|
509
|
+
attributes: {"meta" => ["charset"]})).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
520
510
|
|
521
511
|
_(Sanitize.document('<html><meta charset="us-ascii">Howdy!</html>',
|
522
|
-
:
|
523
|
-
:
|
524
|
-
)).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
512
|
+
elements: %w[html meta],
|
513
|
+
attributes: {"meta" => ["charset"]})).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
525
514
|
|
526
515
|
_(Sanitize.document('<html><meta http-equiv="content-type" content=" text/html; charset=us-ascii">Howdy!</html>',
|
527
|
-
:
|
528
|
-
:
|
529
|
-
)).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>"
|
516
|
+
elements: %w[html meta],
|
517
|
+
attributes: {"meta" => %w[content http-equiv]})).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>"
|
530
518
|
|
531
519
|
_(Sanitize.document('<html><meta http-equiv="Content-Type" content="text/plain;charset = us-ascii">Howdy!</html>',
|
532
|
-
:
|
533
|
-
:
|
534
|
-
)).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>"
|
520
|
+
elements: %w[html meta],
|
521
|
+
attributes: {"meta" => %w[content http-equiv]})).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>"
|
535
522
|
end
|
536
523
|
|
537
|
-
it
|
524
|
+
it "should not modify `<meta>` tags that already set a UTF-8 charset" do
|
538
525
|
_(Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
|
539
|
-
:
|
540
|
-
:
|
541
|
-
)).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>"
|
526
|
+
elements: %w[html head meta body],
|
527
|
+
attributes: {"meta" => %w[content http-equiv]})).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>"
|
542
528
|
end
|
543
529
|
|
544
|
-
it
|
530
|
+
it "always removes `<noscript>` elements even if `noscript` is in the allowlist" do
|
545
531
|
assert_equal(
|
546
|
-
|
547
|
-
Sanitize.fragment(
|
532
|
+
"",
|
533
|
+
Sanitize.fragment("<noscript>foo</noscript>", elements: ["noscript"])
|
548
534
|
)
|
549
535
|
end
|
550
|
-
|
551
536
|
end
|
552
537
|
end
|