sanitize 2.1.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/HISTORY.md +93 -14
- data/README.md +346 -134
- data/lib/sanitize.rb +177 -132
- data/lib/sanitize/config.rb +53 -79
- data/lib/sanitize/config/basic.rb +12 -32
- data/lib/sanitize/config/default.rb +103 -0
- data/lib/sanitize/config/relaxed.rb +517 -52
- data/lib/sanitize/config/restricted.rb +3 -23
- data/lib/sanitize/css.rb +218 -0
- data/lib/sanitize/transformers/clean_cdata.rb +3 -3
- data/lib/sanitize/transformers/clean_comment.rb +6 -3
- data/lib/sanitize/transformers/clean_css.rb +57 -0
- data/lib/sanitize/transformers/clean_doctype.rb +13 -0
- data/lib/sanitize/transformers/clean_element.rb +99 -129
- data/lib/sanitize/version.rb +3 -1
- data/test/common.rb +34 -0
- data/test/test_clean_comment.rb +51 -0
- data/test/test_clean_css.rb +66 -0
- data/test/test_clean_doctype.rb +71 -0
- data/test/test_clean_element.rb +399 -0
- data/test/test_config.rb +65 -0
- data/test/test_malicious_css.rb +42 -0
- data/test/test_malicious_html.rb +128 -0
- data/test/test_parser.rb +104 -0
- data/test/test_sanitize.rb +65 -693
- data/test/test_sanitize_css.rb +222 -0
- data/test/test_transformers.rb +144 -0
- data/test/test_unicode.rb +84 -0
- metadata +56 -8
data/lib/sanitize/version.rb
CHANGED
data/test/common.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
gem 'minitest'
|
3
|
+
require 'minitest/autorun'
|
4
|
+
|
5
|
+
require_relative '../lib/sanitize'
|
6
|
+
|
7
|
+
# Helper to stub an instance method. Shamelessly stolen from
|
8
|
+
# https://github.com/codeodor/minitest-stub_any_instance/
|
9
|
+
class Object
|
10
|
+
def self.stub_instance(name, value, &block)
|
11
|
+
old_method = "__stubbed_method_#{name}__"
|
12
|
+
|
13
|
+
class_eval do
|
14
|
+
alias_method old_method, name
|
15
|
+
|
16
|
+
define_method(name) do |*args|
|
17
|
+
if value.respond_to?(:call) then
|
18
|
+
value.call(*args)
|
19
|
+
else
|
20
|
+
value
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
yield
|
26
|
+
|
27
|
+
ensure
|
28
|
+
class_eval do
|
29
|
+
undef_method name
|
30
|
+
alias_method name, old_method
|
31
|
+
undef_method old_method
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Sanitize::Transformers::CleanComment' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
describe 'when :allow_comments is false' do
|
9
|
+
before do
|
10
|
+
@s = Sanitize.new(:allow_comments => false, :elements => ['div'])
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should remove comments' do
|
14
|
+
@s.fragment('foo <!-- comment --> bar').must_equal 'foo bar'
|
15
|
+
@s.fragment('foo <!-- ').must_equal 'foo '
|
16
|
+
@s.fragment('foo <!-- - -> bar').must_equal 'foo '
|
17
|
+
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal 'foo bar'
|
18
|
+
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo --> -->bar'
|
19
|
+
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
20
|
+
|
21
|
+
# Special case: the comment markup is inside a <script>, which makes it
|
22
|
+
# text content and not an actual HTML comment.
|
23
|
+
@s.fragment("<script><!-- comment --></script>").must_equal '<!-- comment -->'
|
24
|
+
|
25
|
+
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script'])
|
26
|
+
.must_equal '<script><!-- comment --></script>'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe 'when :allow_comments is true' do
|
31
|
+
before do
|
32
|
+
@s = Sanitize.new(:allow_comments => true, :elements => ['div'])
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should allow comments' do
|
36
|
+
@s.fragment('foo <!-- comment --> bar').must_equal 'foo <!-- comment --> bar'
|
37
|
+
@s.fragment('foo <!-- ').must_equal 'foo <!-- -->'
|
38
|
+
@s.fragment('foo <!-- - -> bar').must_equal 'foo <!-- - -> bar-->'
|
39
|
+
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal "foo <!--\n\n\n\n-->bar"
|
40
|
+
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
|
41
|
+
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
42
|
+
|
43
|
+
# Special case: the comment markup is inside a <script>, which makes it
|
44
|
+
# text content and not an actual HTML comment.
|
45
|
+
@s.fragment("<script><!-- comment --></script>").must_equal '<!-- comment -->'
|
46
|
+
|
47
|
+
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script'])
|
48
|
+
.must_equal '<script><!-- comment --></script>'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Sanitize::Transformers::CSS::CleanAttribute' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
before do
|
9
|
+
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should sanitize CSS properties in style attributes' do
|
13
|
+
@s.fragment(%[
|
14
|
+
<div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
|
15
|
+
].strip).must_equal %[
|
16
|
+
<div style="color: #fff; /* <-- evil! */"></div>
|
17
|
+
].strip
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should remove the style attribute if the sanitized CSS is empty' do
|
21
|
+
@s.fragment('<div style="width: expression(alert(1))"></div>').
|
22
|
+
must_equal '<div></div>'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe 'Sanitize::Transformers::CSS::CleanElement' do
|
27
|
+
make_my_diffs_pretty!
|
28
|
+
parallelize_me!
|
29
|
+
|
30
|
+
before do
|
31
|
+
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should sanitize CSS stylesheets in <style> elements' do
|
35
|
+
html = %[
|
36
|
+
<style>@import url(evil.css);
|
37
|
+
/* Yay CSS! */
|
38
|
+
.foo { color: #fff; }
|
39
|
+
#bar { background: url(yay.jpg); bogus: wtf; }
|
40
|
+
.evil { width: expression(xss()); }
|
41
|
+
|
42
|
+
@media screen (max-width:480px) {
|
43
|
+
.foo { width: 400px; }
|
44
|
+
#bar:not(.baz) { height: 100px; }
|
45
|
+
}
|
46
|
+
</style>
|
47
|
+
].strip
|
48
|
+
|
49
|
+
@s.fragment(html).must_equal %[
|
50
|
+
<style>
|
51
|
+
/* Yay CSS! */
|
52
|
+
.foo { color: #fff; }
|
53
|
+
#bar { background: url(yay.jpg); }
|
54
|
+
.evil { }
|
55
|
+
|
56
|
+
@media screen (max-width:480px) {
|
57
|
+
.foo { width: 400px; }
|
58
|
+
#bar:not(.baz) { height: 100px; }
|
59
|
+
}
|
60
|
+
</style>
|
61
|
+
].strip
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'should remove the <style> element if the sanitized CSS is empty' do
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Sanitize::Transformers::CleanDoctype' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
describe 'when :allow_doctype is false' do
|
9
|
+
before do
|
10
|
+
@s = Sanitize.new(:allow_doctype => false, :elements => ['html'])
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should remove doctype declarations' do
|
14
|
+
@s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html>\n"
|
15
|
+
@s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should not allow doctype definitions in fragments' do
|
19
|
+
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
20
|
+
.must_equal "foo"
|
21
|
+
|
22
|
+
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
23
|
+
.must_equal "foo"
|
24
|
+
|
25
|
+
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
26
|
+
.must_equal "foo"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe 'when :allow_doctype is true' do
|
31
|
+
before do
|
32
|
+
@s = Sanitize.new(:allow_doctype => true, :elements => ['html'])
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should allow doctype declarations in documents' do
|
36
|
+
@s.document('<!DOCTYPE html><html>foo</html>')
|
37
|
+
.must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
|
38
|
+
|
39
|
+
@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
40
|
+
.must_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n<html>foo</html>\n"
|
41
|
+
|
42
|
+
@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
43
|
+
.must_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html>foo</html>\n"
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should not allow obviously invalid doctype declarations in documents' do
|
47
|
+
@s.document('<!DOCTYPE blah blah blah><html>foo</html>')
|
48
|
+
.must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
|
49
|
+
|
50
|
+
@s.document('<!DOCTYPE blah><html>foo</html>')
|
51
|
+
.must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
|
52
|
+
|
53
|
+
@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
54
|
+
.must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
|
55
|
+
|
56
|
+
@s.document('<!whatever><html>foo</html>')
|
57
|
+
.must_equal "<html>foo</html>\n"
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should not allow doctype definitions in fragments' do
|
61
|
+
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
62
|
+
.must_equal "foo"
|
63
|
+
|
64
|
+
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
65
|
+
.must_equal "foo"
|
66
|
+
|
67
|
+
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
68
|
+
.must_equal "foo"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,399 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Sanitize::Transformers::CleanElement' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
strings = {
|
9
|
+
:basic => {
|
10
|
+
:html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',
|
11
|
+
|
12
|
+
:default => 'Lorem ipsum dolor sit amet .foo { color: #fff; } alert("hello world");',
|
13
|
+
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet .foo { color: #fff; } alert("hello world");',
|
14
|
+
:basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet .foo { color: #fff; } alert("hello world");',
|
15
|
+
:relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> alert("hello world");'
|
16
|
+
},
|
17
|
+
|
18
|
+
:malformed => {
|
19
|
+
:html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
|
20
|
+
|
21
|
+
:default => 'Lorem dolor sit amet alert("hello world");',
|
22
|
+
:restricted => 'Lorem <strong>dolor</strong> sit amet alert("hello world");',
|
23
|
+
:basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
|
24
|
+
:relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet alert("hello world");',
|
25
|
+
},
|
26
|
+
|
27
|
+
:unclosed => {
|
28
|
+
:html => '<p>a</p><blockquote>b',
|
29
|
+
|
30
|
+
:default => ' a b ',
|
31
|
+
:restricted => ' a b ',
|
32
|
+
:basic => '<p>a</p><blockquote>b</blockquote>',
|
33
|
+
:relaxed => '<p>a</p><blockquote>b</blockquote>'
|
34
|
+
},
|
35
|
+
|
36
|
+
:malicious => {
|
37
|
+
:html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
|
38
|
+
|
39
|
+
:default => 'Lorem ipsum dolor sit amet <script>alert("hello world");',
|
40
|
+
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert("hello world");',
|
41
|
+
:basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");',
|
42
|
+
:relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");'
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
protocols = {
|
47
|
+
'protocol-based JS injection: simple, no spaces' => {
|
48
|
+
:html => '<a href="javascript:alert(\'XSS\');">foo</a>',
|
49
|
+
:default => 'foo',
|
50
|
+
:restricted => 'foo',
|
51
|
+
:basic => '<a rel="nofollow">foo</a>',
|
52
|
+
:relaxed => '<a>foo</a>'
|
53
|
+
},
|
54
|
+
|
55
|
+
'protocol-based JS injection: simple, spaces before' => {
|
56
|
+
:html => '<a href="javascript :alert(\'XSS\');">foo</a>',
|
57
|
+
:default => 'foo',
|
58
|
+
:restricted => 'foo',
|
59
|
+
:basic => '<a rel="nofollow">foo</a>',
|
60
|
+
:relaxed => '<a>foo</a>'
|
61
|
+
},
|
62
|
+
|
63
|
+
'protocol-based JS injection: simple, spaces after' => {
|
64
|
+
:html => '<a href="javascript: alert(\'XSS\');">foo</a>',
|
65
|
+
:default => 'foo',
|
66
|
+
:restricted => 'foo',
|
67
|
+
:basic => '<a rel="nofollow">foo</a>',
|
68
|
+
:relaxed => '<a>foo</a>'
|
69
|
+
},
|
70
|
+
|
71
|
+
'protocol-based JS injection: simple, spaces before and after' => {
|
72
|
+
:html => '<a href="javascript : alert(\'XSS\');">foo</a>',
|
73
|
+
:default => 'foo',
|
74
|
+
:restricted => 'foo',
|
75
|
+
:basic => '<a rel="nofollow">foo</a>',
|
76
|
+
:relaxed => '<a>foo</a>'
|
77
|
+
},
|
78
|
+
|
79
|
+
'protocol-based JS injection: preceding colon' => {
|
80
|
+
:html => '<a href=":javascript:alert(\'XSS\');">foo</a>',
|
81
|
+
:default => 'foo',
|
82
|
+
:restricted => 'foo',
|
83
|
+
:basic => '<a rel="nofollow">foo</a>',
|
84
|
+
:relaxed => '<a>foo</a>'
|
85
|
+
},
|
86
|
+
|
87
|
+
'protocol-based JS injection: UTF-8 encoding' => {
|
88
|
+
:html => '<a href="javascript:">foo</a>',
|
89
|
+
:default => 'foo',
|
90
|
+
:restricted => 'foo',
|
91
|
+
:basic => '<a rel="nofollow">foo</a>',
|
92
|
+
:relaxed => '<a>foo</a>'
|
93
|
+
},
|
94
|
+
|
95
|
+
'protocol-based JS injection: long UTF-8 encoding' => {
|
96
|
+
:html => '<a href="javascript:">foo</a>',
|
97
|
+
:default => 'foo',
|
98
|
+
:restricted => 'foo',
|
99
|
+
:basic => '<a rel="nofollow">foo</a>',
|
100
|
+
:relaxed => '<a>foo</a>'
|
101
|
+
},
|
102
|
+
|
103
|
+
'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
|
104
|
+
:html => '<a href=javascript:alert('XSS')>foo</a>',
|
105
|
+
:default => 'foo',
|
106
|
+
:restricted => 'foo',
|
107
|
+
:basic => '<a rel="nofollow">foo</a>',
|
108
|
+
:relaxed => '<a>foo</a>'
|
109
|
+
},
|
110
|
+
|
111
|
+
'protocol-based JS injection: hex encoding' => {
|
112
|
+
:html => '<a href="javascript:">foo</a>',
|
113
|
+
:default => 'foo',
|
114
|
+
:restricted => 'foo',
|
115
|
+
:basic => '<a rel="nofollow">foo</a>',
|
116
|
+
:relaxed => '<a>foo</a>'
|
117
|
+
},
|
118
|
+
|
119
|
+
'protocol-based JS injection: long hex encoding' => {
|
120
|
+
:html => '<a href="javascript:">foo</a>',
|
121
|
+
:default => 'foo',
|
122
|
+
:restricted => 'foo',
|
123
|
+
:basic => '<a rel="nofollow">foo</a>',
|
124
|
+
:relaxed => '<a>foo</a>'
|
125
|
+
},
|
126
|
+
|
127
|
+
'protocol-based JS injection: hex encoding without semicolons' => {
|
128
|
+
:html => '<a href=javascript:alert('XSS')>foo</a>',
|
129
|
+
:default => 'foo',
|
130
|
+
:restricted => 'foo',
|
131
|
+
:basic => '<a rel="nofollow">foo</a>',
|
132
|
+
:relaxed => '<a>foo</a>'
|
133
|
+
},
|
134
|
+
|
135
|
+
'protocol-based JS injection: null char' => {
|
136
|
+
:html => "<img src=java\0script:alert(\"XSS\")>",
|
137
|
+
:default => '',
|
138
|
+
:restricted => '',
|
139
|
+
:basic => '',
|
140
|
+
:relaxed => '<img>'
|
141
|
+
},
|
142
|
+
|
143
|
+
'protocol-based JS injection: invalid URL char' => {
|
144
|
+
:html => '<img src=java\script:alert("XSS")>',
|
145
|
+
:default => '',
|
146
|
+
:restricted => '',
|
147
|
+
:basic => '',
|
148
|
+
:relaxed => '<img>'
|
149
|
+
},
|
150
|
+
|
151
|
+
'protocol-based JS injection: spaces and entities' => {
|
152
|
+
:html => '<img src="  javascript:alert(\'XSS\');">',
|
153
|
+
:default => '',
|
154
|
+
:restricted => '',
|
155
|
+
:basic => '',
|
156
|
+
:relaxed => '<img>'
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
describe 'Default config' do
|
161
|
+
it 'should remove non-whitelisted elements, leaving safe contents behind' do
|
162
|
+
Sanitize.fragment('foo <b>bar</b> <strong><a href="#a">baz</a></strong> quux')
|
163
|
+
.must_equal 'foo bar baz quux'
|
164
|
+
|
165
|
+
Sanitize.fragment('<script>alert("<xss>");</script>')
|
166
|
+
.must_equal 'alert("<xss>");'
|
167
|
+
|
168
|
+
Sanitize.fragment('<<script>script>alert("<xss>");</<script>>')
|
169
|
+
.must_equal '<script>alert("<xss>");</<script>>'
|
170
|
+
|
171
|
+
Sanitize.fragment('< script <>> alert("<xss>");</script>')
|
172
|
+
.must_equal '< script <>> alert("");'
|
173
|
+
end
|
174
|
+
|
175
|
+
it 'should surround the contents of :whitespace_elements with space characters when removing the element' do
|
176
|
+
Sanitize.fragment('foo<div>bar</div>baz')
|
177
|
+
.must_equal 'foo bar baz'
|
178
|
+
|
179
|
+
Sanitize.fragment('foo<br>bar<br>baz')
|
180
|
+
.must_equal 'foo bar baz'
|
181
|
+
|
182
|
+
Sanitize.fragment('foo<hr>bar<hr>baz')
|
183
|
+
.must_equal 'foo bar baz'
|
184
|
+
end
|
185
|
+
|
186
|
+
it 'should not choke on several instances of the same element in a row' do
|
187
|
+
Sanitize.fragment('<img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif">')
|
188
|
+
.must_equal ''
|
189
|
+
end
|
190
|
+
|
191
|
+
strings.each do |name, data|
|
192
|
+
it "should clean #{name} HTML" do
|
193
|
+
Sanitize.fragment(data[:html]).must_equal(data[:default])
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
protocols.each do |name, data|
|
198
|
+
it "should not allow #{name}" do
|
199
|
+
Sanitize.fragment(data[:html]).must_equal(data[:default])
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
describe 'Restricted config' do
|
205
|
+
before do
|
206
|
+
@s = Sanitize.new(Sanitize::Config::RESTRICTED)
|
207
|
+
end
|
208
|
+
|
209
|
+
strings.each do |name, data|
|
210
|
+
it "should clean #{name} HTML" do
|
211
|
+
@s.fragment(data[:html]).must_equal(data[:restricted])
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
protocols.each do |name, data|
|
216
|
+
it "should not allow #{name}" do
|
217
|
+
@s.fragment(data[:html]).must_equal(data[:restricted])
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
describe 'Basic config' do
|
223
|
+
before do
|
224
|
+
@s = Sanitize.new(Sanitize::Config::BASIC)
|
225
|
+
end
|
226
|
+
|
227
|
+
it 'should not choke on valueless attributes' do
|
228
|
+
@s.fragment('foo <a href>foo</a> bar')
|
229
|
+
.must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
|
230
|
+
end
|
231
|
+
|
232
|
+
it 'should downcase attribute names' do
|
233
|
+
@s.fragment('<a HREF="javascript:alert(\'foo\')">bar</a>')
|
234
|
+
.must_equal '<a rel="nofollow">bar</a>'
|
235
|
+
end
|
236
|
+
|
237
|
+
strings.each do |name, data|
|
238
|
+
it "should clean #{name} HTML" do
|
239
|
+
@s.fragment(data[:html]).must_equal(data[:basic])
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
protocols.each do |name, data|
|
244
|
+
it "should not allow #{name}" do
|
245
|
+
@s.fragment(data[:html]).must_equal(data[:basic])
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
describe 'Relaxed config' do
|
251
|
+
before do
|
252
|
+
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
253
|
+
end
|
254
|
+
|
255
|
+
it 'should encode special chars in attribute values' do
|
256
|
+
@s.fragment('<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>')
|
257
|
+
.must_equal '<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>'
|
258
|
+
end
|
259
|
+
|
260
|
+
strings.each do |name, data|
|
261
|
+
it "should clean #{name} HTML" do
|
262
|
+
@s.fragment(data[:html]).must_equal(data[:relaxed])
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
protocols.each do |name, data|
|
267
|
+
it "should not allow #{name}" do
|
268
|
+
@s.fragment(data[:html]).must_equal(data[:relaxed])
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
describe 'Custom configs' do
|
274
|
+
it 'should allow attributes on all elements if whitelisted under :all' do
|
275
|
+
input = '<p class="foo">bar</p>'
|
276
|
+
|
277
|
+
Sanitize.fragment(input).must_equal ' bar '
|
278
|
+
|
279
|
+
Sanitize.fragment(input, {
|
280
|
+
:elements => ['p'],
|
281
|
+
:attributes => {:all => ['class']}
|
282
|
+
}).must_equal input
|
283
|
+
|
284
|
+
Sanitize.fragment(input, {
|
285
|
+
:elements => ['p'],
|
286
|
+
:attributes => {'div' => ['class']}
|
287
|
+
}).must_equal '<p>bar</p>'
|
288
|
+
|
289
|
+
Sanitize.fragment(input, {
|
290
|
+
:elements => ['p'],
|
291
|
+
:attributes => {'p' => ['title'], :all => ['class']}
|
292
|
+
}).must_equal input
|
293
|
+
end
|
294
|
+
|
295
|
+
it 'should allow relative URLs containing colons when the colon is not in the first path segment' do
|
296
|
+
input = '<a href="/wiki/Special:Random">Random Page</a>'
|
297
|
+
|
298
|
+
Sanitize.fragment(input, {
|
299
|
+
:elements => ['a'],
|
300
|
+
:attributes => {'a' => ['href']},
|
301
|
+
:protocols => {'a' => {'href' => [:relative]}}
|
302
|
+
}).must_equal input
|
303
|
+
end
|
304
|
+
|
305
|
+
it 'should allow relative URLs containing colons when the colon is part of an anchor' do
|
306
|
+
input = '<a href="#fn:1">Footnote 1</a>'
|
307
|
+
|
308
|
+
Sanitize.fragment(input, {
|
309
|
+
:elements => ['a'],
|
310
|
+
:attributes => {'a' => ['href']},
|
311
|
+
:protocols => {'a' => {'href' => [:relative]}}
|
312
|
+
}).must_equal input
|
313
|
+
|
314
|
+
input = '<a href="somepage#fn:1">Footnote 1</a>'
|
315
|
+
|
316
|
+
Sanitize.fragment(input, {
|
317
|
+
:elements => ['a'],
|
318
|
+
:attributes => {'a' => ['href']},
|
319
|
+
:protocols => {'a' => {'href' => [:relative]}}
|
320
|
+
}).must_equal input
|
321
|
+
end
|
322
|
+
|
323
|
+
it 'should remove the contents of filtered nodes when :remove_contents is true' do
|
324
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span></div>',
|
325
|
+
:remove_contents => true
|
326
|
+
).must_equal 'foo bar '
|
327
|
+
end
|
328
|
+
|
329
|
+
it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do
|
330
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
|
331
|
+
:remove_contents => ['script', 'span']
|
332
|
+
).must_equal 'foo bar baz '
|
333
|
+
end
|
334
|
+
|
335
|
+
it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do
|
336
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
|
337
|
+
:remove_contents => [:script, :span]
|
338
|
+
).must_equal 'foo bar baz '
|
339
|
+
end
|
340
|
+
|
341
|
+
it 'should not allow arbitrary HTML5 data attributes by default' do
|
342
|
+
Sanitize.fragment('<b data-foo="bar"></b>',
|
343
|
+
:elements => ['b']
|
344
|
+
).must_equal '<b></b>'
|
345
|
+
|
346
|
+
Sanitize.fragment('<b class="foo" data-foo="bar"></b>',
|
347
|
+
:attributes => {'b' => ['class']},
|
348
|
+
:elements => ['b']
|
349
|
+
).must_equal '<b class="foo"></b>'
|
350
|
+
end
|
351
|
+
|
352
|
+
it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do
|
353
|
+
s = Sanitize.new(
|
354
|
+
:attributes => {'b' => [:data]},
|
355
|
+
:elements => ['b']
|
356
|
+
)
|
357
|
+
|
358
|
+
s.fragment('<b data-foo="valid" data-bar="valid"></b>')
|
359
|
+
.must_equal '<b data-foo="valid" data-bar="valid"></b>'
|
360
|
+
|
361
|
+
s.fragment('<b data-="invalid"></b>')
|
362
|
+
.must_equal '<b></b>'
|
363
|
+
|
364
|
+
s.fragment('<b data-="invalid"></b>')
|
365
|
+
.must_equal '<b></b>'
|
366
|
+
|
367
|
+
s.fragment('<b data-xml="invalid"></b>')
|
368
|
+
.must_equal '<b></b>'
|
369
|
+
|
370
|
+
s.fragment('<b data-xmlfoo="invalid"></b>')
|
371
|
+
.must_equal '<b></b>'
|
372
|
+
|
373
|
+
s.fragment('<b data-f:oo="valid"></b>')
|
374
|
+
.must_equal '<b></b>'
|
375
|
+
|
376
|
+
s.fragment('<b data-f/oo="partial"></b>')
|
377
|
+
.must_equal '<b data-f=""></b>' # Nokogiri quirk; not ideal, but harmless
|
378
|
+
|
379
|
+
s.fragment('<b data-éfoo="valid"></b>')
|
380
|
+
.must_equal '<b></b>' # Another annoying Nokogiri quirk.
|
381
|
+
end
|
382
|
+
|
383
|
+
it 'should replace whitespace_elements with configured :before and :after values' do
|
384
|
+
s = Sanitize.new(
|
385
|
+
:whitespace_elements => {
|
386
|
+
'p' => { :before => "\n", :after => "\n" },
|
387
|
+
'div' => { :before => "\n", :after => "\n" },
|
388
|
+
'br' => { :before => "\n", :after => "\n" },
|
389
|
+
}
|
390
|
+
)
|
391
|
+
|
392
|
+
s.fragment('<p>foo</p>').must_equal "\nfoo\n"
|
393
|
+
s.fragment('<p>foo</p><p>bar</p>').must_equal "\nfoo\n\nbar\n"
|
394
|
+
s.fragment('foo<div>bar</div>baz').must_equal "foo\nbar\nbaz"
|
395
|
+
s.fragment('foo<br>bar<br>baz').must_equal "foo\nbar\nbaz"
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
end
|