sanitize 2.1.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

@@ -0,0 +1,222 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Sanitize::CSS' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ describe 'instance methods' do
9
+ before do
10
+ @default = Sanitize::CSS.new
11
+ @relaxed = Sanitize::CSS.new(Sanitize::Config::RELAXED[:css])
12
+ @custom = Sanitize::CSS.new(:properties => %w[background color width])
13
+ end
14
+
15
+ describe '#properties' do
16
+ it 'should sanitize CSS properties' do
17
+ css = 'background: #fff; width: expression(alert("hi"));'
18
+
19
+ @default.properties(css).must_equal ' '
20
+ @relaxed.properties(css).must_equal 'background: #fff; '
21
+ @custom.properties(css).must_equal 'background: #fff; '
22
+ end
23
+
24
+ it 'should allow whitelisted URL protocols' do
25
+ [
26
+ "background: url(relative.jpg)",
27
+ "background: url('relative.jpg')",
28
+ "background: url(http://example.com/http.jpg)",
29
+ "background: url('ht\\tp://example.com/http.jpg')",
30
+ "background: url(https://example.com/https.jpg)",
31
+ "background: url('https://example.com/https.jpg')",
32
+ ].each do |css|
33
+ @default.properties(css).must_equal ''
34
+ @relaxed.properties(css).must_equal css
35
+ @custom.properties(css).must_equal ''
36
+ end
37
+ end
38
+
39
+ it 'should not allow non-whitelisted URL protocols' do
40
+ [
41
+ "background: url(javascript:alert(0))",
42
+ "background: url(ja\\56 ascript:alert(0))",
43
+ "background: url('javascript:foo')",
44
+ "background: url('ja\\56 ascript:alert(0)')",
45
+ "background: url('ja\\va\\script\\:alert(0)')",
46
+ "background: url('javas\\\ncript:alert(0)')",
47
+ "background: url('java\\0script:foo')"
48
+ ].each do |css|
49
+ @default.properties(css).must_equal ''
50
+ @relaxed.properties(css).must_equal ''
51
+ @custom.properties(css).must_equal ''
52
+ end
53
+ end
54
+
55
+ it 'should not allow -moz-binding' do
56
+ css = "-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')"
57
+
58
+ @default.properties(css).must_equal ''
59
+ @relaxed.properties(css).must_equal ''
60
+ @custom.properties(css).must_equal ''
61
+ end
62
+
63
+ it 'should not allow expressions' do
64
+ [
65
+ "width:expression(alert(1))",
66
+ "width: /**/expression(alert(1)",
67
+ "width:e\\78 pression(\n\nalert(\n1)",
68
+ "width:\nexpression(alert(1));",
69
+ "xss:expression(alert(1))",
70
+ "height: foo(expression(alert(1)));"
71
+ ].each do |css|
72
+ @default.properties(css).must_equal ''
73
+ @relaxed.properties(css).must_equal ''
74
+ @custom.properties(css).must_equal ''
75
+ end
76
+ end
77
+
78
+ it 'should not allow behaviors' do
79
+ css = "behavior: url(xss.htc);"
80
+
81
+ @default.properties(css).must_equal ''
82
+ @relaxed.properties(css).must_equal ''
83
+ @custom.properties(css).must_equal ''
84
+ end
85
+
86
+ describe 'when :allow_comments is true' do
87
+ it 'should preserve comments' do
88
+ @relaxed.properties('color: #fff; /* comment */ width: 100px;')
89
+ .must_equal 'color: #fff; /* comment */ width: 100px;'
90
+
91
+ @relaxed.properties("color: #fff; /* \n\ncomment */ width: 100px;")
92
+ .must_equal "color: #fff; /* \n\ncomment */ width: 100px;"
93
+ end
94
+ end
95
+
96
+ describe 'when :allow_comments is false' do
97
+ it 'should strip comments' do
98
+ @custom.properties('color: #fff; /* comment */ width: 100px;')
99
+ .must_equal 'color: #fff; width: 100px;'
100
+
101
+ @custom.properties("color: #fff; /* \n\ncomment */ width: 100px;")
102
+ .must_equal 'color: #fff; width: 100px;'
103
+ end
104
+ end
105
+
106
+ describe 'when :allow_hacks is true' do
107
+ it 'should allow common CSS hacks' do
108
+ @relaxed.properties('_border: 1px solid #fff; *width: 10px')
109
+ .must_equal '_border: 1px solid #fff; *width: 10px'
110
+ end
111
+ end
112
+
113
+ describe 'when :allow_hacks is false' do
114
+ it 'should not allow common CSS hacks' do
115
+ @custom.properties('_border: 1px solid #fff; *width: 10px')
116
+ .must_equal ' '
117
+ end
118
+ end
119
+ end
120
+
121
+ describe '#stylesheet' do
122
+ it 'should sanitize a CSS stylesheet' do
123
+ css = %[
124
+ /* Yay CSS! */
125
+ .foo { color: #fff; }
126
+ #bar { background: url(yay.jpg); }
127
+
128
+ @media screen (max-width:480px) {
129
+ .foo { width: 400px; }
130
+ #bar:not(.baz) { height: 100px; }
131
+ }
132
+ ].strip
133
+
134
+ @default.stylesheet(css).strip.must_equal %[
135
+ .foo { }
136
+ #bar { }
137
+ ].strip
138
+
139
+ @relaxed.stylesheet(css).must_equal css
140
+
141
+ @custom.stylesheet(css).strip.must_equal %[
142
+ .foo { color: #fff; }
143
+ #bar { }
144
+ ].strip
145
+ end
146
+
147
+ describe 'when :allow_comments is true' do
148
+ it 'should preserve comments' do
149
+ @relaxed.stylesheet('.foo { color: #fff; /* comment */ width: 100px; }')
150
+ .must_equal '.foo { color: #fff; /* comment */ width: 100px; }'
151
+
152
+ @relaxed.stylesheet(".foo { color: #fff; /* \n\ncomment */ width: 100px; }")
153
+ .must_equal ".foo { color: #fff; /* \n\ncomment */ width: 100px; }"
154
+ end
155
+ end
156
+
157
+ describe 'when :allow_comments is false' do
158
+ it 'should strip comments' do
159
+ @custom.stylesheet('.foo { color: #fff; /* comment */ width: 100px; }')
160
+ .must_equal '.foo { color: #fff; width: 100px; }'
161
+
162
+ @custom.stylesheet(".foo { color: #fff; /* \n\ncomment */ width: 100px; }")
163
+ .must_equal '.foo { color: #fff; width: 100px; }'
164
+ end
165
+ end
166
+
167
+ describe 'when :allow_hacks is true' do
168
+ it 'should allow common CSS hacks' do
169
+ @relaxed.stylesheet('.foo { _border: 1px solid #fff; *width: 10px }')
170
+ .must_equal '.foo { _border: 1px solid #fff; *width: 10px }'
171
+ end
172
+ end
173
+
174
+ describe 'when :allow_hacks is false' do
175
+ it 'should not allow common CSS hacks' do
176
+ @custom.stylesheet('.foo { _border: 1px solid #fff; *width: 10px }')
177
+ .must_equal '.foo { }'
178
+ end
179
+ end
180
+ end
181
+
182
+ describe '#tree!' do
183
+ it 'should sanitize a Crass CSS parse tree' do
184
+ tree = Crass.parse("@import url(foo.css);\n" <<
185
+ ".foo { background: #fff; font: 16pt 'Comic Sans MS'; }\n" <<
186
+ "#bar { top: 125px; background: green; }")
187
+
188
+ @custom.tree!(tree).must_be_same_as tree
189
+
190
+ Crass::Parser.stringify(tree).must_equal "\n" <<
191
+ ".foo { background: #fff; }\n" <<
192
+ "#bar { background: green; }"
193
+ end
194
+ end
195
+ end
196
+
197
+ describe 'class methods' do
198
+ describe '.properties' do
199
+ it 'should call #properties' do
200
+ Sanitize::CSS.stub_instance(:properties, proc {|css| css + 'bar' }) do
201
+ Sanitize::CSS.properties('foo').must_equal 'foobar'
202
+ end
203
+ end
204
+ end
205
+
206
+ describe '.stylesheet' do
207
+ it 'should call #stylesheet' do
208
+ Sanitize::CSS.stub_instance(:stylesheet, proc {|css| css + 'bar' }) do
209
+ Sanitize::CSS.stylesheet('foo').must_equal 'foobar'
210
+ end
211
+ end
212
+ end
213
+
214
+ describe '.tree!' do
215
+ it 'should call #tree!' do
216
+ Sanitize::CSS.stub_instance(:tree!, proc {|tree| tree + 'bar' }) do
217
+ Sanitize::CSS.tree!('foo').must_equal 'foobar'
218
+ end
219
+ end
220
+ end
221
+ end
222
+ end
@@ -0,0 +1,144 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Transformers' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ youtube_transformer = lambda do |env|
9
+ node = env[:node]
10
+ node_name = env[:node_name]
11
+
12
+ # Don't continue if this node is already whitelisted or is not an element.
13
+ return if env[:is_whitelisted] || !node.element?
14
+
15
+ # Don't continue unless the node is an iframe.
16
+ return unless node_name == 'iframe'
17
+
18
+ # Verify that the video URL is actually a valid YouTube video URL.
19
+ return unless node['src'] =~ %r|\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/|
20
+
21
+ # We're now certain that this is a YouTube embed, but we still need to run
22
+ # it through a special Sanitize step to ensure that no unwanted elements or
23
+ # attributes that don't belong in a YouTube embed can sneak in.
24
+ Sanitize.node!(node, {
25
+ :elements => %w[iframe],
26
+
27
+ :attributes => {
28
+ 'iframe' => %w[allowfullscreen frameborder height src width]
29
+ }
30
+ })
31
+
32
+ # Now that we're sure that this is a valid YouTube embed and that there are
33
+ # no unwanted elements or attributes hidden inside it, we can tell Sanitize
34
+ # to whitelist the current node.
35
+ {:node_whitelist => [node]}
36
+ end
37
+
38
+ it 'should receive a complete env Hash as input' do
39
+ Sanitize.fragment('<SPAN>foo</SPAN>',
40
+ :foo => :bar,
41
+ :transformers => lambda {|env|
42
+ return unless env[:node].element?
43
+
44
+ env[:config][:foo].must_equal :bar
45
+ env[:is_whitelisted].must_equal false
46
+ env[:node].must_be_kind_of Nokogiri::XML::Node
47
+ env[:node_name].must_equal 'span'
48
+ env[:node_whitelist].must_be_kind_of Set
49
+ env[:node_whitelist].must_be_empty
50
+ }
51
+ )
52
+ end
53
+
54
+ it 'should traverse all node types, including the fragment itself' do
55
+ nodes = []
56
+
57
+ Sanitize.fragment('<div>foo</div><!--bar--><script>cdata!</script>',
58
+ :transformers => proc {|env| nodes << env[:node_name] }
59
+ )
60
+
61
+ nodes.must_equal %w[
62
+ #document-fragment div text text text comment script text
63
+ ]
64
+ end
65
+
66
+ it 'should perform top-down traversal' do
67
+ nodes = []
68
+
69
+ Sanitize.fragment('<div><span><strong>foo</strong></span><b></b></div><p>bar</p>',
70
+ :transformers => proc {|env| nodes << env[:node_name] if env[:node].element? }
71
+ )
72
+
73
+ nodes.must_equal %w[div span strong b p]
74
+ end
75
+
76
+ it 'should whitelist nodes in the node whitelist' do
77
+ Sanitize.fragment('<div class="foo">foo</div><span>bar</span>',
78
+ :transformers => [
79
+ proc {|env|
80
+ {:node_whitelist => [env[:node]]} if env[:node_name] == 'div'
81
+ },
82
+
83
+ proc {|env|
84
+ env[:is_whitelisted].must_equal false unless env[:node_name] == 'div'
85
+ env[:is_whitelisted].must_equal true if env[:node_name] == 'div'
86
+ env[:node_whitelist].must_include env[:node] if env[:node_name] == 'div'
87
+ }
88
+ ]
89
+ ).must_equal '<div class="foo">foo</div>bar'
90
+ end
91
+
92
+ it 'should clear the node whitelist after each fragment' do
93
+ called = false
94
+
95
+ Sanitize.fragment('<div>foo</div>',
96
+ :transformers => proc {|env| {:node_whitelist => [env[:node]]}}
97
+ )
98
+
99
+ Sanitize.fragment('<div>foo</div>',
100
+ :transformers => proc {|env|
101
+ called = true
102
+ env[:is_whitelisted].must_equal false
103
+ env[:node_whitelist].must_be_empty
104
+ }
105
+ )
106
+
107
+ called.must_equal true
108
+ end
109
+
110
+ it 'should allow YouTube video embeds via the YouTube transformer' do
111
+ input = '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
112
+
113
+ Sanitize.fragment(input, :transformers => youtube_transformer)
114
+ .must_equal '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen="">&lt;script&gt;alert()&lt;/script&gt;</iframe>'
115
+ end
116
+
117
+ it 'should allow https YouTube video embeds via the YouTube transformer' do
118
+ input = '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
119
+
120
+ Sanitize.fragment(input, :transformers => youtube_transformer)
121
+ .must_equal '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen="">&lt;script&gt;alert()&lt;/script&gt;</iframe>'
122
+ end
123
+
124
+ it 'should allow protocol-relative YouTube video embeds via the YouTube transformer' do
125
+ input = '<iframe width="420" height="315" src="//www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
126
+
127
+ Sanitize.fragment(input, :transformers => youtube_transformer)
128
+ .must_equal '<iframe width="420" height="315" src="//www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen="">&lt;script&gt;alert()&lt;/script&gt;</iframe>'
129
+ end
130
+
131
+ it 'should allow privacy-enhanced YouTube video embeds via the YouTube transformer' do
132
+ input = '<iframe width="420" height="315" src="https://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
133
+
134
+ Sanitize.fragment(input, :transformers => youtube_transformer)
135
+ .must_equal '<iframe width="420" height="315" src="https://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen="">&lt;script&gt;alert()&lt;/script&gt;</iframe>'
136
+ end
137
+
138
+ it 'should not allow non-YouTube video embeds via the YouTube transformer' do
139
+ input = '<iframe width="420" height="315" src="http://www.fake-youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen></iframe>'
140
+
141
+ Sanitize.fragment(input, :transformers => youtube_transformer)
142
+ .must_equal('')
143
+ end
144
+ end
@@ -0,0 +1,84 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Unicode' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ # http://www.w3.org/TR/unicode-xml/#Charlist
9
+ describe 'Unsuitable characters' do
10
+ before do
11
+ @s = Sanitize.new(Sanitize::Config::RELAXED)
12
+ end
13
+
14
+ it 'should strip deprecated grave and acute clones' do
15
+ @s.document("a\u0340b\u0341c").must_equal "<html><head></head><body>abc</body></html>\n"
16
+ @s.fragment("a\u0340b\u0341c").must_equal 'abc'
17
+ end
18
+
19
+ it 'should strip deprecated Khmer characters' do
20
+ @s.document("a\u17a3b\u17d3c").must_equal "<html><head></head><body>abc</body></html>\n"
21
+ @s.fragment("a\u17a3b\u17d3c").must_equal 'abc'
22
+ end
23
+
24
+ it 'should strip line and paragraph separator punctuation' do
25
+ @s.document("a\u2028b\u2029c").must_equal "<html><head></head><body>abc</body></html>\n"
26
+ @s.fragment("a\u2028b\u2029c").must_equal 'abc'
27
+ end
28
+
29
+ it 'should strip bidi embedding control characters' do
30
+ @s.document("a\u202ab\u202bc\u202cd\u202de\u202e")
31
+ .must_equal "<html><head></head><body>abcde</body></html>\n"
32
+
33
+ @s.fragment("a\u202ab\u202bc\u202cd\u202de\u202e")
34
+ .must_equal 'abcde'
35
+ end
36
+
37
+ it 'should strip deprecated symmetric swapping characters' do
38
+ @s.document("a\u206ab\u206bc").must_equal "<html><head></head><body>abc</body></html>\n"
39
+ @s.fragment("a\u206ab\u206bc").must_equal 'abc'
40
+ end
41
+
42
+ it 'should strip deprecated Arabic form shaping characters' do
43
+ @s.document("a\u206cb\u206dc").must_equal "<html><head></head><body>abc</body></html>\n"
44
+ @s.fragment("a\u206cb\u206dc").must_equal 'abc'
45
+ end
46
+
47
+ it 'should strip deprecated National digit shape characters' do
48
+ @s.document("a\u206eb\u206fc").must_equal "<html><head></head><body>abc</body></html>\n"
49
+ @s.fragment("a\u206eb\u206fc").must_equal 'abc'
50
+ end
51
+
52
+ it 'should strip interlinear annotation characters' do
53
+ @s.document("a\ufff9b\ufffac\ufffb").must_equal "<html><head></head><body>abc</body></html>\n"
54
+ @s.fragment("a\ufff9b\ufffac\ufffb").must_equal 'abc'
55
+ end
56
+
57
+ it 'should strip BOM/zero-width non-breaking space characters' do
58
+ @s.document("a\ufeffbc").must_equal "<html><head></head><body>abc</body></html>\n"
59
+ @s.fragment("a\ufeffbc").must_equal 'abc'
60
+ end
61
+
62
+ it 'should strip object replacement characters' do
63
+ @s.document("a\ufffcbc").must_equal "<html><head></head><body>abc</body></html>\n"
64
+ @s.fragment("a\ufffcbc").must_equal 'abc'
65
+ end
66
+
67
+ it 'should strip musical notation scoping characters' do
68
+ @s.document("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
69
+ .must_equal "<html><head></head><body>abcdefgh</body></html>\n"
70
+
71
+ @s.fragment("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
72
+ .must_equal 'abcdefgh'
73
+ end
74
+
75
+ it 'should strip language tag code point characters' do
76
+ str = 'a'
77
+ (0xE0000..0xE007F).each {|n| str << [n].pack('U') }
78
+ str << 'b'
79
+
80
+ @s.document(str).must_equal "<html><head></head><body>ab</body></html>\n"
81
+ @s.fragment(str).must_equal 'ab'
82
+ end
83
+ end
84
+ end