sanitize 2.1.1 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

@@ -0,0 +1,235 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ # Miscellaneous attempts to sneak maliciously crafted HTML past Sanitize. Many
5
+ # of these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
6
+ # Sheet.
7
+ #
8
+ # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
9
+
10
+ describe 'Malicious HTML' do
11
+ make_my_diffs_pretty!
12
+ parallelize_me!
13
+
14
+ before do
15
+ @s = Sanitize.new(Sanitize::Config::RELAXED)
16
+ end
17
+
18
+ describe 'comments' do
19
+ it 'should not allow script injection via conditional comments' do
20
+ @s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]).
21
+ must_equal ''
22
+ end
23
+ end
24
+
25
+ describe 'interpolation (ERB, PHP, etc.)' do
26
+ it 'should escape ERB-style tags' do
27
+ @s.fragment('<% naughty_ruby_code %>').
28
+ must_equal '&lt;% naughty_ruby_code %&gt;'
29
+
30
+ @s.fragment('<%= naughty_ruby_code %>').
31
+ must_equal '&lt;%= naughty_ruby_code %&gt;'
32
+ end
33
+
34
+ it 'should remove PHP-style tags' do
35
+ @s.fragment('<? naughtyPHPCode(); ?>').
36
+ must_equal ''
37
+
38
+ @s.fragment('<?= naughtyPHPCode(); ?>').
39
+ must_equal ''
40
+ end
41
+ end
42
+
43
+ describe '<body>' do
44
+ it 'should not be possible to inject JS via a malformed event attribute' do
45
+ @s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>').
46
+ must_equal "<html><head></head><body></body></html>"
47
+ end
48
+ end
49
+
50
+ describe '<iframe>' do
51
+ it 'should not be possible to inject an iframe using an improperly closed tag' do
52
+ @s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <]).
53
+ must_equal ''
54
+ end
55
+ end
56
+
57
+ describe '<img>' do
58
+ it 'should not be possible to inject JS via an unquoted <img> src attribute' do
59
+ @s.fragment("<img src=javascript:alert('XSS')>").must_equal '<img>'
60
+ end
61
+
62
+ it 'should not be possible to inject JS using grave accents as <img> src delimiters' do
63
+ @s.fragment("<img src=`javascript:alert('XSS')`>").must_equal '<img>'
64
+ end
65
+
66
+ it 'should not be possible to inject <script> via a malformed <img> tag' do
67
+ @s.fragment('<img """><script>alert("XSS")</script>">').
68
+ must_equal '<img>"&gt;'
69
+ end
70
+
71
+ it 'should not be possible to inject protocol-based JS' do
72
+ @s.fragment('<img src=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>').
73
+ must_equal '<img>'
74
+
75
+ @s.fragment('<img src=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>').
76
+ must_equal '<img>'
77
+
78
+ @s.fragment('<img src=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>').
79
+ must_equal '<img>'
80
+
81
+ # Encoded tab character.
82
+ @s.fragment(%[<img src="jav&#x09;ascript:alert('XSS');">]).
83
+ must_equal '<img>'
84
+
85
+ # Encoded newline.
86
+ @s.fragment(%[<img src="jav&#x0A;ascript:alert('XSS');">]).
87
+ must_equal '<img>'
88
+
89
+ # Encoded carriage return.
90
+ @s.fragment(%[<img src="jav&#x0D;ascript:alert('XSS');">]).
91
+ must_equal '<img>'
92
+
93
+ # Null byte.
94
+ @s.fragment(%[<img src=java\0script:alert("XSS")>]).
95
+ must_equal '<img>'
96
+
97
+ # Spaces plus meta char.
98
+ @s.fragment(%[<img src=" &#14; javascript:alert('XSS');">]).
99
+ must_equal '<img>'
100
+
101
+ # Mixed spaces and tabs.
102
+ @s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]).
103
+ must_equal '<img>'
104
+ end
105
+
106
+ it 'should not be possible to inject protocol-based JS via whitespace' do
107
+ @s.fragment(%[<img src="jav\tascript:alert('XSS');">]).
108
+ must_equal '<img>'
109
+ end
110
+
111
+ it 'should not be possible to inject JS using a half-open <img> tag' do
112
+ @s.fragment(%[<img src="javascript:alert('XSS')"]).
113
+ must_equal ''
114
+ end
115
+ end
116
+
117
+ describe '<script>' do
118
+ it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
119
+ @s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
120
+ must_equal ''
121
+ end
122
+
123
+ it 'should not be possible to inject <script> via extraneous open brackets' do
124
+ @s.fragment(%[<<script>alert("XSS");//<</script>]).
125
+ must_equal '&lt;'
126
+ end
127
+ end
128
+
129
+ # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
130
+ # attempt to preserve server-side includes. This can result in XSS since an
131
+ # unescaped double quote can allow an attacker to inject a non-allowlisted
132
+ # attribute. Sanitize works around this by implementing its own escaping for
133
+ # affected attributes.
134
+ #
135
+ # The relevant libxml2 code is here:
136
+ # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
137
+ describe 'unsafe libxml2 server-side includes in attributes' do
138
+ using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system?
139
+
140
+ tag_configs = [
141
+ {
142
+ tag_name: 'a',
143
+ escaped_attrs: %w[ action href src name ],
144
+ unescaped_attrs: []
145
+ },
146
+
147
+ {
148
+ tag_name: 'div',
149
+ escaped_attrs: %w[ action href src ],
150
+ unescaped_attrs: %w[ name ]
151
+ }
152
+ ]
153
+
154
+ before do
155
+ @s = Sanitize.new({
156
+ elements: %w[ a div ],
157
+
158
+ attributes: {
159
+ all: %w[ action href src name ]
160
+ }
161
+ })
162
+ end
163
+
164
+ tag_configs.each do |tag_config|
165
+ tag_name = tag_config[:tag_name]
166
+
167
+ tag_config[:escaped_attrs].each do |attr_name|
168
+ input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
169
+
170
+ it 'should escape unsafe characters in attributes' do
171
+ skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
172
+
173
+ # This uses Nokogumbo's HTML-compliant serializer rather than
174
+ # libxml2's.
175
+ @s.fragment(input).
176
+ must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
177
+
178
+ # This uses the not-quite-standards-compliant libxml2 serializer via
179
+ # Nokogiri, so the output may be a little different as of Nokogiri
180
+ # 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
181
+ # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
182
+ fragment = Nokogiri::HTML.fragment(input)
183
+ @s.node!(fragment)
184
+ fragment.to_html.
185
+ must_equal(%[<#{tag_name} #{attr_name}="examp&lt;!--%22%20onmouseover=alert(1)&gt;--&gt;le.com">foo</#{tag_name}>])
186
+ end
187
+
188
+ it 'should round-trip to the same output' do
189
+ output = @s.fragment(input)
190
+ @s.fragment(output).must_equal(output)
191
+ end
192
+ end
193
+
194
+ tag_config[:unescaped_attrs].each do |attr_name|
195
+ input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
196
+
197
+ it 'should not escape characters unnecessarily' do
198
+ skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
199
+
200
+ # This uses Nokogumbo's HTML-compliant serializer rather than
201
+ # libxml2's.
202
+ @s.fragment(input).
203
+ must_equal(%[<#{tag_name} #{attr_name}="examp<!--&quot; onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
204
+
205
+ # This uses the not-quite-standards-compliant libxml2 serializer via
206
+ # Nokogiri, so the output may be a little different as of Nokogiri
207
+ # 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
208
+ # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
209
+ fragment = Nokogiri::HTML.fragment(input)
210
+ @s.node!(fragment)
211
+ fragment.to_html.
212
+ must_equal(%[<#{tag_name} #{attr_name}='examp&lt;!--" onmouseover=alert(1)&gt;--&gt;le.com'>foo</#{tag_name}>])
213
+ end
214
+
215
+ it 'should round-trip to the same output' do
216
+ output = @s.fragment(input)
217
+ @s.fragment(output).must_equal(output)
218
+ end
219
+ end
220
+ end
221
+ end
222
+
223
+ # https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
224
+ describe 'foreign content bypass in relaxed config' do
225
+ it 'prevents a sanitization bypass via carefully crafted foreign content' do
226
+ %w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
227
+ @s.fragment(%[<math><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/]).
228
+ must_equal ''
229
+
230
+ @s.fragment(%[<svg><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/]).
231
+ must_equal ''
232
+ end
233
+ end
234
+ end
235
+ end
@@ -0,0 +1,75 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Parser' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ it 'should translate valid entities into characters' do
9
+ Sanitize.fragment("&apos;&eacute;&amp;").must_equal("'é&amp;")
10
+ end
11
+
12
+ it 'should translate orphaned ampersands into entities' do
13
+ Sanitize.fragment('at&t').must_equal('at&amp;t')
14
+ end
15
+
16
+ it 'should not add newlines after tags when serializing a fragment' do
17
+ Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p'])
18
+ .must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>"
19
+ end
20
+
21
+ it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
22
+ Sanitize.fragment('foo <script>bar').must_equal 'foo '
23
+ Sanitize.fragment('foo <style>bar').must_equal 'foo '
24
+ end
25
+
26
+ it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
27
+ Sanitize.fragment('1 > 2 and 2 < 1').must_equal '1 &gt; 2 and 2 &lt; 1'
28
+ Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D').must_equal 'OMG HAPPY BIRTHDAY! *&lt;:-D'
29
+ end
30
+
31
+ describe 'when siblings are added after a node during traversal' do
32
+ it 'the added siblings should be traversed' do
33
+ html = %[
34
+ <div id="one">
35
+ <div id="one_one">
36
+ <div id="one_one_one"></div>
37
+ </div>
38
+ <div id="one_two"></div>
39
+ </div>
40
+ <div id="two">
41
+ <div id="two_one"><div id="two_one_one"></div></div>
42
+ <div id="two_two"></div>
43
+ </div>
44
+ <div id="three"></div>
45
+ ]
46
+
47
+ siblings = []
48
+
49
+ Sanitize.fragment(html, :transformers => ->(env) {
50
+ name = env[:node].name
51
+
52
+ if name == 'div'
53
+ env[:node].add_next_sibling('<b id="added_' + env[:node]['id'] + '">')
54
+ elsif name == 'b'
55
+ siblings << env[:node][:id]
56
+ end
57
+
58
+ return {:node_allowlist => [env[:node]]}
59
+ })
60
+
61
+ # All siblings should be traversed, and in the order added.
62
+ siblings.must_equal [
63
+ "added_one_one_one",
64
+ "added_one_one",
65
+ "added_one_two",
66
+ "added_one",
67
+ "added_two_one_one",
68
+ "added_two_one",
69
+ "added_two_two",
70
+ "added_two",
71
+ "added_three"
72
+ ]
73
+ end
74
+ end
75
+ end