jets-html-sanitizer 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,564 @@
1
+ require "minitest/autorun"
2
+ require "jets-html-sanitizer"
3
+ require "rails/dom/testing/assertions/dom_assertions"
4
+
5
+ class SanitizersTest < Minitest::Test
6
+ include Rails::Dom::Testing::Assertions::DomAssertions
7
+
8
+ def test_sanitizer_sanitize_raises_not_implemented_error
9
+ assert_raises NotImplementedError do
10
+ Jets::Html::Sanitizer.new.sanitize('')
11
+ end
12
+ end
13
+
14
+ def test_sanitize_nested_script
15
+ sanitizer = Jets::Html::WhiteListSanitizer.new
16
+ assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', sanitizer.sanitize('<script><script></script>alert("XSS");<script><</script>/</script><script>script></script>', tags: %w(em))
17
+ end
18
+
19
+ def test_sanitize_nested_script_in_style
20
+ sanitizer = Jets::Html::WhiteListSanitizer.new
21
+ assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', sanitizer.sanitize('<style><script></style>alert("XSS");<style><</style>/</style><style>script></style>', tags: %w(em))
22
+ end
23
+
24
+ class XpathRemovalTestSanitizer < Jets::Html::Sanitizer
25
+ def sanitize(html, options = {})
26
+ fragment = Loofah.fragment(html)
27
+ remove_xpaths(fragment, options[:xpaths]).to_s
28
+ end
29
+ end
30
+
31
+ def test_remove_xpaths_removes_an_xpath
32
+ html = %(<h1>hello <script>code!</script></h1>)
33
+ assert_equal %(<h1>hello </h1>), xpath_sanitize(html, xpaths: %w(.//script))
34
+ end
35
+
36
+ def test_remove_xpaths_removes_all_occurrences_of_xpath
37
+ html = %(<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>)
38
+ assert_equal %(<section><header></header><p>hello </p></section>), xpath_sanitize(html, xpaths: %w(.//script))
39
+ end
40
+
41
+ def test_remove_xpaths_called_with_faulty_xpath
42
+ assert_raises Nokogiri::XML::XPath::SyntaxError do
43
+ xpath_sanitize('<h1>hello<h1>', xpaths: %w(..faulty_xpath))
44
+ end
45
+ end
46
+
47
+ def test_remove_xpaths_called_with_xpath_string
48
+ assert_equal '', xpath_sanitize('<a></a>', xpaths: './/a')
49
+ end
50
+
51
+ def test_remove_xpaths_called_with_enumerable_xpaths
52
+ assert_equal '', xpath_sanitize('<a><span></span></a>', xpaths: %w(.//a .//span))
53
+ end
54
+
55
+ def test_strip_tags_with_quote
56
+ input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
57
+ assert_equal ' hi', full_sanitize(input)
58
+ end
59
+
60
+ def test_strip_invalid_html
61
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html")
62
+ end
63
+
64
+ def test_strip_nested_tags
65
+ expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
66
+ input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
67
+ assert_equal expected, full_sanitize(input)
68
+ end
69
+
70
+ def test_strip_tags_multiline
71
+ expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n}
72
+ input = %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}
73
+
74
+ assert_equal expected, full_sanitize(input)
75
+ end
76
+
77
+ def test_remove_unclosed_tags
78
+ assert_equal "This is ", full_sanitize("This is <-- not\n a comment here.")
79
+ end
80
+
81
+ def test_strip_cdata
82
+ assert_equal "This has a ]]&gt; here.", full_sanitize("This has a <![CDATA[<section>]]> here.")
83
+ end
84
+
85
+ def test_strip_unclosed_cdata
86
+ assert_equal "This has an unclosed ]] here...", full_sanitize("This has an unclosed <![CDATA[<section>]] here...")
87
+ end
88
+
89
+ def test_strip_blank_string
90
+ assert_nil full_sanitize(nil)
91
+ assert_equal "", full_sanitize("")
92
+ assert_equal " ", full_sanitize(" ")
93
+ end
94
+
95
+ def test_strip_tags_with_plaintext
96
+ assert_equal "Dont touch me", full_sanitize("Dont touch me")
97
+ end
98
+
99
+ def test_strip_tags_with_tags
100
+ assert_equal "This is a test.", full_sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")
101
+ end
102
+
103
+ def test_escape_tags_with_many_open_quotes
104
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html>")
105
+ end
106
+
107
+ def test_strip_tags_with_sentence
108
+ assert_equal "This is a test.", full_sanitize("This is a test.")
109
+ end
110
+
111
+ def test_strip_tags_with_comment
112
+ assert_equal "This has a here.", full_sanitize("This has a <!-- comment --> here.")
113
+ end
114
+
115
+ def test_strip_tags_with_frozen_string
116
+ assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags".freeze)
117
+ end
118
+
119
+ def test_full_sanitize_respect_html_escaping_of_the_given_string
120
+ assert_equal 'test\r\nstring', full_sanitize('test\r\nstring')
121
+ assert_equal '&amp;', full_sanitize('&')
122
+ assert_equal '&amp;', full_sanitize('&amp;')
123
+ assert_equal '&amp;amp;', full_sanitize('&amp;amp;')
124
+ assert_equal 'omg &lt;script&gt;BOM&lt;/script&gt;', full_sanitize('omg &lt;script&gt;BOM&lt;/script&gt;')
125
+ end
126
+
127
+ def test_strip_links_with_tags_in_tags
128
+ expected = "&lt;a href='hello'&gt;all <b>day</b> long&lt;/a&gt;"
129
+ input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
130
+ assert_equal expected, link_sanitize(input)
131
+ end
132
+
133
+ def test_strip_links_with_unclosed_tags
134
+ assert_equal "", link_sanitize("<a<a")
135
+ end
136
+
137
+ def test_strip_links_with_plaintext
138
+ assert_equal "Dont touch me", link_sanitize("Dont touch me")
139
+ end
140
+
141
+ def test_strip_links_with_line_feed_and_uppercase_tag
142
+ assert_equal "on my mind\nall day long", link_sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
143
+ end
144
+
145
+ def test_strip_links_leaves_nonlink_tags
146
+ assert_equal "My mind\nall <b>day</b> long", link_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
147
+ end
148
+
149
+ def test_strip_links_with_links
150
+ assert_equal "0wn3d", link_sanitize("<a href='http://www.rubyonjets.com/'><a href='http://www.rubyonjets.com/' onlclick='steal()'>0wn3d</a></a>")
151
+ end
152
+
153
+ def test_strip_links_with_linkception
154
+ assert_equal "Magic", link_sanitize("<a href='http://www.rubyonjets.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
155
+ end
156
+
157
+ def test_strip_links_with_a_tag_in_href
158
+ assert_equal "FrrFox", link_sanitize("<href onlclick='steal()'>FrrFox</a></href>")
159
+ end
160
+
161
+ def test_sanitize_form
162
+ assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ''
163
+ end
164
+
165
+ def test_sanitize_plaintext
166
+ assert_sanitized "<plaintext><span>foo</span></plaintext>", "<span>foo</span>"
167
+ end
168
+
169
+ def test_sanitize_script
170
+ assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f"
171
+ end
172
+
173
+ def test_sanitize_js_handlers
174
+ raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
175
+ assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>}
176
+ end
177
+
178
+ def test_sanitize_javascript_href
179
+ raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
180
+ assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
181
+ end
182
+
183
+ def test_sanitize_image_src
184
+ raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
185
+ assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}
186
+ end
187
+
188
+ tags = Loofah::HTML5::WhiteList::ALLOWED_ELEMENTS - %w(script form)
189
+ tags.each do |tag_name|
190
+ define_method "test_should_allow_#{tag_name}_tag" do
191
+ scope_allowed_tags(tags) do
192
+ assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end)
193
+ end
194
+ end
195
+ end
196
+
197
+ def test_should_allow_anchors
198
+ assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>)
199
+ end
200
+
201
+ def test_video_poster_sanitization
202
+ scope_allowed_tags(%w(video)) do
203
+ scope_allowed_attributes %w(src poster) do
204
+ assert_sanitized %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>), %(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
205
+ assert_sanitized %(<video src="videofile.ogg" poster=javascript:alert(1)></video>), %(<video src="videofile.ogg"></video>)
206
+ end
207
+ end
208
+ end
209
+
210
+ # RFC 3986, sec 4.2
211
+ def test_allow_colons_in_path_component
212
+ assert_sanitized "<a href=\"./this:that\">foo</a>"
213
+ end
214
+
215
+ %w(src width height alt).each do |img_attr|
216
+ define_method "test_should_allow_image_#{img_attr}_attribute" do
217
+ assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />)
218
+ end
219
+ end
220
+
221
+ def test_should_handle_non_html
222
+ assert_sanitized 'abc'
223
+ end
224
+
225
+ def test_should_handle_blank_text
226
+ [nil, '', ' '].each { |blank| assert_sanitized blank }
227
+ end
228
+
229
+ def test_setting_allowed_tags_affects_sanitization
230
+ scope_allowed_tags %w(u) do |sanitizer|
231
+ assert_equal '<u></u>', sanitizer.sanitize('<a><u></u></a>')
232
+ end
233
+ end
234
+
235
+ def test_setting_allowed_attributes_affects_sanitization
236
+ scope_allowed_attributes %w(foo) do |sanitizer|
237
+ input = '<a foo="hello" bar="world"></a>'
238
+ assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input)
239
+ end
240
+ end
241
+
242
+ def test_custom_tags_overrides_allowed_tags
243
+ scope_allowed_tags %(u) do |sanitizer|
244
+ input = '<a><u></u></a>'
245
+ assert_equal '<a></a>', sanitizer.sanitize(input, tags: %w(a))
246
+ end
247
+ end
248
+
249
+ def test_custom_attributes_overrides_allowed_attributes
250
+ scope_allowed_attributes %(foo) do |sanitizer|
251
+ input = '<a foo="hello" bar="world"></a>'
252
+ assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar))
253
+ end
254
+ end
255
+
256
+ def test_should_allow_custom_tags
257
+ text = "<u>foo</u>"
258
+ assert_equal text, white_list_sanitize(text, tags: %w(u))
259
+ end
260
+
261
+ def test_should_allow_only_custom_tags
262
+ text = "<u>foo</u> with <i>bar</i>"
263
+ assert_equal "<u>foo</u> with bar", white_list_sanitize(text, tags: %w(u))
264
+ end
265
+
266
+ def test_should_allow_custom_tags_with_attributes
267
+ text = %(<blockquote cite="http://example.com/">foo</blockquote>)
268
+ assert_equal text, white_list_sanitize(text)
269
+ end
270
+
271
+ def test_should_allow_custom_tags_with_custom_attributes
272
+ text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
273
+ assert_equal text, white_list_sanitize(text, attributes: ['foo'])
274
+ end
275
+
276
+ def test_scrub_style_if_style_attribute_option_is_passed
277
+ input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>'
278
+ assert_equal '<p style="color: #000;"></p>', white_list_sanitize(input, attributes: %w(style))
279
+ end
280
+
281
+ def test_should_raise_argument_error_if_tags_is_not_enumerable
282
+ assert_raises ArgumentError do
283
+ white_list_sanitize('<a>some html</a>', tags: 'foo')
284
+ end
285
+ end
286
+
287
+ def test_should_raise_argument_error_if_attributes_is_not_enumerable
288
+ assert_raises ArgumentError do
289
+ white_list_sanitize('<a>some html</a>', attributes: 'foo')
290
+ end
291
+ end
292
+
293
+ def test_should_not_accept_non_loofah_inheriting_scrubber
294
+ scrubber = Object.new
295
+ def scrubber.scrub(node); node.name = 'h1'; end
296
+
297
+ assert_raises Loofah::ScrubberNotFound do
298
+ white_list_sanitize('<a>some html</a>', scrubber: scrubber)
299
+ end
300
+ end
301
+
302
+ def test_should_accept_loofah_inheriting_scrubber
303
+ scrubber = Loofah::Scrubber.new
304
+ def scrubber.scrub(node); node.name = 'h1'; end
305
+
306
+ html = "<script>hello!</script>"
307
+ assert_equal "<h1>hello!</h1>", white_list_sanitize(html, scrubber: scrubber)
308
+ end
309
+
310
+ def test_should_accept_loofah_scrubber_that_wraps_a_block
311
+ scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
312
+ html = "<script>hello!</script>"
313
+ assert_equal "<h1>hello!</h1>", white_list_sanitize(html, scrubber: scrubber)
314
+ end
315
+
316
+ def test_custom_scrubber_takes_precedence_over_other_options
317
+ scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
318
+ html = "<script>hello!</script>"
319
+ assert_equal "<h1>hello!</h1>", white_list_sanitize(html, scrubber: scrubber, tags: ['foo'])
320
+ end
321
+
322
+ [%w(img src), %w(a href)].each do |(tag, attr)|
323
+ define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
324
+ assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>)
325
+ end
326
+ end
327
+
328
+ def test_should_block_script_tag
329
+ assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
330
+ end
331
+
332
+ def test_should_not_fall_for_xss_image_hack_with_uppercase_tags
333
+ assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;)
334
+ end
335
+
336
+ [%(<IMG SRC="javascript:alert('XSS');">),
337
+ %(<IMG SRC=javascript:alert('XSS')>),
338
+ %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
339
+ %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
340
+ %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
341
+ %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
342
+ %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
343
+ %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
344
+ %(<IMG SRC="jav\tascript:alert('XSS');">),
345
+ %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
346
+ %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
347
+ %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
348
+ %(<IMG SRC=" &#14; javascript:alert('XSS');">),
349
+ %(<IMG SRC="javascript&#x3a;alert('XSS');">),
350
+ %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack|
351
+ define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do
352
+ assert_sanitized img_hack, "<img>"
353
+ end
354
+ end
355
+
356
+ def test_should_sanitize_tag_broken_up_by_null
357
+ assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), ""
358
+ end
359
+
360
+ def test_should_sanitize_invalid_script_tag
361
+ assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
362
+ end
363
+
364
+ def test_should_sanitize_script_tag_with_multiple_open_brackets
365
+ assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;"
366
+ assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), ""
367
+ end
368
+
369
+ def test_should_sanitize_unclosed_script
370
+ assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), ""
371
+ end
372
+
373
+ def test_should_sanitize_half_open_scripts
374
+ assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>"
375
+ end
376
+
377
+ def test_should_not_fall_for_ridiculous_hack
378
+ img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
379
+ assert_sanitized img_hack, "<img>"
380
+ end
381
+
382
+ def test_should_sanitize_attributes
383
+ assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="#{CGI.escapeHTML "'><script>alert()</script>"}">blah</span>)
384
+ end
385
+
386
+ def test_should_sanitize_illegal_style_properties
387
+ raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
388
+ expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;)
389
+ assert_equal expected, sanitize_css(raw)
390
+ end
391
+
392
+ def test_should_sanitize_with_trailing_space
393
+ raw = "display:block; "
394
+ expected = "display:block;"
395
+ assert_equal expected, sanitize_css(raw)
396
+ end
397
+
398
+ def test_should_sanitize_xul_style_attributes
399
+ raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
400
+ assert_equal '', sanitize_css(raw)
401
+ end
402
+
403
+ def test_should_sanitize_invalid_tag_names
404
+ assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
405
+ end
406
+
407
+ def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
408
+ assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
409
+ end
410
+
411
+ def test_should_sanitize_invalid_tag_names_in_single_tags
412
+ assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />")
413
+ end
414
+
415
+ def test_should_sanitize_img_dynsrc_lowsrc
416
+ assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />")
417
+ end
418
+
419
+ def test_should_sanitize_div_background_image_unicode_encoded
420
+ raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029)
421
+ assert_equal '', sanitize_css(raw)
422
+ end
423
+
424
+ def test_should_sanitize_div_style_expression
425
+ raw = %(width: expression(alert('XSS'));)
426
+ assert_equal '', sanitize_css(raw)
427
+ end
428
+
429
+ def test_should_sanitize_across_newlines
430
+ raw = %(\nwidth:\nexpression(alert('XSS'));\n)
431
+ assert_equal '', sanitize_css(raw)
432
+ end
433
+
434
+ def test_should_sanitize_img_vbscript
435
+ assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
436
+ end
437
+
438
+ def test_should_sanitize_cdata_section
439
+ assert_sanitized "<![CDATA[<span>section</span>]]>", "section]]&gt;"
440
+ end
441
+
442
+ def test_should_sanitize_unterminated_cdata_section
443
+ assert_sanitized "<![CDATA[<span>neverending...", "neverending..."
444
+ end
445
+
446
+ def test_should_not_mangle_urls_with_ampersand
447
+ assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
448
+ end
449
+
450
+ def test_should_sanitize_neverending_attribute
451
+ assert_sanitized "<span class=\"\\", "<span class=\"\\\">"
452
+ end
453
+
454
+ [
455
+ %(<a href="javascript&#x3a;alert('XSS');">),
456
+ %(<a href="javascript&#x003a;alert('XSS');">),
457
+ %(<a href="javascript&#x3A;alert('XSS');">),
458
+ %(<a href="javascript&#x003A;alert('XSS');">)
459
+ ].each_with_index do |enc_hack, i|
460
+ define_method "test_x03a_handling_#{i+1}" do
461
+ assert_sanitized enc_hack, "<a>"
462
+ end
463
+ end
464
+
465
+ def test_x03a_legitimate
466
+ assert_sanitized %(<a href="http&#x3a;//legit">), %(<a href="http://legit">)
467
+ assert_sanitized %(<a href="http&#x3A;//legit">), %(<a href="http://legit">)
468
+ end
469
+
470
+ def test_sanitize_ascii_8bit_string
471
+ white_list_sanitize('<a>hello</a>'.encode('ASCII-8BIT')).tap do |sanitized|
472
+ assert_equal '<a>hello</a>', sanitized
473
+ assert_equal Encoding::UTF_8, sanitized.encoding
474
+ end
475
+ end
476
+
477
+ def test_sanitize_data_attributes
478
+ assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>)
479
+ assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>)
480
+ end
481
+
482
+ def test_allow_data_attribute_if_requested
483
+ text = %(<a data-foo="foo">foo</a>)
484
+ assert_equal %(<a data-foo="foo">foo</a>), white_list_sanitize(text, attributes: ['data-foo'])
485
+ end
486
+
487
+ def test_uri_escaping_of_href_attr_in_a_tag_in_white_list_sanitizer
488
+ html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
489
+
490
+ text = white_list_sanitize(html)
491
+
492
+ assert_equal %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, text
493
+ end
494
+
495
+ def test_uri_escaping_of_src_attr_in_a_tag_in_white_list_sanitizer
496
+ html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
497
+
498
+ text = white_list_sanitize(html)
499
+
500
+ assert_equal %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, text
501
+ end
502
+
503
+ def test_uri_escaping_of_name_attr_in_a_tag_in_white_list_sanitizer
504
+ html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
505
+
506
+ text = white_list_sanitize(html)
507
+
508
+ assert_equal %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, text
509
+ end
510
+
511
+ def test_uri_escaping_of_name_action_in_a_tag_in_white_list_sanitizer
512
+ html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
513
+
514
+ text = white_list_sanitize(html, attributes: ['action'])
515
+
516
+ assert_equal %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, text
517
+ end
518
+
519
+ protected
520
+
521
+ def xpath_sanitize(input, options = {})
522
+ XpathRemovalTestSanitizer.new.sanitize(input, options)
523
+ end
524
+
525
+ def full_sanitize(input, options = {})
526
+ Jets::Html::FullSanitizer.new.sanitize(input, options)
527
+ end
528
+
529
+ def link_sanitize(input, options = {})
530
+ Jets::Html::LinkSanitizer.new.sanitize(input, options)
531
+ end
532
+
533
+ def white_list_sanitize(input, options = {})
534
+ Jets::Html::WhiteListSanitizer.new.sanitize(input, options)
535
+ end
536
+
537
+ def assert_sanitized(input, expected = nil)
538
+ if input
539
+ assert_dom_equal expected || input, white_list_sanitize(input)
540
+ else
541
+ assert_nil white_list_sanitize(input)
542
+ end
543
+ end
544
+
545
+ def sanitize_css(input)
546
+ Jets::Html::WhiteListSanitizer.new.sanitize_css(input)
547
+ end
548
+
549
+ def scope_allowed_tags(tags)
550
+ old_tags = Jets::Html::WhiteListSanitizer.allowed_tags
551
+ Jets::Html::WhiteListSanitizer.allowed_tags = tags
552
+ yield Jets::Html::WhiteListSanitizer.new
553
+ ensure
554
+ Jets::Html::WhiteListSanitizer.allowed_tags = old_tags
555
+ end
556
+
557
+ def scope_allowed_attributes(attributes)
558
+ old_attributes = Jets::Html::WhiteListSanitizer.allowed_attributes
559
+ Jets::Html::WhiteListSanitizer.allowed_attributes = attributes
560
+ yield Jets::Html::WhiteListSanitizer.new
561
+ ensure
562
+ Jets::Html::WhiteListSanitizer.allowed_attributes = old_attributes
563
+ end
564
+ end