loofah 2.3.1 → 2.8.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +91 -40
  3. data/README.md +7 -4
  4. data/lib/loofah.rb +33 -16
  5. data/lib/loofah/elements.rb +74 -73
  6. data/lib/loofah/helpers.rb +5 -4
  7. data/lib/loofah/html/document.rb +1 -0
  8. data/lib/loofah/html/document_fragment.rb +4 -2
  9. data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
  10. data/lib/loofah/html5/safelist.rb +23 -0
  11. data/lib/loofah/html5/scrub.rb +21 -21
  12. data/lib/loofah/instance_methods.rb +5 -3
  13. data/lib/loofah/metahelpers.rb +2 -1
  14. data/lib/loofah/scrubber.rb +8 -7
  15. data/lib/loofah/scrubbers.rb +11 -10
  16. data/lib/loofah/version.rb +5 -0
  17. data/lib/loofah/xml/document.rb +1 -0
  18. data/lib/loofah/xml/document_fragment.rb +2 -1
  19. metadata +27 -93
  20. data/.gemtest +0 -0
  21. data/Gemfile +0 -22
  22. data/Manifest.txt +0 -41
  23. data/Rakefile +0 -81
  24. data/benchmark/benchmark.rb +0 -149
  25. data/benchmark/fragment.html +0 -96
  26. data/benchmark/helper.rb +0 -73
  27. data/benchmark/www.slashdot.com.html +0 -2560
  28. data/test/assets/msword.html +0 -63
  29. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  30. data/test/helper.rb +0 -18
  31. data/test/html5/test_sanitizer.rb +0 -401
  32. data/test/html5/test_scrub.rb +0 -10
  33. data/test/integration/test_ad_hoc.rb +0 -220
  34. data/test/integration/test_helpers.rb +0 -43
  35. data/test/integration/test_html.rb +0 -72
  36. data/test/integration/test_scrubbers.rb +0 -400
  37. data/test/integration/test_xml.rb +0 -55
  38. data/test/unit/test_api.rb +0 -142
  39. data/test/unit/test_encoding.rb +0 -20
  40. data/test/unit/test_helpers.rb +0 -62
  41. data/test/unit/test_scrubber.rb +0 -229
  42. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,43 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestHelpers < Loofah::TestCase
4
- context ".strip_tags" do
5
- context "on safe markup" do
6
- it "strip out tags" do
7
- assert_equal "omgwtfbbq!!1!", Loofah::Helpers.strip_tags("<div>omgwtfbbq</div><span>!!1!</span>")
8
- end
9
- end
10
-
11
- context "on hack attack" do
12
- it "strip escape html entities" do
13
- bad_shit = "&lt;script&gt;alert('evil')&lt;/script&gt;"
14
- assert_equal bad_shit, Loofah::Helpers.strip_tags(bad_shit)
15
- end
16
- end
17
- end
18
-
19
- context ".sanitize" do
20
- context "on safe markup" do
21
- it "render the safe html" do
22
- html = "<div>omgwtfbbq</div><span>!!1!</span>"
23
- assert_equal html, Loofah::Helpers.sanitize(html)
24
- end
25
- end
26
-
27
- context "on hack attack" do
28
- it "strip the unsafe tags" do
29
- assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><span>w00t</span>")
30
- end
31
-
32
- it "strips form tags" do
33
- assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><form action=\"/foo/bar\" method=\"post\"><input></form><span>w00t</span>")
34
- end
35
- end
36
- end
37
-
38
- context ".sanitize_css" do
39
- it "removes unsafe css properties" do
40
- assert_match(/display:\s*block;\s*background-color:\s*blue;/, Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg);background-color:blue"))
41
- end
42
- end
43
- end
@@ -1,72 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestHtml < Loofah::TestCase
4
- context "html fragment" do
5
- context "#to_s" do
6
- it "not include head tags (like style)" do
7
- skip "depends on nokogiri version"
8
- html = Loofah.fragment "<style>foo</style><div>bar</div>"
9
- assert_equal "<div>bar</div>", html.to_s
10
- end
11
- end
12
-
13
- context "#text" do
14
- it "not include head tags (like style)" do
15
- skip "depends on nokogiri version"
16
- html = Loofah.fragment "<style>foo</style><div>bar</div>"
17
- assert_equal "bar", html.text
18
- end
19
- end
20
-
21
- context "#to_text" do
22
- it "add newlines before and after html4 block elements" do
23
- html = Loofah.fragment "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
24
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
25
- end
26
-
27
- it "add newlines before and after html5 block elements" do
28
- html = Loofah.fragment "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
29
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
30
- end
31
-
32
- it "remove extraneous whitespace" do
33
- html = Loofah.fragment "<div>tweedle\n\n\t\n\s\nbeetle</div>"
34
- assert_equal "\ntweedle\n\nbeetle\n", html.to_text
35
- end
36
- end
37
-
38
- context 'with an `encoding` arg' do
39
- it "sets the parent document's encoding to accordingly" do
40
- html = Loofah.fragment "<style>foo</style><div>bar</div>", 'US-ASCII'
41
- assert_equal 'US-ASCII', html.document.encoding
42
- end
43
- end
44
- end
45
-
46
- context "html document" do
47
- context "#text" do
48
- it "not include head tags (like style)" do
49
- html = Loofah.document "<style>foo</style><div>bar</div>"
50
- assert_equal "bar", html.text
51
- end
52
- end
53
-
54
- context "#to_text" do
55
- it "add newlines before and after html4 block elements" do
56
- html = Loofah.document "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
57
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
58
- end
59
-
60
- it "add newlines before and after html5 block elements" do
61
- html = Loofah.document "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
62
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
63
- end
64
-
65
- it "remove extraneous whitespace" do
66
- html = Loofah.document "<div>tweedle\n\n\t\n\s\nbeetle</div>"
67
- assert_equal "\ntweedle\n\nbeetle\n", html.to_text
68
- end
69
- end
70
- end
71
- end
72
-
@@ -1,400 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestScrubbers < Loofah::TestCase
4
-
5
- INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
6
- INVALID_ESCAPED = "&lt;invalid&gt;foo&lt;p&gt;bar&lt;/p&gt;bazz&lt;/invalid&gt;<div>quux</div>"
7
- INVALID_PRUNED = "<div>quux</div>"
8
- INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
9
-
10
- WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid><!--[if gts mso9]><div>microsofty stuff</div><![endif]-->"
11
- WHITEWASH_RESULT = "<div>foo</div>"
12
-
13
- NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
14
- NOFOLLOW_RESULT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
15
-
16
- NOFOLLOW_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
17
- NOFOLLOW_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="noopener nofollow">Click here</a>'
18
-
19
- NOOPENER_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
20
- NOOPENER_RESULT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
21
-
22
- NOOPENER_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
23
- NOOPENER_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="nofollow noopener">Click here</a>'
24
-
25
- UNPRINTABLE_FRAGMENT = "<b>Lo\u2029ofah ro\u2028cks!</b><script>x\u2028y</script>"
26
- UNPRINTABLE_RESULT = "<b>Loofah rocks!</b><script>xy</script>"
27
-
28
- ENTITY_FRAGMENT = "<p>this is &lt; that &quot;&amp;&quot; the other &gt; boo&apos;ya</p><div>w00t</div>"
29
- ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t)
30
-
31
- ENTITY_HACK_ATTACK = "<div><div>Hack attack!</div><div>&lt;script&gt;alert('evil')&lt;/script&gt;</div></div>"
32
- ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!&lt;script&gt;alert('evil')&lt;/script&gt;"
33
- ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!<script>alert('evil')</script>"
34
-
35
- context "Document" do
36
- context "#scrub!" do
37
- context ":escape" do
38
- it "escape bad tags" do
39
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
40
- result = doc.scrub! :escape
41
-
42
- assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
43
- assert_equal doc, result
44
- end
45
- end
46
-
47
- context ":prune" do
48
- it "prune bad tags" do
49
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
50
- result = doc.scrub! :prune
51
-
52
- assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
53
- assert_equal doc, result
54
- end
55
- end
56
-
57
- context ":strip" do
58
- it "strip bad tags" do
59
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
60
- result = doc.scrub! :strip
61
-
62
- assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
63
- assert_equal doc, result
64
- end
65
- end
66
-
67
- context ":whitewash" do
68
- it "whitewash the markup" do
69
- doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
70
- result = doc.scrub! :whitewash
71
-
72
- assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
73
- assert_equal doc, result
74
- end
75
- end
76
-
77
- context ":nofollow" do
78
- it "add a 'nofollow' attribute to hyperlinks" do
79
- doc = Loofah::HTML::Document.parse "<html><body>#{NOFOLLOW_FRAGMENT}</body></html>"
80
- result = doc.scrub! :nofollow
81
-
82
- assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
83
- assert_equal doc, result
84
- end
85
- end
86
-
87
- context ":unprintable" do
88
- it "removes unprintable unicode characters" do
89
- doc = Loofah::HTML::Document.parse "<html><body>#{UNPRINTABLE_FRAGMENT}</body></html>"
90
- result = doc.scrub! :unprintable
91
-
92
- assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html
93
- assert_equal doc, result
94
- end
95
- end
96
- end
97
-
98
- context "#scrub_document" do
99
- it "be a shortcut for parse-and-scrub" do
100
- mock_doc = Object.new
101
- mock(Loofah).document(:string_or_io) { mock_doc }
102
- mock(mock_doc).scrub!(:method)
103
-
104
- Loofah.scrub_document(:string_or_io, :method)
105
- end
106
- end
107
-
108
- context "#text" do
109
- it "leave behind only inner text with html entities still escaped" do
110
- doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
111
- result = doc.text
112
-
113
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
114
- end
115
-
116
- context "with encode_special_chars => false" do
117
- it "leave behind only inner text with html entities unescaped" do
118
- doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
119
- result = doc.text(:encode_special_chars => false)
120
-
121
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
122
- end
123
- end
124
-
125
- context "with encode_special_chars => true" do
126
- it "leave behind only inner text with html entities still escaped" do
127
- doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
128
- result = doc.text(:encode_special_chars => true)
129
-
130
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
131
- end
132
- end
133
- end
134
-
135
- context "#to_s" do
136
- it "generate HTML" do
137
- doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
138
- refute_nil doc.xpath("/html").first
139
- refute_nil doc.xpath("/html/head").first
140
- refute_nil doc.xpath("/html/body").first
141
-
142
- string = doc.to_s
143
- assert_match %r/<!DOCTYPE/, string
144
- assert_match %r/<html>/, string
145
- assert_match %r/<head>/, string
146
- assert_match %r/<body>/, string
147
- end
148
- end
149
-
150
- context "#serialize" do
151
- it "generate HTML" do
152
- doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
153
- refute_nil doc.xpath("/html").first
154
- refute_nil doc.xpath("/html/head").first
155
- refute_nil doc.xpath("/html/body").first
156
-
157
- string = doc.serialize
158
- assert_match %r/<!DOCTYPE/, string
159
- assert_match %r/<html>/, string
160
- assert_match %r/<head>/, string
161
- assert_match %r/<body>/, string
162
- end
163
- end
164
-
165
- context "Node" do
166
- context "#scrub!" do
167
- it "only scrub subtree" do
168
- xml = Loofah.document <<-EOHTML
169
- <html><body>
170
- <div class='scrub'>
171
- <script>I should be removed</script>
172
- </div>
173
- <div class='noscrub'>
174
- <script>I should remain</script>
175
- </div>
176
- </body></html>
177
- EOHTML
178
- node = xml.at_css "div.scrub"
179
- node.scrub!(:prune)
180
- assert_match %r/I should remain/, xml.to_s
181
- refute_match %r/I should be removed/, xml.to_s
182
- end
183
- end
184
- end
185
-
186
- context "NodeSet" do
187
- context "#scrub!" do
188
- it "only scrub subtrees" do
189
- xml = Loofah.document <<-EOHTML
190
- <html><body>
191
- <div class='scrub'>
192
- <script>I should be removed</script>
193
- </div>
194
- <div class='noscrub'>
195
- <script>I should remain</script>
196
- </div>
197
- <div class='scrub'>
198
- <script>I should also be removed</script>
199
- </div>
200
- </body></html>
201
- EOHTML
202
- node_set = xml.css "div.scrub"
203
- assert_equal 2, node_set.length
204
- node_set.scrub!(:prune)
205
- assert_match %r/I should remain/, xml.to_s
206
- refute_match %r/I should be removed/, xml.to_s
207
- refute_match %r/I should also be removed/, xml.to_s
208
- end
209
- end
210
- end
211
- end
212
-
213
- context "DocumentFragment" do
214
- context "#scrub!" do
215
- context ":escape" do
216
- it "escape bad tags" do
217
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
218
- result = doc.scrub! :escape
219
-
220
- assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
221
- assert_equal doc, result
222
- end
223
- end
224
-
225
- context ":prune" do
226
- it "prune bad tags" do
227
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
228
- result = doc.scrub! :prune
229
-
230
- assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
231
- assert_equal doc, result
232
- end
233
- end
234
-
235
- context ":strip" do
236
- it "strip bad tags" do
237
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
238
- result = doc.scrub! :strip
239
-
240
- assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
241
- assert_equal doc, result
242
- end
243
- end
244
-
245
- context ":whitewash" do
246
- it "whitewash the markup" do
247
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
248
- result = doc.scrub! :whitewash
249
-
250
- assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
251
- assert_equal doc, result
252
- end
253
- end
254
-
255
- context ":nofollow" do
256
-
257
- context "for a hyperlink that does not have a rel attribute" do
258
- it "add a 'nofollow' attribute to hyperlinks" do
259
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_FRAGMENT}</div>"
260
- result = doc.scrub! :nofollow
261
-
262
- assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
263
- assert_equal doc, result
264
- end
265
- end
266
-
267
- context "for a hyperlink that does have a rel attribute" do
268
- it "appends nofollow to rel attribute" do
269
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_WITH_REL_FRAGMENT}</div>"
270
- result = doc.scrub! :nofollow
271
-
272
- assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html
273
- assert_equal doc, result
274
- end
275
- end
276
-
277
-
278
- end
279
-
280
- context ":noopener" do
281
- context "for a hyperlink without a 'rel' attribute" do
282
- it "add a 'noopener' attribute to hyperlinks" do
283
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_FRAGMENT}</div>"
284
- result = doc.scrub! :noopener
285
-
286
- assert_equal NOOPENER_RESULT, doc.xpath("./div").inner_html
287
- assert_equal doc, result
288
- end
289
- end
290
-
291
- context "for a hyperlink that does have a rel attribute" do
292
- it "appends 'noopener' to 'rel' attribute" do
293
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_WITH_REL_FRAGMENT}</div>"
294
- result = doc.scrub! :noopener
295
-
296
- assert_equal NOOPENER_WITH_REL_RESULT, doc.xpath("./div").inner_html
297
- assert_equal doc, result
298
- end
299
- end
300
- end
301
-
302
- context ":unprintable" do
303
- it "removes unprintable unicode characters" do
304
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{UNPRINTABLE_FRAGMENT}</div>"
305
- result = doc.scrub! :unprintable
306
-
307
- assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html
308
- assert_equal doc, result
309
- end
310
- end
311
- end
312
-
313
- context "#scrub_fragment" do
314
- it "be a shortcut for parse-and-scrub" do
315
- mock_doc = Object.new
316
- mock(Loofah).fragment(:string_or_io) { mock_doc }
317
- mock(mock_doc).scrub!(:method)
318
-
319
- Loofah.scrub_fragment(:string_or_io, :method)
320
- end
321
- end
322
-
323
- context "#text" do
324
- it "leave behind only inner text with html entities still escaped" do
325
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
326
- result = doc.text
327
-
328
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
329
- end
330
-
331
- context "with encode_special_chars => false" do
332
- it "leave behind only inner text with html entities unescaped" do
333
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
334
- result = doc.text(:encode_special_chars => false)
335
-
336
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
337
- end
338
- end
339
-
340
- context "with encode_special_chars => true" do
341
- it "leave behind only inner text with html entities still escaped" do
342
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
343
- result = doc.text(:encode_special_chars => true)
344
-
345
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
346
- end
347
- end
348
- end
349
-
350
- context "#to_s" do
351
- it "not remove entities" do
352
- string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
353
- assert_match %r/this is &lt;/, string
354
- end
355
- end
356
-
357
- context "Node" do
358
- context "#scrub!" do
359
- it "only scrub subtree" do
360
- xml = Loofah.fragment <<-EOHTML
361
- <div class='scrub'>
362
- <script>I should be removed</script>
363
- </div>
364
- <div class='noscrub'>
365
- <script>I should remain</script>
366
- </div>
367
- EOHTML
368
- node = xml.at_css "div.scrub"
369
- node.scrub!(:prune)
370
- assert_match %r(I should remain), xml.to_s
371
- refute_match %r(I should be removed), xml.to_s
372
- end
373
- end
374
- end
375
-
376
- context "NodeSet" do
377
- context "#scrub!" do
378
- it "only scrub subtrees" do
379
- xml = Loofah.fragment <<-EOHTML
380
- <div class='scrub'>
381
- <script>I should be removed</script>
382
- </div>
383
- <div class='noscrub'>
384
- <script>I should remain</script>
385
- </div>
386
- <div class='scrub'>
387
- <script>I should also be removed</script>
388
- </div>
389
- EOHTML
390
- node_set = xml.css "div.scrub"
391
- assert_equal 2, node_set.length
392
- node_set.scrub!(:prune)
393
- assert_match %r/I should remain/, xml.to_s
394
- refute_match %r/I should be removed/, xml.to_s
395
- refute_match %r/I should also be removed/, xml.to_s
396
- end
397
- end
398
- end
399
- end
400
- end