loofah 2.2.3 → 2.19.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +212 -31
  3. data/README.md +18 -24
  4. data/lib/loofah/elements.rb +79 -75
  5. data/lib/loofah/helpers.rb +18 -7
  6. data/lib/loofah/html/document.rb +1 -0
  7. data/lib/loofah/html/document_fragment.rb +4 -2
  8. data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
  9. data/lib/loofah/html5/safelist.rb +1043 -0
  10. data/lib/loofah/html5/scrub.rb +73 -48
  11. data/lib/loofah/instance_methods.rb +14 -8
  12. data/lib/loofah/metahelpers.rb +2 -1
  13. data/lib/loofah/scrubber.rb +8 -7
  14. data/lib/loofah/scrubbers.rb +19 -13
  15. data/lib/loofah/version.rb +5 -0
  16. data/lib/loofah/xml/document.rb +1 -0
  17. data/lib/loofah/xml/document_fragment.rb +2 -1
  18. data/lib/loofah.rb +35 -18
  19. metadata +52 -138
  20. data/.gemtest +0 -0
  21. data/Gemfile +0 -22
  22. data/Manifest.txt +0 -40
  23. data/Rakefile +0 -79
  24. data/benchmark/benchmark.rb +0 -149
  25. data/benchmark/fragment.html +0 -96
  26. data/benchmark/helper.rb +0 -73
  27. data/benchmark/www.slashdot.com.html +0 -2560
  28. data/lib/loofah/html5/whitelist.rb +0 -186
  29. data/test/assets/msword.html +0 -63
  30. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  31. data/test/helper.rb +0 -18
  32. data/test/html5/test_sanitizer.rb +0 -382
  33. data/test/integration/test_ad_hoc.rb +0 -204
  34. data/test/integration/test_helpers.rb +0 -43
  35. data/test/integration/test_html.rb +0 -72
  36. data/test/integration/test_scrubbers.rb +0 -400
  37. data/test/integration/test_xml.rb +0 -55
  38. data/test/unit/test_api.rb +0 -142
  39. data/test/unit/test_encoding.rb +0 -20
  40. data/test/unit/test_helpers.rb +0 -62
  41. data/test/unit/test_scrubber.rb +0 -229
  42. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,72 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestHtml < Loofah::TestCase
4
- context "html fragment" do
5
- context "#to_s" do
6
- it "not include head tags (like style)" do
7
- skip "depends on nokogiri version"
8
- html = Loofah.fragment "<style>foo</style><div>bar</div>"
9
- assert_equal "<div>bar</div>", html.to_s
10
- end
11
- end
12
-
13
- context "#text" do
14
- it "not include head tags (like style)" do
15
- skip "depends on nokogiri version"
16
- html = Loofah.fragment "<style>foo</style><div>bar</div>"
17
- assert_equal "bar", html.text
18
- end
19
- end
20
-
21
- context "#to_text" do
22
- it "add newlines before and after html4 block elements" do
23
- html = Loofah.fragment "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
24
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
25
- end
26
-
27
- it "add newlines before and after html5 block elements" do
28
- html = Loofah.fragment "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
29
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
30
- end
31
-
32
- it "remove extraneous whitespace" do
33
- html = Loofah.fragment "<div>tweedle\n\n\t\n\s\nbeetle</div>"
34
- assert_equal "\ntweedle\n\nbeetle\n", html.to_text
35
- end
36
- end
37
-
38
- context 'with an `encoding` arg' do
39
- it "sets the parent document's encoding to accordingly" do
40
- html = Loofah.fragment "<style>foo</style><div>bar</div>", 'US-ASCII'
41
- assert_equal 'US-ASCII', html.document.encoding
42
- end
43
- end
44
- end
45
-
46
- context "html document" do
47
- context "#text" do
48
- it "not include head tags (like style)" do
49
- html = Loofah.document "<style>foo</style><div>bar</div>"
50
- assert_equal "bar", html.text
51
- end
52
- end
53
-
54
- context "#to_text" do
55
- it "add newlines before and after html4 block elements" do
56
- html = Loofah.document "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
57
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
58
- end
59
-
60
- it "add newlines before and after html5 block elements" do
61
- html = Loofah.document "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
62
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
63
- end
64
-
65
- it "remove extraneous whitespace" do
66
- html = Loofah.document "<div>tweedle\n\n\t\n\s\nbeetle</div>"
67
- assert_equal "\ntweedle\n\nbeetle\n", html.to_text
68
- end
69
- end
70
- end
71
- end
72
-
@@ -1,400 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestScrubbers < Loofah::TestCase
4
-
5
- INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
6
- INVALID_ESCAPED = "&lt;invalid&gt;foo&lt;p&gt;bar&lt;/p&gt;bazz&lt;/invalid&gt;<div>quux</div>"
7
- INVALID_PRUNED = "<div>quux</div>"
8
- INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
9
-
10
- WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid><!--[if gts mso9]><div>microsofty stuff</div><![endif]-->"
11
- WHITEWASH_RESULT = "<div>foo</div>"
12
-
13
- NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
14
- NOFOLLOW_RESULT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
15
-
16
- NOFOLLOW_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
17
- NOFOLLOW_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="noopener nofollow">Click here</a>'
18
-
19
- NOOPENER_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
20
- NOOPENER_RESULT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
21
-
22
- NOOPENER_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
23
- NOOPENER_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="nofollow noopener">Click here</a>'
24
-
25
- UNPRINTABLE_FRAGMENT = "<b>Lo\u2029ofah ro\u2028cks!</b><script>x\u2028y</script>"
26
- UNPRINTABLE_RESULT = "<b>Loofah rocks!</b><script>xy</script>"
27
-
28
- ENTITY_FRAGMENT = "<p>this is &lt; that &quot;&amp;&quot; the other &gt; boo&apos;ya</p><div>w00t</div>"
29
- ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t)
30
-
31
- ENTITY_HACK_ATTACK = "<div><div>Hack attack!</div><div>&lt;script&gt;alert('evil')&lt;/script&gt;</div></div>"
32
- ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!&lt;script&gt;alert('evil')&lt;/script&gt;"
33
- ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!<script>alert('evil')</script>"
34
-
35
- context "Document" do
36
- context "#scrub!" do
37
- context ":escape" do
38
- it "escape bad tags" do
39
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
40
- result = doc.scrub! :escape
41
-
42
- assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
43
- assert_equal doc, result
44
- end
45
- end
46
-
47
- context ":prune" do
48
- it "prune bad tags" do
49
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
50
- result = doc.scrub! :prune
51
-
52
- assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
53
- assert_equal doc, result
54
- end
55
- end
56
-
57
- context ":strip" do
58
- it "strip bad tags" do
59
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
60
- result = doc.scrub! :strip
61
-
62
- assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
63
- assert_equal doc, result
64
- end
65
- end
66
-
67
- context ":whitewash" do
68
- it "whitewash the markup" do
69
- doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
70
- result = doc.scrub! :whitewash
71
-
72
- assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
73
- assert_equal doc, result
74
- end
75
- end
76
-
77
- context ":nofollow" do
78
- it "add a 'nofollow' attribute to hyperlinks" do
79
- doc = Loofah::HTML::Document.parse "<html><body>#{NOFOLLOW_FRAGMENT}</body></html>"
80
- result = doc.scrub! :nofollow
81
-
82
- assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
83
- assert_equal doc, result
84
- end
85
- end
86
-
87
- context ":unprintable" do
88
- it "removes unprintable unicode characters" do
89
- doc = Loofah::HTML::Document.parse "<html><body>#{UNPRINTABLE_FRAGMENT}</body></html>"
90
- result = doc.scrub! :unprintable
91
-
92
- assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html
93
- assert_equal doc, result
94
- end
95
- end
96
- end
97
-
98
- context "#scrub_document" do
99
- it "be a shortcut for parse-and-scrub" do
100
- mock_doc = Object.new
101
- mock(Loofah).document(:string_or_io) { mock_doc }
102
- mock(mock_doc).scrub!(:method)
103
-
104
- Loofah.scrub_document(:string_or_io, :method)
105
- end
106
- end
107
-
108
- context "#text" do
109
- it "leave behind only inner text with html entities still escaped" do
110
- doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
111
- result = doc.text
112
-
113
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
114
- end
115
-
116
- context "with encode_special_chars => false" do
117
- it "leave behind only inner text with html entities unescaped" do
118
- doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
119
- result = doc.text(:encode_special_chars => false)
120
-
121
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
122
- end
123
- end
124
-
125
- context "with encode_special_chars => true" do
126
- it "leave behind only inner text with html entities still escaped" do
127
- doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
128
- result = doc.text(:encode_special_chars => true)
129
-
130
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
131
- end
132
- end
133
- end
134
-
135
- context "#to_s" do
136
- it "generate HTML" do
137
- doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
138
- refute_nil doc.xpath("/html").first
139
- refute_nil doc.xpath("/html/head").first
140
- refute_nil doc.xpath("/html/body").first
141
-
142
- string = doc.to_s
143
- assert_match %r/<!DOCTYPE/, string
144
- assert_match %r/<html>/, string
145
- assert_match %r/<head>/, string
146
- assert_match %r/<body>/, string
147
- end
148
- end
149
-
150
- context "#serialize" do
151
- it "generate HTML" do
152
- doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
153
- refute_nil doc.xpath("/html").first
154
- refute_nil doc.xpath("/html/head").first
155
- refute_nil doc.xpath("/html/body").first
156
-
157
- string = doc.serialize
158
- assert_match %r/<!DOCTYPE/, string
159
- assert_match %r/<html>/, string
160
- assert_match %r/<head>/, string
161
- assert_match %r/<body>/, string
162
- end
163
- end
164
-
165
- context "Node" do
166
- context "#scrub!" do
167
- it "only scrub subtree" do
168
- xml = Loofah.document <<-EOHTML
169
- <html><body>
170
- <div class='scrub'>
171
- <script>I should be removed</script>
172
- </div>
173
- <div class='noscrub'>
174
- <script>I should remain</script>
175
- </div>
176
- </body></html>
177
- EOHTML
178
- node = xml.at_css "div.scrub"
179
- node.scrub!(:prune)
180
- assert_match %r/I should remain/, xml.to_s
181
- refute_match %r/I should be removed/, xml.to_s
182
- end
183
- end
184
- end
185
-
186
- context "NodeSet" do
187
- context "#scrub!" do
188
- it "only scrub subtrees" do
189
- xml = Loofah.document <<-EOHTML
190
- <html><body>
191
- <div class='scrub'>
192
- <script>I should be removed</script>
193
- </div>
194
- <div class='noscrub'>
195
- <script>I should remain</script>
196
- </div>
197
- <div class='scrub'>
198
- <script>I should also be removed</script>
199
- </div>
200
- </body></html>
201
- EOHTML
202
- node_set = xml.css "div.scrub"
203
- assert_equal 2, node_set.length
204
- node_set.scrub!(:prune)
205
- assert_match %r/I should remain/, xml.to_s
206
- refute_match %r/I should be removed/, xml.to_s
207
- refute_match %r/I should also be removed/, xml.to_s
208
- end
209
- end
210
- end
211
- end
212
-
213
- context "DocumentFragment" do
214
- context "#scrub!" do
215
- context ":escape" do
216
- it "escape bad tags" do
217
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
218
- result = doc.scrub! :escape
219
-
220
- assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
221
- assert_equal doc, result
222
- end
223
- end
224
-
225
- context ":prune" do
226
- it "prune bad tags" do
227
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
228
- result = doc.scrub! :prune
229
-
230
- assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
231
- assert_equal doc, result
232
- end
233
- end
234
-
235
- context ":strip" do
236
- it "strip bad tags" do
237
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
238
- result = doc.scrub! :strip
239
-
240
- assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
241
- assert_equal doc, result
242
- end
243
- end
244
-
245
- context ":whitewash" do
246
- it "whitewash the markup" do
247
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
248
- result = doc.scrub! :whitewash
249
-
250
- assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
251
- assert_equal doc, result
252
- end
253
- end
254
-
255
- context ":nofollow" do
256
-
257
- context "for a hyperlink that does not have a rel attribute" do
258
- it "add a 'nofollow' attribute to hyperlinks" do
259
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_FRAGMENT}</div>"
260
- result = doc.scrub! :nofollow
261
-
262
- assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
263
- assert_equal doc, result
264
- end
265
- end
266
-
267
- context "for a hyperlink that does have a rel attribute" do
268
- it "appends nofollow to rel attribute" do
269
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_WITH_REL_FRAGMENT}</div>"
270
- result = doc.scrub! :nofollow
271
-
272
- assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html
273
- assert_equal doc, result
274
- end
275
- end
276
-
277
-
278
- end
279
-
280
- context ":noopener" do
281
- context "for a hyperlink without a 'rel' attribute" do
282
- it "add a 'noopener' attribute to hyperlinks" do
283
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_FRAGMENT}</div>"
284
- result = doc.scrub! :noopener
285
-
286
- assert_equal NOOPENER_RESULT, doc.xpath("./div").inner_html
287
- assert_equal doc, result
288
- end
289
- end
290
-
291
- context "for a hyperlink that does have a rel attribute" do
292
- it "appends 'noopener' to 'rel' attribute" do
293
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_WITH_REL_FRAGMENT}</div>"
294
- result = doc.scrub! :noopener
295
-
296
- assert_equal NOOPENER_WITH_REL_RESULT, doc.xpath("./div").inner_html
297
- assert_equal doc, result
298
- end
299
- end
300
- end
301
-
302
- context ":unprintable" do
303
- it "removes unprintable unicode characters" do
304
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{UNPRINTABLE_FRAGMENT}</div>"
305
- result = doc.scrub! :unprintable
306
-
307
- assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html
308
- assert_equal doc, result
309
- end
310
- end
311
- end
312
-
313
- context "#scrub_fragment" do
314
- it "be a shortcut for parse-and-scrub" do
315
- mock_doc = Object.new
316
- mock(Loofah).fragment(:string_or_io) { mock_doc }
317
- mock(mock_doc).scrub!(:method)
318
-
319
- Loofah.scrub_fragment(:string_or_io, :method)
320
- end
321
- end
322
-
323
- context "#text" do
324
- it "leave behind only inner text with html entities still escaped" do
325
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
326
- result = doc.text
327
-
328
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
329
- end
330
-
331
- context "with encode_special_chars => false" do
332
- it "leave behind only inner text with html entities unescaped" do
333
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
334
- result = doc.text(:encode_special_chars => false)
335
-
336
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
337
- end
338
- end
339
-
340
- context "with encode_special_chars => true" do
341
- it "leave behind only inner text with html entities still escaped" do
342
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
343
- result = doc.text(:encode_special_chars => true)
344
-
345
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
346
- end
347
- end
348
- end
349
-
350
- context "#to_s" do
351
- it "not remove entities" do
352
- string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
353
- assert_match %r/this is &lt;/, string
354
- end
355
- end
356
-
357
- context "Node" do
358
- context "#scrub!" do
359
- it "only scrub subtree" do
360
- xml = Loofah.fragment <<-EOHTML
361
- <div class='scrub'>
362
- <script>I should be removed</script>
363
- </div>
364
- <div class='noscrub'>
365
- <script>I should remain</script>
366
- </div>
367
- EOHTML
368
- node = xml.at_css "div.scrub"
369
- node.scrub!(:prune)
370
- assert_match %r(I should remain), xml.to_s
371
- refute_match %r(I should be removed), xml.to_s
372
- end
373
- end
374
- end
375
-
376
- context "NodeSet" do
377
- context "#scrub!" do
378
- it "only scrub subtrees" do
379
- xml = Loofah.fragment <<-EOHTML
380
- <div class='scrub'>
381
- <script>I should be removed</script>
382
- </div>
383
- <div class='noscrub'>
384
- <script>I should remain</script>
385
- </div>
386
- <div class='scrub'>
387
- <script>I should also be removed</script>
388
- </div>
389
- EOHTML
390
- node_set = xml.css "div.scrub"
391
- assert_equal 2, node_set.length
392
- node_set.scrub!(:prune)
393
- assert_match %r/I should remain/, xml.to_s
394
- refute_match %r/I should be removed/, xml.to_s
395
- refute_match %r/I should also be removed/, xml.to_s
396
- end
397
- end
398
- end
399
- end
400
- end
@@ -1,55 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestXml < Loofah::TestCase
4
- context "integration test" do
5
- context "xml document" do
6
- context "custom scrubber" do
7
- it "act as expected" do
8
- xml = Loofah.xml_document <<-EOXML
9
- <root>
10
- <employee deceased='true'>Abraham Lincoln</employee>
11
- <employee deceased='false'>Abe Vigoda</employee>
12
- </root>
13
- EOXML
14
- bring_out_your_dead = Loofah::Scrubber.new do |node|
15
- if node.name == "employee" and node["deceased"] == "true"
16
- node.remove
17
- Loofah::Scrubber::STOP # don't bother with the rest of the subtree
18
- end
19
- end
20
- assert_equal 2, xml.css("employee").length
21
-
22
- xml.scrub!(bring_out_your_dead)
23
-
24
- employees = xml.css "employee"
25
- assert_equal 1, employees.length
26
- assert_equal "Abe Vigoda", employees.first.inner_text
27
- end
28
- end
29
- end
30
-
31
- context "xml fragment" do
32
- context "custom scrubber" do
33
- it "act as expected" do
34
- xml = Loofah.xml_fragment <<-EOXML
35
- <employee deceased='true'>Abraham Lincoln</employee>
36
- <employee deceased='false'>Abe Vigoda</employee>
37
- EOXML
38
- bring_out_your_dead = Loofah::Scrubber.new do |node|
39
- if node.name == "employee" and node["deceased"] == "true"
40
- node.remove
41
- Loofah::Scrubber::STOP # don't bother with the rest of the subtree
42
- end
43
- end
44
- assert_equal 2, xml.css("employee").length
45
-
46
- xml.scrub!(bring_out_your_dead)
47
-
48
- employees = xml.css "employee"
49
- assert_equal 1, employees.length
50
- assert_equal "Abe Vigoda", employees.first.inner_text
51
- end
52
- end
53
- end
54
- end
55
- end
@@ -1,142 +0,0 @@
1
- require "helper"
2
-
3
- class UnitTestApi < Loofah::TestCase
4
-
5
- HTML = "<div>a</div>\n<div>b</div>"
6
- XML_FRAGMENT = "<div>a</div>\n<div>b</div>"
7
- XML = "<root>#{XML_FRAGMENT}</root>"
8
-
9
- describe "HTML" do
10
- it "creates documents" do
11
- doc = Loofah.document(HTML)
12
- assert_html_documentish doc
13
- end
14
-
15
- it "creates fragments" do
16
- doc = Loofah.fragment(HTML)
17
- assert_html_fragmentish doc
18
- end
19
-
20
- it "parses documents" do
21
- doc = Loofah::HTML::Document.parse(HTML)
22
- assert_html_documentish doc
23
- end
24
-
25
- it "parses document fragment" do
26
- doc = Loofah::HTML::DocumentFragment.parse(HTML)
27
- assert_html_fragmentish doc
28
- end
29
-
30
- it "scrubs documents" do
31
- doc = Loofah.document(HTML).scrub!(:strip)
32
- assert_html_documentish doc
33
- end
34
-
35
- it "scrubs fragments" do
36
- doc = Loofah.fragment(HTML).scrub!(:strip)
37
- assert_html_fragmentish doc
38
- end
39
-
40
- it "scrubs document nodes" do
41
- doc = Loofah.document(HTML)
42
- assert(node = doc.at_css("div"))
43
- node.scrub!(:strip)
44
- end
45
-
46
- it "scrubs fragment nodes" do
47
- doc = Loofah.fragment(HTML)
48
- assert(node = doc.at_css("div"))
49
- node.scrub!(:strip)
50
- end
51
-
52
- it "scrubs document nodesets" do
53
- doc = Loofah.document(HTML)
54
- assert(node_set = doc.css("div"))
55
- assert_instance_of Nokogiri::XML::NodeSet, node_set
56
- node_set.scrub!(:strip)
57
- end
58
-
59
- it "exposes serialize_root on HTML::DocumentFragment" do
60
- doc = Loofah.fragment(HTML)
61
- assert_equal HTML, doc.serialize_root.to_html
62
- end
63
-
64
- it "exposes serialize_root on HTML::Document" do
65
- doc = Loofah.document(HTML)
66
- assert_equal HTML, doc.serialize_root.children.to_html
67
- end
68
- end
69
-
70
- describe "XML" do
71
- it "creates documents" do
72
- doc = Loofah.xml_document(XML)
73
- assert_xml_documentish doc
74
- end
75
-
76
- it "creates fragments" do
77
- doc = Loofah.xml_fragment(XML_FRAGMENT)
78
- assert_xml_fragmentish doc
79
- end
80
-
81
- it "parses documents" do
82
- doc = Loofah::XML::Document.parse(XML)
83
- assert_xml_documentish doc
84
- end
85
-
86
- it "parses document fragments" do
87
- doc = Loofah::XML::DocumentFragment.parse(XML_FRAGMENT)
88
- assert_xml_fragmentish doc
89
- end
90
-
91
- it "scrubs documents" do
92
- scrubber = Loofah::Scrubber.new { |node| }
93
- doc = Loofah.xml_document(XML).scrub!(scrubber)
94
- assert_xml_documentish doc
95
- end
96
-
97
- it "scrubs fragments" do
98
- scrubber = Loofah::Scrubber.new { |node| }
99
- doc = Loofah.xml_fragment(XML_FRAGMENT).scrub!(scrubber)
100
- assert_xml_fragmentish doc
101
- end
102
-
103
- it "scrubs document nodes" do
104
- doc = Loofah.xml_document(XML)
105
- assert(node = doc.at_css("div"))
106
- node.scrub!(:strip)
107
- end
108
-
109
- it "scrubs fragment nodes" do
110
- doc = Loofah.xml_fragment(XML)
111
- assert(node = doc.at_css("div"))
112
- node.scrub!(:strip)
113
- end
114
- end
115
-
116
- private
117
-
118
- def assert_html_documentish(doc)
119
- assert_kind_of Nokogiri::HTML::Document, doc
120
- assert_kind_of Loofah::HTML::Document, doc
121
- assert_equal HTML, doc.xpath("/html/body").inner_html
122
- end
123
-
124
- def assert_html_fragmentish(doc)
125
- assert_kind_of Nokogiri::HTML::DocumentFragment, doc
126
- assert_kind_of Loofah::HTML::DocumentFragment, doc
127
- assert_equal HTML, doc.inner_html
128
- end
129
-
130
- def assert_xml_documentish(doc)
131
- assert_kind_of Nokogiri::XML::Document, doc
132
- assert_kind_of Loofah::XML::Document, doc
133
- assert_equal XML, doc.root.to_xml
134
- end
135
-
136
- def assert_xml_fragmentish(doc)
137
- assert_kind_of Nokogiri::XML::DocumentFragment, doc
138
- assert_kind_of Loofah::XML::DocumentFragment, doc
139
- assert_equal XML_FRAGMENT, doc.children.to_xml
140
- end
141
-
142
- end
@@ -1,20 +0,0 @@
1
- # :coding: utf-8
2
- require "helper"
3
-
4
- class UnitTestEncoding < Loofah::TestCase
5
- UTF8_STRING = "日本語"
6
-
7
- if String.new.respond_to?(:encoding)
8
- describe "scrub_fragment" do
9
- it "sets the encoding for html" do
10
- escaped = Loofah.scrub_fragment(UTF8_STRING, :escape).to_s
11
- assert_equal UTF8_STRING.encoding, escaped.encoding
12
- end
13
-
14
- it "sets the encoding for xml" do
15
- escaped = Loofah.scrub_xml_fragment(UTF8_STRING, :escape).to_s
16
- assert_equal UTF8_STRING.encoding, escaped.encoding
17
- end
18
- end
19
- end
20
- end