loofah 2.2.3 → 2.21.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +269 -31
  3. data/README.md +109 -124
  4. data/lib/loofah/concerns.rb +207 -0
  5. data/lib/loofah/elements.rb +85 -79
  6. data/lib/loofah/helpers.rb +37 -20
  7. data/lib/loofah/{html → html4}/document.rb +6 -7
  8. data/lib/loofah/html4/document_fragment.rb +15 -0
  9. data/lib/loofah/html5/document.rb +17 -0
  10. data/lib/loofah/html5/document_fragment.rb +15 -0
  11. data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
  12. data/lib/loofah/html5/safelist.rb +1055 -0
  13. data/lib/loofah/html5/scrub.rb +153 -58
  14. data/lib/loofah/metahelpers.rb +11 -6
  15. data/lib/loofah/scrubber.rb +22 -15
  16. data/lib/loofah/scrubbers.rb +66 -55
  17. data/lib/loofah/version.rb +6 -0
  18. data/lib/loofah/xml/document.rb +2 -0
  19. data/lib/loofah/xml/document_fragment.rb +4 -7
  20. data/lib/loofah.rb +131 -38
  21. metadata +28 -216
  22. data/.gemtest +0 -0
  23. data/Gemfile +0 -22
  24. data/Manifest.txt +0 -40
  25. data/Rakefile +0 -79
  26. data/benchmark/benchmark.rb +0 -149
  27. data/benchmark/fragment.html +0 -96
  28. data/benchmark/helper.rb +0 -73
  29. data/benchmark/www.slashdot.com.html +0 -2560
  30. data/lib/loofah/html/document_fragment.rb +0 -40
  31. data/lib/loofah/html5/whitelist.rb +0 -186
  32. data/lib/loofah/instance_methods.rb +0 -127
  33. data/test/assets/msword.html +0 -63
  34. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  35. data/test/helper.rb +0 -18
  36. data/test/html5/test_sanitizer.rb +0 -382
  37. data/test/integration/test_ad_hoc.rb +0 -204
  38. data/test/integration/test_helpers.rb +0 -43
  39. data/test/integration/test_html.rb +0 -72
  40. data/test/integration/test_scrubbers.rb +0 -400
  41. data/test/integration/test_xml.rb +0 -55
  42. data/test/unit/test_api.rb +0 -142
  43. data/test/unit/test_encoding.rb +0 -20
  44. data/test/unit/test_helpers.rb +0 -62
  45. data/test/unit/test_scrubber.rb +0 -229
  46. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,72 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestHtml < Loofah::TestCase
4
- context "html fragment" do
5
- context "#to_s" do
6
- it "not include head tags (like style)" do
7
- skip "depends on nokogiri version"
8
- html = Loofah.fragment "<style>foo</style><div>bar</div>"
9
- assert_equal "<div>bar</div>", html.to_s
10
- end
11
- end
12
-
13
- context "#text" do
14
- it "not include head tags (like style)" do
15
- skip "depends on nokogiri version"
16
- html = Loofah.fragment "<style>foo</style><div>bar</div>"
17
- assert_equal "bar", html.text
18
- end
19
- end
20
-
21
- context "#to_text" do
22
- it "add newlines before and after html4 block elements" do
23
- html = Loofah.fragment "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
24
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
25
- end
26
-
27
- it "add newlines before and after html5 block elements" do
28
- html = Loofah.fragment "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
29
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
30
- end
31
-
32
- it "remove extraneous whitespace" do
33
- html = Loofah.fragment "<div>tweedle\n\n\t\n\s\nbeetle</div>"
34
- assert_equal "\ntweedle\n\nbeetle\n", html.to_text
35
- end
36
- end
37
-
38
- context 'with an `encoding` arg' do
39
- it "sets the parent document's encoding to accordingly" do
40
- html = Loofah.fragment "<style>foo</style><div>bar</div>", 'US-ASCII'
41
- assert_equal 'US-ASCII', html.document.encoding
42
- end
43
- end
44
- end
45
-
46
- context "html document" do
47
- context "#text" do
48
- it "not include head tags (like style)" do
49
- html = Loofah.document "<style>foo</style><div>bar</div>"
50
- assert_equal "bar", html.text
51
- end
52
- end
53
-
54
- context "#to_text" do
55
- it "add newlines before and after html4 block elements" do
56
- html = Loofah.document "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
57
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
58
- end
59
-
60
- it "add newlines before and after html5 block elements" do
61
- html = Loofah.document "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
62
- assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
63
- end
64
-
65
- it "remove extraneous whitespace" do
66
- html = Loofah.document "<div>tweedle\n\n\t\n\s\nbeetle</div>"
67
- assert_equal "\ntweedle\n\nbeetle\n", html.to_text
68
- end
69
- end
70
- end
71
- end
72
-
@@ -1,400 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestScrubbers < Loofah::TestCase
4
-
5
- INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
6
- INVALID_ESCAPED = "&lt;invalid&gt;foo&lt;p&gt;bar&lt;/p&gt;bazz&lt;/invalid&gt;<div>quux</div>"
7
- INVALID_PRUNED = "<div>quux</div>"
8
- INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
9
-
10
- WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid><!--[if gts mso9]><div>microsofty stuff</div><![endif]-->"
11
- WHITEWASH_RESULT = "<div>foo</div>"
12
-
13
- NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
14
- NOFOLLOW_RESULT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
15
-
16
- NOFOLLOW_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
17
- NOFOLLOW_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="noopener nofollow">Click here</a>'
18
-
19
- NOOPENER_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
20
- NOOPENER_RESULT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
21
-
22
- NOOPENER_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
23
- NOOPENER_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="nofollow noopener">Click here</a>'
24
-
25
- UNPRINTABLE_FRAGMENT = "<b>Lo\u2029ofah ro\u2028cks!</b><script>x\u2028y</script>"
26
- UNPRINTABLE_RESULT = "<b>Loofah rocks!</b><script>xy</script>"
27
-
28
- ENTITY_FRAGMENT = "<p>this is &lt; that &quot;&amp;&quot; the other &gt; boo&apos;ya</p><div>w00t</div>"
29
- ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t)
30
-
31
- ENTITY_HACK_ATTACK = "<div><div>Hack attack!</div><div>&lt;script&gt;alert('evil')&lt;/script&gt;</div></div>"
32
- ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!&lt;script&gt;alert('evil')&lt;/script&gt;"
33
- ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!<script>alert('evil')</script>"
34
-
35
- context "Document" do
36
- context "#scrub!" do
37
- context ":escape" do
38
- it "escape bad tags" do
39
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
40
- result = doc.scrub! :escape
41
-
42
- assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
43
- assert_equal doc, result
44
- end
45
- end
46
-
47
- context ":prune" do
48
- it "prune bad tags" do
49
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
50
- result = doc.scrub! :prune
51
-
52
- assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
53
- assert_equal doc, result
54
- end
55
- end
56
-
57
- context ":strip" do
58
- it "strip bad tags" do
59
- doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
60
- result = doc.scrub! :strip
61
-
62
- assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
63
- assert_equal doc, result
64
- end
65
- end
66
-
67
- context ":whitewash" do
68
- it "whitewash the markup" do
69
- doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
70
- result = doc.scrub! :whitewash
71
-
72
- assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
73
- assert_equal doc, result
74
- end
75
- end
76
-
77
- context ":nofollow" do
78
- it "add a 'nofollow' attribute to hyperlinks" do
79
- doc = Loofah::HTML::Document.parse "<html><body>#{NOFOLLOW_FRAGMENT}</body></html>"
80
- result = doc.scrub! :nofollow
81
-
82
- assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
83
- assert_equal doc, result
84
- end
85
- end
86
-
87
- context ":unprintable" do
88
- it "removes unprintable unicode characters" do
89
- doc = Loofah::HTML::Document.parse "<html><body>#{UNPRINTABLE_FRAGMENT}</body></html>"
90
- result = doc.scrub! :unprintable
91
-
92
- assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html
93
- assert_equal doc, result
94
- end
95
- end
96
- end
97
-
98
- context "#scrub_document" do
99
- it "be a shortcut for parse-and-scrub" do
100
- mock_doc = Object.new
101
- mock(Loofah).document(:string_or_io) { mock_doc }
102
- mock(mock_doc).scrub!(:method)
103
-
104
- Loofah.scrub_document(:string_or_io, :method)
105
- end
106
- end
107
-
108
- context "#text" do
109
- it "leave behind only inner text with html entities still escaped" do
110
- doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
111
- result = doc.text
112
-
113
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
114
- end
115
-
116
- context "with encode_special_chars => false" do
117
- it "leave behind only inner text with html entities unescaped" do
118
- doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
119
- result = doc.text(:encode_special_chars => false)
120
-
121
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
122
- end
123
- end
124
-
125
- context "with encode_special_chars => true" do
126
- it "leave behind only inner text with html entities still escaped" do
127
- doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
128
- result = doc.text(:encode_special_chars => true)
129
-
130
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
131
- end
132
- end
133
- end
134
-
135
- context "#to_s" do
136
- it "generate HTML" do
137
- doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
138
- refute_nil doc.xpath("/html").first
139
- refute_nil doc.xpath("/html/head").first
140
- refute_nil doc.xpath("/html/body").first
141
-
142
- string = doc.to_s
143
- assert_match %r/<!DOCTYPE/, string
144
- assert_match %r/<html>/, string
145
- assert_match %r/<head>/, string
146
- assert_match %r/<body>/, string
147
- end
148
- end
149
-
150
- context "#serialize" do
151
- it "generate HTML" do
152
- doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
153
- refute_nil doc.xpath("/html").first
154
- refute_nil doc.xpath("/html/head").first
155
- refute_nil doc.xpath("/html/body").first
156
-
157
- string = doc.serialize
158
- assert_match %r/<!DOCTYPE/, string
159
- assert_match %r/<html>/, string
160
- assert_match %r/<head>/, string
161
- assert_match %r/<body>/, string
162
- end
163
- end
164
-
165
- context "Node" do
166
- context "#scrub!" do
167
- it "only scrub subtree" do
168
- xml = Loofah.document <<-EOHTML
169
- <html><body>
170
- <div class='scrub'>
171
- <script>I should be removed</script>
172
- </div>
173
- <div class='noscrub'>
174
- <script>I should remain</script>
175
- </div>
176
- </body></html>
177
- EOHTML
178
- node = xml.at_css "div.scrub"
179
- node.scrub!(:prune)
180
- assert_match %r/I should remain/, xml.to_s
181
- refute_match %r/I should be removed/, xml.to_s
182
- end
183
- end
184
- end
185
-
186
- context "NodeSet" do
187
- context "#scrub!" do
188
- it "only scrub subtrees" do
189
- xml = Loofah.document <<-EOHTML
190
- <html><body>
191
- <div class='scrub'>
192
- <script>I should be removed</script>
193
- </div>
194
- <div class='noscrub'>
195
- <script>I should remain</script>
196
- </div>
197
- <div class='scrub'>
198
- <script>I should also be removed</script>
199
- </div>
200
- </body></html>
201
- EOHTML
202
- node_set = xml.css "div.scrub"
203
- assert_equal 2, node_set.length
204
- node_set.scrub!(:prune)
205
- assert_match %r/I should remain/, xml.to_s
206
- refute_match %r/I should be removed/, xml.to_s
207
- refute_match %r/I should also be removed/, xml.to_s
208
- end
209
- end
210
- end
211
- end
212
-
213
- context "DocumentFragment" do
214
- context "#scrub!" do
215
- context ":escape" do
216
- it "escape bad tags" do
217
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
218
- result = doc.scrub! :escape
219
-
220
- assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
221
- assert_equal doc, result
222
- end
223
- end
224
-
225
- context ":prune" do
226
- it "prune bad tags" do
227
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
228
- result = doc.scrub! :prune
229
-
230
- assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
231
- assert_equal doc, result
232
- end
233
- end
234
-
235
- context ":strip" do
236
- it "strip bad tags" do
237
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
238
- result = doc.scrub! :strip
239
-
240
- assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
241
- assert_equal doc, result
242
- end
243
- end
244
-
245
- context ":whitewash" do
246
- it "whitewash the markup" do
247
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
248
- result = doc.scrub! :whitewash
249
-
250
- assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
251
- assert_equal doc, result
252
- end
253
- end
254
-
255
- context ":nofollow" do
256
-
257
- context "for a hyperlink that does not have a rel attribute" do
258
- it "add a 'nofollow' attribute to hyperlinks" do
259
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_FRAGMENT}</div>"
260
- result = doc.scrub! :nofollow
261
-
262
- assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
263
- assert_equal doc, result
264
- end
265
- end
266
-
267
- context "for a hyperlink that does have a rel attribute" do
268
- it "appends nofollow to rel attribute" do
269
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_WITH_REL_FRAGMENT}</div>"
270
- result = doc.scrub! :nofollow
271
-
272
- assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html
273
- assert_equal doc, result
274
- end
275
- end
276
-
277
-
278
- end
279
-
280
- context ":noopener" do
281
- context "for a hyperlink without a 'rel' attribute" do
282
- it "add a 'noopener' attribute to hyperlinks" do
283
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_FRAGMENT}</div>"
284
- result = doc.scrub! :noopener
285
-
286
- assert_equal NOOPENER_RESULT, doc.xpath("./div").inner_html
287
- assert_equal doc, result
288
- end
289
- end
290
-
291
- context "for a hyperlink that does have a rel attribute" do
292
- it "appends 'noopener' to 'rel' attribute" do
293
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_WITH_REL_FRAGMENT}</div>"
294
- result = doc.scrub! :noopener
295
-
296
- assert_equal NOOPENER_WITH_REL_RESULT, doc.xpath("./div").inner_html
297
- assert_equal doc, result
298
- end
299
- end
300
- end
301
-
302
- context ":unprintable" do
303
- it "removes unprintable unicode characters" do
304
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{UNPRINTABLE_FRAGMENT}</div>"
305
- result = doc.scrub! :unprintable
306
-
307
- assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html
308
- assert_equal doc, result
309
- end
310
- end
311
- end
312
-
313
- context "#scrub_fragment" do
314
- it "be a shortcut for parse-and-scrub" do
315
- mock_doc = Object.new
316
- mock(Loofah).fragment(:string_or_io) { mock_doc }
317
- mock(mock_doc).scrub!(:method)
318
-
319
- Loofah.scrub_fragment(:string_or_io, :method)
320
- end
321
- end
322
-
323
- context "#text" do
324
- it "leave behind only inner text with html entities still escaped" do
325
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
326
- result = doc.text
327
-
328
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
329
- end
330
-
331
- context "with encode_special_chars => false" do
332
- it "leave behind only inner text with html entities unescaped" do
333
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
334
- result = doc.text(:encode_special_chars => false)
335
-
336
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
337
- end
338
- end
339
-
340
- context "with encode_special_chars => true" do
341
- it "leave behind only inner text with html entities still escaped" do
342
- doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
343
- result = doc.text(:encode_special_chars => true)
344
-
345
- assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
346
- end
347
- end
348
- end
349
-
350
- context "#to_s" do
351
- it "not remove entities" do
352
- string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
353
- assert_match %r/this is &lt;/, string
354
- end
355
- end
356
-
357
- context "Node" do
358
- context "#scrub!" do
359
- it "only scrub subtree" do
360
- xml = Loofah.fragment <<-EOHTML
361
- <div class='scrub'>
362
- <script>I should be removed</script>
363
- </div>
364
- <div class='noscrub'>
365
- <script>I should remain</script>
366
- </div>
367
- EOHTML
368
- node = xml.at_css "div.scrub"
369
- node.scrub!(:prune)
370
- assert_match %r(I should remain), xml.to_s
371
- refute_match %r(I should be removed), xml.to_s
372
- end
373
- end
374
- end
375
-
376
- context "NodeSet" do
377
- context "#scrub!" do
378
- it "only scrub subtrees" do
379
- xml = Loofah.fragment <<-EOHTML
380
- <div class='scrub'>
381
- <script>I should be removed</script>
382
- </div>
383
- <div class='noscrub'>
384
- <script>I should remain</script>
385
- </div>
386
- <div class='scrub'>
387
- <script>I should also be removed</script>
388
- </div>
389
- EOHTML
390
- node_set = xml.css "div.scrub"
391
- assert_equal 2, node_set.length
392
- node_set.scrub!(:prune)
393
- assert_match %r/I should remain/, xml.to_s
394
- refute_match %r/I should be removed/, xml.to_s
395
- refute_match %r/I should also be removed/, xml.to_s
396
- end
397
- end
398
- end
399
- end
400
- end
@@ -1,55 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestXml < Loofah::TestCase
4
- context "integration test" do
5
- context "xml document" do
6
- context "custom scrubber" do
7
- it "act as expected" do
8
- xml = Loofah.xml_document <<-EOXML
9
- <root>
10
- <employee deceased='true'>Abraham Lincoln</employee>
11
- <employee deceased='false'>Abe Vigoda</employee>
12
- </root>
13
- EOXML
14
- bring_out_your_dead = Loofah::Scrubber.new do |node|
15
- if node.name == "employee" and node["deceased"] == "true"
16
- node.remove
17
- Loofah::Scrubber::STOP # don't bother with the rest of the subtree
18
- end
19
- end
20
- assert_equal 2, xml.css("employee").length
21
-
22
- xml.scrub!(bring_out_your_dead)
23
-
24
- employees = xml.css "employee"
25
- assert_equal 1, employees.length
26
- assert_equal "Abe Vigoda", employees.first.inner_text
27
- end
28
- end
29
- end
30
-
31
- context "xml fragment" do
32
- context "custom scrubber" do
33
- it "act as expected" do
34
- xml = Loofah.xml_fragment <<-EOXML
35
- <employee deceased='true'>Abraham Lincoln</employee>
36
- <employee deceased='false'>Abe Vigoda</employee>
37
- EOXML
38
- bring_out_your_dead = Loofah::Scrubber.new do |node|
39
- if node.name == "employee" and node["deceased"] == "true"
40
- node.remove
41
- Loofah::Scrubber::STOP # don't bother with the rest of the subtree
42
- end
43
- end
44
- assert_equal 2, xml.css("employee").length
45
-
46
- xml.scrub!(bring_out_your_dead)
47
-
48
- employees = xml.css "employee"
49
- assert_equal 1, employees.length
50
- assert_equal "Abe Vigoda", employees.first.inner_text
51
- end
52
- end
53
- end
54
- end
55
- end
@@ -1,142 +0,0 @@
1
- require "helper"
2
-
3
- class UnitTestApi < Loofah::TestCase
4
-
5
- HTML = "<div>a</div>\n<div>b</div>"
6
- XML_FRAGMENT = "<div>a</div>\n<div>b</div>"
7
- XML = "<root>#{XML_FRAGMENT}</root>"
8
-
9
- describe "HTML" do
10
- it "creates documents" do
11
- doc = Loofah.document(HTML)
12
- assert_html_documentish doc
13
- end
14
-
15
- it "creates fragments" do
16
- doc = Loofah.fragment(HTML)
17
- assert_html_fragmentish doc
18
- end
19
-
20
- it "parses documents" do
21
- doc = Loofah::HTML::Document.parse(HTML)
22
- assert_html_documentish doc
23
- end
24
-
25
- it "parses document fragment" do
26
- doc = Loofah::HTML::DocumentFragment.parse(HTML)
27
- assert_html_fragmentish doc
28
- end
29
-
30
- it "scrubs documents" do
31
- doc = Loofah.document(HTML).scrub!(:strip)
32
- assert_html_documentish doc
33
- end
34
-
35
- it "scrubs fragments" do
36
- doc = Loofah.fragment(HTML).scrub!(:strip)
37
- assert_html_fragmentish doc
38
- end
39
-
40
- it "scrubs document nodes" do
41
- doc = Loofah.document(HTML)
42
- assert(node = doc.at_css("div"))
43
- node.scrub!(:strip)
44
- end
45
-
46
- it "scrubs fragment nodes" do
47
- doc = Loofah.fragment(HTML)
48
- assert(node = doc.at_css("div"))
49
- node.scrub!(:strip)
50
- end
51
-
52
- it "scrubs document nodesets" do
53
- doc = Loofah.document(HTML)
54
- assert(node_set = doc.css("div"))
55
- assert_instance_of Nokogiri::XML::NodeSet, node_set
56
- node_set.scrub!(:strip)
57
- end
58
-
59
- it "exposes serialize_root on HTML::DocumentFragment" do
60
- doc = Loofah.fragment(HTML)
61
- assert_equal HTML, doc.serialize_root.to_html
62
- end
63
-
64
- it "exposes serialize_root on HTML::Document" do
65
- doc = Loofah.document(HTML)
66
- assert_equal HTML, doc.serialize_root.children.to_html
67
- end
68
- end
69
-
70
- describe "XML" do
71
- it "creates documents" do
72
- doc = Loofah.xml_document(XML)
73
- assert_xml_documentish doc
74
- end
75
-
76
- it "creates fragments" do
77
- doc = Loofah.xml_fragment(XML_FRAGMENT)
78
- assert_xml_fragmentish doc
79
- end
80
-
81
- it "parses documents" do
82
- doc = Loofah::XML::Document.parse(XML)
83
- assert_xml_documentish doc
84
- end
85
-
86
- it "parses document fragments" do
87
- doc = Loofah::XML::DocumentFragment.parse(XML_FRAGMENT)
88
- assert_xml_fragmentish doc
89
- end
90
-
91
- it "scrubs documents" do
92
- scrubber = Loofah::Scrubber.new { |node| }
93
- doc = Loofah.xml_document(XML).scrub!(scrubber)
94
- assert_xml_documentish doc
95
- end
96
-
97
- it "scrubs fragments" do
98
- scrubber = Loofah::Scrubber.new { |node| }
99
- doc = Loofah.xml_fragment(XML_FRAGMENT).scrub!(scrubber)
100
- assert_xml_fragmentish doc
101
- end
102
-
103
- it "scrubs document nodes" do
104
- doc = Loofah.xml_document(XML)
105
- assert(node = doc.at_css("div"))
106
- node.scrub!(:strip)
107
- end
108
-
109
- it "scrubs fragment nodes" do
110
- doc = Loofah.xml_fragment(XML)
111
- assert(node = doc.at_css("div"))
112
- node.scrub!(:strip)
113
- end
114
- end
115
-
116
- private
117
-
118
- def assert_html_documentish(doc)
119
- assert_kind_of Nokogiri::HTML::Document, doc
120
- assert_kind_of Loofah::HTML::Document, doc
121
- assert_equal HTML, doc.xpath("/html/body").inner_html
122
- end
123
-
124
- def assert_html_fragmentish(doc)
125
- assert_kind_of Nokogiri::HTML::DocumentFragment, doc
126
- assert_kind_of Loofah::HTML::DocumentFragment, doc
127
- assert_equal HTML, doc.inner_html
128
- end
129
-
130
- def assert_xml_documentish(doc)
131
- assert_kind_of Nokogiri::XML::Document, doc
132
- assert_kind_of Loofah::XML::Document, doc
133
- assert_equal XML, doc.root.to_xml
134
- end
135
-
136
- def assert_xml_fragmentish(doc)
137
- assert_kind_of Nokogiri::XML::DocumentFragment, doc
138
- assert_kind_of Loofah::XML::DocumentFragment, doc
139
- assert_equal XML_FRAGMENT, doc.children.to_xml
140
- end
141
-
142
- end
@@ -1,20 +0,0 @@
1
- # :coding: utf-8
2
- require "helper"
3
-
4
- class UnitTestEncoding < Loofah::TestCase
5
- UTF8_STRING = "日本語"
6
-
7
- if String.new.respond_to?(:encoding)
8
- describe "scrub_fragment" do
9
- it "sets the encoding for html" do
10
- escaped = Loofah.scrub_fragment(UTF8_STRING, :escape).to_s
11
- assert_equal UTF8_STRING.encoding, escaped.encoding
12
- end
13
-
14
- it "sets the encoding for xml" do
15
- escaped = Loofah.scrub_xml_fragment(UTF8_STRING, :escape).to_s
16
- assert_equal UTF8_STRING.encoding, escaped.encoding
17
- end
18
- end
19
- end
20
- end