loofah 2.2.3 → 2.19.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +212 -31
- data/README.md +18 -24
- data/lib/loofah/elements.rb +79 -75
- data/lib/loofah/helpers.rb +18 -7
- data/lib/loofah/html/document.rb +1 -0
- data/lib/loofah/html/document_fragment.rb +4 -2
- data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
- data/lib/loofah/html5/safelist.rb +1043 -0
- data/lib/loofah/html5/scrub.rb +73 -48
- data/lib/loofah/instance_methods.rb +14 -8
- data/lib/loofah/metahelpers.rb +2 -1
- data/lib/loofah/scrubber.rb +8 -7
- data/lib/loofah/scrubbers.rb +19 -13
- data/lib/loofah/version.rb +5 -0
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -1
- data/lib/loofah.rb +35 -18
- metadata +52 -138
- data/.gemtest +0 -0
- data/Gemfile +0 -22
- data/Manifest.txt +0 -40
- data/Rakefile +0 -79
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/lib/loofah/html5/whitelist.rb +0 -186
- data/test/assets/msword.html +0 -63
- data/test/assets/testdata_sanitizer_tests1.dat +0 -502
- data/test/helper.rb +0 -18
- data/test/html5/test_sanitizer.rb +0 -382
- data/test/integration/test_ad_hoc.rb +0 -204
- data/test/integration/test_helpers.rb +0 -43
- data/test/integration/test_html.rb +0 -72
- data/test/integration/test_scrubbers.rb +0 -400
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -142
- data/test/unit/test_encoding.rb +0 -20
- data/test/unit/test_helpers.rb +0 -62
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
@@ -1,72 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class IntegrationTestHtml < Loofah::TestCase
|
4
|
-
context "html fragment" do
|
5
|
-
context "#to_s" do
|
6
|
-
it "not include head tags (like style)" do
|
7
|
-
skip "depends on nokogiri version"
|
8
|
-
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
9
|
-
assert_equal "<div>bar</div>", html.to_s
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
context "#text" do
|
14
|
-
it "not include head tags (like style)" do
|
15
|
-
skip "depends on nokogiri version"
|
16
|
-
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
17
|
-
assert_equal "bar", html.text
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
context "#to_text" do
|
22
|
-
it "add newlines before and after html4 block elements" do
|
23
|
-
html = Loofah.fragment "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
24
|
-
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
25
|
-
end
|
26
|
-
|
27
|
-
it "add newlines before and after html5 block elements" do
|
28
|
-
html = Loofah.fragment "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
29
|
-
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
30
|
-
end
|
31
|
-
|
32
|
-
it "remove extraneous whitespace" do
|
33
|
-
html = Loofah.fragment "<div>tweedle\n\n\t\n\s\nbeetle</div>"
|
34
|
-
assert_equal "\ntweedle\n\nbeetle\n", html.to_text
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
context 'with an `encoding` arg' do
|
39
|
-
it "sets the parent document's encoding to accordingly" do
|
40
|
-
html = Loofah.fragment "<style>foo</style><div>bar</div>", 'US-ASCII'
|
41
|
-
assert_equal 'US-ASCII', html.document.encoding
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
context "html document" do
|
47
|
-
context "#text" do
|
48
|
-
it "not include head tags (like style)" do
|
49
|
-
html = Loofah.document "<style>foo</style><div>bar</div>"
|
50
|
-
assert_equal "bar", html.text
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
context "#to_text" do
|
55
|
-
it "add newlines before and after html4 block elements" do
|
56
|
-
html = Loofah.document "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
57
|
-
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
58
|
-
end
|
59
|
-
|
60
|
-
it "add newlines before and after html5 block elements" do
|
61
|
-
html = Loofah.document "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
62
|
-
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
63
|
-
end
|
64
|
-
|
65
|
-
it "remove extraneous whitespace" do
|
66
|
-
html = Loofah.document "<div>tweedle\n\n\t\n\s\nbeetle</div>"
|
67
|
-
assert_equal "\ntweedle\n\nbeetle\n", html.to_text
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
@@ -1,400 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class IntegrationTestScrubbers < Loofah::TestCase
|
4
|
-
|
5
|
-
INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
|
6
|
-
INVALID_ESCAPED = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
|
7
|
-
INVALID_PRUNED = "<div>quux</div>"
|
8
|
-
INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
|
9
|
-
|
10
|
-
WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid><!--[if gts mso9]><div>microsofty stuff</div><![endif]-->"
|
11
|
-
WHITEWASH_RESULT = "<div>foo</div>"
|
12
|
-
|
13
|
-
NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
|
14
|
-
NOFOLLOW_RESULT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
|
15
|
-
|
16
|
-
NOFOLLOW_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
|
17
|
-
NOFOLLOW_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="noopener nofollow">Click here</a>'
|
18
|
-
|
19
|
-
NOOPENER_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
|
20
|
-
NOOPENER_RESULT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
|
21
|
-
|
22
|
-
NOOPENER_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
|
23
|
-
NOOPENER_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="nofollow noopener">Click here</a>'
|
24
|
-
|
25
|
-
UNPRINTABLE_FRAGMENT = "<b>Lo\u2029ofah ro\u2028cks!</b><script>x\u2028y</script>"
|
26
|
-
UNPRINTABLE_RESULT = "<b>Loofah rocks!</b><script>xy</script>"
|
27
|
-
|
28
|
-
ENTITY_FRAGMENT = "<p>this is < that "&" the other > boo'ya</p><div>w00t</div>"
|
29
|
-
ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t)
|
30
|
-
|
31
|
-
ENTITY_HACK_ATTACK = "<div><div>Hack attack!</div><div><script>alert('evil')</script></div></div>"
|
32
|
-
ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!<script>alert('evil')</script>"
|
33
|
-
ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!<script>alert('evil')</script>"
|
34
|
-
|
35
|
-
context "Document" do
|
36
|
-
context "#scrub!" do
|
37
|
-
context ":escape" do
|
38
|
-
it "escape bad tags" do
|
39
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
|
40
|
-
result = doc.scrub! :escape
|
41
|
-
|
42
|
-
assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
|
43
|
-
assert_equal doc, result
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
context ":prune" do
|
48
|
-
it "prune bad tags" do
|
49
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
|
50
|
-
result = doc.scrub! :prune
|
51
|
-
|
52
|
-
assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
|
53
|
-
assert_equal doc, result
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
context ":strip" do
|
58
|
-
it "strip bad tags" do
|
59
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
|
60
|
-
result = doc.scrub! :strip
|
61
|
-
|
62
|
-
assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
|
63
|
-
assert_equal doc, result
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
context ":whitewash" do
|
68
|
-
it "whitewash the markup" do
|
69
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
|
70
|
-
result = doc.scrub! :whitewash
|
71
|
-
|
72
|
-
assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
|
73
|
-
assert_equal doc, result
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
context ":nofollow" do
|
78
|
-
it "add a 'nofollow' attribute to hyperlinks" do
|
79
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{NOFOLLOW_FRAGMENT}</body></html>"
|
80
|
-
result = doc.scrub! :nofollow
|
81
|
-
|
82
|
-
assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
|
83
|
-
assert_equal doc, result
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
context ":unprintable" do
|
88
|
-
it "removes unprintable unicode characters" do
|
89
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{UNPRINTABLE_FRAGMENT}</body></html>"
|
90
|
-
result = doc.scrub! :unprintable
|
91
|
-
|
92
|
-
assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html
|
93
|
-
assert_equal doc, result
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
context "#scrub_document" do
|
99
|
-
it "be a shortcut for parse-and-scrub" do
|
100
|
-
mock_doc = Object.new
|
101
|
-
mock(Loofah).document(:string_or_io) { mock_doc }
|
102
|
-
mock(mock_doc).scrub!(:method)
|
103
|
-
|
104
|
-
Loofah.scrub_document(:string_or_io, :method)
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
context "#text" do
|
109
|
-
it "leave behind only inner text with html entities still escaped" do
|
110
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
|
111
|
-
result = doc.text
|
112
|
-
|
113
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
114
|
-
end
|
115
|
-
|
116
|
-
context "with encode_special_chars => false" do
|
117
|
-
it "leave behind only inner text with html entities unescaped" do
|
118
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
|
119
|
-
result = doc.text(:encode_special_chars => false)
|
120
|
-
|
121
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
context "with encode_special_chars => true" do
|
126
|
-
it "leave behind only inner text with html entities still escaped" do
|
127
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
|
128
|
-
result = doc.text(:encode_special_chars => true)
|
129
|
-
|
130
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
context "#to_s" do
|
136
|
-
it "generate HTML" do
|
137
|
-
doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
|
138
|
-
refute_nil doc.xpath("/html").first
|
139
|
-
refute_nil doc.xpath("/html/head").first
|
140
|
-
refute_nil doc.xpath("/html/body").first
|
141
|
-
|
142
|
-
string = doc.to_s
|
143
|
-
assert_match %r/<!DOCTYPE/, string
|
144
|
-
assert_match %r/<html>/, string
|
145
|
-
assert_match %r/<head>/, string
|
146
|
-
assert_match %r/<body>/, string
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
context "#serialize" do
|
151
|
-
it "generate HTML" do
|
152
|
-
doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
|
153
|
-
refute_nil doc.xpath("/html").first
|
154
|
-
refute_nil doc.xpath("/html/head").first
|
155
|
-
refute_nil doc.xpath("/html/body").first
|
156
|
-
|
157
|
-
string = doc.serialize
|
158
|
-
assert_match %r/<!DOCTYPE/, string
|
159
|
-
assert_match %r/<html>/, string
|
160
|
-
assert_match %r/<head>/, string
|
161
|
-
assert_match %r/<body>/, string
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
context "Node" do
|
166
|
-
context "#scrub!" do
|
167
|
-
it "only scrub subtree" do
|
168
|
-
xml = Loofah.document <<-EOHTML
|
169
|
-
<html><body>
|
170
|
-
<div class='scrub'>
|
171
|
-
<script>I should be removed</script>
|
172
|
-
</div>
|
173
|
-
<div class='noscrub'>
|
174
|
-
<script>I should remain</script>
|
175
|
-
</div>
|
176
|
-
</body></html>
|
177
|
-
EOHTML
|
178
|
-
node = xml.at_css "div.scrub"
|
179
|
-
node.scrub!(:prune)
|
180
|
-
assert_match %r/I should remain/, xml.to_s
|
181
|
-
refute_match %r/I should be removed/, xml.to_s
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
context "NodeSet" do
|
187
|
-
context "#scrub!" do
|
188
|
-
it "only scrub subtrees" do
|
189
|
-
xml = Loofah.document <<-EOHTML
|
190
|
-
<html><body>
|
191
|
-
<div class='scrub'>
|
192
|
-
<script>I should be removed</script>
|
193
|
-
</div>
|
194
|
-
<div class='noscrub'>
|
195
|
-
<script>I should remain</script>
|
196
|
-
</div>
|
197
|
-
<div class='scrub'>
|
198
|
-
<script>I should also be removed</script>
|
199
|
-
</div>
|
200
|
-
</body></html>
|
201
|
-
EOHTML
|
202
|
-
node_set = xml.css "div.scrub"
|
203
|
-
assert_equal 2, node_set.length
|
204
|
-
node_set.scrub!(:prune)
|
205
|
-
assert_match %r/I should remain/, xml.to_s
|
206
|
-
refute_match %r/I should be removed/, xml.to_s
|
207
|
-
refute_match %r/I should also be removed/, xml.to_s
|
208
|
-
end
|
209
|
-
end
|
210
|
-
end
|
211
|
-
end
|
212
|
-
|
213
|
-
context "DocumentFragment" do
|
214
|
-
context "#scrub!" do
|
215
|
-
context ":escape" do
|
216
|
-
it "escape bad tags" do
|
217
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
|
218
|
-
result = doc.scrub! :escape
|
219
|
-
|
220
|
-
assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
|
221
|
-
assert_equal doc, result
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
context ":prune" do
|
226
|
-
it "prune bad tags" do
|
227
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
|
228
|
-
result = doc.scrub! :prune
|
229
|
-
|
230
|
-
assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
|
231
|
-
assert_equal doc, result
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
context ":strip" do
|
236
|
-
it "strip bad tags" do
|
237
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
|
238
|
-
result = doc.scrub! :strip
|
239
|
-
|
240
|
-
assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
|
241
|
-
assert_equal doc, result
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
context ":whitewash" do
|
246
|
-
it "whitewash the markup" do
|
247
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
|
248
|
-
result = doc.scrub! :whitewash
|
249
|
-
|
250
|
-
assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
|
251
|
-
assert_equal doc, result
|
252
|
-
end
|
253
|
-
end
|
254
|
-
|
255
|
-
context ":nofollow" do
|
256
|
-
|
257
|
-
context "for a hyperlink that does not have a rel attribute" do
|
258
|
-
it "add a 'nofollow' attribute to hyperlinks" do
|
259
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_FRAGMENT}</div>"
|
260
|
-
result = doc.scrub! :nofollow
|
261
|
-
|
262
|
-
assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
|
263
|
-
assert_equal doc, result
|
264
|
-
end
|
265
|
-
end
|
266
|
-
|
267
|
-
context "for a hyperlink that does have a rel attribute" do
|
268
|
-
it "appends nofollow to rel attribute" do
|
269
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_WITH_REL_FRAGMENT}</div>"
|
270
|
-
result = doc.scrub! :nofollow
|
271
|
-
|
272
|
-
assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html
|
273
|
-
assert_equal doc, result
|
274
|
-
end
|
275
|
-
end
|
276
|
-
|
277
|
-
|
278
|
-
end
|
279
|
-
|
280
|
-
context ":noopener" do
|
281
|
-
context "for a hyperlink without a 'rel' attribute" do
|
282
|
-
it "add a 'noopener' attribute to hyperlinks" do
|
283
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_FRAGMENT}</div>"
|
284
|
-
result = doc.scrub! :noopener
|
285
|
-
|
286
|
-
assert_equal NOOPENER_RESULT, doc.xpath("./div").inner_html
|
287
|
-
assert_equal doc, result
|
288
|
-
end
|
289
|
-
end
|
290
|
-
|
291
|
-
context "for a hyperlink that does have a rel attribute" do
|
292
|
-
it "appends 'noopener' to 'rel' attribute" do
|
293
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_WITH_REL_FRAGMENT}</div>"
|
294
|
-
result = doc.scrub! :noopener
|
295
|
-
|
296
|
-
assert_equal NOOPENER_WITH_REL_RESULT, doc.xpath("./div").inner_html
|
297
|
-
assert_equal doc, result
|
298
|
-
end
|
299
|
-
end
|
300
|
-
end
|
301
|
-
|
302
|
-
context ":unprintable" do
|
303
|
-
it "removes unprintable unicode characters" do
|
304
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{UNPRINTABLE_FRAGMENT}</div>"
|
305
|
-
result = doc.scrub! :unprintable
|
306
|
-
|
307
|
-
assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html
|
308
|
-
assert_equal doc, result
|
309
|
-
end
|
310
|
-
end
|
311
|
-
end
|
312
|
-
|
313
|
-
context "#scrub_fragment" do
|
314
|
-
it "be a shortcut for parse-and-scrub" do
|
315
|
-
mock_doc = Object.new
|
316
|
-
mock(Loofah).fragment(:string_or_io) { mock_doc }
|
317
|
-
mock(mock_doc).scrub!(:method)
|
318
|
-
|
319
|
-
Loofah.scrub_fragment(:string_or_io, :method)
|
320
|
-
end
|
321
|
-
end
|
322
|
-
|
323
|
-
context "#text" do
|
324
|
-
it "leave behind only inner text with html entities still escaped" do
|
325
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
|
326
|
-
result = doc.text
|
327
|
-
|
328
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
329
|
-
end
|
330
|
-
|
331
|
-
context "with encode_special_chars => false" do
|
332
|
-
it "leave behind only inner text with html entities unescaped" do
|
333
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
|
334
|
-
result = doc.text(:encode_special_chars => false)
|
335
|
-
|
336
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
|
337
|
-
end
|
338
|
-
end
|
339
|
-
|
340
|
-
context "with encode_special_chars => true" do
|
341
|
-
it "leave behind only inner text with html entities still escaped" do
|
342
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
|
343
|
-
result = doc.text(:encode_special_chars => true)
|
344
|
-
|
345
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
346
|
-
end
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
context "#to_s" do
|
351
|
-
it "not remove entities" do
|
352
|
-
string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
|
353
|
-
assert_match %r/this is </, string
|
354
|
-
end
|
355
|
-
end
|
356
|
-
|
357
|
-
context "Node" do
|
358
|
-
context "#scrub!" do
|
359
|
-
it "only scrub subtree" do
|
360
|
-
xml = Loofah.fragment <<-EOHTML
|
361
|
-
<div class='scrub'>
|
362
|
-
<script>I should be removed</script>
|
363
|
-
</div>
|
364
|
-
<div class='noscrub'>
|
365
|
-
<script>I should remain</script>
|
366
|
-
</div>
|
367
|
-
EOHTML
|
368
|
-
node = xml.at_css "div.scrub"
|
369
|
-
node.scrub!(:prune)
|
370
|
-
assert_match %r(I should remain), xml.to_s
|
371
|
-
refute_match %r(I should be removed), xml.to_s
|
372
|
-
end
|
373
|
-
end
|
374
|
-
end
|
375
|
-
|
376
|
-
context "NodeSet" do
|
377
|
-
context "#scrub!" do
|
378
|
-
it "only scrub subtrees" do
|
379
|
-
xml = Loofah.fragment <<-EOHTML
|
380
|
-
<div class='scrub'>
|
381
|
-
<script>I should be removed</script>
|
382
|
-
</div>
|
383
|
-
<div class='noscrub'>
|
384
|
-
<script>I should remain</script>
|
385
|
-
</div>
|
386
|
-
<div class='scrub'>
|
387
|
-
<script>I should also be removed</script>
|
388
|
-
</div>
|
389
|
-
EOHTML
|
390
|
-
node_set = xml.css "div.scrub"
|
391
|
-
assert_equal 2, node_set.length
|
392
|
-
node_set.scrub!(:prune)
|
393
|
-
assert_match %r/I should remain/, xml.to_s
|
394
|
-
refute_match %r/I should be removed/, xml.to_s
|
395
|
-
refute_match %r/I should also be removed/, xml.to_s
|
396
|
-
end
|
397
|
-
end
|
398
|
-
end
|
399
|
-
end
|
400
|
-
end
|
@@ -1,55 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class IntegrationTestXml < Loofah::TestCase
|
4
|
-
context "integration test" do
|
5
|
-
context "xml document" do
|
6
|
-
context "custom scrubber" do
|
7
|
-
it "act as expected" do
|
8
|
-
xml = Loofah.xml_document <<-EOXML
|
9
|
-
<root>
|
10
|
-
<employee deceased='true'>Abraham Lincoln</employee>
|
11
|
-
<employee deceased='false'>Abe Vigoda</employee>
|
12
|
-
</root>
|
13
|
-
EOXML
|
14
|
-
bring_out_your_dead = Loofah::Scrubber.new do |node|
|
15
|
-
if node.name == "employee" and node["deceased"] == "true"
|
16
|
-
node.remove
|
17
|
-
Loofah::Scrubber::STOP # don't bother with the rest of the subtree
|
18
|
-
end
|
19
|
-
end
|
20
|
-
assert_equal 2, xml.css("employee").length
|
21
|
-
|
22
|
-
xml.scrub!(bring_out_your_dead)
|
23
|
-
|
24
|
-
employees = xml.css "employee"
|
25
|
-
assert_equal 1, employees.length
|
26
|
-
assert_equal "Abe Vigoda", employees.first.inner_text
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
context "xml fragment" do
|
32
|
-
context "custom scrubber" do
|
33
|
-
it "act as expected" do
|
34
|
-
xml = Loofah.xml_fragment <<-EOXML
|
35
|
-
<employee deceased='true'>Abraham Lincoln</employee>
|
36
|
-
<employee deceased='false'>Abe Vigoda</employee>
|
37
|
-
EOXML
|
38
|
-
bring_out_your_dead = Loofah::Scrubber.new do |node|
|
39
|
-
if node.name == "employee" and node["deceased"] == "true"
|
40
|
-
node.remove
|
41
|
-
Loofah::Scrubber::STOP # don't bother with the rest of the subtree
|
42
|
-
end
|
43
|
-
end
|
44
|
-
assert_equal 2, xml.css("employee").length
|
45
|
-
|
46
|
-
xml.scrub!(bring_out_your_dead)
|
47
|
-
|
48
|
-
employees = xml.css "employee"
|
49
|
-
assert_equal 1, employees.length
|
50
|
-
assert_equal "Abe Vigoda", employees.first.inner_text
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
data/test/unit/test_api.rb
DELETED
@@ -1,142 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class UnitTestApi < Loofah::TestCase
|
4
|
-
|
5
|
-
HTML = "<div>a</div>\n<div>b</div>"
|
6
|
-
XML_FRAGMENT = "<div>a</div>\n<div>b</div>"
|
7
|
-
XML = "<root>#{XML_FRAGMENT}</root>"
|
8
|
-
|
9
|
-
describe "HTML" do
|
10
|
-
it "creates documents" do
|
11
|
-
doc = Loofah.document(HTML)
|
12
|
-
assert_html_documentish doc
|
13
|
-
end
|
14
|
-
|
15
|
-
it "creates fragments" do
|
16
|
-
doc = Loofah.fragment(HTML)
|
17
|
-
assert_html_fragmentish doc
|
18
|
-
end
|
19
|
-
|
20
|
-
it "parses documents" do
|
21
|
-
doc = Loofah::HTML::Document.parse(HTML)
|
22
|
-
assert_html_documentish doc
|
23
|
-
end
|
24
|
-
|
25
|
-
it "parses document fragment" do
|
26
|
-
doc = Loofah::HTML::DocumentFragment.parse(HTML)
|
27
|
-
assert_html_fragmentish doc
|
28
|
-
end
|
29
|
-
|
30
|
-
it "scrubs documents" do
|
31
|
-
doc = Loofah.document(HTML).scrub!(:strip)
|
32
|
-
assert_html_documentish doc
|
33
|
-
end
|
34
|
-
|
35
|
-
it "scrubs fragments" do
|
36
|
-
doc = Loofah.fragment(HTML).scrub!(:strip)
|
37
|
-
assert_html_fragmentish doc
|
38
|
-
end
|
39
|
-
|
40
|
-
it "scrubs document nodes" do
|
41
|
-
doc = Loofah.document(HTML)
|
42
|
-
assert(node = doc.at_css("div"))
|
43
|
-
node.scrub!(:strip)
|
44
|
-
end
|
45
|
-
|
46
|
-
it "scrubs fragment nodes" do
|
47
|
-
doc = Loofah.fragment(HTML)
|
48
|
-
assert(node = doc.at_css("div"))
|
49
|
-
node.scrub!(:strip)
|
50
|
-
end
|
51
|
-
|
52
|
-
it "scrubs document nodesets" do
|
53
|
-
doc = Loofah.document(HTML)
|
54
|
-
assert(node_set = doc.css("div"))
|
55
|
-
assert_instance_of Nokogiri::XML::NodeSet, node_set
|
56
|
-
node_set.scrub!(:strip)
|
57
|
-
end
|
58
|
-
|
59
|
-
it "exposes serialize_root on HTML::DocumentFragment" do
|
60
|
-
doc = Loofah.fragment(HTML)
|
61
|
-
assert_equal HTML, doc.serialize_root.to_html
|
62
|
-
end
|
63
|
-
|
64
|
-
it "exposes serialize_root on HTML::Document" do
|
65
|
-
doc = Loofah.document(HTML)
|
66
|
-
assert_equal HTML, doc.serialize_root.children.to_html
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
describe "XML" do
|
71
|
-
it "creates documents" do
|
72
|
-
doc = Loofah.xml_document(XML)
|
73
|
-
assert_xml_documentish doc
|
74
|
-
end
|
75
|
-
|
76
|
-
it "creates fragments" do
|
77
|
-
doc = Loofah.xml_fragment(XML_FRAGMENT)
|
78
|
-
assert_xml_fragmentish doc
|
79
|
-
end
|
80
|
-
|
81
|
-
it "parses documents" do
|
82
|
-
doc = Loofah::XML::Document.parse(XML)
|
83
|
-
assert_xml_documentish doc
|
84
|
-
end
|
85
|
-
|
86
|
-
it "parses document fragments" do
|
87
|
-
doc = Loofah::XML::DocumentFragment.parse(XML_FRAGMENT)
|
88
|
-
assert_xml_fragmentish doc
|
89
|
-
end
|
90
|
-
|
91
|
-
it "scrubs documents" do
|
92
|
-
scrubber = Loofah::Scrubber.new { |node| }
|
93
|
-
doc = Loofah.xml_document(XML).scrub!(scrubber)
|
94
|
-
assert_xml_documentish doc
|
95
|
-
end
|
96
|
-
|
97
|
-
it "scrubs fragments" do
|
98
|
-
scrubber = Loofah::Scrubber.new { |node| }
|
99
|
-
doc = Loofah.xml_fragment(XML_FRAGMENT).scrub!(scrubber)
|
100
|
-
assert_xml_fragmentish doc
|
101
|
-
end
|
102
|
-
|
103
|
-
it "scrubs document nodes" do
|
104
|
-
doc = Loofah.xml_document(XML)
|
105
|
-
assert(node = doc.at_css("div"))
|
106
|
-
node.scrub!(:strip)
|
107
|
-
end
|
108
|
-
|
109
|
-
it "scrubs fragment nodes" do
|
110
|
-
doc = Loofah.xml_fragment(XML)
|
111
|
-
assert(node = doc.at_css("div"))
|
112
|
-
node.scrub!(:strip)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
private
|
117
|
-
|
118
|
-
def assert_html_documentish(doc)
|
119
|
-
assert_kind_of Nokogiri::HTML::Document, doc
|
120
|
-
assert_kind_of Loofah::HTML::Document, doc
|
121
|
-
assert_equal HTML, doc.xpath("/html/body").inner_html
|
122
|
-
end
|
123
|
-
|
124
|
-
def assert_html_fragmentish(doc)
|
125
|
-
assert_kind_of Nokogiri::HTML::DocumentFragment, doc
|
126
|
-
assert_kind_of Loofah::HTML::DocumentFragment, doc
|
127
|
-
assert_equal HTML, doc.inner_html
|
128
|
-
end
|
129
|
-
|
130
|
-
def assert_xml_documentish(doc)
|
131
|
-
assert_kind_of Nokogiri::XML::Document, doc
|
132
|
-
assert_kind_of Loofah::XML::Document, doc
|
133
|
-
assert_equal XML, doc.root.to_xml
|
134
|
-
end
|
135
|
-
|
136
|
-
def assert_xml_fragmentish(doc)
|
137
|
-
assert_kind_of Nokogiri::XML::DocumentFragment, doc
|
138
|
-
assert_kind_of Loofah::XML::DocumentFragment, doc
|
139
|
-
assert_equal XML_FRAGMENT, doc.children.to_xml
|
140
|
-
end
|
141
|
-
|
142
|
-
end
|
data/test/unit/test_encoding.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# :coding: utf-8
|
2
|
-
require "helper"
|
3
|
-
|
4
|
-
class UnitTestEncoding < Loofah::TestCase
|
5
|
-
UTF8_STRING = "日本語"
|
6
|
-
|
7
|
-
if String.new.respond_to?(:encoding)
|
8
|
-
describe "scrub_fragment" do
|
9
|
-
it "sets the encoding for html" do
|
10
|
-
escaped = Loofah.scrub_fragment(UTF8_STRING, :escape).to_s
|
11
|
-
assert_equal UTF8_STRING.encoding, escaped.encoding
|
12
|
-
end
|
13
|
-
|
14
|
-
it "sets the encoding for xml" do
|
15
|
-
escaped = Loofah.scrub_xml_fragment(UTF8_STRING, :escape).to_s
|
16
|
-
assert_equal UTF8_STRING.encoding, escaped.encoding
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|