loofah 2.2.3 → 2.21.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +269 -31
- data/README.md +109 -124
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +85 -79
- data/lib/loofah/helpers.rb +37 -20
- data/lib/loofah/{html → html4}/document.rb +6 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
- data/lib/loofah/html5/safelist.rb +1055 -0
- data/lib/loofah/html5/scrub.rb +153 -58
- data/lib/loofah/metahelpers.rb +11 -6
- data/lib/loofah/scrubber.rb +22 -15
- data/lib/loofah/scrubbers.rb +66 -55
- data/lib/loofah/version.rb +6 -0
- data/lib/loofah/xml/document.rb +2 -0
- data/lib/loofah/xml/document_fragment.rb +4 -7
- data/lib/loofah.rb +131 -38
- metadata +28 -216
- data/.gemtest +0 -0
- data/Gemfile +0 -22
- data/Manifest.txt +0 -40
- data/Rakefile +0 -79
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/lib/loofah/html/document_fragment.rb +0 -40
- data/lib/loofah/html5/whitelist.rb +0 -186
- data/lib/loofah/instance_methods.rb +0 -127
- data/test/assets/msword.html +0 -63
- data/test/assets/testdata_sanitizer_tests1.dat +0 -502
- data/test/helper.rb +0 -18
- data/test/html5/test_sanitizer.rb +0 -382
- data/test/integration/test_ad_hoc.rb +0 -204
- data/test/integration/test_helpers.rb +0 -43
- data/test/integration/test_html.rb +0 -72
- data/test/integration/test_scrubbers.rb +0 -400
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -142
- data/test/unit/test_encoding.rb +0 -20
- data/test/unit/test_helpers.rb +0 -62
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
@@ -1,72 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class IntegrationTestHtml < Loofah::TestCase
|
4
|
-
context "html fragment" do
|
5
|
-
context "#to_s" do
|
6
|
-
it "not include head tags (like style)" do
|
7
|
-
skip "depends on nokogiri version"
|
8
|
-
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
9
|
-
assert_equal "<div>bar</div>", html.to_s
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
context "#text" do
|
14
|
-
it "not include head tags (like style)" do
|
15
|
-
skip "depends on nokogiri version"
|
16
|
-
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
17
|
-
assert_equal "bar", html.text
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
context "#to_text" do
|
22
|
-
it "add newlines before and after html4 block elements" do
|
23
|
-
html = Loofah.fragment "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
24
|
-
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
25
|
-
end
|
26
|
-
|
27
|
-
it "add newlines before and after html5 block elements" do
|
28
|
-
html = Loofah.fragment "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
29
|
-
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
30
|
-
end
|
31
|
-
|
32
|
-
it "remove extraneous whitespace" do
|
33
|
-
html = Loofah.fragment "<div>tweedle\n\n\t\n\s\nbeetle</div>"
|
34
|
-
assert_equal "\ntweedle\n\nbeetle\n", html.to_text
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
context 'with an `encoding` arg' do
|
39
|
-
it "sets the parent document's encoding to accordingly" do
|
40
|
-
html = Loofah.fragment "<style>foo</style><div>bar</div>", 'US-ASCII'
|
41
|
-
assert_equal 'US-ASCII', html.document.encoding
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
context "html document" do
|
47
|
-
context "#text" do
|
48
|
-
it "not include head tags (like style)" do
|
49
|
-
html = Loofah.document "<style>foo</style><div>bar</div>"
|
50
|
-
assert_equal "bar", html.text
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
context "#to_text" do
|
55
|
-
it "add newlines before and after html4 block elements" do
|
56
|
-
html = Loofah.document "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
57
|
-
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
58
|
-
end
|
59
|
-
|
60
|
-
it "add newlines before and after html5 block elements" do
|
61
|
-
html = Loofah.document "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
62
|
-
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
63
|
-
end
|
64
|
-
|
65
|
-
it "remove extraneous whitespace" do
|
66
|
-
html = Loofah.document "<div>tweedle\n\n\t\n\s\nbeetle</div>"
|
67
|
-
assert_equal "\ntweedle\n\nbeetle\n", html.to_text
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
@@ -1,400 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class IntegrationTestScrubbers < Loofah::TestCase
|
4
|
-
|
5
|
-
INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
|
6
|
-
INVALID_ESCAPED = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
|
7
|
-
INVALID_PRUNED = "<div>quux</div>"
|
8
|
-
INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
|
9
|
-
|
10
|
-
WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid><!--[if gts mso9]><div>microsofty stuff</div><![endif]-->"
|
11
|
-
WHITEWASH_RESULT = "<div>foo</div>"
|
12
|
-
|
13
|
-
NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
|
14
|
-
NOFOLLOW_RESULT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
|
15
|
-
|
16
|
-
NOFOLLOW_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
|
17
|
-
NOFOLLOW_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="noopener nofollow">Click here</a>'
|
18
|
-
|
19
|
-
NOOPENER_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
|
20
|
-
NOOPENER_RESULT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
|
21
|
-
|
22
|
-
NOOPENER_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
|
23
|
-
NOOPENER_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="nofollow noopener">Click here</a>'
|
24
|
-
|
25
|
-
UNPRINTABLE_FRAGMENT = "<b>Lo\u2029ofah ro\u2028cks!</b><script>x\u2028y</script>"
|
26
|
-
UNPRINTABLE_RESULT = "<b>Loofah rocks!</b><script>xy</script>"
|
27
|
-
|
28
|
-
ENTITY_FRAGMENT = "<p>this is < that "&" the other > boo'ya</p><div>w00t</div>"
|
29
|
-
ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t)
|
30
|
-
|
31
|
-
ENTITY_HACK_ATTACK = "<div><div>Hack attack!</div><div><script>alert('evil')</script></div></div>"
|
32
|
-
ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!<script>alert('evil')</script>"
|
33
|
-
ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!<script>alert('evil')</script>"
|
34
|
-
|
35
|
-
context "Document" do
|
36
|
-
context "#scrub!" do
|
37
|
-
context ":escape" do
|
38
|
-
it "escape bad tags" do
|
39
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
|
40
|
-
result = doc.scrub! :escape
|
41
|
-
|
42
|
-
assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
|
43
|
-
assert_equal doc, result
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
context ":prune" do
|
48
|
-
it "prune bad tags" do
|
49
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
|
50
|
-
result = doc.scrub! :prune
|
51
|
-
|
52
|
-
assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
|
53
|
-
assert_equal doc, result
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
context ":strip" do
|
58
|
-
it "strip bad tags" do
|
59
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
|
60
|
-
result = doc.scrub! :strip
|
61
|
-
|
62
|
-
assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
|
63
|
-
assert_equal doc, result
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
context ":whitewash" do
|
68
|
-
it "whitewash the markup" do
|
69
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
|
70
|
-
result = doc.scrub! :whitewash
|
71
|
-
|
72
|
-
assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
|
73
|
-
assert_equal doc, result
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
context ":nofollow" do
|
78
|
-
it "add a 'nofollow' attribute to hyperlinks" do
|
79
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{NOFOLLOW_FRAGMENT}</body></html>"
|
80
|
-
result = doc.scrub! :nofollow
|
81
|
-
|
82
|
-
assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
|
83
|
-
assert_equal doc, result
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
context ":unprintable" do
|
88
|
-
it "removes unprintable unicode characters" do
|
89
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{UNPRINTABLE_FRAGMENT}</body></html>"
|
90
|
-
result = doc.scrub! :unprintable
|
91
|
-
|
92
|
-
assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html
|
93
|
-
assert_equal doc, result
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
context "#scrub_document" do
|
99
|
-
it "be a shortcut for parse-and-scrub" do
|
100
|
-
mock_doc = Object.new
|
101
|
-
mock(Loofah).document(:string_or_io) { mock_doc }
|
102
|
-
mock(mock_doc).scrub!(:method)
|
103
|
-
|
104
|
-
Loofah.scrub_document(:string_or_io, :method)
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
context "#text" do
|
109
|
-
it "leave behind only inner text with html entities still escaped" do
|
110
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
|
111
|
-
result = doc.text
|
112
|
-
|
113
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
114
|
-
end
|
115
|
-
|
116
|
-
context "with encode_special_chars => false" do
|
117
|
-
it "leave behind only inner text with html entities unescaped" do
|
118
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
|
119
|
-
result = doc.text(:encode_special_chars => false)
|
120
|
-
|
121
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
context "with encode_special_chars => true" do
|
126
|
-
it "leave behind only inner text with html entities still escaped" do
|
127
|
-
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
|
128
|
-
result = doc.text(:encode_special_chars => true)
|
129
|
-
|
130
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
context "#to_s" do
|
136
|
-
it "generate HTML" do
|
137
|
-
doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
|
138
|
-
refute_nil doc.xpath("/html").first
|
139
|
-
refute_nil doc.xpath("/html/head").first
|
140
|
-
refute_nil doc.xpath("/html/body").first
|
141
|
-
|
142
|
-
string = doc.to_s
|
143
|
-
assert_match %r/<!DOCTYPE/, string
|
144
|
-
assert_match %r/<html>/, string
|
145
|
-
assert_match %r/<head>/, string
|
146
|
-
assert_match %r/<body>/, string
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
context "#serialize" do
|
151
|
-
it "generate HTML" do
|
152
|
-
doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
|
153
|
-
refute_nil doc.xpath("/html").first
|
154
|
-
refute_nil doc.xpath("/html/head").first
|
155
|
-
refute_nil doc.xpath("/html/body").first
|
156
|
-
|
157
|
-
string = doc.serialize
|
158
|
-
assert_match %r/<!DOCTYPE/, string
|
159
|
-
assert_match %r/<html>/, string
|
160
|
-
assert_match %r/<head>/, string
|
161
|
-
assert_match %r/<body>/, string
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
context "Node" do
|
166
|
-
context "#scrub!" do
|
167
|
-
it "only scrub subtree" do
|
168
|
-
xml = Loofah.document <<-EOHTML
|
169
|
-
<html><body>
|
170
|
-
<div class='scrub'>
|
171
|
-
<script>I should be removed</script>
|
172
|
-
</div>
|
173
|
-
<div class='noscrub'>
|
174
|
-
<script>I should remain</script>
|
175
|
-
</div>
|
176
|
-
</body></html>
|
177
|
-
EOHTML
|
178
|
-
node = xml.at_css "div.scrub"
|
179
|
-
node.scrub!(:prune)
|
180
|
-
assert_match %r/I should remain/, xml.to_s
|
181
|
-
refute_match %r/I should be removed/, xml.to_s
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
context "NodeSet" do
|
187
|
-
context "#scrub!" do
|
188
|
-
it "only scrub subtrees" do
|
189
|
-
xml = Loofah.document <<-EOHTML
|
190
|
-
<html><body>
|
191
|
-
<div class='scrub'>
|
192
|
-
<script>I should be removed</script>
|
193
|
-
</div>
|
194
|
-
<div class='noscrub'>
|
195
|
-
<script>I should remain</script>
|
196
|
-
</div>
|
197
|
-
<div class='scrub'>
|
198
|
-
<script>I should also be removed</script>
|
199
|
-
</div>
|
200
|
-
</body></html>
|
201
|
-
EOHTML
|
202
|
-
node_set = xml.css "div.scrub"
|
203
|
-
assert_equal 2, node_set.length
|
204
|
-
node_set.scrub!(:prune)
|
205
|
-
assert_match %r/I should remain/, xml.to_s
|
206
|
-
refute_match %r/I should be removed/, xml.to_s
|
207
|
-
refute_match %r/I should also be removed/, xml.to_s
|
208
|
-
end
|
209
|
-
end
|
210
|
-
end
|
211
|
-
end
|
212
|
-
|
213
|
-
context "DocumentFragment" do
|
214
|
-
context "#scrub!" do
|
215
|
-
context ":escape" do
|
216
|
-
it "escape bad tags" do
|
217
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
|
218
|
-
result = doc.scrub! :escape
|
219
|
-
|
220
|
-
assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
|
221
|
-
assert_equal doc, result
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
context ":prune" do
|
226
|
-
it "prune bad tags" do
|
227
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
|
228
|
-
result = doc.scrub! :prune
|
229
|
-
|
230
|
-
assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
|
231
|
-
assert_equal doc, result
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
context ":strip" do
|
236
|
-
it "strip bad tags" do
|
237
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
|
238
|
-
result = doc.scrub! :strip
|
239
|
-
|
240
|
-
assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
|
241
|
-
assert_equal doc, result
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
context ":whitewash" do
|
246
|
-
it "whitewash the markup" do
|
247
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
|
248
|
-
result = doc.scrub! :whitewash
|
249
|
-
|
250
|
-
assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
|
251
|
-
assert_equal doc, result
|
252
|
-
end
|
253
|
-
end
|
254
|
-
|
255
|
-
context ":nofollow" do
|
256
|
-
|
257
|
-
context "for a hyperlink that does not have a rel attribute" do
|
258
|
-
it "add a 'nofollow' attribute to hyperlinks" do
|
259
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_FRAGMENT}</div>"
|
260
|
-
result = doc.scrub! :nofollow
|
261
|
-
|
262
|
-
assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
|
263
|
-
assert_equal doc, result
|
264
|
-
end
|
265
|
-
end
|
266
|
-
|
267
|
-
context "for a hyperlink that does have a rel attribute" do
|
268
|
-
it "appends nofollow to rel attribute" do
|
269
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_WITH_REL_FRAGMENT}</div>"
|
270
|
-
result = doc.scrub! :nofollow
|
271
|
-
|
272
|
-
assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html
|
273
|
-
assert_equal doc, result
|
274
|
-
end
|
275
|
-
end
|
276
|
-
|
277
|
-
|
278
|
-
end
|
279
|
-
|
280
|
-
context ":noopener" do
|
281
|
-
context "for a hyperlink without a 'rel' attribute" do
|
282
|
-
it "add a 'noopener' attribute to hyperlinks" do
|
283
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_FRAGMENT}</div>"
|
284
|
-
result = doc.scrub! :noopener
|
285
|
-
|
286
|
-
assert_equal NOOPENER_RESULT, doc.xpath("./div").inner_html
|
287
|
-
assert_equal doc, result
|
288
|
-
end
|
289
|
-
end
|
290
|
-
|
291
|
-
context "for a hyperlink that does have a rel attribute" do
|
292
|
-
it "appends 'noopener' to 'rel' attribute" do
|
293
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_WITH_REL_FRAGMENT}</div>"
|
294
|
-
result = doc.scrub! :noopener
|
295
|
-
|
296
|
-
assert_equal NOOPENER_WITH_REL_RESULT, doc.xpath("./div").inner_html
|
297
|
-
assert_equal doc, result
|
298
|
-
end
|
299
|
-
end
|
300
|
-
end
|
301
|
-
|
302
|
-
context ":unprintable" do
|
303
|
-
it "removes unprintable unicode characters" do
|
304
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{UNPRINTABLE_FRAGMENT}</div>"
|
305
|
-
result = doc.scrub! :unprintable
|
306
|
-
|
307
|
-
assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html
|
308
|
-
assert_equal doc, result
|
309
|
-
end
|
310
|
-
end
|
311
|
-
end
|
312
|
-
|
313
|
-
context "#scrub_fragment" do
|
314
|
-
it "be a shortcut for parse-and-scrub" do
|
315
|
-
mock_doc = Object.new
|
316
|
-
mock(Loofah).fragment(:string_or_io) { mock_doc }
|
317
|
-
mock(mock_doc).scrub!(:method)
|
318
|
-
|
319
|
-
Loofah.scrub_fragment(:string_or_io, :method)
|
320
|
-
end
|
321
|
-
end
|
322
|
-
|
323
|
-
context "#text" do
|
324
|
-
it "leave behind only inner text with html entities still escaped" do
|
325
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
|
326
|
-
result = doc.text
|
327
|
-
|
328
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
329
|
-
end
|
330
|
-
|
331
|
-
context "with encode_special_chars => false" do
|
332
|
-
it "leave behind only inner text with html entities unescaped" do
|
333
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
|
334
|
-
result = doc.text(:encode_special_chars => false)
|
335
|
-
|
336
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
|
337
|
-
end
|
338
|
-
end
|
339
|
-
|
340
|
-
context "with encode_special_chars => true" do
|
341
|
-
it "leave behind only inner text with html entities still escaped" do
|
342
|
-
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
|
343
|
-
result = doc.text(:encode_special_chars => true)
|
344
|
-
|
345
|
-
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
346
|
-
end
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
context "#to_s" do
|
351
|
-
it "not remove entities" do
|
352
|
-
string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
|
353
|
-
assert_match %r/this is </, string
|
354
|
-
end
|
355
|
-
end
|
356
|
-
|
357
|
-
context "Node" do
|
358
|
-
context "#scrub!" do
|
359
|
-
it "only scrub subtree" do
|
360
|
-
xml = Loofah.fragment <<-EOHTML
|
361
|
-
<div class='scrub'>
|
362
|
-
<script>I should be removed</script>
|
363
|
-
</div>
|
364
|
-
<div class='noscrub'>
|
365
|
-
<script>I should remain</script>
|
366
|
-
</div>
|
367
|
-
EOHTML
|
368
|
-
node = xml.at_css "div.scrub"
|
369
|
-
node.scrub!(:prune)
|
370
|
-
assert_match %r(I should remain), xml.to_s
|
371
|
-
refute_match %r(I should be removed), xml.to_s
|
372
|
-
end
|
373
|
-
end
|
374
|
-
end
|
375
|
-
|
376
|
-
context "NodeSet" do
|
377
|
-
context "#scrub!" do
|
378
|
-
it "only scrub subtrees" do
|
379
|
-
xml = Loofah.fragment <<-EOHTML
|
380
|
-
<div class='scrub'>
|
381
|
-
<script>I should be removed</script>
|
382
|
-
</div>
|
383
|
-
<div class='noscrub'>
|
384
|
-
<script>I should remain</script>
|
385
|
-
</div>
|
386
|
-
<div class='scrub'>
|
387
|
-
<script>I should also be removed</script>
|
388
|
-
</div>
|
389
|
-
EOHTML
|
390
|
-
node_set = xml.css "div.scrub"
|
391
|
-
assert_equal 2, node_set.length
|
392
|
-
node_set.scrub!(:prune)
|
393
|
-
assert_match %r/I should remain/, xml.to_s
|
394
|
-
refute_match %r/I should be removed/, xml.to_s
|
395
|
-
refute_match %r/I should also be removed/, xml.to_s
|
396
|
-
end
|
397
|
-
end
|
398
|
-
end
|
399
|
-
end
|
400
|
-
end
|
@@ -1,55 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class IntegrationTestXml < Loofah::TestCase
|
4
|
-
context "integration test" do
|
5
|
-
context "xml document" do
|
6
|
-
context "custom scrubber" do
|
7
|
-
it "act as expected" do
|
8
|
-
xml = Loofah.xml_document <<-EOXML
|
9
|
-
<root>
|
10
|
-
<employee deceased='true'>Abraham Lincoln</employee>
|
11
|
-
<employee deceased='false'>Abe Vigoda</employee>
|
12
|
-
</root>
|
13
|
-
EOXML
|
14
|
-
bring_out_your_dead = Loofah::Scrubber.new do |node|
|
15
|
-
if node.name == "employee" and node["deceased"] == "true"
|
16
|
-
node.remove
|
17
|
-
Loofah::Scrubber::STOP # don't bother with the rest of the subtree
|
18
|
-
end
|
19
|
-
end
|
20
|
-
assert_equal 2, xml.css("employee").length
|
21
|
-
|
22
|
-
xml.scrub!(bring_out_your_dead)
|
23
|
-
|
24
|
-
employees = xml.css "employee"
|
25
|
-
assert_equal 1, employees.length
|
26
|
-
assert_equal "Abe Vigoda", employees.first.inner_text
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
context "xml fragment" do
|
32
|
-
context "custom scrubber" do
|
33
|
-
it "act as expected" do
|
34
|
-
xml = Loofah.xml_fragment <<-EOXML
|
35
|
-
<employee deceased='true'>Abraham Lincoln</employee>
|
36
|
-
<employee deceased='false'>Abe Vigoda</employee>
|
37
|
-
EOXML
|
38
|
-
bring_out_your_dead = Loofah::Scrubber.new do |node|
|
39
|
-
if node.name == "employee" and node["deceased"] == "true"
|
40
|
-
node.remove
|
41
|
-
Loofah::Scrubber::STOP # don't bother with the rest of the subtree
|
42
|
-
end
|
43
|
-
end
|
44
|
-
assert_equal 2, xml.css("employee").length
|
45
|
-
|
46
|
-
xml.scrub!(bring_out_your_dead)
|
47
|
-
|
48
|
-
employees = xml.css "employee"
|
49
|
-
assert_equal 1, employees.length
|
50
|
-
assert_equal "Abe Vigoda", employees.first.inner_text
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
data/test/unit/test_api.rb
DELETED
@@ -1,142 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class UnitTestApi < Loofah::TestCase
|
4
|
-
|
5
|
-
HTML = "<div>a</div>\n<div>b</div>"
|
6
|
-
XML_FRAGMENT = "<div>a</div>\n<div>b</div>"
|
7
|
-
XML = "<root>#{XML_FRAGMENT}</root>"
|
8
|
-
|
9
|
-
describe "HTML" do
|
10
|
-
it "creates documents" do
|
11
|
-
doc = Loofah.document(HTML)
|
12
|
-
assert_html_documentish doc
|
13
|
-
end
|
14
|
-
|
15
|
-
it "creates fragments" do
|
16
|
-
doc = Loofah.fragment(HTML)
|
17
|
-
assert_html_fragmentish doc
|
18
|
-
end
|
19
|
-
|
20
|
-
it "parses documents" do
|
21
|
-
doc = Loofah::HTML::Document.parse(HTML)
|
22
|
-
assert_html_documentish doc
|
23
|
-
end
|
24
|
-
|
25
|
-
it "parses document fragment" do
|
26
|
-
doc = Loofah::HTML::DocumentFragment.parse(HTML)
|
27
|
-
assert_html_fragmentish doc
|
28
|
-
end
|
29
|
-
|
30
|
-
it "scrubs documents" do
|
31
|
-
doc = Loofah.document(HTML).scrub!(:strip)
|
32
|
-
assert_html_documentish doc
|
33
|
-
end
|
34
|
-
|
35
|
-
it "scrubs fragments" do
|
36
|
-
doc = Loofah.fragment(HTML).scrub!(:strip)
|
37
|
-
assert_html_fragmentish doc
|
38
|
-
end
|
39
|
-
|
40
|
-
it "scrubs document nodes" do
|
41
|
-
doc = Loofah.document(HTML)
|
42
|
-
assert(node = doc.at_css("div"))
|
43
|
-
node.scrub!(:strip)
|
44
|
-
end
|
45
|
-
|
46
|
-
it "scrubs fragment nodes" do
|
47
|
-
doc = Loofah.fragment(HTML)
|
48
|
-
assert(node = doc.at_css("div"))
|
49
|
-
node.scrub!(:strip)
|
50
|
-
end
|
51
|
-
|
52
|
-
it "scrubs document nodesets" do
|
53
|
-
doc = Loofah.document(HTML)
|
54
|
-
assert(node_set = doc.css("div"))
|
55
|
-
assert_instance_of Nokogiri::XML::NodeSet, node_set
|
56
|
-
node_set.scrub!(:strip)
|
57
|
-
end
|
58
|
-
|
59
|
-
it "exposes serialize_root on HTML::DocumentFragment" do
|
60
|
-
doc = Loofah.fragment(HTML)
|
61
|
-
assert_equal HTML, doc.serialize_root.to_html
|
62
|
-
end
|
63
|
-
|
64
|
-
it "exposes serialize_root on HTML::Document" do
|
65
|
-
doc = Loofah.document(HTML)
|
66
|
-
assert_equal HTML, doc.serialize_root.children.to_html
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
describe "XML" do
|
71
|
-
it "creates documents" do
|
72
|
-
doc = Loofah.xml_document(XML)
|
73
|
-
assert_xml_documentish doc
|
74
|
-
end
|
75
|
-
|
76
|
-
it "creates fragments" do
|
77
|
-
doc = Loofah.xml_fragment(XML_FRAGMENT)
|
78
|
-
assert_xml_fragmentish doc
|
79
|
-
end
|
80
|
-
|
81
|
-
it "parses documents" do
|
82
|
-
doc = Loofah::XML::Document.parse(XML)
|
83
|
-
assert_xml_documentish doc
|
84
|
-
end
|
85
|
-
|
86
|
-
it "parses document fragments" do
|
87
|
-
doc = Loofah::XML::DocumentFragment.parse(XML_FRAGMENT)
|
88
|
-
assert_xml_fragmentish doc
|
89
|
-
end
|
90
|
-
|
91
|
-
it "scrubs documents" do
|
92
|
-
scrubber = Loofah::Scrubber.new { |node| }
|
93
|
-
doc = Loofah.xml_document(XML).scrub!(scrubber)
|
94
|
-
assert_xml_documentish doc
|
95
|
-
end
|
96
|
-
|
97
|
-
it "scrubs fragments" do
|
98
|
-
scrubber = Loofah::Scrubber.new { |node| }
|
99
|
-
doc = Loofah.xml_fragment(XML_FRAGMENT).scrub!(scrubber)
|
100
|
-
assert_xml_fragmentish doc
|
101
|
-
end
|
102
|
-
|
103
|
-
it "scrubs document nodes" do
|
104
|
-
doc = Loofah.xml_document(XML)
|
105
|
-
assert(node = doc.at_css("div"))
|
106
|
-
node.scrub!(:strip)
|
107
|
-
end
|
108
|
-
|
109
|
-
it "scrubs fragment nodes" do
|
110
|
-
doc = Loofah.xml_fragment(XML)
|
111
|
-
assert(node = doc.at_css("div"))
|
112
|
-
node.scrub!(:strip)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
private
|
117
|
-
|
118
|
-
def assert_html_documentish(doc)
|
119
|
-
assert_kind_of Nokogiri::HTML::Document, doc
|
120
|
-
assert_kind_of Loofah::HTML::Document, doc
|
121
|
-
assert_equal HTML, doc.xpath("/html/body").inner_html
|
122
|
-
end
|
123
|
-
|
124
|
-
def assert_html_fragmentish(doc)
|
125
|
-
assert_kind_of Nokogiri::HTML::DocumentFragment, doc
|
126
|
-
assert_kind_of Loofah::HTML::DocumentFragment, doc
|
127
|
-
assert_equal HTML, doc.inner_html
|
128
|
-
end
|
129
|
-
|
130
|
-
def assert_xml_documentish(doc)
|
131
|
-
assert_kind_of Nokogiri::XML::Document, doc
|
132
|
-
assert_kind_of Loofah::XML::Document, doc
|
133
|
-
assert_equal XML, doc.root.to_xml
|
134
|
-
end
|
135
|
-
|
136
|
-
def assert_xml_fragmentish(doc)
|
137
|
-
assert_kind_of Nokogiri::XML::DocumentFragment, doc
|
138
|
-
assert_kind_of Loofah::XML::DocumentFragment, doc
|
139
|
-
assert_equal XML_FRAGMENT, doc.children.to_xml
|
140
|
-
end
|
141
|
-
|
142
|
-
end
|
data/test/unit/test_encoding.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# :coding: utf-8
|
2
|
-
require "helper"
|
3
|
-
|
4
|
-
class UnitTestEncoding < Loofah::TestCase
|
5
|
-
UTF8_STRING = "日本語"
|
6
|
-
|
7
|
-
if String.new.respond_to?(:encoding)
|
8
|
-
describe "scrub_fragment" do
|
9
|
-
it "sets the encoding for html" do
|
10
|
-
escaped = Loofah.scrub_fragment(UTF8_STRING, :escape).to_s
|
11
|
-
assert_equal UTF8_STRING.encoding, escaped.encoding
|
12
|
-
end
|
13
|
-
|
14
|
-
it "sets the encoding for xml" do
|
15
|
-
escaped = Loofah.scrub_xml_fragment(UTF8_STRING, :escape).to_s
|
16
|
-
assert_equal UTF8_STRING.encoding, escaped.encoding
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|