loofah 2.3.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.gemtest +0 -0
- data/CHANGELOG.md +336 -0
- data/Gemfile +22 -0
- data/MIT-LICENSE.txt +23 -0
- data/Manifest.txt +41 -0
- data/README.md +363 -0
- data/Rakefile +81 -0
- data/SECURITY.md +18 -0
- data/benchmark/benchmark.rb +149 -0
- data/benchmark/fragment.html +96 -0
- data/benchmark/helper.rb +73 -0
- data/benchmark/www.slashdot.com.html +2560 -0
- data/lib/loofah.rb +83 -0
- data/lib/loofah/elements.rb +92 -0
- data/lib/loofah/helpers.rb +103 -0
- data/lib/loofah/html/document.rb +18 -0
- data/lib/loofah/html/document_fragment.rb +40 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +26 -0
- data/lib/loofah/html5/safelist.rb +796 -0
- data/lib/loofah/html5/scrub.rb +133 -0
- data/lib/loofah/instance_methods.rb +127 -0
- data/lib/loofah/metahelpers.rb +13 -0
- data/lib/loofah/scrubber.rb +133 -0
- data/lib/loofah/scrubbers.rb +297 -0
- data/lib/loofah/xml/document.rb +13 -0
- data/lib/loofah/xml/document_fragment.rb +23 -0
- data/test/assets/msword.html +63 -0
- data/test/assets/testdata_sanitizer_tests1.dat +502 -0
- data/test/helper.rb +18 -0
- data/test/html5/test_sanitizer.rb +401 -0
- data/test/html5/test_scrub.rb +10 -0
- data/test/integration/test_ad_hoc.rb +220 -0
- data/test/integration/test_helpers.rb +43 -0
- data/test/integration/test_html.rb +72 -0
- data/test/integration/test_scrubbers.rb +400 -0
- data/test/integration/test_xml.rb +55 -0
- data/test/unit/test_api.rb +142 -0
- data/test/unit/test_encoding.rb +20 -0
- data/test/unit/test_helpers.rb +62 -0
- data/test/unit/test_scrubber.rb +229 -0
- data/test/unit/test_scrubbers.rb +14 -0
- metadata +287 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
class IntegrationTestHelpers < Loofah::TestCase
|
4
|
+
context ".strip_tags" do
|
5
|
+
context "on safe markup" do
|
6
|
+
it "strip out tags" do
|
7
|
+
assert_equal "omgwtfbbq!!1!", Loofah::Helpers.strip_tags("<div>omgwtfbbq</div><span>!!1!</span>")
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
context "on hack attack" do
|
12
|
+
it "strip escape html entities" do
|
13
|
+
bad_shit = "<script>alert('evil')</script>"
|
14
|
+
assert_equal bad_shit, Loofah::Helpers.strip_tags(bad_shit)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context ".sanitize" do
|
20
|
+
context "on safe markup" do
|
21
|
+
it "render the safe html" do
|
22
|
+
html = "<div>omgwtfbbq</div><span>!!1!</span>"
|
23
|
+
assert_equal html, Loofah::Helpers.sanitize(html)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
context "on hack attack" do
|
28
|
+
it "strip the unsafe tags" do
|
29
|
+
assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><span>w00t</span>")
|
30
|
+
end
|
31
|
+
|
32
|
+
it "strips form tags" do
|
33
|
+
assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><form action=\"/foo/bar\" method=\"post\"><input></form><span>w00t</span>")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
context ".sanitize_css" do
|
39
|
+
it "removes unsafe css properties" do
|
40
|
+
assert_match(/display:\s*block;\s*background-color:\s*blue;/, Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg);background-color:blue"))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
class IntegrationTestHtml < Loofah::TestCase
|
4
|
+
context "html fragment" do
|
5
|
+
context "#to_s" do
|
6
|
+
it "not include head tags (like style)" do
|
7
|
+
skip "depends on nokogiri version"
|
8
|
+
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
9
|
+
assert_equal "<div>bar</div>", html.to_s
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
context "#text" do
|
14
|
+
it "not include head tags (like style)" do
|
15
|
+
skip "depends on nokogiri version"
|
16
|
+
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
17
|
+
assert_equal "bar", html.text
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
context "#to_text" do
|
22
|
+
it "add newlines before and after html4 block elements" do
|
23
|
+
html = Loofah.fragment "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
24
|
+
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
25
|
+
end
|
26
|
+
|
27
|
+
it "add newlines before and after html5 block elements" do
|
28
|
+
html = Loofah.fragment "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
29
|
+
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
30
|
+
end
|
31
|
+
|
32
|
+
it "remove extraneous whitespace" do
|
33
|
+
html = Loofah.fragment "<div>tweedle\n\n\t\n\s\nbeetle</div>"
|
34
|
+
assert_equal "\ntweedle\n\nbeetle\n", html.to_text
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
context 'with an `encoding` arg' do
|
39
|
+
it "sets the parent document's encoding to accordingly" do
|
40
|
+
html = Loofah.fragment "<style>foo</style><div>bar</div>", 'US-ASCII'
|
41
|
+
assert_equal 'US-ASCII', html.document.encoding
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
context "html document" do
|
47
|
+
context "#text" do
|
48
|
+
it "not include head tags (like style)" do
|
49
|
+
html = Loofah.document "<style>foo</style><div>bar</div>"
|
50
|
+
assert_equal "bar", html.text
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
context "#to_text" do
|
55
|
+
it "add newlines before and after html4 block elements" do
|
56
|
+
html = Loofah.document "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
57
|
+
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
58
|
+
end
|
59
|
+
|
60
|
+
it "add newlines before and after html5 block elements" do
|
61
|
+
html = Loofah.document "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
|
62
|
+
assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
|
63
|
+
end
|
64
|
+
|
65
|
+
it "remove extraneous whitespace" do
|
66
|
+
html = Loofah.document "<div>tweedle\n\n\t\n\s\nbeetle</div>"
|
67
|
+
assert_equal "\ntweedle\n\nbeetle\n", html.to_text
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
@@ -0,0 +1,400 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
class IntegrationTestScrubbers < Loofah::TestCase
|
4
|
+
|
5
|
+
INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
|
6
|
+
INVALID_ESCAPED = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
|
7
|
+
INVALID_PRUNED = "<div>quux</div>"
|
8
|
+
INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
|
9
|
+
|
10
|
+
WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid><!--[if gts mso9]><div>microsofty stuff</div><![endif]-->"
|
11
|
+
WHITEWASH_RESULT = "<div>foo</div>"
|
12
|
+
|
13
|
+
NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
|
14
|
+
NOFOLLOW_RESULT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
|
15
|
+
|
16
|
+
NOFOLLOW_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
|
17
|
+
NOFOLLOW_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="noopener nofollow">Click here</a>'
|
18
|
+
|
19
|
+
NOOPENER_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
|
20
|
+
NOOPENER_RESULT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
|
21
|
+
|
22
|
+
NOOPENER_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
|
23
|
+
NOOPENER_WITH_REL_RESULT = '<a href="http://www.example.com/" rel="nofollow noopener">Click here</a>'
|
24
|
+
|
25
|
+
UNPRINTABLE_FRAGMENT = "<b>Lo\u2029ofah ro\u2028cks!</b><script>x\u2028y</script>"
|
26
|
+
UNPRINTABLE_RESULT = "<b>Loofah rocks!</b><script>xy</script>"
|
27
|
+
|
28
|
+
ENTITY_FRAGMENT = "<p>this is < that "&" the other > boo'ya</p><div>w00t</div>"
|
29
|
+
ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t)
|
30
|
+
|
31
|
+
ENTITY_HACK_ATTACK = "<div><div>Hack attack!</div><div><script>alert('evil')</script></div></div>"
|
32
|
+
ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!<script>alert('evil')</script>"
|
33
|
+
ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!<script>alert('evil')</script>"
|
34
|
+
|
35
|
+
context "Document" do
|
36
|
+
context "#scrub!" do
|
37
|
+
context ":escape" do
|
38
|
+
it "escape bad tags" do
|
39
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
|
40
|
+
result = doc.scrub! :escape
|
41
|
+
|
42
|
+
assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
|
43
|
+
assert_equal doc, result
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context ":prune" do
|
48
|
+
it "prune bad tags" do
|
49
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
|
50
|
+
result = doc.scrub! :prune
|
51
|
+
|
52
|
+
assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
|
53
|
+
assert_equal doc, result
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context ":strip" do
|
58
|
+
it "strip bad tags" do
|
59
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
|
60
|
+
result = doc.scrub! :strip
|
61
|
+
|
62
|
+
assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
|
63
|
+
assert_equal doc, result
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
context ":whitewash" do
|
68
|
+
it "whitewash the markup" do
|
69
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
|
70
|
+
result = doc.scrub! :whitewash
|
71
|
+
|
72
|
+
assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
|
73
|
+
assert_equal doc, result
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
context ":nofollow" do
|
78
|
+
it "add a 'nofollow' attribute to hyperlinks" do
|
79
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{NOFOLLOW_FRAGMENT}</body></html>"
|
80
|
+
result = doc.scrub! :nofollow
|
81
|
+
|
82
|
+
assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
|
83
|
+
assert_equal doc, result
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context ":unprintable" do
|
88
|
+
it "removes unprintable unicode characters" do
|
89
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{UNPRINTABLE_FRAGMENT}</body></html>"
|
90
|
+
result = doc.scrub! :unprintable
|
91
|
+
|
92
|
+
assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html
|
93
|
+
assert_equal doc, result
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
context "#scrub_document" do
|
99
|
+
it "be a shortcut for parse-and-scrub" do
|
100
|
+
mock_doc = Object.new
|
101
|
+
mock(Loofah).document(:string_or_io) { mock_doc }
|
102
|
+
mock(mock_doc).scrub!(:method)
|
103
|
+
|
104
|
+
Loofah.scrub_document(:string_or_io, :method)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
context "#text" do
|
109
|
+
it "leave behind only inner text with html entities still escaped" do
|
110
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
|
111
|
+
result = doc.text
|
112
|
+
|
113
|
+
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
114
|
+
end
|
115
|
+
|
116
|
+
context "with encode_special_chars => false" do
|
117
|
+
it "leave behind only inner text with html entities unescaped" do
|
118
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
|
119
|
+
result = doc.text(:encode_special_chars => false)
|
120
|
+
|
121
|
+
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
context "with encode_special_chars => true" do
|
126
|
+
it "leave behind only inner text with html entities still escaped" do
|
127
|
+
doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
|
128
|
+
result = doc.text(:encode_special_chars => true)
|
129
|
+
|
130
|
+
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
context "#to_s" do
|
136
|
+
it "generate HTML" do
|
137
|
+
doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
|
138
|
+
refute_nil doc.xpath("/html").first
|
139
|
+
refute_nil doc.xpath("/html/head").first
|
140
|
+
refute_nil doc.xpath("/html/body").first
|
141
|
+
|
142
|
+
string = doc.to_s
|
143
|
+
assert_match %r/<!DOCTYPE/, string
|
144
|
+
assert_match %r/<html>/, string
|
145
|
+
assert_match %r/<head>/, string
|
146
|
+
assert_match %r/<body>/, string
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
context "#serialize" do
|
151
|
+
it "generate HTML" do
|
152
|
+
doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
|
153
|
+
refute_nil doc.xpath("/html").first
|
154
|
+
refute_nil doc.xpath("/html/head").first
|
155
|
+
refute_nil doc.xpath("/html/body").first
|
156
|
+
|
157
|
+
string = doc.serialize
|
158
|
+
assert_match %r/<!DOCTYPE/, string
|
159
|
+
assert_match %r/<html>/, string
|
160
|
+
assert_match %r/<head>/, string
|
161
|
+
assert_match %r/<body>/, string
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
context "Node" do
|
166
|
+
context "#scrub!" do
|
167
|
+
it "only scrub subtree" do
|
168
|
+
xml = Loofah.document <<-EOHTML
|
169
|
+
<html><body>
|
170
|
+
<div class='scrub'>
|
171
|
+
<script>I should be removed</script>
|
172
|
+
</div>
|
173
|
+
<div class='noscrub'>
|
174
|
+
<script>I should remain</script>
|
175
|
+
</div>
|
176
|
+
</body></html>
|
177
|
+
EOHTML
|
178
|
+
node = xml.at_css "div.scrub"
|
179
|
+
node.scrub!(:prune)
|
180
|
+
assert_match %r/I should remain/, xml.to_s
|
181
|
+
refute_match %r/I should be removed/, xml.to_s
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
context "NodeSet" do
|
187
|
+
context "#scrub!" do
|
188
|
+
it "only scrub subtrees" do
|
189
|
+
xml = Loofah.document <<-EOHTML
|
190
|
+
<html><body>
|
191
|
+
<div class='scrub'>
|
192
|
+
<script>I should be removed</script>
|
193
|
+
</div>
|
194
|
+
<div class='noscrub'>
|
195
|
+
<script>I should remain</script>
|
196
|
+
</div>
|
197
|
+
<div class='scrub'>
|
198
|
+
<script>I should also be removed</script>
|
199
|
+
</div>
|
200
|
+
</body></html>
|
201
|
+
EOHTML
|
202
|
+
node_set = xml.css "div.scrub"
|
203
|
+
assert_equal 2, node_set.length
|
204
|
+
node_set.scrub!(:prune)
|
205
|
+
assert_match %r/I should remain/, xml.to_s
|
206
|
+
refute_match %r/I should be removed/, xml.to_s
|
207
|
+
refute_match %r/I should also be removed/, xml.to_s
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
context "DocumentFragment" do
|
214
|
+
context "#scrub!" do
|
215
|
+
context ":escape" do
|
216
|
+
it "escape bad tags" do
|
217
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
|
218
|
+
result = doc.scrub! :escape
|
219
|
+
|
220
|
+
assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
|
221
|
+
assert_equal doc, result
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
context ":prune" do
|
226
|
+
it "prune bad tags" do
|
227
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
|
228
|
+
result = doc.scrub! :prune
|
229
|
+
|
230
|
+
assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
|
231
|
+
assert_equal doc, result
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
context ":strip" do
|
236
|
+
it "strip bad tags" do
|
237
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
|
238
|
+
result = doc.scrub! :strip
|
239
|
+
|
240
|
+
assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
|
241
|
+
assert_equal doc, result
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
context ":whitewash" do
|
246
|
+
it "whitewash the markup" do
|
247
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
|
248
|
+
result = doc.scrub! :whitewash
|
249
|
+
|
250
|
+
assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
|
251
|
+
assert_equal doc, result
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
context ":nofollow" do
|
256
|
+
|
257
|
+
context "for a hyperlink that does not have a rel attribute" do
|
258
|
+
it "add a 'nofollow' attribute to hyperlinks" do
|
259
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_FRAGMENT}</div>"
|
260
|
+
result = doc.scrub! :nofollow
|
261
|
+
|
262
|
+
assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
|
263
|
+
assert_equal doc, result
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
context "for a hyperlink that does have a rel attribute" do
|
268
|
+
it "appends nofollow to rel attribute" do
|
269
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_WITH_REL_FRAGMENT}</div>"
|
270
|
+
result = doc.scrub! :nofollow
|
271
|
+
|
272
|
+
assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html
|
273
|
+
assert_equal doc, result
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
|
278
|
+
end
|
279
|
+
|
280
|
+
context ":noopener" do
|
281
|
+
context "for a hyperlink without a 'rel' attribute" do
|
282
|
+
it "add a 'noopener' attribute to hyperlinks" do
|
283
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_FRAGMENT}</div>"
|
284
|
+
result = doc.scrub! :noopener
|
285
|
+
|
286
|
+
assert_equal NOOPENER_RESULT, doc.xpath("./div").inner_html
|
287
|
+
assert_equal doc, result
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
context "for a hyperlink that does have a rel attribute" do
|
292
|
+
it "appends 'noopener' to 'rel' attribute" do
|
293
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_WITH_REL_FRAGMENT}</div>"
|
294
|
+
result = doc.scrub! :noopener
|
295
|
+
|
296
|
+
assert_equal NOOPENER_WITH_REL_RESULT, doc.xpath("./div").inner_html
|
297
|
+
assert_equal doc, result
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
context ":unprintable" do
|
303
|
+
it "removes unprintable unicode characters" do
|
304
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{UNPRINTABLE_FRAGMENT}</div>"
|
305
|
+
result = doc.scrub! :unprintable
|
306
|
+
|
307
|
+
assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html
|
308
|
+
assert_equal doc, result
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
context "#scrub_fragment" do
|
314
|
+
it "be a shortcut for parse-and-scrub" do
|
315
|
+
mock_doc = Object.new
|
316
|
+
mock(Loofah).fragment(:string_or_io) { mock_doc }
|
317
|
+
mock(mock_doc).scrub!(:method)
|
318
|
+
|
319
|
+
Loofah.scrub_fragment(:string_or_io, :method)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
context "#text" do
|
324
|
+
it "leave behind only inner text with html entities still escaped" do
|
325
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
|
326
|
+
result = doc.text
|
327
|
+
|
328
|
+
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
329
|
+
end
|
330
|
+
|
331
|
+
context "with encode_special_chars => false" do
|
332
|
+
it "leave behind only inner text with html entities unescaped" do
|
333
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
|
334
|
+
result = doc.text(:encode_special_chars => false)
|
335
|
+
|
336
|
+
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
context "with encode_special_chars => true" do
|
341
|
+
it "leave behind only inner text with html entities still escaped" do
|
342
|
+
doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
|
343
|
+
result = doc.text(:encode_special_chars => true)
|
344
|
+
|
345
|
+
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
context "#to_s" do
|
351
|
+
it "not remove entities" do
|
352
|
+
string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
|
353
|
+
assert_match %r/this is </, string
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
context "Node" do
|
358
|
+
context "#scrub!" do
|
359
|
+
it "only scrub subtree" do
|
360
|
+
xml = Loofah.fragment <<-EOHTML
|
361
|
+
<div class='scrub'>
|
362
|
+
<script>I should be removed</script>
|
363
|
+
</div>
|
364
|
+
<div class='noscrub'>
|
365
|
+
<script>I should remain</script>
|
366
|
+
</div>
|
367
|
+
EOHTML
|
368
|
+
node = xml.at_css "div.scrub"
|
369
|
+
node.scrub!(:prune)
|
370
|
+
assert_match %r(I should remain), xml.to_s
|
371
|
+
refute_match %r(I should be removed), xml.to_s
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
context "NodeSet" do
|
377
|
+
context "#scrub!" do
|
378
|
+
it "only scrub subtrees" do
|
379
|
+
xml = Loofah.fragment <<-EOHTML
|
380
|
+
<div class='scrub'>
|
381
|
+
<script>I should be removed</script>
|
382
|
+
</div>
|
383
|
+
<div class='noscrub'>
|
384
|
+
<script>I should remain</script>
|
385
|
+
</div>
|
386
|
+
<div class='scrub'>
|
387
|
+
<script>I should also be removed</script>
|
388
|
+
</div>
|
389
|
+
EOHTML
|
390
|
+
node_set = xml.css "div.scrub"
|
391
|
+
assert_equal 2, node_set.length
|
392
|
+
node_set.scrub!(:prune)
|
393
|
+
assert_match %r/I should remain/, xml.to_s
|
394
|
+
refute_match %r/I should be removed/, xml.to_s
|
395
|
+
refute_match %r/I should also be removed/, xml.to_s
|
396
|
+
end
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
400
|
+
end
|