loofah 0.4.2 → 2.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +604 -0
- data/MIT-LICENSE.txt +3 -1
- data/README.md +410 -0
- data/SECURITY.md +18 -0
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +98 -0
- data/lib/loofah/helpers.rb +91 -4
- data/lib/loofah/html4/document.rb +17 -0
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +28 -0
- data/lib/loofah/html5/safelist.rb +1058 -0
- data/lib/loofah/html5/scrub.rb +211 -40
- data/lib/loofah/metahelpers.rb +18 -0
- data/lib/loofah/scrubber.rb +31 -13
- data/lib/loofah/scrubbers.rb +262 -31
- data/lib/loofah/version.rb +6 -0
- data/lib/loofah/xml/document.rb +2 -0
- data/lib/loofah/xml/document_fragment.rb +6 -9
- data/lib/loofah.rb +131 -52
- metadata +79 -158
- data/CHANGELOG.rdoc +0 -92
- data/DEPRECATED.rdoc +0 -12
- data/Manifest.txt +0 -34
- data/README.rdoc +0 -330
- data/Rakefile +0 -61
- data/TODO.rdoc +0 -4
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/init.rb +0 -1
- data/lib/loofah/active_record.rb +0 -62
- data/lib/loofah/html/document.rb +0 -22
- data/lib/loofah/html/document_fragment.rb +0 -46
- data/lib/loofah/html5/whitelist.rb +0 -174
- data/lib/loofah/instance_methods.rb +0 -77
- data/lib/loofah/xss_foliate.rb +0 -212
- data/test/helper.rb +0 -8
- data/test/html5/test_sanitizer.rb +0 -248
- data/test/test_active_record.rb +0 -146
- data/test/test_ad_hoc.rb +0 -272
- data/test/test_api.rb +0 -128
- data/test/test_helpers.rb +0 -28
- data/test/test_scrubber.rb +0 -227
- data/test/test_scrubbers.rb +0 -144
- data/test/test_xss_foliate.rb +0 -171
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -2
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# these tests taken from the HTML5 sanitization project and modified for use with Loofah
|
|
3
|
-
# see the original here: http://code.google.com/p/html5lib/source/browse/ruby/test/test_sanitizer.rb
|
|
4
|
-
#
|
|
5
|
-
# license text at the bottom of this file
|
|
6
|
-
#
|
|
7
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
|
|
8
|
-
require 'json'
|
|
9
|
-
|
|
10
|
-
class Html5TestSanitizer < Test::Unit::TestCase
|
|
11
|
-
include Loofah
|
|
12
|
-
|
|
13
|
-
def sanitize_xhtml stream
|
|
14
|
-
Loofah.fragment(stream).scrub!(:escape).to_xhtml
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def sanitize_html stream
|
|
18
|
-
Loofah.fragment(stream).scrub!(:escape).to_html
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
|
22
|
-
## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
|
|
23
|
-
sane = sanitize_html(input).gsub('"',"'")
|
|
24
|
-
|
|
25
|
-
## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
|
|
26
|
-
## it would require a lot of manual hacking to make the tests match libxml's output.
|
|
27
|
-
## instead, I'm taking the shotgun approach, and trying to match any of the described outputs.
|
|
28
|
-
assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane), input)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
(HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
|
|
32
|
-
define_method "test_should_allow_#{tag_name}_tag" do
|
|
33
|
-
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
|
34
|
-
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
|
35
|
-
xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
|
36
|
-
rexmloutput = xhtmloutput
|
|
37
|
-
|
|
38
|
-
if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
|
|
39
|
-
htmloutput = "foo <bad>bar</bad> baz"
|
|
40
|
-
xhtmloutput = htmloutput
|
|
41
|
-
elsif tag_name == 'col'
|
|
42
|
-
htmloutput = "<col title='1'>foo <bad>bar</bad> baz"
|
|
43
|
-
xhtmloutput = htmloutput
|
|
44
|
-
rexmloutput = "<col title='1' />"
|
|
45
|
-
elsif tag_name == 'table'
|
|
46
|
-
htmloutput = "foo <bad>bar</bad>baz<table title='1'> </table>"
|
|
47
|
-
xhtmloutput = htmloutput
|
|
48
|
-
elsif tag_name == 'image'
|
|
49
|
-
htmloutput = "<img title='1'/>foo <bad>bar</bad> baz"
|
|
50
|
-
xhtmloutput = htmloutput
|
|
51
|
-
rexmloutput = "<image title='1'>foo <bad>bar</bad> baz</image>"
|
|
52
|
-
elsif HTML5::WhiteList::VOID_ELEMENTS.include?(tag_name)
|
|
53
|
-
htmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz"
|
|
54
|
-
xhtmloutput = htmloutput
|
|
55
|
-
htmloutput += '<br/>' if tag_name == 'br'
|
|
56
|
-
rexmloutput = "<#{tag_name} title='1' />"
|
|
57
|
-
end
|
|
58
|
-
check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
##
|
|
63
|
-
## libxml2 downcases elements, so this is moot.
|
|
64
|
-
##
|
|
65
|
-
# HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
|
|
66
|
-
# define_method "test_should_forbid_#{tag_name.upcase}_tag" do
|
|
67
|
-
# input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
|
68
|
-
# output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
|
69
|
-
# check_sanitization(input, output, output, output)
|
|
70
|
-
# end
|
|
71
|
-
# end
|
|
72
|
-
|
|
73
|
-
HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
|
74
|
-
next if attribute_name == 'style'
|
|
75
|
-
define_method "test_should_allow_#{attribute_name}_attribute" do
|
|
76
|
-
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
|
77
|
-
if %w[checked compact disabled ismap multiple nohref noshade nowrap readonly selected].include?(attribute_name)
|
|
78
|
-
output = "<p #{attribute_name}>foo <bad>bar</bad> baz</p>"
|
|
79
|
-
htmloutput = "<p #{attribute_name.downcase}>foo <bad>bar</bad> baz</p>"
|
|
80
|
-
else
|
|
81
|
-
output = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
|
82
|
-
htmloutput = "<p #{attribute_name.downcase}='foo'>foo <bad>bar</bad> baz</p>"
|
|
83
|
-
end
|
|
84
|
-
check_sanitization(input, htmloutput, output, output)
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
##
|
|
89
|
-
## libxml2 downcases attributes, so this is moot.
|
|
90
|
-
##
|
|
91
|
-
# HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
|
92
|
-
# define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
|
|
93
|
-
# input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
|
|
94
|
-
# output = "<p>foo <bad>bar</bad> baz</p>"
|
|
95
|
-
# check_sanitization(input, output, output, output)
|
|
96
|
-
# end
|
|
97
|
-
# end
|
|
98
|
-
|
|
99
|
-
HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
|
|
100
|
-
define_method "test_should_allow_#{protocol}_uris" do
|
|
101
|
-
input = %(<a href="#{protocol}">foo</a>)
|
|
102
|
-
output = "<a href='#{protocol}'>foo</a>"
|
|
103
|
-
check_sanitization(input, output, output, output)
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
|
|
108
|
-
define_method "test_should_allow_uppercase_#{protocol}_uris" do
|
|
109
|
-
input = %(<a href="#{protocol.upcase}">foo</a>)
|
|
110
|
-
output = "<a href='#{protocol.upcase}'>foo</a>"
|
|
111
|
-
check_sanitization(input, output, output, output)
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
|
|
116
|
-
next unless HTML5::WhiteList::ALLOWED_ELEMENTS.include?(tag_name)
|
|
117
|
-
define_method "test_#{tag_name}_should_allow_local_href" do
|
|
118
|
-
input = %(<#{tag_name} xlink:href="#foo"/>)
|
|
119
|
-
output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
|
|
120
|
-
xhtmloutput = "<#{tag_name} xlink:href='#foo'></#{tag_name}>"
|
|
121
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
define_method "test_#{tag_name}_should_allow_local_href_with_newline" do
|
|
125
|
-
input = %(<#{tag_name} xlink:href="\n#foo"/>)
|
|
126
|
-
output = "<#{tag_name.downcase} xlink:href='\n#foo'></#{tag_name.downcase}>"
|
|
127
|
-
xhtmloutput = "<#{tag_name} xlink:href='\n#foo'></#{tag_name}>"
|
|
128
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
define_method "test_#{tag_name}_should_forbid_nonlocal_href" do
|
|
132
|
-
input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>)
|
|
133
|
-
output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
|
|
134
|
-
xhtmloutput = "<#{tag_name}></#{tag_name}>"
|
|
135
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do
|
|
139
|
-
input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>)
|
|
140
|
-
output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
|
|
141
|
-
xhtmloutput = "<#{tag_name}></#{tag_name}>"
|
|
142
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
|
143
|
-
end
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
##
|
|
147
|
-
## as tenderlove says, "care < 0"
|
|
148
|
-
##
|
|
149
|
-
# def test_should_handle_astral_plane_characters
|
|
150
|
-
# input = "<p>𝒵 𝔸</p>"
|
|
151
|
-
# output = "<p>\360\235\222\265 \360\235\224\270</p>"
|
|
152
|
-
# check_sanitization(input, output, output, output)
|
|
153
|
-
|
|
154
|
-
# input = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
|
155
|
-
# output = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
|
156
|
-
# check_sanitization(input, output, output, output)
|
|
157
|
-
# end
|
|
158
|
-
|
|
159
|
-
# This affects only NS4. Is it worth fixing?
|
|
160
|
-
# def test_javascript_includes
|
|
161
|
-
# input = %(<div size="&{alert('XSS')}">foo</div>)
|
|
162
|
-
# output = "<div>foo</div>"
|
|
163
|
-
# check_sanitization(input, output, output, output)
|
|
164
|
-
# end
|
|
165
|
-
|
|
166
|
-
##
|
|
167
|
-
## these tests primarily test the parser logic, not the sanitizer
|
|
168
|
-
## logic. i call bullshit. we're not writing a test suite for
|
|
169
|
-
## libxml2 here, so let's rely on the unit tests above to take care
|
|
170
|
-
## of our valid elements and attributes.
|
|
171
|
-
##
|
|
172
|
-
# Dir[File.join(File.dirname(__FILE__), 'testdata', '*.*')].each do |filename|
|
|
173
|
-
# JSON::parse(open(filename).read).each do |test|
|
|
174
|
-
# define_method "test_#{test['name']}" do
|
|
175
|
-
# check_sanitization(
|
|
176
|
-
# test['input'],
|
|
177
|
-
# test['output'],
|
|
178
|
-
# test['xhtml'] || test['output'],
|
|
179
|
-
# test['rexml'] || test['output']
|
|
180
|
-
# )
|
|
181
|
-
# end
|
|
182
|
-
# end
|
|
183
|
-
# end
|
|
184
|
-
|
|
185
|
-
## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
|
|
186
|
-
HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
|
|
187
|
-
define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
|
|
188
|
-
input = "<rect fill='url(#foo)' />"
|
|
189
|
-
output = "<rect fill='url(#foo)'></rect>"
|
|
190
|
-
check_sanitization(input, output, output, output)
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
define_method "test_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
|
|
194
|
-
input = "<rect fill='url(http://bad.com/) #fff' />"
|
|
195
|
-
output = "<rect fill=' #fff'></rect>"
|
|
196
|
-
check_sanitization(input, output, output, output)
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
define_method "test_uri_ref_with_space_in_svg_attribute_#{attr_name}" do
|
|
200
|
-
input = "<rect fill='url(\n#foo)' />"
|
|
201
|
-
rexml = "<rect fill='url(\n#foo)'></rect>"
|
|
202
|
-
end
|
|
203
|
-
|
|
204
|
-
define_method "test_absolute_uri_ref_with_space_in_svg_attribute_#{attr_name}" do
|
|
205
|
-
input = "<rect fill=\"url(\nhttp://bad.com/)\" />"
|
|
206
|
-
rexml = "<rect fill=' '></rect>"
|
|
207
|
-
end
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
# <html5_license>
|
|
213
|
-
#
|
|
214
|
-
# Copyright (c) 2006-2008 The Authors
|
|
215
|
-
#
|
|
216
|
-
# Contributors:
|
|
217
|
-
# James Graham - jg307@cam.ac.uk
|
|
218
|
-
# Anne van Kesteren - annevankesteren@gmail.com
|
|
219
|
-
# Lachlan Hunt - lachlan.hunt@lachy.id.au
|
|
220
|
-
# Matt McDonald - kanashii@kanashii.ca
|
|
221
|
-
# Sam Ruby - rubys@intertwingly.net
|
|
222
|
-
# Ian Hickson (Google) - ian@hixie.ch
|
|
223
|
-
# Thomas Broyer - t.broyer@ltgt.net
|
|
224
|
-
# Jacques Distler - distler@golem.ph.utexas.edu
|
|
225
|
-
# Henri Sivonen - hsivonen@iki.fi
|
|
226
|
-
# The Mozilla Foundation (contributions from Henri Sivonen since 2008)
|
|
227
|
-
#
|
|
228
|
-
# Permission is hereby granted, free of charge, to any person
|
|
229
|
-
# obtaining a copy of this software and associated documentation files
|
|
230
|
-
# (the "Software"), to deal in the Software without restriction,
|
|
231
|
-
# including without limitation the rights to use, copy, modify, merge,
|
|
232
|
-
# publish, distribute, sublicense, and/or sell copies of the Software,
|
|
233
|
-
# and to permit persons to whom the Software is furnished to do so,
|
|
234
|
-
# subject to the following conditions:
|
|
235
|
-
#
|
|
236
|
-
# The above copyright notice and this permission notice shall be
|
|
237
|
-
# included in all copies or substantial portions of the Software.
|
|
238
|
-
#
|
|
239
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
240
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
241
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
242
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
243
|
-
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
244
|
-
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
245
|
-
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
246
|
-
# SOFTWARE.
|
|
247
|
-
#
|
|
248
|
-
# </html5_license>
|
data/test/test_active_record.rb
DELETED
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
|
2
|
-
|
|
3
|
-
require 'loofah/active_record'
|
|
4
|
-
|
|
5
|
-
class TestActiveRecord < Test::Unit::TestCase
|
|
6
|
-
|
|
7
|
-
HTML_STRING = "<div>omgwtfbbq</div>"
|
|
8
|
-
PLAIN_TEXT = "vanilla text"
|
|
9
|
-
|
|
10
|
-
context "with a Post model" do
|
|
11
|
-
|
|
12
|
-
setup do
|
|
13
|
-
ActsAsFu.build_model(:posts) do
|
|
14
|
-
string :plain_text
|
|
15
|
-
string :html_string
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
context "scrubbing a single field as a fragment" do
|
|
20
|
-
context "using a symbol to indicate the attribute" do
|
|
21
|
-
setup do
|
|
22
|
-
Post.html_fragment :html_string, :scrub => :prune
|
|
23
|
-
assert ! Post.xss_foliated?
|
|
24
|
-
@post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
should "scrub the specified field" do
|
|
28
|
-
Loofah.expects(:scrub_fragment).with(HTML_STRING, :prune).once
|
|
29
|
-
Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).never
|
|
30
|
-
@post.valid?
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
should "only call scrub_fragment once" do
|
|
34
|
-
Loofah.expects(:scrub_fragment).once
|
|
35
|
-
@post.valid?
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
should "generate strings" do
|
|
39
|
-
@post.valid?
|
|
40
|
-
assert_equal String, @post.html_string.class
|
|
41
|
-
assert_equal HTML_STRING, @post.html_string
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
context "using a string to indicate the attribute" do
|
|
46
|
-
setup do
|
|
47
|
-
Post.html_fragment 'html_string', :scrub => :prune
|
|
48
|
-
assert ! Post.xss_foliated?
|
|
49
|
-
@post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
should "scrub the specified field" do
|
|
53
|
-
Loofah.expects(:scrub_fragment).with(HTML_STRING, :prune).once
|
|
54
|
-
Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).never
|
|
55
|
-
@post.valid?
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
context "scrubbing a single field as a document" do
|
|
61
|
-
context "using a symbol to indicate the attribute" do
|
|
62
|
-
setup do
|
|
63
|
-
Post.html_document :html_string, :scrub => :strip
|
|
64
|
-
@post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
should "scrub the specified field, but not other fields" do
|
|
68
|
-
Loofah.expects(:scrub_document).with(HTML_STRING, :strip).once
|
|
69
|
-
Loofah.expects(:scrub_document).with(PLAIN_TEXT, :strip).never
|
|
70
|
-
@post.valid?
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
should "only call scrub_document once" do
|
|
74
|
-
Loofah.expects(:scrub_document).once
|
|
75
|
-
@post.valid?
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
should "generate strings" do
|
|
79
|
-
@post.valid?
|
|
80
|
-
assert_equal String, @post.html_string.class
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
context "using a string to indicate the attribute" do
|
|
85
|
-
setup do
|
|
86
|
-
Post.html_document 'html_string', :scrub => :strip
|
|
87
|
-
@post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
should "scrub the specified field, but not other fields" do
|
|
91
|
-
Loofah.expects(:scrub_document).with(HTML_STRING, :strip).once
|
|
92
|
-
Loofah.expects(:scrub_document).with(PLAIN_TEXT, :strip).never
|
|
93
|
-
@post.valid?
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
context "not passing any options" do
|
|
99
|
-
should "raise ArgumentError" do
|
|
100
|
-
assert_raises(ArgumentError) {
|
|
101
|
-
Post.html_fragment :foo
|
|
102
|
-
}
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
context "not passing :scrub option" do
|
|
107
|
-
should "raise ArgumentError" do
|
|
108
|
-
assert_raise(ArgumentError) {
|
|
109
|
-
Post.html_fragment :foo, :bar => :quux
|
|
110
|
-
}
|
|
111
|
-
end
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
context "passing a :scrub option" do
|
|
115
|
-
should "not raise ArgumentError" do
|
|
116
|
-
assert_nothing_raised {
|
|
117
|
-
Post.html_fragment :foo, :scrub => :quux
|
|
118
|
-
}
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
context "passing a Scrubber" do
|
|
123
|
-
setup do
|
|
124
|
-
@called = false
|
|
125
|
-
@scrubber = Loofah::Scrubber.new do |node|
|
|
126
|
-
@called = true
|
|
127
|
-
end
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
should "not raise ArgumentError" do
|
|
131
|
-
assert_nothing_raised {
|
|
132
|
-
Post.html_fragment :html_string, :scrub => @scrubber
|
|
133
|
-
}
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
should "scrub properly" do
|
|
137
|
-
Post.html_fragment :html_string, :scrub => @scrubber
|
|
138
|
-
post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
|
|
139
|
-
post.valid?
|
|
140
|
-
assert @called
|
|
141
|
-
end
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
end
|
data/test/test_ad_hoc.rb
DELETED
|
@@ -1,272 +0,0 @@
|
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
|
2
|
-
|
|
3
|
-
class TestAdHoc < Test::Unit::TestCase
|
|
4
|
-
|
|
5
|
-
def test_empty_string_with_escape
|
|
6
|
-
assert_equal "", Loofah.scrub_fragment("", :escape).to_xml
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
def test_empty_string_with_prune
|
|
10
|
-
assert_equal Loofah.scrub_document("", :prune).text, ""
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
def test_xml_document_scrub
|
|
14
|
-
xml = Loofah.xml_document <<-EOXML
|
|
15
|
-
<root>
|
|
16
|
-
<employee deceased='true'>Abraham Lincoln</employee>
|
|
17
|
-
<employee deceased='false'>Abe Vigoda</employee>
|
|
18
|
-
</root>
|
|
19
|
-
EOXML
|
|
20
|
-
bring_out_your_dead = Loofah::Scrubber.new do |node|
|
|
21
|
-
if node.name == "employee" and node["deceased"] == "true"
|
|
22
|
-
node.remove
|
|
23
|
-
Loofah::Scrubber::STOP # don't bother with the rest of the subtree
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
assert_equal 2, xml.css("employee").length
|
|
27
|
-
|
|
28
|
-
xml.scrub!(bring_out_your_dead)
|
|
29
|
-
|
|
30
|
-
employees = xml.css "employee"
|
|
31
|
-
assert_equal 1, employees.length
|
|
32
|
-
assert_equal "Abe Vigoda", employees.first.inner_text
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def test_xml_fragment_scrub
|
|
36
|
-
xml = Loofah.xml_fragment <<-EOXML
|
|
37
|
-
<employee deceased='true'>Abraham Lincoln</employee>
|
|
38
|
-
<employee deceased='false'>Abe Vigoda</employee>
|
|
39
|
-
EOXML
|
|
40
|
-
bring_out_your_dead = Loofah::Scrubber.new do |node|
|
|
41
|
-
if node.name == "employee" and node["deceased"] == "true"
|
|
42
|
-
node.remove
|
|
43
|
-
Loofah::Scrubber::STOP # don't bother with the rest of the subtree
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
assert_equal 2, xml.css("employee").length
|
|
47
|
-
|
|
48
|
-
xml.scrub!(bring_out_your_dead)
|
|
49
|
-
|
|
50
|
-
employees = xml.css "employee"
|
|
51
|
-
assert_equal 1, employees.length
|
|
52
|
-
assert_equal "Abe Vigoda", employees.first.inner_text
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def test_html_fragment_to_s_should_not_include_head_tags
|
|
56
|
-
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
|
57
|
-
assert_equal "<div>bar</div>", html.to_s
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def test_html_fragment_text_should_not_include_head_tags
|
|
61
|
-
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
|
62
|
-
assert_equal "bar", html.text
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
def test_html_document_text_should_not_include_head_tags
|
|
66
|
-
html = Loofah.document "<style>foo</style><div>bar</div>"
|
|
67
|
-
assert_equal "bar", html.text
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
def test_node_scrub_should_only_scrub_subtree
|
|
71
|
-
xml = Loofah.document <<-EOHTML
|
|
72
|
-
<html><body>
|
|
73
|
-
<div class='scrub'>
|
|
74
|
-
<script>I should be removed</script>
|
|
75
|
-
</div>
|
|
76
|
-
<div class='noscrub'>
|
|
77
|
-
<script>I should remain</script>
|
|
78
|
-
</div>
|
|
79
|
-
</body></html>
|
|
80
|
-
EOHTML
|
|
81
|
-
node = xml.at_css "div.scrub"
|
|
82
|
-
node.scrub!(:prune)
|
|
83
|
-
assert_contains xml.to_s, /I should remain/
|
|
84
|
-
assert_does_not_contain xml.to_s, /I should be removed/
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
def test_nodeset_scrub_should_only_scrub_subtrees
|
|
88
|
-
xml = Loofah.document <<-EOHTML
|
|
89
|
-
<html><body>
|
|
90
|
-
<div class='scrub'>
|
|
91
|
-
<script>I should be removed</script>
|
|
92
|
-
</div>
|
|
93
|
-
<div class='noscrub'>
|
|
94
|
-
<script>I should remain</script>
|
|
95
|
-
</div>
|
|
96
|
-
<div class='scrub'>
|
|
97
|
-
<script>I should also be removed</script>
|
|
98
|
-
</div>
|
|
99
|
-
</body></html>
|
|
100
|
-
EOHTML
|
|
101
|
-
node_set = xml.css "div.scrub"
|
|
102
|
-
assert_equal 2, node_set.length
|
|
103
|
-
node_set.scrub!(:prune)
|
|
104
|
-
assert_contains xml.to_s, /I should remain/
|
|
105
|
-
assert_does_not_contain xml.to_s, /I should be removed/
|
|
106
|
-
assert_does_not_contain xml.to_s, /I should also be removed/
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
def test_removal_of_illegal_tag
|
|
110
|
-
html = <<-HTML
|
|
111
|
-
following this there should be no jim tag
|
|
112
|
-
<jim>jim</jim>
|
|
113
|
-
was there?
|
|
114
|
-
HTML
|
|
115
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
|
116
|
-
assert sane.xpath("//jim").empty?
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
def test_removal_of_illegal_attribute
|
|
120
|
-
html = "<p class=bar foo=bar abbr=bar />"
|
|
121
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
|
122
|
-
node = sane.xpath("//p").first
|
|
123
|
-
assert node.attributes['class']
|
|
124
|
-
assert node.attributes['abbr']
|
|
125
|
-
assert_nil node.attributes['foo']
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
def test_removal_of_illegal_url_in_href
|
|
129
|
-
html = <<-HTML
|
|
130
|
-
<a href='jimbo://jim.jim/'>this link should have its href removed because of illegal url</a>
|
|
131
|
-
<a href='http://jim.jim/'>this link should be fine</a>
|
|
132
|
-
HTML
|
|
133
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
|
134
|
-
nodes = sane.xpath("//a")
|
|
135
|
-
assert_nil nodes.first.attributes['href']
|
|
136
|
-
assert nodes.last.attributes['href']
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
def test_css_sanitization
|
|
140
|
-
html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
|
|
141
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
|
142
|
-
assert_match(/#000/, sane.inner_html)
|
|
143
|
-
assert_no_match(/foo\.com/, sane.inner_html)
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
def test_fragment_with_no_tags
|
|
147
|
-
assert_equal "This fragment has no tags.", Loofah.scrub_fragment("This fragment has no tags.", :escape).to_xml
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
def test_fragment_in_p_tag
|
|
151
|
-
assert_equal "<p>This fragment is in a p.</p>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>", :escape).to_xml
|
|
152
|
-
end
|
|
153
|
-
|
|
154
|
-
def test_fragment_in_p_tag_plus_stuff
|
|
155
|
-
assert_equal "<p>This fragment is in a p.</p>foo<strong>bar</strong>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>foo<strong>bar</strong>", :escape).to_xml
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
def test_fragment_with_text_nodes_leading_and_trailing
|
|
159
|
-
assert_equal "text<p>fragment</p>text", Loofah.scrub_fragment("text<p>fragment</p>text", :escape).to_xml
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
def test_whitewash_on_fragment
|
|
163
|
-
html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
|
|
164
|
-
whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
|
|
165
|
-
assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
MSWORD_HTML = <<-EOHTML
|
|
169
|
-
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
|
|
170
|
-
<w:WordDocument>
|
|
171
|
-
<w:View>Normal</w:View>
|
|
172
|
-
<w:Zoom>0</w:Zoom>
|
|
173
|
-
<w:PunctuationKerning/>
|
|
174
|
-
<w:ValidateAgainstSchemas/>
|
|
175
|
-
<w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
|
|
176
|
-
<w:IgnoreMixedContent>false</w:IgnoreMixedContent>
|
|
177
|
-
<w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
|
|
178
|
-
<w:Compatibility>
|
|
179
|
-
<w:BreakWrappedTables/>
|
|
180
|
-
<w:SnapToGridInCell/>
|
|
181
|
-
<w:WrapTextWithPunct/>
|
|
182
|
-
<w:UseAsianBreakRules/>
|
|
183
|
-
<w:DontGrowAutofit/>
|
|
184
|
-
</w:Compatibility>
|
|
185
|
-
<w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
|
|
186
|
-
</w:WordDocument>
|
|
187
|
-
</xml><![endif]--><!--[if gte mso 9]><xml>
|
|
188
|
-
<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
|
|
189
|
-
</w:LatentStyles>
|
|
190
|
-
</xml><![endif]--><style>
|
|
191
|
-
<!--
|
|
192
|
-
/* Style Definitions */
|
|
193
|
-
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
|
194
|
-
{mso-style-parent:"";
|
|
195
|
-
margin:0in;
|
|
196
|
-
margin-bottom:.0001pt;
|
|
197
|
-
mso-pagination:widow-orphan;
|
|
198
|
-
font-size:12.0pt;
|
|
199
|
-
font-family:"Times New Roman";
|
|
200
|
-
mso-fareast-font-family:"Times New Roman";}
|
|
201
|
-
@page Section1
|
|
202
|
-
{size:8.5in 11.0in;
|
|
203
|
-
margin:1.0in 1.25in 1.0in 1.25in;
|
|
204
|
-
mso-header-margin:.5in;
|
|
205
|
-
mso-footer-margin:.5in;
|
|
206
|
-
mso-paper-source:0;}
|
|
207
|
-
div.Section1
|
|
208
|
-
{page:Section1;}
|
|
209
|
-
-->
|
|
210
|
-
</style><!--[if gte mso 10]>
|
|
211
|
-
<style>
|
|
212
|
-
/* Style Definitions */
|
|
213
|
-
table.MsoNormalTable
|
|
214
|
-
{mso-style-name:"Table Normal";
|
|
215
|
-
mso-tstyle-rowband-size:0;
|
|
216
|
-
mso-tstyle-colband-size:0;
|
|
217
|
-
mso-style-noshow:yes;
|
|
218
|
-
mso-style-parent:"";
|
|
219
|
-
mso-padding-alt:0in 5.4pt 0in 5.4pt;
|
|
220
|
-
mso-para-margin:0in;
|
|
221
|
-
mso-para-margin-bottom:.0001pt;
|
|
222
|
-
mso-pagination:widow-orphan;
|
|
223
|
-
font-size:10.0pt;
|
|
224
|
-
font-family:"Times New Roman";
|
|
225
|
-
mso-ansi-language:#0400;
|
|
226
|
-
mso-fareast-language:#0400;
|
|
227
|
-
mso-bidi-language:#0400;}
|
|
228
|
-
</style>
|
|
229
|
-
<![endif]-->
|
|
230
|
-
|
|
231
|
-
<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
|
|
232
|
-
EOHTML
|
|
233
|
-
|
|
234
|
-
def test_fragment_whitewash_on_microsofty_markup
|
|
235
|
-
whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
|
|
236
|
-
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s
|
|
237
|
-
end
|
|
238
|
-
|
|
239
|
-
def test_document_whitewash_on_microsofty_markup
|
|
240
|
-
whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
|
|
241
|
-
assert_contains whitewashed.to_s, %r(<p>Foo <b>BOLD</b></p>)
|
|
242
|
-
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
|
|
243
|
-
end
|
|
244
|
-
|
|
245
|
-
def test_return_empty_string_when_nothing_left
|
|
246
|
-
assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
def test_removal_of_all_tags
|
|
250
|
-
html = <<-HTML
|
|
251
|
-
What's up <strong>doc</strong>?
|
|
252
|
-
HTML
|
|
253
|
-
stripped = Loofah.scrub_document(html, :prune).text
|
|
254
|
-
assert_equal "What's up doc?".strip, stripped.strip
|
|
255
|
-
end
|
|
256
|
-
|
|
257
|
-
def test_dont_remove_whitespace
|
|
258
|
-
html = "Foo\nBar"
|
|
259
|
-
assert_equal html, Loofah.scrub_document(html, :prune).text
|
|
260
|
-
end
|
|
261
|
-
|
|
262
|
-
def test_dont_remove_whitespace_between_tags
|
|
263
|
-
html = "<p>Foo</p>\n<p>Bar</p>"
|
|
264
|
-
assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
|
|
265
|
-
end
|
|
266
|
-
|
|
267
|
-
def test_removal_of_entities
|
|
268
|
-
html = "<p>this is < that "&" the other > boo'ya</p>"
|
|
269
|
-
assert_equal 'this is < that "&" the other > boo\'ya', Loofah.scrub_document(html, :prune).text
|
|
270
|
-
end
|
|
271
|
-
|
|
272
|
-
end
|