loofah 1.0.0 → 2.19.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +489 -0
- data/MIT-LICENSE.txt +3 -1
- data/README.md +364 -0
- data/SECURITY.md +18 -0
- data/lib/loofah/elements.rb +88 -11
- data/lib/loofah/helpers.rb +76 -2
- data/lib/loofah/html/document.rb +1 -0
- data/lib/loofah/html/document_fragment.rb +9 -2
- data/lib/loofah/html5/libxml2_workarounds.rb +27 -0
- data/lib/loofah/html5/safelist.rb +1042 -0
- data/lib/loofah/html5/scrub.rb +198 -40
- data/lib/loofah/instance_methods.rb +16 -10
- data/lib/loofah/metahelpers.rb +9 -10
- data/lib/loofah/scrubber.rb +22 -6
- data/lib/loofah/scrubbers.rb +96 -16
- data/lib/loofah/version.rb +5 -0
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +5 -2
- data/lib/loofah.rb +38 -25
- metadata +159 -172
- data/CHANGELOG.rdoc +0 -134
- data/Gemfile +0 -1
- data/Manifest.txt +0 -34
- data/README.rdoc +0 -312
- data/Rakefile +0 -53
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/lib/loofah/html5/whitelist.rb +0 -168
- data/test/helper.rb +0 -7
- data/test/html5/test_sanitizer.rb +0 -248
- data/test/integration/test_ad_hoc.rb +0 -176
- data/test/integration/test_helpers.rb +0 -33
- data/test/integration/test_html.rb +0 -51
- data/test/integration/test_scrubbers.rb +0 -331
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -138
- data/test/unit/test_helpers.rb +0 -27
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
@@ -1,168 +0,0 @@
|
|
1
|
-
module Loofah
|
2
|
-
module HTML5 # :nodoc:
|
3
|
-
#
|
4
|
-
# HTML whitelist lifted from HTML5lib sanitizer code:
|
5
|
-
#
|
6
|
-
# http://code.google.com/p/html5lib/
|
7
|
-
#
|
8
|
-
# <html5_license>
|
9
|
-
#
|
10
|
-
# Copyright (c) 2006-2008 The Authors
|
11
|
-
#
|
12
|
-
# Contributors:
|
13
|
-
# James Graham - jg307@cam.ac.uk
|
14
|
-
# Anne van Kesteren - annevankesteren@gmail.com
|
15
|
-
# Lachlan Hunt - lachlan.hunt@lachy.id.au
|
16
|
-
# Matt McDonald - kanashii@kanashii.ca
|
17
|
-
# Sam Ruby - rubys@intertwingly.net
|
18
|
-
# Ian Hickson (Google) - ian@hixie.ch
|
19
|
-
# Thomas Broyer - t.broyer@ltgt.net
|
20
|
-
# Jacques Distler - distler@golem.ph.utexas.edu
|
21
|
-
# Henri Sivonen - hsivonen@iki.fi
|
22
|
-
# The Mozilla Foundation (contributions from Henri Sivonen since 2008)
|
23
|
-
#
|
24
|
-
# Permission is hereby granted, free of charge, to any person
|
25
|
-
# obtaining a copy of this software and associated documentation
|
26
|
-
# files (the "Software"), to deal in the Software without
|
27
|
-
# restriction, including without limitation the rights to use, copy,
|
28
|
-
# modify, merge, publish, distribute, sublicense, and/or sell copies
|
29
|
-
# of the Software, and to permit persons to whom the Software is
|
30
|
-
# furnished to do so, subject to the following conditions:
|
31
|
-
#
|
32
|
-
# The above copyright notice and this permission notice shall be
|
33
|
-
# included in all copies or substantial portions of the Software.
|
34
|
-
#
|
35
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
36
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
37
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
38
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
39
|
-
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
40
|
-
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
41
|
-
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
42
|
-
# DEALINGS IN THE SOFTWARE.
|
43
|
-
#
|
44
|
-
# </html5_license>
|
45
|
-
module WhiteList
|
46
|
-
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
|
47
|
-
button caption center cite code col colgroup dd del dfn dir div dl dt
|
48
|
-
em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
|
49
|
-
legend li map menu ol optgroup option p pre q s samp select small span
|
50
|
-
strike strong sub sup table tbody td textarea tfoot th thead tr tt u
|
51
|
-
ul var]
|
52
|
-
|
53
|
-
MATHML_ELEMENTS = %w[annotation annotation-xml maction math merror mfrac
|
54
|
-
mfenced mi mmultiscripts mn mo mover mpadded mphantom mprescripts mroot mrow
|
55
|
-
mspace msqrt mstyle msub msubsup msup mtable mtd mtext mtr munder
|
56
|
-
munderover none semantics]
|
57
|
-
|
58
|
-
SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
|
59
|
-
circle defs desc ellipse font-face font-face-name font-face-src foreignObject
|
60
|
-
g glyph hkern linearGradient line marker metadata missing-glyph
|
61
|
-
mpath path polygon polyline radialGradient rect set stop svg switch
|
62
|
-
text title tspan use]
|
63
|
-
|
64
|
-
ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
|
65
|
-
align alt axis border cellpadding cellspacing char charoff charset
|
66
|
-
checked cite class clear cols colspan color compact coords datetime
|
67
|
-
dir disabled enctype for frame headers height href hreflang hspace id
|
68
|
-
ismap label lang longdesc maxlength media method multiple name nohref
|
69
|
-
noshade nowrap prompt readonly rel rev rows rowspan rules scope
|
70
|
-
selected shape size span src start style summary tabindex target title
|
71
|
-
type usemap valign value vspace width xml:lang]
|
72
|
-
|
73
|
-
MATHML_ATTRIBUTES = %w[actiontype align close columnalign columnalign
|
74
|
-
columnalign columnlines columnspacing columnspan depth display
|
75
|
-
displaystyle encoding equalcolumns equalrows fence fontstyle fontweight
|
76
|
-
frame height linethickness lspace mathbackground mathcolor mathvariant
|
77
|
-
mathvariant maxsize minsize open other rowalign rowalign rowalign rowlines
|
78
|
-
rowspacing rowspan rspace scriptlevel selection separator separators
|
79
|
-
stretchy width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
|
80
|
-
|
81
|
-
SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
|
82
|
-
arabic-form ascent attributeName attributeType baseProfile bbox begin
|
83
|
-
by calcMode cap-height class color color-rendering content cx cy d dx
|
84
|
-
dy descent display dur end fill fill-opacity fill-rule font-family
|
85
|
-
font-size font-stretch font-style font-variant font-weight from fx fy g1
|
86
|
-
g2 glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
|
87
|
-
ideographic k keyPoints keySplines keyTimes lang marker-end
|
88
|
-
marker-mid marker-start markerHeight markerUnits markerWidth
|
89
|
-
mathematical max min name offset opacity orient origin
|
90
|
-
overline-position overline-thickness panose-1 path pathLength points
|
91
|
-
preserveAspectRatio r refX refY repeatCount repeatDur
|
92
|
-
requiredExtensions requiredFeatures restart rotate rx ry slope stemh
|
93
|
-
stemv stop-color stop-opacity strikethrough-position
|
94
|
-
strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
|
95
|
-
stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
|
96
|
-
stroke-width systemLanguage target text-anchor to transform type u1
|
97
|
-
u2 underline-position underline-thickness unicode unicode-range
|
98
|
-
units-per-em values version viewBox visibility width widths x
|
99
|
-
x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
|
100
|
-
xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
|
101
|
-
xmlns:xlink y y1 y2 zoomAndPan]
|
102
|
-
|
103
|
-
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
|
104
|
-
|
105
|
-
SVG_ATTR_VAL_ALLOWS_REF = %w[clip-path color-profile cursor fill
|
106
|
-
filter marker marker-start marker-mid marker-end mask stroke]
|
107
|
-
|
108
|
-
SVG_ALLOW_LOCAL_HREF = %w[altGlyph animate animateColor animateMotion
|
109
|
-
animateTransform cursor feImage filter linearGradient pattern
|
110
|
-
radialGradient textpath tref set use]
|
111
|
-
|
112
|
-
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
|
113
|
-
border-bottom-color border-collapse border-color border-left-color
|
114
|
-
border-right-color border-top-color clear color cursor direction
|
115
|
-
display elevation float font font-family font-size font-style
|
116
|
-
font-variant font-weight height letter-spacing line-height overflow
|
117
|
-
pause pause-after pause-before pitch pitch-range richness speak
|
118
|
-
speak-header speak-numeral speak-punctuation speech-rate stress
|
119
|
-
text-align text-decoration text-indent unicode-bidi vertical-align
|
120
|
-
voice-family volume white-space width]
|
121
|
-
|
122
|
-
ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
|
123
|
-
brown center collapse dashed dotted fuchsia gray green !important
|
124
|
-
italic left lime maroon medium none navy normal nowrap olive pointer
|
125
|
-
purple red right solid silver teal top transparent underline white
|
126
|
-
yellow]
|
127
|
-
|
128
|
-
ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
|
129
|
-
stroke-width stroke-linecap stroke-linejoin stroke-opacity]
|
130
|
-
|
131
|
-
ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
|
132
|
-
telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
|
133
|
-
|
134
|
-
# subclasses may define their own versions of these constants
|
135
|
-
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
136
|
-
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
|
137
|
-
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
|
138
|
-
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
|
139
|
-
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
|
140
|
-
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
141
|
-
|
142
|
-
VOID_ELEMENTS = %w[
|
143
|
-
base
|
144
|
-
link
|
145
|
-
meta
|
146
|
-
hr
|
147
|
-
br
|
148
|
-
img
|
149
|
-
embed
|
150
|
-
param
|
151
|
-
area
|
152
|
-
col
|
153
|
-
input
|
154
|
-
]
|
155
|
-
|
156
|
-
# additional tags we should consider safe since we have libxml2 fixing up our documents.
|
157
|
-
TAGS_SAFE_WITH_LIBXML2 = %w[html head body]
|
158
|
-
ALLOWED_ELEMENTS_WITH_LIBXML2 = ALLOWED_ELEMENTS + TAGS_SAFE_WITH_LIBXML2
|
159
|
-
end
|
160
|
-
|
161
|
-
#
|
162
|
-
# The HTML5lib whitelist arrays, transformed into hashes for faster lookup.
|
163
|
-
#
|
164
|
-
module HashedWhiteList
|
165
|
-
include Loofah::MetaHelpers::HashifiedConstants(WhiteList)
|
166
|
-
end
|
167
|
-
end
|
168
|
-
end
|
data/test/helper.rb
DELETED
@@ -1,248 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# these tests taken from the HTML5 sanitization project and modified for use with Loofah
|
3
|
-
# see the original here: http://code.google.com/p/html5lib/source/browse/ruby/test/test_sanitizer.rb
|
4
|
-
#
|
5
|
-
# license text at the bottom of this file
|
6
|
-
#
|
7
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
|
8
|
-
require 'json'
|
9
|
-
|
10
|
-
class Html5TestSanitizer < Test::Unit::TestCase
|
11
|
-
include Loofah
|
12
|
-
|
13
|
-
def sanitize_xhtml stream
|
14
|
-
Loofah.fragment(stream).scrub!(:escape).to_xhtml
|
15
|
-
end
|
16
|
-
|
17
|
-
def sanitize_html stream
|
18
|
-
Loofah.fragment(stream).scrub!(:escape).to_html
|
19
|
-
end
|
20
|
-
|
21
|
-
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
22
|
-
## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
|
23
|
-
sane = sanitize_html(input).gsub('"',"'")
|
24
|
-
|
25
|
-
## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
|
26
|
-
## it would require a lot of manual hacking to make the tests match libxml's output.
|
27
|
-
## instead, I'm taking the shotgun approach, and trying to match any of the described outputs.
|
28
|
-
assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane), input)
|
29
|
-
end
|
30
|
-
|
31
|
-
(HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
|
32
|
-
define_method "test_should_allow_#{tag_name}_tag" do
|
33
|
-
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
34
|
-
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
35
|
-
xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
36
|
-
rexmloutput = xhtmloutput
|
37
|
-
|
38
|
-
if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
|
39
|
-
htmloutput = "foo <bad>bar</bad> baz"
|
40
|
-
xhtmloutput = htmloutput
|
41
|
-
elsif tag_name == 'col'
|
42
|
-
htmloutput = "<col title='1'>foo <bad>bar</bad> baz"
|
43
|
-
xhtmloutput = htmloutput
|
44
|
-
rexmloutput = "<col title='1' />"
|
45
|
-
elsif tag_name == 'table'
|
46
|
-
htmloutput = "foo <bad>bar</bad>baz<table title='1'> </table>"
|
47
|
-
xhtmloutput = htmloutput
|
48
|
-
elsif tag_name == 'image'
|
49
|
-
htmloutput = "<img title='1'/>foo <bad>bar</bad> baz"
|
50
|
-
xhtmloutput = htmloutput
|
51
|
-
rexmloutput = "<image title='1'>foo <bad>bar</bad> baz</image>"
|
52
|
-
elsif HTML5::WhiteList::VOID_ELEMENTS.include?(tag_name)
|
53
|
-
htmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz"
|
54
|
-
xhtmloutput = htmloutput
|
55
|
-
htmloutput += '<br/>' if tag_name == 'br'
|
56
|
-
rexmloutput = "<#{tag_name} title='1' />"
|
57
|
-
end
|
58
|
-
check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
##
|
63
|
-
## libxml2 downcases elements, so this is moot.
|
64
|
-
##
|
65
|
-
# HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
|
66
|
-
# define_method "test_should_forbid_#{tag_name.upcase}_tag" do
|
67
|
-
# input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
68
|
-
# output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
69
|
-
# check_sanitization(input, output, output, output)
|
70
|
-
# end
|
71
|
-
# end
|
72
|
-
|
73
|
-
HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
74
|
-
next if attribute_name == 'style'
|
75
|
-
define_method "test_should_allow_#{attribute_name}_attribute" do
|
76
|
-
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
77
|
-
if %w[checked compact disabled ismap multiple nohref noshade nowrap readonly selected].include?(attribute_name)
|
78
|
-
output = "<p #{attribute_name}>foo <bad>bar</bad> baz</p>"
|
79
|
-
htmloutput = "<p #{attribute_name.downcase}>foo <bad>bar</bad> baz</p>"
|
80
|
-
else
|
81
|
-
output = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
82
|
-
htmloutput = "<p #{attribute_name.downcase}='foo'>foo <bad>bar</bad> baz</p>"
|
83
|
-
end
|
84
|
-
check_sanitization(input, htmloutput, output, output)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
##
|
89
|
-
## libxml2 downcases attributes, so this is moot.
|
90
|
-
##
|
91
|
-
# HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
92
|
-
# define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
|
93
|
-
# input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
|
94
|
-
# output = "<p>foo <bad>bar</bad> baz</p>"
|
95
|
-
# check_sanitization(input, output, output, output)
|
96
|
-
# end
|
97
|
-
# end
|
98
|
-
|
99
|
-
HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
|
100
|
-
define_method "test_should_allow_#{protocol}_uris" do
|
101
|
-
input = %(<a href="#{protocol}">foo</a>)
|
102
|
-
output = "<a href='#{protocol}'>foo</a>"
|
103
|
-
check_sanitization(input, output, output, output)
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
|
108
|
-
define_method "test_should_allow_uppercase_#{protocol}_uris" do
|
109
|
-
input = %(<a href="#{protocol.upcase}">foo</a>)
|
110
|
-
output = "<a href='#{protocol.upcase}'>foo</a>"
|
111
|
-
check_sanitization(input, output, output, output)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
|
116
|
-
next unless HTML5::WhiteList::ALLOWED_ELEMENTS.include?(tag_name)
|
117
|
-
define_method "test_#{tag_name}_should_allow_local_href" do
|
118
|
-
input = %(<#{tag_name} xlink:href="#foo"/>)
|
119
|
-
output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
|
120
|
-
xhtmloutput = "<#{tag_name} xlink:href='#foo'></#{tag_name}>"
|
121
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
122
|
-
end
|
123
|
-
|
124
|
-
define_method "test_#{tag_name}_should_allow_local_href_with_newline" do
|
125
|
-
input = %(<#{tag_name} xlink:href="\n#foo"/>)
|
126
|
-
output = "<#{tag_name.downcase} xlink:href='\n#foo'></#{tag_name.downcase}>"
|
127
|
-
xhtmloutput = "<#{tag_name} xlink:href='\n#foo'></#{tag_name}>"
|
128
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
129
|
-
end
|
130
|
-
|
131
|
-
define_method "test_#{tag_name}_should_forbid_nonlocal_href" do
|
132
|
-
input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>)
|
133
|
-
output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
|
134
|
-
xhtmloutput = "<#{tag_name}></#{tag_name}>"
|
135
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
136
|
-
end
|
137
|
-
|
138
|
-
define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do
|
139
|
-
input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>)
|
140
|
-
output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
|
141
|
-
xhtmloutput = "<#{tag_name}></#{tag_name}>"
|
142
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
##
|
147
|
-
## as tenderlove says, "care < 0"
|
148
|
-
##
|
149
|
-
# def test_should_handle_astral_plane_characters
|
150
|
-
# input = "<p>𝒵 𝔸</p>"
|
151
|
-
# output = "<p>\360\235\222\265 \360\235\224\270</p>"
|
152
|
-
# check_sanitization(input, output, output, output)
|
153
|
-
|
154
|
-
# input = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
155
|
-
# output = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
156
|
-
# check_sanitization(input, output, output, output)
|
157
|
-
# end
|
158
|
-
|
159
|
-
# This affects only NS4. Is it worth fixing?
|
160
|
-
# def test_javascript_includes
|
161
|
-
# input = %(<div size="&{alert('XSS')}">foo</div>)
|
162
|
-
# output = "<div>foo</div>"
|
163
|
-
# check_sanitization(input, output, output, output)
|
164
|
-
# end
|
165
|
-
|
166
|
-
##
|
167
|
-
## these tests primarily test the parser logic, not the sanitizer
|
168
|
-
## logic. i call bullshit. we're not writing a test suite for
|
169
|
-
## libxml2 here, so let's rely on the unit tests above to take care
|
170
|
-
## of our valid elements and attributes.
|
171
|
-
##
|
172
|
-
# Dir[File.join(File.dirname(__FILE__), 'testdata', '*.*')].each do |filename|
|
173
|
-
# JSON::parse(open(filename).read).each do |test|
|
174
|
-
# define_method "test_#{test['name']}" do
|
175
|
-
# check_sanitization(
|
176
|
-
# test['input'],
|
177
|
-
# test['output'],
|
178
|
-
# test['xhtml'] || test['output'],
|
179
|
-
# test['rexml'] || test['output']
|
180
|
-
# )
|
181
|
-
# end
|
182
|
-
# end
|
183
|
-
# end
|
184
|
-
|
185
|
-
## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
|
186
|
-
HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
|
187
|
-
define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
|
188
|
-
input = "<rect fill='url(#foo)' />"
|
189
|
-
output = "<rect fill='url(#foo)'></rect>"
|
190
|
-
check_sanitization(input, output, output, output)
|
191
|
-
end
|
192
|
-
|
193
|
-
define_method "test_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
|
194
|
-
input = "<rect fill='url(http://bad.com/) #fff' />"
|
195
|
-
output = "<rect fill=' #fff'></rect>"
|
196
|
-
check_sanitization(input, output, output, output)
|
197
|
-
end
|
198
|
-
|
199
|
-
define_method "test_uri_ref_with_space_in_svg_attribute_#{attr_name}" do
|
200
|
-
input = "<rect fill='url(\n#foo)' />"
|
201
|
-
rexml = "<rect fill='url(\n#foo)'></rect>"
|
202
|
-
end
|
203
|
-
|
204
|
-
define_method "test_absolute_uri_ref_with_space_in_svg_attribute_#{attr_name}" do
|
205
|
-
input = "<rect fill=\"url(\nhttp://bad.com/)\" />"
|
206
|
-
rexml = "<rect fill=' '></rect>"
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
end
|
211
|
-
|
212
|
-
# <html5_license>
|
213
|
-
#
|
214
|
-
# Copyright (c) 2006-2008 The Authors
|
215
|
-
#
|
216
|
-
# Contributors:
|
217
|
-
# James Graham - jg307@cam.ac.uk
|
218
|
-
# Anne van Kesteren - annevankesteren@gmail.com
|
219
|
-
# Lachlan Hunt - lachlan.hunt@lachy.id.au
|
220
|
-
# Matt McDonald - kanashii@kanashii.ca
|
221
|
-
# Sam Ruby - rubys@intertwingly.net
|
222
|
-
# Ian Hickson (Google) - ian@hixie.ch
|
223
|
-
# Thomas Broyer - t.broyer@ltgt.net
|
224
|
-
# Jacques Distler - distler@golem.ph.utexas.edu
|
225
|
-
# Henri Sivonen - hsivonen@iki.fi
|
226
|
-
# The Mozilla Foundation (contributions from Henri Sivonen since 2008)
|
227
|
-
#
|
228
|
-
# Permission is hereby granted, free of charge, to any person
|
229
|
-
# obtaining a copy of this software and associated documentation files
|
230
|
-
# (the "Software"), to deal in the Software without restriction,
|
231
|
-
# including without limitation the rights to use, copy, modify, merge,
|
232
|
-
# publish, distribute, sublicense, and/or sell copies of the Software,
|
233
|
-
# and to permit persons to whom the Software is furnished to do so,
|
234
|
-
# subject to the following conditions:
|
235
|
-
#
|
236
|
-
# The above copyright notice and this permission notice shall be
|
237
|
-
# included in all copies or substantial portions of the Software.
|
238
|
-
#
|
239
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
240
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
241
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
242
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
243
|
-
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
244
|
-
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
245
|
-
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
246
|
-
# SOFTWARE.
|
247
|
-
#
|
248
|
-
# </html5_license>
|
@@ -1,176 +0,0 @@
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
|
2
|
-
|
3
|
-
class TestAdHoc < Test::Unit::TestCase
|
4
|
-
|
5
|
-
context "blank input string" do
|
6
|
-
context "fragment" do
|
7
|
-
should "return a blank string" do
|
8
|
-
assert_equal "", Loofah.scrub_fragment("", :prune).to_s
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
context "document" do
|
13
|
-
should "return a blank string" do
|
14
|
-
assert_equal "", Loofah.scrub_document("", :prune).root.to_s
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_removal_of_illegal_tag
|
20
|
-
html = <<-HTML
|
21
|
-
following this there should be no jim tag
|
22
|
-
<jim>jim</jim>
|
23
|
-
was there?
|
24
|
-
HTML
|
25
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
26
|
-
assert sane.xpath("//jim").empty?
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_removal_of_illegal_attribute
|
30
|
-
html = "<p class=bar foo=bar abbr=bar />"
|
31
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
32
|
-
node = sane.xpath("//p").first
|
33
|
-
assert node.attributes['class']
|
34
|
-
assert node.attributes['abbr']
|
35
|
-
assert_nil node.attributes['foo']
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_removal_of_illegal_url_in_href
|
39
|
-
html = <<-HTML
|
40
|
-
<a href='jimbo://jim.jim/'>this link should have its href removed because of illegal url</a>
|
41
|
-
<a href='http://jim.jim/'>this link should be fine</a>
|
42
|
-
HTML
|
43
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
44
|
-
nodes = sane.xpath("//a")
|
45
|
-
assert_nil nodes.first.attributes['href']
|
46
|
-
assert nodes.last.attributes['href']
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_css_sanitization
|
50
|
-
html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
|
51
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
52
|
-
assert_match(/#000/, sane.inner_html)
|
53
|
-
assert_no_match(/foo\.com/, sane.inner_html)
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_fragment_with_no_tags
|
57
|
-
assert_equal "This fragment has no tags.", Loofah.scrub_fragment("This fragment has no tags.", :escape).to_xml
|
58
|
-
end
|
59
|
-
|
60
|
-
def test_fragment_in_p_tag
|
61
|
-
assert_equal "<p>This fragment is in a p.</p>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>", :escape).to_xml
|
62
|
-
end
|
63
|
-
|
64
|
-
def test_fragment_in_p_tag_plus_stuff
|
65
|
-
assert_equal "<p>This fragment is in a p.</p>foo<strong>bar</strong>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>foo<strong>bar</strong>", :escape).to_xml
|
66
|
-
end
|
67
|
-
|
68
|
-
def test_fragment_with_text_nodes_leading_and_trailing
|
69
|
-
assert_equal "text<p>fragment</p>text", Loofah.scrub_fragment("text<p>fragment</p>text", :escape).to_xml
|
70
|
-
end
|
71
|
-
|
72
|
-
def test_whitewash_on_fragment
|
73
|
-
html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
|
74
|
-
whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
|
75
|
-
assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
|
76
|
-
end
|
77
|
-
|
78
|
-
MSWORD_HTML = <<-EOHTML
|
79
|
-
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
|
80
|
-
<w:WordDocument>
|
81
|
-
<w:View>Normal</w:View>
|
82
|
-
<w:Zoom>0</w:Zoom>
|
83
|
-
<w:PunctuationKerning/>
|
84
|
-
<w:ValidateAgainstSchemas/>
|
85
|
-
<w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
|
86
|
-
<w:IgnoreMixedContent>false</w:IgnoreMixedContent>
|
87
|
-
<w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
|
88
|
-
<w:Compatibility>
|
89
|
-
<w:BreakWrappedTables/>
|
90
|
-
<w:SnapToGridInCell/>
|
91
|
-
<w:WrapTextWithPunct/>
|
92
|
-
<w:UseAsianBreakRules/>
|
93
|
-
<w:DontGrowAutofit/>
|
94
|
-
</w:Compatibility>
|
95
|
-
<w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
|
96
|
-
</w:WordDocument>
|
97
|
-
</xml><![endif]--><!--[if gte mso 9]><xml>
|
98
|
-
<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
|
99
|
-
</w:LatentStyles>
|
100
|
-
</xml><![endif]--><style>
|
101
|
-
<!--
|
102
|
-
/* Style Definitions */
|
103
|
-
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
104
|
-
{mso-style-parent:"";
|
105
|
-
margin:0in;
|
106
|
-
margin-bottom:.0001pt;
|
107
|
-
mso-pagination:widow-orphan;
|
108
|
-
font-size:12.0pt;
|
109
|
-
font-family:"Times New Roman";
|
110
|
-
mso-fareast-font-family:"Times New Roman";}
|
111
|
-
@page Section1
|
112
|
-
{size:8.5in 11.0in;
|
113
|
-
margin:1.0in 1.25in 1.0in 1.25in;
|
114
|
-
mso-header-margin:.5in;
|
115
|
-
mso-footer-margin:.5in;
|
116
|
-
mso-paper-source:0;}
|
117
|
-
div.Section1
|
118
|
-
{page:Section1;}
|
119
|
-
-->
|
120
|
-
</style><!--[if gte mso 10]>
|
121
|
-
<style>
|
122
|
-
/* Style Definitions */
|
123
|
-
table.MsoNormalTable
|
124
|
-
{mso-style-name:"Table Normal";
|
125
|
-
mso-tstyle-rowband-size:0;
|
126
|
-
mso-tstyle-colband-size:0;
|
127
|
-
mso-style-noshow:yes;
|
128
|
-
mso-style-parent:"";
|
129
|
-
mso-padding-alt:0in 5.4pt 0in 5.4pt;
|
130
|
-
mso-para-margin:0in;
|
131
|
-
mso-para-margin-bottom:.0001pt;
|
132
|
-
mso-pagination:widow-orphan;
|
133
|
-
font-size:10.0pt;
|
134
|
-
font-family:"Times New Roman";
|
135
|
-
mso-ansi-language:#0400;
|
136
|
-
mso-fareast-language:#0400;
|
137
|
-
mso-bidi-language:#0400;}
|
138
|
-
</style>
|
139
|
-
<![endif]-->
|
140
|
-
|
141
|
-
<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
|
142
|
-
EOHTML
|
143
|
-
|
144
|
-
def test_fragment_whitewash_on_microsofty_markup
|
145
|
-
whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
|
146
|
-
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s
|
147
|
-
end
|
148
|
-
|
149
|
-
def test_document_whitewash_on_microsofty_markup
|
150
|
-
whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
|
151
|
-
assert_contains whitewashed.to_s, %r(<p>Foo <b>BOLD</b></p>)
|
152
|
-
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
|
153
|
-
end
|
154
|
-
|
155
|
-
def test_return_empty_string_when_nothing_left
|
156
|
-
assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
|
157
|
-
end
|
158
|
-
|
159
|
-
def test_removal_of_all_tags
|
160
|
-
html = <<-HTML
|
161
|
-
What's up <strong>doc</strong>?
|
162
|
-
HTML
|
163
|
-
stripped = Loofah.scrub_document(html, :prune).text
|
164
|
-
assert_equal %Q(What\'s up doc?).strip, stripped.strip
|
165
|
-
end
|
166
|
-
|
167
|
-
def test_dont_remove_whitespace
|
168
|
-
html = "Foo\nBar"
|
169
|
-
assert_equal html, Loofah.scrub_document(html, :prune).text
|
170
|
-
end
|
171
|
-
|
172
|
-
def test_dont_remove_whitespace_between_tags
|
173
|
-
html = "<p>Foo</p>\n<p>Bar</p>"
|
174
|
-
assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
|
175
|
-
end
|
176
|
-
end
|
@@ -1,33 +0,0 @@
|
|
1
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
|
2
|
-
|
3
|
-
class TestHelpers < Test::Unit::TestCase
|
4
|
-
context "#strip_tags" do
|
5
|
-
context "on safe markup" do
|
6
|
-
should "strip out tags" do
|
7
|
-
assert_equal "omgwtfbbq!!1!", Loofah::Helpers.strip_tags("<div>omgwtfbbq</div><span>!!1!</span>")
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
context "on hack attack" do
|
12
|
-
should "strip escape html entities" do
|
13
|
-
bad_shit = "<script>alert('evil')</script>"
|
14
|
-
assert_equal bad_shit, Loofah::Helpers.strip_tags(bad_shit)
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
context "#sanitize" do
|
20
|
-
context "on safe markup" do
|
21
|
-
should "render the safe html" do
|
22
|
-
html = "<div>omgwtfbbq</div><span>!!1!</span>"
|
23
|
-
assert_equal html, Loofah::Helpers.sanitize(html)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
context "on hack attack" do
|
28
|
-
should "strip the unsafe tags" do
|
29
|
-
assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><span>w00t</span>")
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|