loofah 1.0.0 → 2.19.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +489 -0
  3. data/MIT-LICENSE.txt +3 -1
  4. data/README.md +364 -0
  5. data/SECURITY.md +18 -0
  6. data/lib/loofah/elements.rb +88 -11
  7. data/lib/loofah/helpers.rb +76 -2
  8. data/lib/loofah/html/document.rb +1 -0
  9. data/lib/loofah/html/document_fragment.rb +9 -2
  10. data/lib/loofah/html5/libxml2_workarounds.rb +27 -0
  11. data/lib/loofah/html5/safelist.rb +1042 -0
  12. data/lib/loofah/html5/scrub.rb +198 -40
  13. data/lib/loofah/instance_methods.rb +16 -10
  14. data/lib/loofah/metahelpers.rb +9 -10
  15. data/lib/loofah/scrubber.rb +22 -6
  16. data/lib/loofah/scrubbers.rb +96 -16
  17. data/lib/loofah/version.rb +5 -0
  18. data/lib/loofah/xml/document.rb +1 -0
  19. data/lib/loofah/xml/document_fragment.rb +5 -2
  20. data/lib/loofah.rb +38 -25
  21. metadata +159 -172
  22. data/CHANGELOG.rdoc +0 -134
  23. data/Gemfile +0 -1
  24. data/Manifest.txt +0 -34
  25. data/README.rdoc +0 -312
  26. data/Rakefile +0 -53
  27. data/benchmark/benchmark.rb +0 -149
  28. data/benchmark/fragment.html +0 -96
  29. data/benchmark/helper.rb +0 -73
  30. data/benchmark/www.slashdot.com.html +0 -2560
  31. data/lib/loofah/html5/whitelist.rb +0 -168
  32. data/test/helper.rb +0 -7
  33. data/test/html5/test_sanitizer.rb +0 -248
  34. data/test/integration/test_ad_hoc.rb +0 -176
  35. data/test/integration/test_helpers.rb +0 -33
  36. data/test/integration/test_html.rb +0 -51
  37. data/test/integration/test_scrubbers.rb +0 -331
  38. data/test/integration/test_xml.rb +0 -55
  39. data/test/unit/test_api.rb +0 -138
  40. data/test/unit/test_helpers.rb +0 -27
  41. data/test/unit/test_scrubber.rb +0 -229
  42. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,168 +0,0 @@
1
- module Loofah
2
- module HTML5 # :nodoc:
3
- #
4
- # HTML whitelist lifted from HTML5lib sanitizer code:
5
- #
6
- # http://code.google.com/p/html5lib/
7
- #
8
- # <html5_license>
9
- #
10
- # Copyright (c) 2006-2008 The Authors
11
- #
12
- # Contributors:
13
- # James Graham - jg307@cam.ac.uk
14
- # Anne van Kesteren - annevankesteren@gmail.com
15
- # Lachlan Hunt - lachlan.hunt@lachy.id.au
16
- # Matt McDonald - kanashii@kanashii.ca
17
- # Sam Ruby - rubys@intertwingly.net
18
- # Ian Hickson (Google) - ian@hixie.ch
19
- # Thomas Broyer - t.broyer@ltgt.net
20
- # Jacques Distler - distler@golem.ph.utexas.edu
21
- # Henri Sivonen - hsivonen@iki.fi
22
- # The Mozilla Foundation (contributions from Henri Sivonen since 2008)
23
- #
24
- # Permission is hereby granted, free of charge, to any person
25
- # obtaining a copy of this software and associated documentation
26
- # files (the "Software"), to deal in the Software without
27
- # restriction, including without limitation the rights to use, copy,
28
- # modify, merge, publish, distribute, sublicense, and/or sell copies
29
- # of the Software, and to permit persons to whom the Software is
30
- # furnished to do so, subject to the following conditions:
31
- #
32
- # The above copyright notice and this permission notice shall be
33
- # included in all copies or substantial portions of the Software.
34
- #
35
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
38
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
39
- # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
40
- # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
41
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
42
- # DEALINGS IN THE SOFTWARE.
43
- #
44
- # </html5_license>
45
- module WhiteList
46
- ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
47
- button caption center cite code col colgroup dd del dfn dir div dl dt
48
- em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
49
- legend li map menu ol optgroup option p pre q s samp select small span
50
- strike strong sub sup table tbody td textarea tfoot th thead tr tt u
51
- ul var]
52
-
53
- MATHML_ELEMENTS = %w[annotation annotation-xml maction math merror mfrac
54
- mfenced mi mmultiscripts mn mo mover mpadded mphantom mprescripts mroot mrow
55
- mspace msqrt mstyle msub msubsup msup mtable mtd mtext mtr munder
56
- munderover none semantics]
57
-
58
- SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
59
- circle defs desc ellipse font-face font-face-name font-face-src foreignObject
60
- g glyph hkern linearGradient line marker metadata missing-glyph
61
- mpath path polygon polyline radialGradient rect set stop svg switch
62
- text title tspan use]
63
-
64
- ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
65
- align alt axis border cellpadding cellspacing char charoff charset
66
- checked cite class clear cols colspan color compact coords datetime
67
- dir disabled enctype for frame headers height href hreflang hspace id
68
- ismap label lang longdesc maxlength media method multiple name nohref
69
- noshade nowrap prompt readonly rel rev rows rowspan rules scope
70
- selected shape size span src start style summary tabindex target title
71
- type usemap valign value vspace width xml:lang]
72
-
73
- MATHML_ATTRIBUTES = %w[actiontype align close columnalign columnalign
74
- columnalign columnlines columnspacing columnspan depth display
75
- displaystyle encoding equalcolumns equalrows fence fontstyle fontweight
76
- frame height linethickness lspace mathbackground mathcolor mathvariant
77
- mathvariant maxsize minsize open other rowalign rowalign rowalign rowlines
78
- rowspacing rowspan rspace scriptlevel selection separator separators
79
- stretchy width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
80
-
81
- SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
82
- arabic-form ascent attributeName attributeType baseProfile bbox begin
83
- by calcMode cap-height class color color-rendering content cx cy d dx
84
- dy descent display dur end fill fill-opacity fill-rule font-family
85
- font-size font-stretch font-style font-variant font-weight from fx fy g1
86
- g2 glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
87
- ideographic k keyPoints keySplines keyTimes lang marker-end
88
- marker-mid marker-start markerHeight markerUnits markerWidth
89
- mathematical max min name offset opacity orient origin
90
- overline-position overline-thickness panose-1 path pathLength points
91
- preserveAspectRatio r refX refY repeatCount repeatDur
92
- requiredExtensions requiredFeatures restart rotate rx ry slope stemh
93
- stemv stop-color stop-opacity strikethrough-position
94
- strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
95
- stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
96
- stroke-width systemLanguage target text-anchor to transform type u1
97
- u2 underline-position underline-thickness unicode unicode-range
98
- units-per-em values version viewBox visibility width widths x
99
- x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
100
- xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
101
- xmlns:xlink y y1 y2 zoomAndPan]
102
-
103
- ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
104
-
105
- SVG_ATTR_VAL_ALLOWS_REF = %w[clip-path color-profile cursor fill
106
- filter marker marker-start marker-mid marker-end mask stroke]
107
-
108
- SVG_ALLOW_LOCAL_HREF = %w[altGlyph animate animateColor animateMotion
109
- animateTransform cursor feImage filter linearGradient pattern
110
- radialGradient textpath tref set use]
111
-
112
- ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
113
- border-bottom-color border-collapse border-color border-left-color
114
- border-right-color border-top-color clear color cursor direction
115
- display elevation float font font-family font-size font-style
116
- font-variant font-weight height letter-spacing line-height overflow
117
- pause pause-after pause-before pitch pitch-range richness speak
118
- speak-header speak-numeral speak-punctuation speech-rate stress
119
- text-align text-decoration text-indent unicode-bidi vertical-align
120
- voice-family volume white-space width]
121
-
122
- ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
123
- brown center collapse dashed dotted fuchsia gray green !important
124
- italic left lime maroon medium none navy normal nowrap olive pointer
125
- purple red right solid silver teal top transparent underline white
126
- yellow]
127
-
128
- ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
129
- stroke-width stroke-linecap stroke-linejoin stroke-opacity]
130
-
131
- ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
132
- telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
133
-
134
- # subclasses may define their own versions of these constants
135
- ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
136
- ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
137
- ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
138
- ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
139
- ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
140
- ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
141
-
142
- VOID_ELEMENTS = %w[
143
- base
144
- link
145
- meta
146
- hr
147
- br
148
- img
149
- embed
150
- param
151
- area
152
- col
153
- input
154
- ]
155
-
156
- # additional tags we should consider safe since we have libxml2 fixing up our documents.
157
- TAGS_SAFE_WITH_LIBXML2 = %w[html head body]
158
- ALLOWED_ELEMENTS_WITH_LIBXML2 = ALLOWED_ELEMENTS + TAGS_SAFE_WITH_LIBXML2
159
- end
160
-
161
- #
162
- # The HTML5lib whitelist arrays, transformed into hashes for faster lookup.
163
- #
164
- module HashedWhiteList
165
- include Loofah::MetaHelpers::HashifiedConstants(WhiteList)
166
- end
167
- end
168
- end
data/test/helper.rb DELETED
@@ -1,7 +0,0 @@
1
- require 'rubygems'
2
- require 'test/unit'
3
- require 'shoulda'
4
- require 'mocha'
5
- require File.expand_path(File.join(File.dirname(__FILE__), "..", "lib", "loofah"))
6
-
7
- puts "=> testing with Nokogiri #{Nokogiri::VERSION_INFO.inspect}"
@@ -1,248 +0,0 @@
1
- #
2
- # these tests taken from the HTML5 sanitization project and modified for use with Loofah
3
- # see the original here: http://code.google.com/p/html5lib/source/browse/ruby/test/test_sanitizer.rb
4
- #
5
- # license text at the bottom of this file
6
- #
7
- require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
8
- require 'json'
9
-
10
- class Html5TestSanitizer < Test::Unit::TestCase
11
- include Loofah
12
-
13
- def sanitize_xhtml stream
14
- Loofah.fragment(stream).scrub!(:escape).to_xhtml
15
- end
16
-
17
- def sanitize_html stream
18
- Loofah.fragment(stream).scrub!(:escape).to_html
19
- end
20
-
21
- def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
22
- ## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
23
- sane = sanitize_html(input).gsub('"',"'")
24
-
25
- ## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
26
- ## it would require a lot of manual hacking to make the tests match libxml's output.
27
- ## instead, I'm taking the shotgun approach, and trying to match any of the described outputs.
28
- assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane), input)
29
- end
30
-
31
- (HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
32
- define_method "test_should_allow_#{tag_name}_tag" do
33
- input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
34
- htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
35
- xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
36
- rexmloutput = xhtmloutput
37
-
38
- if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
39
- htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
40
- xhtmloutput = htmloutput
41
- elsif tag_name == 'col'
42
- htmloutput = "<col title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
43
- xhtmloutput = htmloutput
44
- rexmloutput = "<col title='1' />"
45
- elsif tag_name == 'table'
46
- htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt;baz<table title='1'> </table>"
47
- xhtmloutput = htmloutput
48
- elsif tag_name == 'image'
49
- htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
50
- xhtmloutput = htmloutput
51
- rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
52
- elsif HTML5::WhiteList::VOID_ELEMENTS.include?(tag_name)
53
- htmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
54
- xhtmloutput = htmloutput
55
- htmloutput += '<br/>' if tag_name == 'br'
56
- rexmloutput = "<#{tag_name} title='1' />"
57
- end
58
- check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
59
- end
60
- end
61
-
62
- ##
63
- ## libxml2 downcases elements, so this is moot.
64
- ##
65
- # HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
66
- # define_method "test_should_forbid_#{tag_name.upcase}_tag" do
67
- # input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
68
- # output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
69
- # check_sanitization(input, output, output, output)
70
- # end
71
- # end
72
-
73
- HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
74
- next if attribute_name == 'style'
75
- define_method "test_should_allow_#{attribute_name}_attribute" do
76
- input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
77
- if %w[checked compact disabled ismap multiple nohref noshade nowrap readonly selected].include?(attribute_name)
78
- output = "<p #{attribute_name}>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
79
- htmloutput = "<p #{attribute_name.downcase}>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
80
- else
81
- output = "<p #{attribute_name}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
82
- htmloutput = "<p #{attribute_name.downcase}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
83
- end
84
- check_sanitization(input, htmloutput, output, output)
85
- end
86
- end
87
-
88
- ##
89
- ## libxml2 downcases attributes, so this is moot.
90
- ##
91
- # HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
92
- # define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
93
- # input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
94
- # output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
95
- # check_sanitization(input, output, output, output)
96
- # end
97
- # end
98
-
99
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
100
- define_method "test_should_allow_#{protocol}_uris" do
101
- input = %(<a href="#{protocol}">foo</a>)
102
- output = "<a href='#{protocol}'>foo</a>"
103
- check_sanitization(input, output, output, output)
104
- end
105
- end
106
-
107
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
108
- define_method "test_should_allow_uppercase_#{protocol}_uris" do
109
- input = %(<a href="#{protocol.upcase}">foo</a>)
110
- output = "<a href='#{protocol.upcase}'>foo</a>"
111
- check_sanitization(input, output, output, output)
112
- end
113
- end
114
-
115
- HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
116
- next unless HTML5::WhiteList::ALLOWED_ELEMENTS.include?(tag_name)
117
- define_method "test_#{tag_name}_should_allow_local_href" do
118
- input = %(<#{tag_name} xlink:href="#foo"/>)
119
- output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
120
- xhtmloutput = "<#{tag_name} xlink:href='#foo'></#{tag_name}>"
121
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
122
- end
123
-
124
- define_method "test_#{tag_name}_should_allow_local_href_with_newline" do
125
- input = %(<#{tag_name} xlink:href="\n#foo"/>)
126
- output = "<#{tag_name.downcase} xlink:href='\n#foo'></#{tag_name.downcase}>"
127
- xhtmloutput = "<#{tag_name} xlink:href='\n#foo'></#{tag_name}>"
128
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
129
- end
130
-
131
- define_method "test_#{tag_name}_should_forbid_nonlocal_href" do
132
- input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>)
133
- output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
134
- xhtmloutput = "<#{tag_name}></#{tag_name}>"
135
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
136
- end
137
-
138
- define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do
139
- input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>)
140
- output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
141
- xhtmloutput = "<#{tag_name}></#{tag_name}>"
142
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
143
- end
144
- end
145
-
146
- ##
147
- ## as tenderlove says, "care < 0"
148
- ##
149
- # def test_should_handle_astral_plane_characters
150
- # input = "<p>&#x1d4b5; &#x1d538;</p>"
151
- # output = "<p>\360\235\222\265 \360\235\224\270</p>"
152
- # check_sanitization(input, output, output, output)
153
-
154
- # input = "<p><tspan>\360\235\224\270</tspan> a</p>"
155
- # output = "<p><tspan>\360\235\224\270</tspan> a</p>"
156
- # check_sanitization(input, output, output, output)
157
- # end
158
-
159
- # This affects only NS4. Is it worth fixing?
160
- # def test_javascript_includes
161
- # input = %(<div size="&{alert('XSS')}">foo</div>)
162
- # output = "<div>foo</div>"
163
- # check_sanitization(input, output, output, output)
164
- # end
165
-
166
- ##
167
- ## these tests primarily test the parser logic, not the sanitizer
168
- ## logic. i call bullshit. we're not writing a test suite for
169
- ## libxml2 here, so let's rely on the unit tests above to take care
170
- ## of our valid elements and attributes.
171
- ##
172
- # Dir[File.join(File.dirname(__FILE__), 'testdata', '*.*')].each do |filename|
173
- # JSON::parse(open(filename).read).each do |test|
174
- # define_method "test_#{test['name']}" do
175
- # check_sanitization(
176
- # test['input'],
177
- # test['output'],
178
- # test['xhtml'] || test['output'],
179
- # test['rexml'] || test['output']
180
- # )
181
- # end
182
- # end
183
- # end
184
-
185
- ## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
186
- HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
187
- define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
188
- input = "<rect fill='url(#foo)' />"
189
- output = "<rect fill='url(#foo)'></rect>"
190
- check_sanitization(input, output, output, output)
191
- end
192
-
193
- define_method "test_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
194
- input = "<rect fill='url(http://bad.com/) #fff' />"
195
- output = "<rect fill=' #fff'></rect>"
196
- check_sanitization(input, output, output, output)
197
- end
198
-
199
- define_method "test_uri_ref_with_space_in_svg_attribute_#{attr_name}" do
200
- input = "<rect fill='url(\n#foo)' />"
201
- rexml = "<rect fill='url(\n#foo)'></rect>"
202
- end
203
-
204
- define_method "test_absolute_uri_ref_with_space_in_svg_attribute_#{attr_name}" do
205
- input = "<rect fill=\"url(\nhttp://bad.com/)\" />"
206
- rexml = "<rect fill=' '></rect>"
207
- end
208
- end
209
-
210
- end
211
-
212
- # <html5_license>
213
- #
214
- # Copyright (c) 2006-2008 The Authors
215
- #
216
- # Contributors:
217
- # James Graham - jg307@cam.ac.uk
218
- # Anne van Kesteren - annevankesteren@gmail.com
219
- # Lachlan Hunt - lachlan.hunt@lachy.id.au
220
- # Matt McDonald - kanashii@kanashii.ca
221
- # Sam Ruby - rubys@intertwingly.net
222
- # Ian Hickson (Google) - ian@hixie.ch
223
- # Thomas Broyer - t.broyer@ltgt.net
224
- # Jacques Distler - distler@golem.ph.utexas.edu
225
- # Henri Sivonen - hsivonen@iki.fi
226
- # The Mozilla Foundation (contributions from Henri Sivonen since 2008)
227
- #
228
- # Permission is hereby granted, free of charge, to any person
229
- # obtaining a copy of this software and associated documentation files
230
- # (the "Software"), to deal in the Software without restriction,
231
- # including without limitation the rights to use, copy, modify, merge,
232
- # publish, distribute, sublicense, and/or sell copies of the Software,
233
- # and to permit persons to whom the Software is furnished to do so,
234
- # subject to the following conditions:
235
- #
236
- # The above copyright notice and this permission notice shall be
237
- # included in all copies or substantial portions of the Software.
238
- #
239
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
240
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
241
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
242
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
243
- # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
244
- # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
245
- # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
246
- # SOFTWARE.
247
- #
248
- # </html5_license>
@@ -1,176 +0,0 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
2
-
3
- class TestAdHoc < Test::Unit::TestCase
4
-
5
- context "blank input string" do
6
- context "fragment" do
7
- should "return a blank string" do
8
- assert_equal "", Loofah.scrub_fragment("", :prune).to_s
9
- end
10
- end
11
-
12
- context "document" do
13
- should "return a blank string" do
14
- assert_equal "", Loofah.scrub_document("", :prune).root.to_s
15
- end
16
- end
17
- end
18
-
19
- def test_removal_of_illegal_tag
20
- html = <<-HTML
21
- following this there should be no jim tag
22
- <jim>jim</jim>
23
- was there?
24
- HTML
25
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
26
- assert sane.xpath("//jim").empty?
27
- end
28
-
29
- def test_removal_of_illegal_attribute
30
- html = "<p class=bar foo=bar abbr=bar />"
31
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
32
- node = sane.xpath("//p").first
33
- assert node.attributes['class']
34
- assert node.attributes['abbr']
35
- assert_nil node.attributes['foo']
36
- end
37
-
38
- def test_removal_of_illegal_url_in_href
39
- html = <<-HTML
40
- <a href='jimbo://jim.jim/'>this link should have its href removed because of illegal url</a>
41
- <a href='http://jim.jim/'>this link should be fine</a>
42
- HTML
43
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
44
- nodes = sane.xpath("//a")
45
- assert_nil nodes.first.attributes['href']
46
- assert nodes.last.attributes['href']
47
- end
48
-
49
- def test_css_sanitization
50
- html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
51
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
52
- assert_match(/#000/, sane.inner_html)
53
- assert_no_match(/foo\.com/, sane.inner_html)
54
- end
55
-
56
- def test_fragment_with_no_tags
57
- assert_equal "This fragment has no tags.", Loofah.scrub_fragment("This fragment has no tags.", :escape).to_xml
58
- end
59
-
60
- def test_fragment_in_p_tag
61
- assert_equal "<p>This fragment is in a p.</p>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>", :escape).to_xml
62
- end
63
-
64
- def test_fragment_in_p_tag_plus_stuff
65
- assert_equal "<p>This fragment is in a p.</p>foo<strong>bar</strong>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>foo<strong>bar</strong>", :escape).to_xml
66
- end
67
-
68
- def test_fragment_with_text_nodes_leading_and_trailing
69
- assert_equal "text<p>fragment</p>text", Loofah.scrub_fragment("text<p>fragment</p>text", :escape).to_xml
70
- end
71
-
72
- def test_whitewash_on_fragment
73
- html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
74
- whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
75
- assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
76
- end
77
-
78
- MSWORD_HTML = <<-EOHTML
79
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
80
- <w:WordDocument>
81
- <w:View>Normal</w:View>
82
- <w:Zoom>0</w:Zoom>
83
- <w:PunctuationKerning/>
84
- <w:ValidateAgainstSchemas/>
85
- <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
86
- <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
87
- <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
88
- <w:Compatibility>
89
- <w:BreakWrappedTables/>
90
- <w:SnapToGridInCell/>
91
- <w:WrapTextWithPunct/>
92
- <w:UseAsianBreakRules/>
93
- <w:DontGrowAutofit/>
94
- </w:Compatibility>
95
- <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
96
- </w:WordDocument>
97
- </xml><![endif]--><!--[if gte mso 9]><xml>
98
- <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
99
- </w:LatentStyles>
100
- </xml><![endif]--><style>
101
- <!--
102
- /* Style Definitions */
103
- p.MsoNormal, li.MsoNormal, div.MsoNormal
104
- {mso-style-parent:"";
105
- margin:0in;
106
- margin-bottom:.0001pt;
107
- mso-pagination:widow-orphan;
108
- font-size:12.0pt;
109
- font-family:"Times New Roman";
110
- mso-fareast-font-family:"Times New Roman";}
111
- @page Section1
112
- {size:8.5in 11.0in;
113
- margin:1.0in 1.25in 1.0in 1.25in;
114
- mso-header-margin:.5in;
115
- mso-footer-margin:.5in;
116
- mso-paper-source:0;}
117
- div.Section1
118
- {page:Section1;}
119
- -->
120
- </style><!--[if gte mso 10]>
121
- <style>
122
- /* Style Definitions */
123
- table.MsoNormalTable
124
- {mso-style-name:"Table Normal";
125
- mso-tstyle-rowband-size:0;
126
- mso-tstyle-colband-size:0;
127
- mso-style-noshow:yes;
128
- mso-style-parent:"";
129
- mso-padding-alt:0in 5.4pt 0in 5.4pt;
130
- mso-para-margin:0in;
131
- mso-para-margin-bottom:.0001pt;
132
- mso-pagination:widow-orphan;
133
- font-size:10.0pt;
134
- font-family:"Times New Roman";
135
- mso-ansi-language:#0400;
136
- mso-fareast-language:#0400;
137
- mso-bidi-language:#0400;}
138
- </style>
139
- <![endif]-->
140
-
141
- <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
142
- EOHTML
143
-
144
- def test_fragment_whitewash_on_microsofty_markup
145
- whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
146
- assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s
147
- end
148
-
149
- def test_document_whitewash_on_microsofty_markup
150
- whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
151
- assert_contains whitewashed.to_s, %r(<p>Foo <b>BOLD</b></p>)
152
- assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
153
- end
154
-
155
- def test_return_empty_string_when_nothing_left
156
- assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
157
- end
158
-
159
- def test_removal_of_all_tags
160
- html = <<-HTML
161
- What's up <strong>doc</strong>?
162
- HTML
163
- stripped = Loofah.scrub_document(html, :prune).text
164
- assert_equal %Q(What\'s up doc?).strip, stripped.strip
165
- end
166
-
167
- def test_dont_remove_whitespace
168
- html = "Foo\nBar"
169
- assert_equal html, Loofah.scrub_document(html, :prune).text
170
- end
171
-
172
- def test_dont_remove_whitespace_between_tags
173
- html = "<p>Foo</p>\n<p>Bar</p>"
174
- assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
175
- end
176
- end
@@ -1,33 +0,0 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
2
-
3
- class TestHelpers < Test::Unit::TestCase
4
- context "#strip_tags" do
5
- context "on safe markup" do
6
- should "strip out tags" do
7
- assert_equal "omgwtfbbq!!1!", Loofah::Helpers.strip_tags("<div>omgwtfbbq</div><span>!!1!</span>")
8
- end
9
- end
10
-
11
- context "on hack attack" do
12
- should "strip escape html entities" do
13
- bad_shit = "&lt;script&gt;alert('evil')&lt;/script&gt;"
14
- assert_equal bad_shit, Loofah::Helpers.strip_tags(bad_shit)
15
- end
16
- end
17
- end
18
-
19
- context "#sanitize" do
20
- context "on safe markup" do
21
- should "render the safe html" do
22
- html = "<div>omgwtfbbq</div><span>!!1!</span>"
23
- assert_equal html, Loofah::Helpers.sanitize(html)
24
- end
25
- end
26
-
27
- context "on hack attack" do
28
- should "strip the unsafe tags" do
29
- assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><span>w00t</span>")
30
- end
31
- end
32
- end
33
- end