loofah 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -0,0 +1,28 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
2
+
3
+ class TestHelpers < Test::Unit::TestCase
4
+
5
+ HTML_STRING = "<div>omgwtfbbq</div>"
6
+
7
+ context "when calling strip_tags" do
8
+ should "invoke Loofah.fragment.text" do
9
+ mock_doc = mock
10
+ Loofah.expects(:fragment).with(HTML_STRING).returns(mock_doc)
11
+ mock_doc.expects(:text)
12
+
13
+ Loofah::Helpers.strip_tags HTML_STRING
14
+ end
15
+ end
16
+
17
+ context "when calling sanitize" do
18
+ should "invoke Loofah.scrub_fragment(:escape).to_s" do
19
+ mock_doc = mock
20
+ Loofah.expects(:fragment).with(HTML_STRING).returns(mock_doc)
21
+ mock_doc.expects(:scrub!).with(:strip).returns(mock_doc)
22
+ mock_doc.expects(:to_s)
23
+
24
+ Loofah::Helpers.sanitize HTML_STRING
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,171 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
2
+
3
+ class TestXssFoliate < Test::Unit::TestCase
4
+
5
+ HTML_STRING = "<div>omgwtfbbq</div>"
6
+ PLAIN_TEXT = "vanilla text"
7
+ INTEGER_VALUE = "1234"
8
+ WHITESPACEY = " <br> "
9
+
10
+ def new_post(overrides={})
11
+ Post.new({:html_string => HTML_STRING, :plain_text => PLAIN_TEXT, :not_a_string => INTEGER_VALUE}.merge(overrides))
12
+ end
13
+
14
+ context "with a Post model" do
15
+ setup do
16
+ ActsAsFu.build_model(:posts) do
17
+ string :plain_text
18
+ string :html_string
19
+ integer :not_a_string
20
+ end
21
+ end
22
+
23
+ context "#xss_foliated?" do
24
+ context "when xss_foliate has not been called" do
25
+ should "return false" do
26
+ assert ! Post.xss_foliated?
27
+ end
28
+ end
29
+
30
+ context "when xss_foliate has been called with no options" do
31
+ setup do
32
+ Post.xss_foliate
33
+ end
34
+
35
+ should "return true" do
36
+ assert Post.xss_foliated?
37
+ end
38
+ end
39
+
40
+ context "when xss_foliate has been called with options" do
41
+ setup do
42
+ Post.xss_foliate :prune => :plain_text
43
+ end
44
+
45
+ should "return true" do
46
+ assert Post.xss_foliated?
47
+ end
48
+ end
49
+ end
50
+
51
+ context "#xss_foliate" do
52
+ context "when passed invalid option" do
53
+ should "raise ArgumentError" do
54
+ assert_raise(ArgumentError) { Post.xss_foliate :quux => [:foo] }
55
+ end
56
+ end
57
+
58
+ context "when passed a symbol" do
59
+ should "do the right thing" do
60
+ assert_nothing_raised(ArgumentError) { Post.xss_foliate :prune => :plain_text }
61
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :strip).once
62
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).once
63
+ assert new_post.valid?
64
+ end
65
+ end
66
+
67
+ context "when passed an array of symbols" do
68
+ should "do the right thing" do
69
+ assert_nothing_raised(ArgumentError) {
70
+ Post.xss_foliate :prune => [:plain_text, :html_string]
71
+ }
72
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :prune).once
73
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).once
74
+ assert new_post.valid?
75
+ end
76
+ end
77
+
78
+ context "when passed a string" do
79
+ should "do the right thing" do
80
+ assert_nothing_raised(ArgumentError) { Post.xss_foliate :prune => 'plain_text' }
81
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :strip).once
82
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).once
83
+ assert new_post.valid?
84
+ end
85
+ end
86
+
87
+ context "when passed an array of strings" do
88
+ should "do the right thing" do
89
+ assert_nothing_raised(ArgumentError) {
90
+ Post.xss_foliate :prune => ['plain_text', 'html_string']
91
+ }
92
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :prune).once
93
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).once
94
+ assert new_post.valid?
95
+ end
96
+ end
97
+ end
98
+
99
+ context "declaring scrubbed fields" do
100
+ context "on all fields" do
101
+ setup do
102
+ Post.xss_foliate
103
+ end
104
+
105
+ should "scrub all fields" do
106
+ mock_doc = mock
107
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :strip).once.returns(mock_doc)
108
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :strip).once.returns(mock_doc)
109
+ Loofah.expects(:scrub_fragment).with(INTEGER_VALUE, :strip).never
110
+ mock_doc.expects(:text).twice
111
+ assert new_post.valid?
112
+ end
113
+ end
114
+
115
+ context "omitting one field" do
116
+ setup do
117
+ Post.xss_foliate :except => [:plain_text]
118
+ end
119
+
120
+ should "not scrub omitted field" do
121
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :strip).once
122
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :strip).never
123
+ Loofah.expects(:scrub_fragment).with(INTEGER_VALUE, :strip).never
124
+ assert new_post.valid?
125
+ end
126
+ end
127
+
128
+ [:strip, :escape, :prune].each do |method|
129
+ context "declaring one field to be scrubbed with #{method}" do
130
+ setup do
131
+ Post.xss_foliate method => [:plain_text]
132
+ end
133
+
134
+ should "not that field appropriately" do
135
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :strip).once
136
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, method).once
137
+ Loofah.expects(:scrub_fragment).with(INTEGER_VALUE, :strip).never
138
+ assert new_post.valid?
139
+ end
140
+ end
141
+ end
142
+
143
+ context "declaring one field to be scrubbed with html5lib_sanitize" do
144
+ setup do
145
+ Post.xss_foliate :html5lib_sanitize => [:plain_text]
146
+ end
147
+
148
+ should "not that field appropriately" do
149
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :strip).once
150
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :escape).once
151
+ Loofah.expects(:scrub_fragment).with(INTEGER_VALUE, :strip).never
152
+ assert new_post.valid?
153
+ end
154
+ end
155
+ end
156
+
157
+ context "invalid model data" do
158
+ setup do
159
+ Post.validates_presence_of :html_string
160
+ Post.xss_foliate
161
+ end
162
+
163
+ should "not be valid after sanitizing" do
164
+ Loofah.expects(:scrub_fragment).with(WHITESPACEY, :strip).once
165
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :strip).once
166
+ assert ! new_post(:html_string => WHITESPACEY).valid?
167
+ end
168
+ end
169
+
170
+ end
171
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loofah
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Dalessio
@@ -31,7 +31,7 @@ cert_chain:
31
31
  FlqnTjy13J3nD30uxy9a1g==
32
32
  -----END CERTIFICATE-----
33
33
 
34
- date: 2009-08-30 00:00:00 -04:00
34
+ date: 2009-10-06 00:00:00 -04:00
35
35
  default_executable:
36
36
  dependencies:
37
37
  - !ruby/object:Gem::Dependency
@@ -55,17 +55,15 @@ dependencies:
55
55
  version: 2.3.3
56
56
  version:
57
57
  description: |-
58
- Loofah is an HTML sanitizer. It will *always* fix broken markup, but
58
+ Loofah is an HTML sanitizer. It will always fix broken markup, but
59
59
  can also sanitize unsafe tags in a few different ways, and transform
60
60
  the markup for storage or display.
61
61
 
62
62
  It's built on top of Nokogiri and libxml2, so it's fast. And it uses
63
63
  html5lib's whitelist, so it most likely won't make your codes less
64
- secure.
64
+ secure. \*
65
65
 
66
- (These statements have not been evaluated by Internet Experts.)
67
-
68
- This library was formerly known as Dryopteris.
66
+ \* These statements have not been evaluated by Netexperts.
69
67
  email:
70
68
  - mike.dalessio@gmail.com
71
69
  - bryan@brynary.com
@@ -78,9 +76,11 @@ extra_rdoc_files:
78
76
  - Manifest.txt
79
77
  - TODO.rdoc
80
78
  - CHANGELOG.rdoc
79
+ - DEPRECATED.rdoc
81
80
  - README.rdoc
82
81
  files:
83
82
  - CHANGELOG.rdoc
83
+ - DEPRECATED.rdoc
84
84
  - MIT-LICENSE.txt
85
85
  - Manifest.txt
86
86
  - README.rdoc
@@ -93,24 +93,24 @@ files:
93
93
  - init.rb
94
94
  - lib/loofah.rb
95
95
  - lib/loofah/active_record.rb
96
- - lib/loofah/deprecated.rb
96
+ - lib/loofah/helpers.rb
97
97
  - lib/loofah/html/document.rb
98
98
  - lib/loofah/html/document_fragment.rb
99
99
  - lib/loofah/html5/scrub.rb
100
100
  - lib/loofah/html5/whitelist.rb
101
101
  - lib/loofah/scrubber.rb
102
+ - lib/loofah/xss_foliate.rb
102
103
  - test/helper.rb
103
- - test/html5/test_deprecated_sanitizer.rb
104
104
  - test/html5/test_sanitizer.rb
105
105
  - test/html5/testdata/tests1.dat
106
106
  - test/test_active_record.rb
107
+ - test/test_ad_hoc.rb
107
108
  - test/test_api.rb
108
- - test/test_deprecated_basic.rb
109
- - test/test_microsofty.rb
109
+ - test/test_helpers.rb
110
110
  - test/test_scrubber.rb
111
- - test/test_strip_tags.rb
111
+ - test/test_xss_foliate.rb
112
112
  has_rdoc: true
113
- homepage: http://loofah.rubyforge.org/
113
+ homepage: http://loofah.rubyforge.org
114
114
  licenses: []
115
115
 
116
116
  post_install_message:
@@ -139,11 +139,10 @@ signing_key:
139
139
  specification_version: 3
140
140
  summary: Loofah is an HTML sanitizer
141
141
  test_files:
142
- - test/test_deprecated_basic.rb
142
+ - test/test_xss_foliate.rb
143
+ - test/test_helpers.rb
143
144
  - test/test_scrubber.rb
144
- - test/test_strip_tags.rb
145
145
  - test/test_api.rb
146
+ - test/test_ad_hoc.rb
146
147
  - test/html5/test_sanitizer.rb
147
- - test/html5/test_deprecated_sanitizer.rb
148
148
  - test/test_active_record.rb
149
- - test/test_microsofty.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,38 +0,0 @@
1
- module Loofah
2
- class << self
3
- def strip_tags(string_or_io) # :nodoc:
4
- warn_once "WARNING: Loofah.strip_tags is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_document(string_or_io, :prune)"
5
- Loofah.scrub_document(string_or_io, :prune).text
6
- end
7
-
8
- def whitewash(string_or_io) # :nodoc:
9
- warn_once "WARNING: Loofah.whitewash is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_fragment(string_or_io, :whitewash)"
10
- Loofah.scrub_fragment(string_or_io, :whitewash).to_s
11
- end
12
-
13
- def whitewash_document(string_or_io) # :nodoc:
14
- warn_once "WARNING: Loofah.whitewash_document is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_document(string_or_io, :whitewash)"
15
- Loofah.scrub_document(string_or_io, :whitewash).to_s
16
- end
17
-
18
- def sanitize(string_or_io) # :nodoc:
19
- warn_once "WARNING: Loofah.sanitize is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_fragment(string_or_io, :escape)"
20
- Loofah.scrub_fragment(string_or_io, :escape).to_xml
21
- end
22
-
23
- def sanitize_document(string_or_io) # :nodoc:
24
- warn_once "WARNING: Loofah.sanitize_document is deprecated and will be removed in Loofah 0.3.0. Please switch to Loofah.scrub_document(string_or_io, :escape)"
25
- Loofah.scrub_document(string_or_io, :escape).to_xml
26
- end
27
-
28
- private
29
-
30
- def warn_once(message) # :nodoc:
31
- @aooga ||= {}
32
- unless @aooga.key?(message)
33
- warn message unless @aooga[message]
34
- @aooga[message] = true
35
- end
36
- end
37
- end
38
- end
@@ -1,185 +0,0 @@
1
- #
2
- # these tests taken from the HTML5 sanitization project and modified for use with Loofah
3
- # see the original here: http://code.google.com/p/html5lib/source/browse/ruby/test/test_sanitizer.rb
4
- #
5
- # license text at the bottom of this file
6
- #
7
- require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
8
-
9
- class HTML5TestDeprecatedSanitizer < Test::Unit::TestCase
10
- include Loofah
11
-
12
- def sanitize_html stream
13
- Loofah.sanitize(stream)
14
- end
15
-
16
- def sanitize_doc stream
17
- Loofah.sanitize_document(stream)
18
- end
19
-
20
- def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
21
- # libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
22
- assert_equal htmloutput, sanitize_html(input).gsub(/"/,"'"), input
23
-
24
- doc = sanitize_doc(input).gsub(/"/,"'")
25
- assert doc.include?(htmloutput), "#{input}:\n#{doc}\nshould include:\n#{htmloutput}"
26
- end
27
-
28
- HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
29
- define_method "test_should_allow_#{tag_name}_tag" do
30
- input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
31
- htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
32
- xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
33
- rexmloutput = xhtmloutput
34
-
35
- ##
36
- ## these special cases are HTML5-tokenizer-dependent.
37
- ## libxml2 cleans up HTML differently, and I trust that.
38
- ##
39
- # if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
40
- # htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
41
- # xhtmloutput = htmloutput
42
- # elsif tag_name == 'col'
43
- # htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
44
- # xhtmloutput = htmloutput
45
- # rexmloutput = "<col title='1' />"
46
- # elsif tag_name == 'table'
47
- # htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt;baz<table title='1'> </table>"
48
- # xhtmloutput = htmloutput
49
- # elsif tag_name == 'image'
50
- # htmloutput = "<image title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
51
- # xhtmloutput = htmloutput
52
- # rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
53
- if HTML5::WhiteList::VOID_ELEMENTS.include?(tag_name)
54
- if Nokogiri::LIBXML_VERSION <= "2.6.16"
55
- htmloutput = "<#{tag_name} title='1'/><p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
56
- else
57
- htmloutput = "<#{tag_name} title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
58
- end
59
- xhtmloutput = htmloutput
60
- # htmloutput += '<br/>' if tag_name == 'br'
61
- rexmloutput = "<#{tag_name} title='1' />"
62
- end
63
- check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
64
- end
65
- end
66
-
67
- ##
68
- ## libxml2 downcases tag names as it parses, so this is unnecessary.
69
- ##
70
- # HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
71
- # define_method "test_should_forbid_#{tag_name.upcase}_tag" do
72
- # input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
73
- # output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
74
- # check_sanitization(input, output, output, output)
75
- # end
76
- # end
77
-
78
- HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
79
- next if attribute_name == 'style'
80
- next if attribute_name =~ /:/ && Nokogiri::LIBXML_VERSION <= '2.6.16'
81
- define_method "test_should_allow_#{attribute_name}_attribute" do
82
- input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
83
- output = "<p #{attribute_name}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
84
- htmloutput = "<p #{attribute_name.downcase}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
85
- check_sanitization(input, htmloutput, output, output)
86
- end
87
- end
88
-
89
- ##
90
- ## libxml2 downcases attributes as it parses, so this is unnecessary.
91
- ##
92
- # HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
93
- # define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
94
- # input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
95
- # output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
96
- # check_sanitization(input, output, output, output)
97
- # end
98
- # end
99
-
100
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
101
- define_method "test_should_allow_#{protocol}_uris" do
102
- input = %(<a href="#{protocol}">foo</a>)
103
- output = "<a href='#{protocol}'>foo</a>"
104
- check_sanitization(input, output, output, output)
105
- end
106
- end
107
-
108
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
109
- define_method "test_should_allow_uppercase_#{protocol}_uris" do
110
- input = %(<a href="#{protocol.upcase}">foo</a>)
111
- output = "<a href='#{protocol.upcase}'>foo</a>"
112
- check_sanitization(input, output, output, output)
113
- end
114
- end
115
-
116
- if false # TODO. should we even care about this? libxml2 punt.
117
- def test_should_handle_astral_plane_characters
118
- input = "<p>&#x1d4b5; &#x1d538;</p>"
119
- output = "<p>\360\235\222\265 \360\235\224\270</p>"
120
- check_sanitization(input, output, output, output)
121
-
122
- input = "<p><tspan>\360\235\224\270</tspan> a</p>"
123
- output = "<p><tspan>\360\235\224\270</tspan> a</p>"
124
- check_sanitization(input, output, output, output)
125
- end
126
- end
127
-
128
- # This affects only NS4. Is it worth fixing?
129
- # def test_javascript_includes
130
- # input = %(<div size="&{alert('XSS')}">foo</div>)
131
- # output = "<div>foo</div>"
132
- # check_sanitization(input, output, output, output)
133
- # end
134
-
135
- #html5_test_files('sanitizer').each do |filename|
136
- # JSON::parse(open(filename).read).each do |test|
137
- # define_method "test_#{test['name']}" do
138
- # check_sanitization(
139
- # test['input'],
140
- # test['output'],
141
- # test['xhtml'] || test['output'],
142
- # test['rexml'] || test['output']
143
- # )
144
- # end
145
- # end
146
- #end
147
- end
148
-
149
- # <html5_license>
150
- #
151
- # Copyright (c) 2006-2008 The Authors
152
- #
153
- # Contributors:
154
- # James Graham - jg307@cam.ac.uk
155
- # Anne van Kesteren - annevankesteren@gmail.com
156
- # Lachlan Hunt - lachlan.hunt@lachy.id.au
157
- # Matt McDonald - kanashii@kanashii.ca
158
- # Sam Ruby - rubys@intertwingly.net
159
- # Ian Hickson (Google) - ian@hixie.ch
160
- # Thomas Broyer - t.broyer@ltgt.net
161
- # Jacques Distler - distler@golem.ph.utexas.edu
162
- # Henri Sivonen - hsivonen@iki.fi
163
- # The Mozilla Foundation (contributions from Henri Sivonen since 2008)
164
- #
165
- # Permission is hereby granted, free of charge, to any person
166
- # obtaining a copy of this software and associated documentation files
167
- # (the "Software"), to deal in the Software without restriction,
168
- # including without limitation the rights to use, copy, modify, merge,
169
- # publish, distribute, sublicense, and/or sell copies of the Software,
170
- # and to permit persons to whom the Software is furnished to do so,
171
- # subject to the following conditions:
172
- #
173
- # The above copyright notice and this permission notice shall be
174
- # included in all copies or substantial portions of the Software.
175
- #
176
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
177
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
178
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
179
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
180
- # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
181
- # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
182
- # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
183
- # SOFTWARE.
184
- #
185
- # </html5_license>