loofah 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -1,6 +1,5 @@
1
1
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
2
 
3
- require 'rubygems'
4
3
  require 'nokogiri'
5
4
 
6
5
  require 'loofah/html5/whitelist'
@@ -11,8 +10,7 @@ require 'loofah/scrubber'
11
10
  require 'loofah/html/document'
12
11
  require 'loofah/html/document_fragment'
13
12
 
14
- require 'loofah/deprecated'
15
-
13
+ require 'loofah/helpers'
16
14
 
17
15
  #
18
16
  # Loofah is an HTML sanitizer wrapped around Nokogiri[http://nokogiri.org], an excellent
@@ -161,7 +159,7 @@ require 'loofah/deprecated'
161
159
  #
162
160
  module Loofah
163
161
  # The version of Loofah you are using
164
- VERSION = '0.2.2'
162
+ VERSION = '0.3.0'
165
163
 
166
164
  # The minimum required version of Nokogiri
167
165
  REQUIRED_NOKOGIRI_VERSION = '1.3.3'
@@ -196,8 +194,12 @@ if Nokogiri::VERSION < Loofah::REQUIRED_NOKOGIRI_VERSION
196
194
  raise RuntimeError, "Loofah requires Nokogiri #{Loofah::REQUIRED_NOKOGIRI_VERSION} or later (currently #{Nokogiri::VERSION})"
197
195
  end
198
196
 
199
- if defined? Rails.configuration
197
+ if defined? Rails.configuration # rails 2.1 and later
200
198
  Rails.configuration.after_initialize do
201
199
  require 'loofah/active_record'
200
+ require 'loofah/xss_foliate'
202
201
  end
202
+ elsif defined? ActiveRecord::Base # rails 2.0
203
+ require 'loofah/active_record'
204
+ require 'loofah/xss_foliate'
203
205
  end
@@ -20,8 +20,20 @@ module Loofah
20
20
  #
21
21
  module ActiveRecordExtension
22
22
  #
23
- # scrub an ActiveRecord attribute +attr+ as an HTML fragment
24
- # using the method specified in the required +:scrub+ option.
23
+ # :call-seq:
24
+ # html_fragment(attribute, :scrub => sanitization_method)
25
+ #
26
+ # Scrub an ActiveRecord attribute +attribute+ as an HTML *fragment*
27
+ # using the method specified by +sanitization_method+.
28
+ #
29
+ # +sanitization_method+ must be one of:
30
+ #
31
+ # * :string
32
+ # * :prune
33
+ # * :escape
34
+ # * :whitewash
35
+ #
36
+ # See Loofah for an explanation of each sanitization method.
25
37
  #
26
38
  def html_fragment(attr, options={})
27
39
  raise ArgumentError, "html_fragment requires :scrub option" unless method = options[:scrub]
@@ -31,8 +43,20 @@ module Loofah
31
43
  end
32
44
 
33
45
  #
34
- # scrub an ActiveRecord attribute +attr+ as an HTML document
35
- # using the method specified in the required +:scrub+ option.
46
+ # :call-seq:
47
+ # model.html_document(attribute, :scrub => sanitization_method)
48
+ #
49
+ # Scrub an ActiveRecord attribute +attribute+ as an HTML *document*
50
+ # using the method specified by +sanitization_method+.
51
+ #
52
+ # +sanitization_method+ must be one of:
53
+ #
54
+ # * :string
55
+ # * :prune
56
+ # * :escape
57
+ # * :whitewash
58
+ #
59
+ # See Loofah for an explanation of each sanitization method.
36
60
  #
37
61
  def html_document(attr, options={})
38
62
  raise ArgumentError, "html_document requires :scrub option" unless method = options[:scrub]
@@ -0,0 +1,23 @@
1
+ module Loofah
2
+ module Helpers
3
+ class << self
4
+ #
5
+ # A replacement for Rails's built-in +strip_tags+ helper.
6
+ #
7
+ # Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
8
+ #
9
+ def strip_tags(string_or_io)
10
+ Loofah.fragment(string_or_io).text
11
+ end
12
+
13
+ #
14
+ # A replacement for Rails's built-in +sanitize+ helper.
15
+ #
16
+ # Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>") # => "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
17
+ #
18
+ def sanitize(string_or_io)
19
+ Loofah.scrub_fragment(string_or_io, :strip).to_s
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,210 @@
1
+ module Loofah
2
+ #
3
+ # A replacement for
4
+ # XssTerminate[http://github.com/look/xss_terminate/tree/master],
5
+ # XssFoliate will strip all tags from your ActiveRecord models'
6
+ # string and text attributes.
7
+ #
8
+ # See Loofah::XssFoliate::ClassMethods for more information.
9
+ #
10
+ module XssFoliate
11
+ #
12
+ # A replacement for
13
+ # XssTerminate[http://github.com/look/xss_terminate/tree/master],
14
+ # XssFoliate will strip all tags from your ActiveRecord models'
15
+ # string and text attributes.
16
+ #
17
+ # Please read the Loofah documentation for an explanation of the
18
+ # different scrubbing methods.
19
+ #
20
+ # If you'd like to scrub all fields in all your models (and perhaps *opt-out* in specific models):
21
+ #
22
+ # # config/environment
23
+ # LOOFAH_XSS_FOLIATE_ALL_MODELS = true
24
+ # Rails::Initializer.run do |config|
25
+ # config.gem "loofah"
26
+ # end
27
+ #
28
+ # # db/schema.rb
29
+ # create_table "posts" do |t|
30
+ # t.string "title"
31
+ # t.text "body"
32
+ # t.string "author"
33
+ # end
34
+ #
35
+ # # app/model/post.rb
36
+ # class Post < ActiveRecord::Base
37
+ # # by default, title, body and author will all be scrubbed down to their inner text
38
+ # end
39
+ #
40
+ # OR
41
+ #
42
+ # # app/model/post.rb
43
+ # class Post < ActiveRecord::Base
44
+ # xss_foliate :except => :author # opt-out of sanitizing author
45
+ # end
46
+ #
47
+ # OR
48
+ #
49
+ # xss_foliate :strip => [:title, body] # strip unsafe tags from both title and body
50
+ #
51
+ # OR
52
+ #
53
+ # xss_foliate :except => :title # scrub body and author but not title
54
+ #
55
+ # OR
56
+ #
57
+ # # remove all tags from title, remove unsafe tags from body
58
+ # xss_foliate :sanitize => :title, :scrub => :body
59
+ #
60
+ # OR
61
+ #
62
+ # # old xss_terminate code will work if you s/_terminate/_foliate/
63
+ # # was: xss_terminate :except => [:title], :sanitize => [:body]
64
+ # xss_foliate :except => [:title], :sanitize => [:body]
65
+ #
66
+ # Alternatively, if you would like to *opt-in* to the models and attributes that are sanitized:
67
+ #
68
+ # # config/environment.rb
69
+ # LOOFAH_XSS_FOLIATE_ALL_MODELS = false # default, this line could be omitted
70
+ # Rails::Initializer.run do |config|
71
+ # config.gem "loofah"
72
+ # end
73
+ #
74
+ # # db/schema.rb
75
+ # create_table "posts" do |t|
76
+ # t.string "title"
77
+ # t.text "body"
78
+ # t.string "author"
79
+ # end
80
+ #
81
+ # # app/model/post.rb
82
+ # class Post < ActiveRecord::Base
83
+ # xss_foliate # scrub title, body and author down to their inner text
84
+ # end
85
+ #
86
+ module ClassMethods
87
+ # :stopdoc:
88
+ VALID_OPTIONS = [:except, :strip, :escape, :prune, :text, :html5lib_sanitize, :sanitize]
89
+ ALIASED_OPTIONS = {:html5lib_sanitize => :escape, :sanitize => :strip}
90
+ REAL_OPTIONS = VALID_OPTIONS - ALIASED_OPTIONS.keys
91
+ # :startdoc:
92
+
93
+ #
94
+ # Annotate your model with this method to specify which fields
95
+ # you want scrubbed, and how you want them scrubbed. XssFoliate
96
+ # assumes all character fields are HTML fragments (as opposed to
97
+ # full documents, see the Loofah[http://loofah.rubyforge.org/]
98
+ # documentation for a full explanation of the difference).
99
+ #
100
+ # Example call:
101
+ #
102
+ # xss_foliate :except => :author, :strip => :body, :prune => [:title, :description]
103
+ #
104
+ # *Note* that the values in the options hash can be either an
105
+ # array of attributes or a single attribute.
106
+ #
107
+ # Options:
108
+ #
109
+ # :except => [fields] # don't scrub these fields
110
+ # :strip => [fields] # strip unsafe tags from these fields
111
+ # :escape => [fields] # escape unsafe tags from these fields
112
+ # :prune => [fields] # prune unsafe tags and subtrees from these fields
113
+ # :text => [fields] # remove everything except the inner text from these fields
114
+ #
115
+ # XssTerminate compatibility options (note that the default
116
+ # behavior in XssTerminate corresponds to :text)
117
+ #
118
+ # :html5lib_sanitize => [fields] # same as :escape
119
+ # :sanitize => [fields] # same as :strip
120
+ #
121
+ # The default is :text for all fields unless otherwise specified.
122
+ #
123
+ def xss_foliate(options = {})
124
+ callback_already_declared = \
125
+ if respond_to?(:before_validation_callback_chain)
126
+ # Rails 2.1 and later
127
+ before_validation_callback_chain.any? {|cb| cb.method == :xss_foliate_fields}
128
+ else
129
+ # Rails 2.0
130
+ cbs = read_inheritable_attribute(:before_validation)
131
+ (! cbs.nil?) && cbs.any? {|cb| cb == :xss_foliate_fields}
132
+ end
133
+
134
+ unless callback_already_declared
135
+ before_validation :xss_foliate_fields
136
+ class_inheritable_reader :xss_foliate_options
137
+ include XssFoliate::InstanceMethods
138
+ end
139
+
140
+ options.keys.each do |option|
141
+ raise ArgumentError, "unknown xss_foliate option #{option}" unless VALID_OPTIONS.include?(option)
142
+ end
143
+
144
+ REAL_OPTIONS.each do |option|
145
+ options[option] = Array(options[option]).collect { |val| val.to_sym }
146
+ end
147
+
148
+ ALIASED_OPTIONS.each do |option, real|
149
+ options[real] += Array(options.delete(option)).collect { |val| val.to_sym } if options[option]
150
+ end
151
+
152
+ write_inheritable_attribute(:xss_foliate_options, options)
153
+ end
154
+
155
+ #
156
+ # Class method to determine whether or not this model is applying
157
+ # xss_foliation to its attributes. Could be useful in test suites.
158
+ #
159
+ def xss_foliated?
160
+ options = read_inheritable_attribute(:xss_foliate_options)
161
+ ! (options.nil? || options.empty?)
162
+ end
163
+ end
164
+
165
+ module InstanceMethods
166
+
167
+ def xss_foliate_fields # :nodoc:
168
+ # fix a bug with Rails internal AR::Base models that get loaded before
169
+ # the plugin, like CGI::Sessions::ActiveRecordStore::Session
170
+ return if xss_foliate_options.nil?
171
+
172
+ self.class.columns.each do |column|
173
+ next unless (column.type == :string || column.type == :text)
174
+
175
+ field = column.name.to_sym
176
+ value = self[field]
177
+
178
+ next if value.nil? || !value.is_a?(String)
179
+
180
+ if xss_foliate_options[:except].include?(field)
181
+ next
182
+
183
+ elsif xss_foliate_options[:strip].include?(field)
184
+ fragment = Loofah.scrub_fragment(value, :strip)
185
+ self[field] = fragment.nil? ? "" : fragment.to_s
186
+
187
+ elsif xss_foliate_options[:prune].include?(field)
188
+ fragment = Loofah.scrub_fragment(value, :prune)
189
+ self[field] = fragment.nil? ? "" : fragment.to_s
190
+
191
+ elsif xss_foliate_options[:escape].include?(field)
192
+ fragment = Loofah.scrub_fragment(value, :escape)
193
+ self[field] = fragment.nil? ? "" : fragment.to_s
194
+
195
+ else # :text
196
+ fragment = Loofah.scrub_fragment(value, :strip)
197
+ self[field] = fragment.nil? ? "" : fragment.text
198
+ end
199
+ end
200
+
201
+ end
202
+ end
203
+ end
204
+ end
205
+
206
+ ActiveRecord::Base.extend(Loofah::XssFoliate::ClassMethods)
207
+
208
+ if defined?(LOOFAH_XSS_FOLIATE_ALL_MODELS) && LOOFAH_XSS_FOLIATE_ALL_MODELS
209
+ ActiveRecord::Base.xss_foliate
210
+ end
@@ -16,40 +16,82 @@ class TestActiveRecord < Test::Unit::TestCase
16
16
  end
17
17
  end
18
18
 
19
- context "scrubbing field as a fragment" do
20
- setup do
21
- Post.html_fragment :html_string, :scrub => :prune
22
- @post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
19
+ context "scrubbing a single field as a fragment" do
20
+ context "using a symbol to indicate the attribute" do
21
+ setup do
22
+ Post.html_fragment :html_string, :scrub => :prune
23
+ assert ! Post.xss_foliated?
24
+ @post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
25
+ end
26
+
27
+ should "scrub the specified field" do
28
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :prune).once
29
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).never
30
+ @post.valid?
31
+ end
32
+
33
+ should "only call scrub_fragment once" do
34
+ Loofah.expects(:scrub_fragment).once
35
+ @post.valid?
36
+ end
37
+
38
+ should "generate strings" do
39
+ @post.valid?
40
+ assert_equal String, @post.html_string.class
41
+ assert_equal HTML_STRING, @post.html_string
42
+ end
23
43
  end
24
44
 
25
- should "scrub the specified field" do
26
- Loofah.expects(:scrub_fragment).with(HTML_STRING, :prune).once
27
- Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).never
28
- @post.valid?
29
- end
30
-
31
- should "generate strings" do
32
- @post.valid?
33
- assert_equal String, @post.html_string.class
34
- assert_equal HTML_STRING, @post.html_string
45
+ context "using a string to indicate the attribute" do
46
+ setup do
47
+ Post.html_fragment 'html_string', :scrub => :prune
48
+ assert ! Post.xss_foliated?
49
+ @post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
50
+ end
51
+
52
+ should "scrub the specified field" do
53
+ Loofah.expects(:scrub_fragment).with(HTML_STRING, :prune).once
54
+ Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).never
55
+ @post.valid?
56
+ end
35
57
  end
36
58
  end
37
59
 
38
- context "scrubbing field as a document" do
39
- setup do
40
- Post.html_document :html_string, :scrub => :strip
41
- @post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
42
- end
43
-
44
- should "scrub the specified field, but not other fields" do
45
- Loofah.expects(:scrub_document).with(HTML_STRING, :strip).once
46
- Loofah.expects(:scrub_document).with(PLAIN_TEXT, :strip).never
47
- @post.valid?
60
+ context "scrubbing a single field as a document" do
61
+ context "using a symbol to indicate the attribute" do
62
+ setup do
63
+ Post.html_document :html_string, :scrub => :strip
64
+ @post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
65
+ end
66
+
67
+ should "scrub the specified field, but not other fields" do
68
+ Loofah.expects(:scrub_document).with(HTML_STRING, :strip).once
69
+ Loofah.expects(:scrub_document).with(PLAIN_TEXT, :strip).never
70
+ @post.valid?
71
+ end
72
+
73
+ should "only call scrub_document once" do
74
+ Loofah.expects(:scrub_document).once
75
+ @post.valid?
76
+ end
77
+
78
+ should "generate strings" do
79
+ @post.valid?
80
+ assert_equal String, @post.html_string.class
81
+ end
48
82
  end
49
83
 
50
- should "generate strings" do
51
- @post.valid?
52
- assert_equal String, @post.html_string.class
84
+ context "using a string to indicate the attribute" do
85
+ setup do
86
+ Post.html_document 'html_string', :scrub => :strip
87
+ @post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
88
+ end
89
+
90
+ should "scrub the specified field, but not other fields" do
91
+ Loofah.expects(:scrub_document).with(HTML_STRING, :strip).once
92
+ Loofah.expects(:scrub_document).with(PLAIN_TEXT, :strip).never
93
+ @post.valid?
94
+ end
53
95
  end
54
96
  end
55
97
 
@@ -0,0 +1,185 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
2
+
3
+ class TestAdHoc < Test::Unit::TestCase
4
+
5
+ def test_empty_string_with_escape
6
+ assert_equal "", Loofah.scrub_fragment("", :escape).to_xml
7
+ end
8
+
9
+ def test_empty_string_with_prune
10
+ assert_equal Loofah.scrub_document("", :prune).text, ""
11
+ end
12
+
13
+ def test_removal_of_illegal_tag
14
+ html = <<-HTML
15
+ following this there should be no jim tag
16
+ <jim>jim</jim>
17
+ was there?
18
+ HTML
19
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
20
+ assert sane.xpath("//jim").empty?
21
+ end
22
+
23
+ def test_removal_of_illegal_attribute
24
+ html = "<p class=bar foo=bar abbr=bar />"
25
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
26
+ node = sane.xpath("//p").first
27
+ assert node.attributes['class']
28
+ assert node.attributes['abbr']
29
+ assert_nil node.attributes['foo']
30
+ end
31
+
32
+ def test_removal_of_illegal_url_in_href
33
+ html = <<-HTML
34
+ <a href='jimbo://jim.jim/'>this link should have its href removed because of illegal url</a>
35
+ <a href='http://jim.jim/'>this link should be fine</a>
36
+ HTML
37
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
38
+ nodes = sane.xpath("//a")
39
+ assert_nil nodes.first.attributes['href']
40
+ assert nodes.last.attributes['href']
41
+ end
42
+
43
+ def test_css_sanitization
44
+ html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
45
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
46
+ assert_match(/#000/, sane.inner_html)
47
+ assert_no_match(/foo\.com/, sane.inner_html)
48
+ end
49
+
50
+ def test_fragment_with_no_tags
51
+ assert_equal "This fragment has no tags.", Loofah.scrub_fragment("This fragment has no tags.", :escape).to_xml
52
+ end
53
+
54
+ def test_fragment_in_p_tag
55
+ assert_equal "<p>This fragment is in a p.</p>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>", :escape).to_xml
56
+ end
57
+
58
+ def test_fragment_in_p_tag_plus_stuff
59
+ assert_equal "<p>This fragment is in a p.</p>foo<strong>bar</strong>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>foo<strong>bar</strong>", :escape).to_xml
60
+ end
61
+
62
+ def test_fragment_with_text_nodes_leading_and_trailing
63
+ assert_equal "text<p>fragment</p>text", Loofah.scrub_fragment("text<p>fragment</p>text", :escape).to_xml
64
+ end
65
+
66
+ def test_whitewash_on_fragment
67
+ html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
68
+ whitewashed = Loofah.scrub_document(html, :whitewash).to_s
69
+ assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
70
+ end
71
+
72
+ MSWORD_HTML = <<-EOHTML
73
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
74
+ <w:WordDocument>
75
+ <w:View>Normal</w:View>
76
+ <w:Zoom>0</w:Zoom>
77
+ <w:PunctuationKerning/>
78
+ <w:ValidateAgainstSchemas/>
79
+ <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
80
+ <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
81
+ <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
82
+ <w:Compatibility>
83
+ <w:BreakWrappedTables/>
84
+ <w:SnapToGridInCell/>
85
+ <w:WrapTextWithPunct/>
86
+ <w:UseAsianBreakRules/>
87
+ <w:DontGrowAutofit/>
88
+ </w:Compatibility>
89
+ <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
90
+ </w:WordDocument>
91
+ </xml><![endif]--><!--[if gte mso 9]><xml>
92
+ <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
93
+ </w:LatentStyles>
94
+ </xml><![endif]--><style>
95
+ <!--
96
+ /* Style Definitions */
97
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
98
+ {mso-style-parent:"";
99
+ margin:0in;
100
+ margin-bottom:.0001pt;
101
+ mso-pagination:widow-orphan;
102
+ font-size:12.0pt;
103
+ font-family:"Times New Roman";
104
+ mso-fareast-font-family:"Times New Roman";}
105
+ @page Section1
106
+ {size:8.5in 11.0in;
107
+ margin:1.0in 1.25in 1.0in 1.25in;
108
+ mso-header-margin:.5in;
109
+ mso-footer-margin:.5in;
110
+ mso-paper-source:0;}
111
+ div.Section1
112
+ {page:Section1;}
113
+ -->
114
+ </style><!--[if gte mso 10]>
115
+ <style>
116
+ /* Style Definitions */
117
+ table.MsoNormalTable
118
+ {mso-style-name:"Table Normal";
119
+ mso-tstyle-rowband-size:0;
120
+ mso-tstyle-colband-size:0;
121
+ mso-style-noshow:yes;
122
+ mso-style-parent:"";
123
+ mso-padding-alt:0in 5.4pt 0in 5.4pt;
124
+ mso-para-margin:0in;
125
+ mso-para-margin-bottom:.0001pt;
126
+ mso-pagination:widow-orphan;
127
+ font-size:10.0pt;
128
+ font-family:"Times New Roman";
129
+ mso-ansi-language:#0400;
130
+ mso-fareast-language:#0400;
131
+ mso-bidi-language:#0400;}
132
+ </style>
133
+ <![endif]-->
134
+
135
+ <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
136
+ EOHTML
137
+
138
+ def test_deprecated_whitewash_fragment_on_microsofty_markup
139
+ whitewashed = Loofah.scrub_fragment(MSWORD_HTML.chomp, :whitewash).to_s
140
+ assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed
141
+ end
142
+
143
+ def test_deprecated_whitewash_on_microsofty_markup
144
+ whitewashed = Loofah.scrub_document(MSWORD_HTML, :whitewash).to_s
145
+ assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed
146
+ end
147
+
148
+ def test_fragment_whitewash_on_microsofty_markup
149
+ whitewashed = Loofah.fragment(MSWORD_HTML.chomp).scrub!(:whitewash)
150
+ assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s
151
+ end
152
+
153
+ def test_document_whitewash_on_microsofty_markup
154
+ whitewashed = Loofah.document(MSWORD_HTML.chomp).scrub!(:whitewash)
155
+ assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s
156
+ end
157
+
158
+ def test_return_empty_string_when_nothing_left
159
+ assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
160
+ end
161
+
162
+ def test_removal_of_all_tags
163
+ html = <<-HTML
164
+ What's up <strong>doc</strong>?
165
+ HTML
166
+ stripped = Loofah.scrub_document(html, :prune).text
167
+ assert_equal "What's up doc?".strip, stripped.strip
168
+ end
169
+
170
+ def test_dont_remove_whitespace
171
+ html = "Foo\nBar"
172
+ assert_equal html, Loofah.scrub_document(html, :prune).text
173
+ end
174
+
175
+ def test_dont_remove_whitespace_between_tags
176
+ html = "<p>Foo</p>\n<p>Bar</p>"
177
+ assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
178
+ end
179
+
180
+ def test_removal_of_entities
181
+ html = "<p>this is &lt; that &quot;&amp;&quot; the other &gt; boo&apos;ya</p>"
182
+ assert_equal 'this is < that "&" the other > boo\'ya', Loofah.scrub_document(html, :prune).text
183
+ end
184
+
185
+ end