loofah 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.rdoc +14 -0
- data/DEPRECATED.rdoc +12 -0
- data/Manifest.txt +6 -5
- data/README.rdoc +86 -20
- data/Rakefile +38 -0
- data/benchmark/benchmark.rb +117 -37
- data/benchmark/helper.rb +36 -0
- data/lib/loofah.rb +7 -5
- data/lib/loofah/active_record.rb +28 -4
- data/lib/loofah/helpers.rb +23 -0
- data/lib/loofah/xss_foliate.rb +210 -0
- data/test/test_active_record.rb +69 -27
- data/test/test_ad_hoc.rb +185 -0
- data/test/test_helpers.rb +28 -0
- data/test/test_xss_foliate.rb +171 -0
- metadata +16 -17
- metadata.gz.sig +0 -0
- data/lib/loofah/deprecated.rb +0 -38
- data/test/html5/test_deprecated_sanitizer.rb +0 -185
- data/test/test_deprecated_basic.rb +0 -68
- data/test/test_microsofty.rb +0 -91
- data/test/test_strip_tags.rb +0 -36
data/lib/loofah.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
2
2
|
|
3
|
-
require 'rubygems'
|
4
3
|
require 'nokogiri'
|
5
4
|
|
6
5
|
require 'loofah/html5/whitelist'
|
@@ -11,8 +10,7 @@ require 'loofah/scrubber'
|
|
11
10
|
require 'loofah/html/document'
|
12
11
|
require 'loofah/html/document_fragment'
|
13
12
|
|
14
|
-
require 'loofah/
|
15
|
-
|
13
|
+
require 'loofah/helpers'
|
16
14
|
|
17
15
|
#
|
18
16
|
# Loofah is an HTML sanitizer wrapped around Nokogiri[http://nokogiri.org], an excellent
|
@@ -161,7 +159,7 @@ require 'loofah/deprecated'
|
|
161
159
|
#
|
162
160
|
module Loofah
|
163
161
|
# The version of Loofah you are using
|
164
|
-
VERSION = '0.
|
162
|
+
VERSION = '0.3.0'
|
165
163
|
|
166
164
|
# The minimum required version of Nokogiri
|
167
165
|
REQUIRED_NOKOGIRI_VERSION = '1.3.3'
|
@@ -196,8 +194,12 @@ if Nokogiri::VERSION < Loofah::REQUIRED_NOKOGIRI_VERSION
|
|
196
194
|
raise RuntimeError, "Loofah requires Nokogiri #{Loofah::REQUIRED_NOKOGIRI_VERSION} or later (currently #{Nokogiri::VERSION})"
|
197
195
|
end
|
198
196
|
|
199
|
-
if defined? Rails.configuration
|
197
|
+
if defined? Rails.configuration # rails 2.1 and later
|
200
198
|
Rails.configuration.after_initialize do
|
201
199
|
require 'loofah/active_record'
|
200
|
+
require 'loofah/xss_foliate'
|
202
201
|
end
|
202
|
+
elsif defined? ActiveRecord::Base # rails 2.0
|
203
|
+
require 'loofah/active_record'
|
204
|
+
require 'loofah/xss_foliate'
|
203
205
|
end
|
data/lib/loofah/active_record.rb
CHANGED
@@ -20,8 +20,20 @@ module Loofah
|
|
20
20
|
#
|
21
21
|
module ActiveRecordExtension
|
22
22
|
#
|
23
|
-
#
|
24
|
-
#
|
23
|
+
# :call-seq:
|
24
|
+
# html_fragment(attribute, :scrub => sanitization_method)
|
25
|
+
#
|
26
|
+
# Scrub an ActiveRecord attribute +attribute+ as an HTML *fragment*
|
27
|
+
# using the method specified by +sanitization_method+.
|
28
|
+
#
|
29
|
+
# +sanitization_method+ must be one of:
|
30
|
+
#
|
31
|
+
# * :string
|
32
|
+
# * :prune
|
33
|
+
# * :escape
|
34
|
+
# * :whitewash
|
35
|
+
#
|
36
|
+
# See Loofah for an explanation of each sanitization method.
|
25
37
|
#
|
26
38
|
def html_fragment(attr, options={})
|
27
39
|
raise ArgumentError, "html_fragment requires :scrub option" unless method = options[:scrub]
|
@@ -31,8 +43,20 @@ module Loofah
|
|
31
43
|
end
|
32
44
|
|
33
45
|
#
|
34
|
-
#
|
35
|
-
#
|
46
|
+
# :call-seq:
|
47
|
+
# model.html_document(attribute, :scrub => sanitization_method)
|
48
|
+
#
|
49
|
+
# Scrub an ActiveRecord attribute +attribute+ as an HTML *document*
|
50
|
+
# using the method specified by +sanitization_method+.
|
51
|
+
#
|
52
|
+
# +sanitization_method+ must be one of:
|
53
|
+
#
|
54
|
+
# * :string
|
55
|
+
# * :prune
|
56
|
+
# * :escape
|
57
|
+
# * :whitewash
|
58
|
+
#
|
59
|
+
# See Loofah for an explanation of each sanitization method.
|
36
60
|
#
|
37
61
|
def html_document(attr, options={})
|
38
62
|
raise ArgumentError, "html_document requires :scrub option" unless method = options[:scrub]
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Loofah
|
2
|
+
module Helpers
|
3
|
+
class << self
|
4
|
+
#
|
5
|
+
# A replacement for Rails's built-in +strip_tags+ helper.
|
6
|
+
#
|
7
|
+
# Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
|
8
|
+
#
|
9
|
+
def strip_tags(string_or_io)
|
10
|
+
Loofah.fragment(string_or_io).text
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
# A replacement for Rails's built-in +sanitize+ helper.
|
15
|
+
#
|
16
|
+
# Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>") # => "<script src=\"http://ha.ckers.org/xss.js\"></script>"
|
17
|
+
#
|
18
|
+
def sanitize(string_or_io)
|
19
|
+
Loofah.scrub_fragment(string_or_io, :strip).to_s
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,210 @@
|
|
1
|
+
module Loofah
|
2
|
+
#
|
3
|
+
# A replacement for
|
4
|
+
# XssTerminate[http://github.com/look/xss_terminate/tree/master],
|
5
|
+
# XssFoliate will strip all tags from your ActiveRecord models'
|
6
|
+
# string and text attributes.
|
7
|
+
#
|
8
|
+
# See Loofah::XssFoliate::ClassMethods for more information.
|
9
|
+
#
|
10
|
+
module XssFoliate
|
11
|
+
#
|
12
|
+
# A replacement for
|
13
|
+
# XssTerminate[http://github.com/look/xss_terminate/tree/master],
|
14
|
+
# XssFoliate will strip all tags from your ActiveRecord models'
|
15
|
+
# string and text attributes.
|
16
|
+
#
|
17
|
+
# Please read the Loofah documentation for an explanation of the
|
18
|
+
# different scrubbing methods.
|
19
|
+
#
|
20
|
+
# If you'd like to scrub all fields in all your models (and perhaps *opt-out* in specific models):
|
21
|
+
#
|
22
|
+
# # config/environment
|
23
|
+
# LOOFAH_XSS_FOLIATE_ALL_MODELS = true
|
24
|
+
# Rails::Initializer.run do |config|
|
25
|
+
# config.gem "loofah"
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# # db/schema.rb
|
29
|
+
# create_table "posts" do |t|
|
30
|
+
# t.string "title"
|
31
|
+
# t.text "body"
|
32
|
+
# t.string "author"
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
# # app/model/post.rb
|
36
|
+
# class Post < ActiveRecord::Base
|
37
|
+
# # by default, title, body and author will all be scrubbed down to their inner text
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# OR
|
41
|
+
#
|
42
|
+
# # app/model/post.rb
|
43
|
+
# class Post < ActiveRecord::Base
|
44
|
+
# xss_foliate :except => :author # opt-out of sanitizing author
|
45
|
+
# end
|
46
|
+
#
|
47
|
+
# OR
|
48
|
+
#
|
49
|
+
# xss_foliate :strip => [:title, body] # strip unsafe tags from both title and body
|
50
|
+
#
|
51
|
+
# OR
|
52
|
+
#
|
53
|
+
# xss_foliate :except => :title # scrub body and author but not title
|
54
|
+
#
|
55
|
+
# OR
|
56
|
+
#
|
57
|
+
# # remove all tags from title, remove unsafe tags from body
|
58
|
+
# xss_foliate :sanitize => :title, :scrub => :body
|
59
|
+
#
|
60
|
+
# OR
|
61
|
+
#
|
62
|
+
# # old xss_terminate code will work if you s/_terminate/_foliate/
|
63
|
+
# # was: xss_terminate :except => [:title], :sanitize => [:body]
|
64
|
+
# xss_foliate :except => [:title], :sanitize => [:body]
|
65
|
+
#
|
66
|
+
# Alternatively, if you would like to *opt-in* to the models and attributes that are sanitized:
|
67
|
+
#
|
68
|
+
# # config/environment.rb
|
69
|
+
# LOOFAH_XSS_FOLIATE_ALL_MODELS = false # default, this line could be omitted
|
70
|
+
# Rails::Initializer.run do |config|
|
71
|
+
# config.gem "loofah"
|
72
|
+
# end
|
73
|
+
#
|
74
|
+
# # db/schema.rb
|
75
|
+
# create_table "posts" do |t|
|
76
|
+
# t.string "title"
|
77
|
+
# t.text "body"
|
78
|
+
# t.string "author"
|
79
|
+
# end
|
80
|
+
#
|
81
|
+
# # app/model/post.rb
|
82
|
+
# class Post < ActiveRecord::Base
|
83
|
+
# xss_foliate # scrub title, body and author down to their inner text
|
84
|
+
# end
|
85
|
+
#
|
86
|
+
module ClassMethods
|
87
|
+
# :stopdoc:
|
88
|
+
VALID_OPTIONS = [:except, :strip, :escape, :prune, :text, :html5lib_sanitize, :sanitize]
|
89
|
+
ALIASED_OPTIONS = {:html5lib_sanitize => :escape, :sanitize => :strip}
|
90
|
+
REAL_OPTIONS = VALID_OPTIONS - ALIASED_OPTIONS.keys
|
91
|
+
# :startdoc:
|
92
|
+
|
93
|
+
#
|
94
|
+
# Annotate your model with this method to specify which fields
|
95
|
+
# you want scrubbed, and how you want them scrubbed. XssFoliate
|
96
|
+
# assumes all character fields are HTML fragments (as opposed to
|
97
|
+
# full documents, see the Loofah[http://loofah.rubyforge.org/]
|
98
|
+
# documentation for a full explanation of the difference).
|
99
|
+
#
|
100
|
+
# Example call:
|
101
|
+
#
|
102
|
+
# xss_foliate :except => :author, :strip => :body, :prune => [:title, :description]
|
103
|
+
#
|
104
|
+
# *Note* that the values in the options hash can be either an
|
105
|
+
# array of attributes or a single attribute.
|
106
|
+
#
|
107
|
+
# Options:
|
108
|
+
#
|
109
|
+
# :except => [fields] # don't scrub these fields
|
110
|
+
# :strip => [fields] # strip unsafe tags from these fields
|
111
|
+
# :escape => [fields] # escape unsafe tags from these fields
|
112
|
+
# :prune => [fields] # prune unsafe tags and subtrees from these fields
|
113
|
+
# :text => [fields] # remove everything except the inner text from these fields
|
114
|
+
#
|
115
|
+
# XssTerminate compatibility options (note that the default
|
116
|
+
# behavior in XssTerminate corresponds to :text)
|
117
|
+
#
|
118
|
+
# :html5lib_sanitize => [fields] # same as :escape
|
119
|
+
# :sanitize => [fields] # same as :strip
|
120
|
+
#
|
121
|
+
# The default is :text for all fields unless otherwise specified.
|
122
|
+
#
|
123
|
+
def xss_foliate(options = {})
|
124
|
+
callback_already_declared = \
|
125
|
+
if respond_to?(:before_validation_callback_chain)
|
126
|
+
# Rails 2.1 and later
|
127
|
+
before_validation_callback_chain.any? {|cb| cb.method == :xss_foliate_fields}
|
128
|
+
else
|
129
|
+
# Rails 2.0
|
130
|
+
cbs = read_inheritable_attribute(:before_validation)
|
131
|
+
(! cbs.nil?) && cbs.any? {|cb| cb == :xss_foliate_fields}
|
132
|
+
end
|
133
|
+
|
134
|
+
unless callback_already_declared
|
135
|
+
before_validation :xss_foliate_fields
|
136
|
+
class_inheritable_reader :xss_foliate_options
|
137
|
+
include XssFoliate::InstanceMethods
|
138
|
+
end
|
139
|
+
|
140
|
+
options.keys.each do |option|
|
141
|
+
raise ArgumentError, "unknown xss_foliate option #{option}" unless VALID_OPTIONS.include?(option)
|
142
|
+
end
|
143
|
+
|
144
|
+
REAL_OPTIONS.each do |option|
|
145
|
+
options[option] = Array(options[option]).collect { |val| val.to_sym }
|
146
|
+
end
|
147
|
+
|
148
|
+
ALIASED_OPTIONS.each do |option, real|
|
149
|
+
options[real] += Array(options.delete(option)).collect { |val| val.to_sym } if options[option]
|
150
|
+
end
|
151
|
+
|
152
|
+
write_inheritable_attribute(:xss_foliate_options, options)
|
153
|
+
end
|
154
|
+
|
155
|
+
#
|
156
|
+
# Class method to determine whether or not this model is applying
|
157
|
+
# xss_foliation to its attributes. Could be useful in test suites.
|
158
|
+
#
|
159
|
+
def xss_foliated?
|
160
|
+
options = read_inheritable_attribute(:xss_foliate_options)
|
161
|
+
! (options.nil? || options.empty?)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
module InstanceMethods
|
166
|
+
|
167
|
+
def xss_foliate_fields # :nodoc:
|
168
|
+
# fix a bug with Rails internal AR::Base models that get loaded before
|
169
|
+
# the plugin, like CGI::Sessions::ActiveRecordStore::Session
|
170
|
+
return if xss_foliate_options.nil?
|
171
|
+
|
172
|
+
self.class.columns.each do |column|
|
173
|
+
next unless (column.type == :string || column.type == :text)
|
174
|
+
|
175
|
+
field = column.name.to_sym
|
176
|
+
value = self[field]
|
177
|
+
|
178
|
+
next if value.nil? || !value.is_a?(String)
|
179
|
+
|
180
|
+
if xss_foliate_options[:except].include?(field)
|
181
|
+
next
|
182
|
+
|
183
|
+
elsif xss_foliate_options[:strip].include?(field)
|
184
|
+
fragment = Loofah.scrub_fragment(value, :strip)
|
185
|
+
self[field] = fragment.nil? ? "" : fragment.to_s
|
186
|
+
|
187
|
+
elsif xss_foliate_options[:prune].include?(field)
|
188
|
+
fragment = Loofah.scrub_fragment(value, :prune)
|
189
|
+
self[field] = fragment.nil? ? "" : fragment.to_s
|
190
|
+
|
191
|
+
elsif xss_foliate_options[:escape].include?(field)
|
192
|
+
fragment = Loofah.scrub_fragment(value, :escape)
|
193
|
+
self[field] = fragment.nil? ? "" : fragment.to_s
|
194
|
+
|
195
|
+
else # :text
|
196
|
+
fragment = Loofah.scrub_fragment(value, :strip)
|
197
|
+
self[field] = fragment.nil? ? "" : fragment.text
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
ActiveRecord::Base.extend(Loofah::XssFoliate::ClassMethods)
|
207
|
+
|
208
|
+
if defined?(LOOFAH_XSS_FOLIATE_ALL_MODELS) && LOOFAH_XSS_FOLIATE_ALL_MODELS
|
209
|
+
ActiveRecord::Base.xss_foliate
|
210
|
+
end
|
data/test/test_active_record.rb
CHANGED
@@ -16,40 +16,82 @@ class TestActiveRecord < Test::Unit::TestCase
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
context "scrubbing field as a fragment" do
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
context "scrubbing a single field as a fragment" do
|
20
|
+
context "using a symbol to indicate the attribute" do
|
21
|
+
setup do
|
22
|
+
Post.html_fragment :html_string, :scrub => :prune
|
23
|
+
assert ! Post.xss_foliated?
|
24
|
+
@post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
|
25
|
+
end
|
26
|
+
|
27
|
+
should "scrub the specified field" do
|
28
|
+
Loofah.expects(:scrub_fragment).with(HTML_STRING, :prune).once
|
29
|
+
Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).never
|
30
|
+
@post.valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
should "only call scrub_fragment once" do
|
34
|
+
Loofah.expects(:scrub_fragment).once
|
35
|
+
@post.valid?
|
36
|
+
end
|
37
|
+
|
38
|
+
should "generate strings" do
|
39
|
+
@post.valid?
|
40
|
+
assert_equal String, @post.html_string.class
|
41
|
+
assert_equal HTML_STRING, @post.html_string
|
42
|
+
end
|
23
43
|
end
|
24
44
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
45
|
+
context "using a string to indicate the attribute" do
|
46
|
+
setup do
|
47
|
+
Post.html_fragment 'html_string', :scrub => :prune
|
48
|
+
assert ! Post.xss_foliated?
|
49
|
+
@post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
|
50
|
+
end
|
51
|
+
|
52
|
+
should "scrub the specified field" do
|
53
|
+
Loofah.expects(:scrub_fragment).with(HTML_STRING, :prune).once
|
54
|
+
Loofah.expects(:scrub_fragment).with(PLAIN_TEXT, :prune).never
|
55
|
+
@post.valid?
|
56
|
+
end
|
35
57
|
end
|
36
58
|
end
|
37
59
|
|
38
|
-
context "scrubbing field as a document" do
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
60
|
+
context "scrubbing a single field as a document" do
|
61
|
+
context "using a symbol to indicate the attribute" do
|
62
|
+
setup do
|
63
|
+
Post.html_document :html_string, :scrub => :strip
|
64
|
+
@post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
|
65
|
+
end
|
66
|
+
|
67
|
+
should "scrub the specified field, but not other fields" do
|
68
|
+
Loofah.expects(:scrub_document).with(HTML_STRING, :strip).once
|
69
|
+
Loofah.expects(:scrub_document).with(PLAIN_TEXT, :strip).never
|
70
|
+
@post.valid?
|
71
|
+
end
|
72
|
+
|
73
|
+
should "only call scrub_document once" do
|
74
|
+
Loofah.expects(:scrub_document).once
|
75
|
+
@post.valid?
|
76
|
+
end
|
77
|
+
|
78
|
+
should "generate strings" do
|
79
|
+
@post.valid?
|
80
|
+
assert_equal String, @post.html_string.class
|
81
|
+
end
|
48
82
|
end
|
49
83
|
|
50
|
-
|
51
|
-
|
52
|
-
|
84
|
+
context "using a string to indicate the attribute" do
|
85
|
+
setup do
|
86
|
+
Post.html_document 'html_string', :scrub => :strip
|
87
|
+
@post = Post.new :html_string => HTML_STRING, :plain_text => PLAIN_TEXT
|
88
|
+
end
|
89
|
+
|
90
|
+
should "scrub the specified field, but not other fields" do
|
91
|
+
Loofah.expects(:scrub_document).with(HTML_STRING, :strip).once
|
92
|
+
Loofah.expects(:scrub_document).with(PLAIN_TEXT, :strip).never
|
93
|
+
@post.valid?
|
94
|
+
end
|
53
95
|
end
|
54
96
|
end
|
55
97
|
|
data/test/test_ad_hoc.rb
ADDED
@@ -0,0 +1,185 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
|
3
|
+
class TestAdHoc < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_empty_string_with_escape
|
6
|
+
assert_equal "", Loofah.scrub_fragment("", :escape).to_xml
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_empty_string_with_prune
|
10
|
+
assert_equal Loofah.scrub_document("", :prune).text, ""
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_removal_of_illegal_tag
|
14
|
+
html = <<-HTML
|
15
|
+
following this there should be no jim tag
|
16
|
+
<jim>jim</jim>
|
17
|
+
was there?
|
18
|
+
HTML
|
19
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
20
|
+
assert sane.xpath("//jim").empty?
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_removal_of_illegal_attribute
|
24
|
+
html = "<p class=bar foo=bar abbr=bar />"
|
25
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
26
|
+
node = sane.xpath("//p").first
|
27
|
+
assert node.attributes['class']
|
28
|
+
assert node.attributes['abbr']
|
29
|
+
assert_nil node.attributes['foo']
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_removal_of_illegal_url_in_href
|
33
|
+
html = <<-HTML
|
34
|
+
<a href='jimbo://jim.jim/'>this link should have its href removed because of illegal url</a>
|
35
|
+
<a href='http://jim.jim/'>this link should be fine</a>
|
36
|
+
HTML
|
37
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
38
|
+
nodes = sane.xpath("//a")
|
39
|
+
assert_nil nodes.first.attributes['href']
|
40
|
+
assert nodes.last.attributes['href']
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_css_sanitization
|
44
|
+
html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
|
45
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
46
|
+
assert_match(/#000/, sane.inner_html)
|
47
|
+
assert_no_match(/foo\.com/, sane.inner_html)
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_fragment_with_no_tags
|
51
|
+
assert_equal "This fragment has no tags.", Loofah.scrub_fragment("This fragment has no tags.", :escape).to_xml
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_fragment_in_p_tag
|
55
|
+
assert_equal "<p>This fragment is in a p.</p>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>", :escape).to_xml
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_fragment_in_p_tag_plus_stuff
|
59
|
+
assert_equal "<p>This fragment is in a p.</p>foo<strong>bar</strong>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>foo<strong>bar</strong>", :escape).to_xml
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_fragment_with_text_nodes_leading_and_trailing
|
63
|
+
assert_equal "text<p>fragment</p>text", Loofah.scrub_fragment("text<p>fragment</p>text", :escape).to_xml
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_whitewash_on_fragment
|
67
|
+
html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
|
68
|
+
whitewashed = Loofah.scrub_document(html, :whitewash).to_s
|
69
|
+
assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
|
70
|
+
end
|
71
|
+
|
72
|
+
MSWORD_HTML = <<-EOHTML
|
73
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
|
74
|
+
<w:WordDocument>
|
75
|
+
<w:View>Normal</w:View>
|
76
|
+
<w:Zoom>0</w:Zoom>
|
77
|
+
<w:PunctuationKerning/>
|
78
|
+
<w:ValidateAgainstSchemas/>
|
79
|
+
<w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
|
80
|
+
<w:IgnoreMixedContent>false</w:IgnoreMixedContent>
|
81
|
+
<w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
|
82
|
+
<w:Compatibility>
|
83
|
+
<w:BreakWrappedTables/>
|
84
|
+
<w:SnapToGridInCell/>
|
85
|
+
<w:WrapTextWithPunct/>
|
86
|
+
<w:UseAsianBreakRules/>
|
87
|
+
<w:DontGrowAutofit/>
|
88
|
+
</w:Compatibility>
|
89
|
+
<w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
|
90
|
+
</w:WordDocument>
|
91
|
+
</xml><![endif]--><!--[if gte mso 9]><xml>
|
92
|
+
<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
|
93
|
+
</w:LatentStyles>
|
94
|
+
</xml><![endif]--><style>
|
95
|
+
<!--
|
96
|
+
/* Style Definitions */
|
97
|
+
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
98
|
+
{mso-style-parent:"";
|
99
|
+
margin:0in;
|
100
|
+
margin-bottom:.0001pt;
|
101
|
+
mso-pagination:widow-orphan;
|
102
|
+
font-size:12.0pt;
|
103
|
+
font-family:"Times New Roman";
|
104
|
+
mso-fareast-font-family:"Times New Roman";}
|
105
|
+
@page Section1
|
106
|
+
{size:8.5in 11.0in;
|
107
|
+
margin:1.0in 1.25in 1.0in 1.25in;
|
108
|
+
mso-header-margin:.5in;
|
109
|
+
mso-footer-margin:.5in;
|
110
|
+
mso-paper-source:0;}
|
111
|
+
div.Section1
|
112
|
+
{page:Section1;}
|
113
|
+
-->
|
114
|
+
</style><!--[if gte mso 10]>
|
115
|
+
<style>
|
116
|
+
/* Style Definitions */
|
117
|
+
table.MsoNormalTable
|
118
|
+
{mso-style-name:"Table Normal";
|
119
|
+
mso-tstyle-rowband-size:0;
|
120
|
+
mso-tstyle-colband-size:0;
|
121
|
+
mso-style-noshow:yes;
|
122
|
+
mso-style-parent:"";
|
123
|
+
mso-padding-alt:0in 5.4pt 0in 5.4pt;
|
124
|
+
mso-para-margin:0in;
|
125
|
+
mso-para-margin-bottom:.0001pt;
|
126
|
+
mso-pagination:widow-orphan;
|
127
|
+
font-size:10.0pt;
|
128
|
+
font-family:"Times New Roman";
|
129
|
+
mso-ansi-language:#0400;
|
130
|
+
mso-fareast-language:#0400;
|
131
|
+
mso-bidi-language:#0400;}
|
132
|
+
</style>
|
133
|
+
<![endif]-->
|
134
|
+
|
135
|
+
<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
|
136
|
+
EOHTML
|
137
|
+
|
138
|
+
def test_deprecated_whitewash_fragment_on_microsofty_markup
|
139
|
+
whitewashed = Loofah.scrub_fragment(MSWORD_HTML.chomp, :whitewash).to_s
|
140
|
+
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_deprecated_whitewash_on_microsofty_markup
|
144
|
+
whitewashed = Loofah.scrub_document(MSWORD_HTML, :whitewash).to_s
|
145
|
+
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed
|
146
|
+
end
|
147
|
+
|
148
|
+
def test_fragment_whitewash_on_microsofty_markup
|
149
|
+
whitewashed = Loofah.fragment(MSWORD_HTML.chomp).scrub!(:whitewash)
|
150
|
+
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_document_whitewash_on_microsofty_markup
|
154
|
+
whitewashed = Loofah.document(MSWORD_HTML.chomp).scrub!(:whitewash)
|
155
|
+
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_return_empty_string_when_nothing_left
|
159
|
+
assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
|
160
|
+
end
|
161
|
+
|
162
|
+
def test_removal_of_all_tags
|
163
|
+
html = <<-HTML
|
164
|
+
What's up <strong>doc</strong>?
|
165
|
+
HTML
|
166
|
+
stripped = Loofah.scrub_document(html, :prune).text
|
167
|
+
assert_equal "What's up doc?".strip, stripped.strip
|
168
|
+
end
|
169
|
+
|
170
|
+
def test_dont_remove_whitespace
|
171
|
+
html = "Foo\nBar"
|
172
|
+
assert_equal html, Loofah.scrub_document(html, :prune).text
|
173
|
+
end
|
174
|
+
|
175
|
+
def test_dont_remove_whitespace_between_tags
|
176
|
+
html = "<p>Foo</p>\n<p>Bar</p>"
|
177
|
+
assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_removal_of_entities
|
181
|
+
html = "<p>this is < that "&" the other > boo'ya</p>"
|
182
|
+
assert_equal 'this is < that "&" the other > boo\'ya', Loofah.scrub_document(html, :prune).text
|
183
|
+
end
|
184
|
+
|
185
|
+
end
|