loofah 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.rdoc +20 -2
- data/README.rdoc +21 -6
- data/Rakefile +21 -23
- data/benchmark/benchmark.rb +21 -1
- data/benchmark/helper.rb +5 -0
- data/lib/loofah.rb +2 -2
- data/lib/loofah/active_record.rb +2 -2
- data/lib/loofah/html/document.rb +10 -7
- data/lib/loofah/html/document_fragment.rb +15 -7
- data/lib/loofah/html5/whitelist.rb +4 -0
- data/lib/loofah/instance_methods.rb +69 -39
- data/lib/loofah/scrubber.rb +1 -1
- data/lib/loofah/scrubbers.rb +1 -1
- data/lib/loofah/xml/document.rb +3 -9
- data/lib/loofah/xml/document_fragment.rb +3 -10
- data/test/html5/test_sanitizer.rb +1 -1
- data/test/test_ad_hoc.rb +54 -0
- data/test/test_api.rb +31 -0
- data/test/test_scrubber.rb +2 -2
- metadata +2 -2
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.rdoc
CHANGED
@@ -1,12 +1,30 @@
|
|
1
1
|
= Changelog
|
2
2
|
|
3
|
-
== 0.4.
|
3
|
+
== 0.4.2 (2010-01-22)
|
4
|
+
|
5
|
+
Enhancements:
|
6
|
+
|
7
|
+
* Implemented Node#scrub! for scrubbing subtrees.
|
8
|
+
* Implemented NodeSet#scrub! for scrubbing a set of subtrees.
|
9
|
+
* Document.text now only serializes <body> contents (ignores <head>)
|
10
|
+
* <head>, <html> and <body> added to the HTML5lib whitelist.
|
11
|
+
|
12
|
+
Bug fixes:
|
13
|
+
|
14
|
+
* Supporting Rails apps that aren't loading ActiveRecord. GH #10
|
15
|
+
|
16
|
+
Miscellaneous:
|
17
|
+
|
18
|
+
* Mailing list is now loofah@librelist.com / http://librelist.com
|
19
|
+
* IRC channel is now \#loofah on freenode.
|
20
|
+
|
21
|
+
== 0.4.1 (2009-11-23)
|
4
22
|
|
5
23
|
Bugfix:
|
6
24
|
|
7
25
|
* Manifest fixed. Whoops.
|
8
26
|
|
9
|
-
== 0.4.0
|
27
|
+
== 0.4.0 (2009-11-21)
|
10
28
|
|
11
29
|
Enhancements:
|
12
30
|
|
data/README.rdoc
CHANGED
@@ -33,13 +33,14 @@ not been evaluated by Netexperts.)
|
|
33
33
|
|
34
34
|
== Compare and Contrast
|
35
35
|
|
36
|
-
Loofah is
|
37
|
-
well-formed and valid markup
|
36
|
+
Loofah is one of two known Ruby XSS/sanitization solutions that
|
37
|
+
guarantees well-formed and valid markup (the other is Sanitize, which
|
38
|
+
also uses Nokogiri).
|
38
39
|
|
39
40
|
Loofah works fine on XML, XHTML and HTML documents.
|
40
41
|
|
41
42
|
Also, it's pretty fast. Here is a benchmark comparing Loofah to other
|
42
|
-
commonly-used libraries (ActionView, Sanitize and
|
43
|
+
commonly-used libraries (ActionView, Sanitize, HTML5lib and HTMLfilter):
|
43
44
|
|
44
45
|
* http://gist.github.com/170193
|
45
46
|
|
@@ -131,6 +132,20 @@ parse an XML document and an XML fragment, respectively.
|
|
131
132
|
Loofah.xml_document(bad_xml).is_a?(Nokogiri::XML::Document) # => true
|
132
133
|
Loofah.xml_fragment(bad_xml).is_a?(Nokogiri::XML::DocumentFragment) # => true
|
133
134
|
|
135
|
+
=== Nodes and NodeSets
|
136
|
+
|
137
|
+
Nokogiri::XML::Node and Nokogiri::XML::NodeSet also get a +scrub!+
|
138
|
+
method, which makes it easy to scrub subtrees.
|
139
|
+
|
140
|
+
The following code will apply the +employee_scrubber+ only to the
|
141
|
+
+employee+ nodes (and their subtrees) in the document:
|
142
|
+
|
143
|
+
Loofah.xml_document(bad_xml).xpath("//employee").scrub!(employee_scrubber)
|
144
|
+
|
145
|
+
And this code will only scrub the first +employee+ node and its subtree:
|
146
|
+
|
147
|
+
Loofah.xml_document(bad_xml).at_xpath("//employee").scrub!(employee_scrubber)
|
148
|
+
|
134
149
|
=== Loofah::Scrubber
|
135
150
|
|
136
151
|
A Scrubber wraps up a block (or method) that is run on a document node:
|
@@ -256,11 +271,11 @@ The bug tracker is available here:
|
|
256
271
|
|
257
272
|
* http://github.com/flavorjones/loofah/issues
|
258
273
|
|
259
|
-
|
274
|
+
And the mailing list is on librelist:
|
260
275
|
|
261
|
-
* http://
|
276
|
+
* loofah@librelist.com / http://librelist.com
|
262
277
|
|
263
|
-
And the IRC channel is
|
278
|
+
And the IRC channel is \#loofah on freenode.
|
264
279
|
|
265
280
|
== Related Links
|
266
281
|
|
data/Rakefile
CHANGED
@@ -35,29 +35,27 @@ task :redocs => :fix_css
|
|
35
35
|
task :docs => :fix_css
|
36
36
|
task :fix_css do
|
37
37
|
better_css = <<-EOT
|
38
|
-
.method-description pre {
|
39
|
-
|
40
|
-
}
|
41
|
-
|
42
|
-
.method-description ul {
|
43
|
-
|
44
|
-
}
|
45
|
-
|
46
|
-
.method-description p {
|
47
|
-
|
48
|
-
}
|
49
|
-
|
50
|
-
#main ul, div#documentation ul {
|
51
|
-
|
52
|
-
|
53
|
-
}
|
54
|
-
|
55
|
-
h2 + ul {
|
56
|
-
|
57
|
-
}
|
58
|
-
|
59
|
-
EOT
|
38
|
+
.method-description pre {
|
39
|
+
margin : 1em 0 ;
|
40
|
+
}
|
41
|
+
|
42
|
+
.method-description ul {
|
43
|
+
padding : .5em 0 .5em 2em ;
|
44
|
+
}
|
45
|
+
|
46
|
+
.method-description p {
|
47
|
+
margin-top : .5em ;
|
48
|
+
}
|
49
|
+
|
50
|
+
#main ul, div#documentation ul {
|
51
|
+
list-style-type : disc ! IMPORTANT ;
|
52
|
+
list-style-position : inside ! IMPORTANT ;
|
53
|
+
}
|
54
|
+
|
55
|
+
h2 + ul {
|
56
|
+
margin-top : 1em;
|
57
|
+
}
|
58
|
+
EOT
|
60
59
|
puts "* fixing css"
|
61
60
|
File.open("doc/rdoc.css", "a") { |f| f.write better_css }
|
62
61
|
end
|
63
|
-
|
data/benchmark/benchmark.rb
CHANGED
@@ -3,7 +3,7 @@ require "#{File.dirname(__FILE__)}/helper.rb"
|
|
3
3
|
|
4
4
|
def compare_scrub_methods
|
5
5
|
snip = "<div>foo</div><foo>fuxx <b>quux</b></foo><script>i have a chair</script>"
|
6
|
-
puts "starting with
|
6
|
+
puts "starting with:\n#{snip}"
|
7
7
|
puts
|
8
8
|
puts RailsSanitize.new.sanitize(snip) # => Rails.sanitize / scrub!(:prune).to_s
|
9
9
|
puts Loofah::Helpers.sanitize(snip)
|
@@ -17,6 +17,9 @@ def compare_scrub_methods
|
|
17
17
|
puts HTML5libSanitize.new.sanitize(snip) # => scrub!(:escape).to_s
|
18
18
|
puts Loofah.scrub_fragment(snip, :escape).to_s
|
19
19
|
puts "--"
|
20
|
+
puts HTMLFilter.new.filter(snip)
|
21
|
+
puts Loofah.scrub_fragment(snip, :strip).to_s
|
22
|
+
puts
|
20
23
|
end
|
21
24
|
|
22
25
|
module TestSet
|
@@ -115,6 +118,22 @@ class HeadToHeadHtml5LibSanitize < Measure
|
|
115
118
|
end
|
116
119
|
end
|
117
120
|
|
121
|
+
class HeadToHeadHTMLFilter < Measure
|
122
|
+
include TestSet
|
123
|
+
def bench(content, ntimes, fragment_p)
|
124
|
+
clear_measure
|
125
|
+
|
126
|
+
measure "Loofah::Helpers.sanitize", ntimes do
|
127
|
+
Loofah::Helpers.sanitize content
|
128
|
+
end
|
129
|
+
|
130
|
+
sanitizer = HTMLFilter.new
|
131
|
+
measure "HTMLFilter.filter", ntimes do
|
132
|
+
sanitizer.filter(content)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
118
137
|
puts "Nokogiri version: #{Nokogiri::VERSION_INFO.inspect}"
|
119
138
|
puts "Loofah version: #{Loofah::VERSION.inspect}"
|
120
139
|
|
@@ -123,6 +142,7 @@ benches << HeadToHeadRailsSanitize.new
|
|
123
142
|
benches << HeadToHeadRailsStripTags.new
|
124
143
|
benches << HeadToHeadSanitizerSanitize.new
|
125
144
|
benches << HeadToHeadHtml5LibSanitize.new
|
145
|
+
benches << HeadToHeadHTMLFilter.new
|
126
146
|
puts "---------- rehearsal ----------"
|
127
147
|
benches.each { |bench| bench.test_set :rehearse => true }
|
128
148
|
puts "---------- realsies ----------"
|
data/benchmark/helper.rb
CHANGED
@@ -7,6 +7,11 @@ require "action_view"
|
|
7
7
|
require "action_controller/vendor/html-scanner"
|
8
8
|
require "sanitize"
|
9
9
|
require 'hitimes'
|
10
|
+
require 'htmlfilter'
|
11
|
+
|
12
|
+
unless defined?(HTMLFilter)
|
13
|
+
HTMLFilter = HtmlFilter
|
14
|
+
end
|
10
15
|
|
11
16
|
class RailsSanitize
|
12
17
|
include ActionView::Helpers::SanitizeHelper
|
data/lib/loofah.rb
CHANGED
@@ -26,7 +26,7 @@ require 'loofah/helpers'
|
|
26
26
|
#
|
27
27
|
module Loofah
|
28
28
|
# The version of Loofah you are using
|
29
|
-
VERSION = '0.4.
|
29
|
+
VERSION = '0.4.2'
|
30
30
|
|
31
31
|
# The minimum required version of Nokogiri
|
32
32
|
REQUIRED_NOKOGIRI_VERSION = '1.3.3'
|
@@ -83,7 +83,7 @@ if Nokogiri::VERSION < Loofah::REQUIRED_NOKOGIRI_VERSION
|
|
83
83
|
raise RuntimeError, "Loofah requires Nokogiri #{Loofah::REQUIRED_NOKOGIRI_VERSION} or later (currently #{Nokogiri::VERSION})"
|
84
84
|
end
|
85
85
|
|
86
|
-
if defined? Rails.configuration # rails 2.1 and later
|
86
|
+
if defined? Rails.configuration and Rails.configuration.frameworks.include?([:active_record]) # rails 2.1 and later
|
87
87
|
Rails.configuration.after_initialize do
|
88
88
|
require 'loofah/active_record'
|
89
89
|
require 'loofah/xss_foliate'
|
data/lib/loofah/active_record.rb
CHANGED
@@ -26,7 +26,7 @@ module Loofah
|
|
26
26
|
# Scrub an ActiveRecord attribute +attribute+ as an HTML *fragment*
|
27
27
|
# using the method specified by +scrubber_specification+.
|
28
28
|
#
|
29
|
-
# +scrubber_specification+ must be an argument acceptable to Loofah::
|
29
|
+
# +scrubber_specification+ must be an argument acceptable to Loofah::ScrubBehavior.scrub!, namely:
|
30
30
|
#
|
31
31
|
# * a symbol for one of the built-in scrubbers (see Loofah::Scrubbers for a full list)
|
32
32
|
# * or a Scrubber instance. (see Loofah::Scrubber for help on implementing a custom scrubber)
|
@@ -45,7 +45,7 @@ module Loofah
|
|
45
45
|
# Scrub an ActiveRecord attribute +attribute+ as an HTML *document*
|
46
46
|
# using the method specified by +scrubber_specification+.
|
47
47
|
#
|
48
|
-
# +scrubber_specification+ must be an argument acceptable to Loofah::
|
48
|
+
# +scrubber_specification+ must be an argument acceptable to Loofah::ScrubBehavior.scrub!, namely:
|
49
49
|
#
|
50
50
|
# * a symbol for one of the built-in scrubbers (see Loofah::Scrubbers for a full list)
|
51
51
|
# * or a Scrubber instance.
|
data/lib/loofah/html/document.rb
CHANGED
@@ -3,17 +3,20 @@ module Loofah
|
|
3
3
|
#
|
4
4
|
# Subclass of Nokogiri::HTML::Document.
|
5
5
|
#
|
6
|
-
# See Loofah::
|
6
|
+
# See Loofah::ScrubBehavior and Loofah::DocumentDecorator for additional methods.
|
7
7
|
#
|
8
8
|
class Document < Nokogiri::HTML::Document
|
9
|
-
include Loofah::
|
9
|
+
include Loofah::ScrubBehavior::Node
|
10
|
+
include Loofah::DocumentDecorator
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
#
|
13
|
+
# Returns a plain-text version of the markup contained by the document
|
14
|
+
#
|
15
|
+
def text
|
16
|
+
xpath("/html/body").inner_text
|
15
17
|
end
|
16
|
-
|
18
|
+
alias :inner_text :text
|
19
|
+
alias :to_str :text
|
17
20
|
end
|
18
21
|
end
|
19
22
|
end
|
@@ -1,12 +1,12 @@
|
|
1
1
|
module Loofah
|
2
2
|
module HTML # :nodoc:
|
3
3
|
#
|
4
|
-
# Subclass of Nokogiri::HTML::DocumentFragment.
|
4
|
+
# Subclass of Nokogiri::HTML::DocumentFragment.
|
5
5
|
#
|
6
|
-
# See Loofah::
|
6
|
+
# See Loofah::ScrubBehavior for additional methods.
|
7
7
|
#
|
8
8
|
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
9
|
-
include Loofah::
|
9
|
+
include Loofah::ScrubBehavior::Node
|
10
10
|
|
11
11
|
class << self
|
12
12
|
#
|
@@ -20,19 +20,27 @@ module Loofah
|
|
20
20
|
end
|
21
21
|
|
22
22
|
#
|
23
|
-
# Returns the HTML markup contained by the fragment
|
23
|
+
# Returns the HTML markup contained by the fragment
|
24
24
|
#
|
25
25
|
def to_s
|
26
|
-
|
26
|
+
serialize_roots.children.to_s
|
27
27
|
end
|
28
28
|
alias :serialize :to_s
|
29
29
|
|
30
|
+
#
|
31
|
+
# Returns a plain-text version of the markup contained by the fragment
|
32
|
+
#
|
33
|
+
def text
|
34
|
+
serialize_roots.children.inner_text
|
35
|
+
end
|
36
|
+
alias :inner_text :text
|
37
|
+
alias :to_str :text
|
38
|
+
|
30
39
|
private
|
31
40
|
|
32
|
-
def
|
41
|
+
def serialize_roots # :nodoc:
|
33
42
|
xpath("./body").first || self
|
34
43
|
end
|
35
|
-
|
36
44
|
end
|
37
45
|
end
|
38
46
|
end
|
@@ -152,6 +152,10 @@ module Loofah
|
|
152
152
|
col
|
153
153
|
input
|
154
154
|
]
|
155
|
+
|
156
|
+
# additional tags we should consider safe since we have libxml2 fixing up our documents.
|
157
|
+
TAGS_SAFE_WITH_LIBXML2 = %w[html head body]
|
158
|
+
ALLOWED_ELEMENTS_WITH_LIBXML2 = ALLOWED_ELEMENTS + TAGS_SAFE_WITH_LIBXML2
|
155
159
|
end
|
156
160
|
|
157
161
|
#
|
@@ -1,47 +1,77 @@
|
|
1
1
|
module Loofah
|
2
2
|
#
|
3
|
-
#
|
4
|
-
#
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
#
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
#
|
31
|
-
|
3
|
+
# Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
|
4
|
+
#
|
5
|
+
# Traverse the document or fragment, invoking the +scrubber+ on
|
6
|
+
# each node.
|
7
|
+
#
|
8
|
+
# +scrubber+ must either be one of the symbols representing the
|
9
|
+
# built-in scrubbers (see Scrubbers), or a Scrubber instance.
|
10
|
+
#
|
11
|
+
# span2div = Loofah::Scrubber.new do |node|
|
12
|
+
# node.name = "div" if node.name == "span"
|
13
|
+
# end
|
14
|
+
# Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
|
15
|
+
# # => "<div>foo</div><p>bar</p>"
|
16
|
+
#
|
17
|
+
# or
|
18
|
+
#
|
19
|
+
# unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
|
20
|
+
# Loofah.fragment(unsafe_html).scrub!(:strip).to_s
|
21
|
+
# # => "ohai! <div>div is safe</div> "
|
22
|
+
#
|
23
|
+
# Note that this method is called implicitly from
|
24
|
+
# Loofah.scrub_fragment and Loofah.scrub_document.
|
25
|
+
#
|
26
|
+
# Please see Scrubber for more information on implementation and traversal, and
|
27
|
+
# README.rdoc for more example usage.
|
28
|
+
#
|
29
|
+
module ScrubBehavior
|
30
|
+
# see Loofah::ScrubBehavior
|
31
|
+
module Node
|
32
|
+
def scrub!(scrubber)
|
33
|
+
#
|
34
|
+
# yes. this should be three separate methods. but nokogiri
|
35
|
+
# decorates (or not) based on whether the module name has
|
36
|
+
# already been included. and since documents get decorated
|
37
|
+
# just like their constituent nodes, we need to jam all the
|
38
|
+
# logic into a single module.
|
39
|
+
#
|
40
|
+
scrubber = ScrubBehavior.resolve_scrubber(scrubber)
|
41
|
+
case self
|
42
|
+
when Nokogiri::XML::Document
|
43
|
+
scrubber.traverse(root) if root
|
44
|
+
when Nokogiri::XML::DocumentFragment
|
45
|
+
children.each { |node| node.scrub!(scrubber) } # TODO: children.scrub! once Nokogiri 1.4.2 is out
|
46
|
+
else
|
47
|
+
scrubber.traverse(self)
|
48
|
+
end
|
49
|
+
self
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# see Loofah::ScrubBehavior
|
54
|
+
module NodeSet
|
55
|
+
def scrub!(scrubber)
|
56
|
+
each { |node| node.scrub!(scrubber) }
|
57
|
+
self
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def ScrubBehavior.resolve_scrubber(scrubber) # :nodoc:
|
32
62
|
scrubber = Scrubbers::MAP[scrubber].new if Scrubbers::MAP[scrubber]
|
33
|
-
|
34
|
-
|
35
|
-
|
63
|
+
unless scrubber.is_a?(Loofah::Scrubber)
|
64
|
+
raise Loofah::ScrubberNotFound, "not a Scrubber or a scrubber name: #{scrubber.inspect}"
|
65
|
+
end
|
66
|
+
scrubber
|
36
67
|
end
|
68
|
+
end
|
37
69
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
70
|
+
module DocumentDecorator # :nodoc:
|
71
|
+
def initialize(*args, &block)
|
72
|
+
super
|
73
|
+
self.decorators(Nokogiri::XML::Node) << ScrubBehavior::Node
|
74
|
+
self.decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet
|
43
75
|
end
|
44
|
-
alias :inner_text :text
|
45
|
-
alias :to_str :text
|
46
76
|
end
|
47
77
|
end
|
data/lib/loofah/scrubber.rb
CHANGED
@@ -91,7 +91,7 @@ module Loofah
|
|
91
91
|
def html5lib_sanitize(node)
|
92
92
|
case node.type
|
93
93
|
when Nokogiri::XML::Node::ELEMENT_NODE
|
94
|
-
if HTML5::HashedWhiteList::
|
94
|
+
if HTML5::HashedWhiteList::ALLOWED_ELEMENTS_WITH_LIBXML2[node.name]
|
95
95
|
HTML5::Scrub.scrub_attributes node
|
96
96
|
return Scrubber::CONTINUE
|
97
97
|
end
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -152,7 +152,7 @@ module Loofah
|
|
152
152
|
def scrub(node)
|
153
153
|
case node.type
|
154
154
|
when Nokogiri::XML::Node::ELEMENT_NODE
|
155
|
-
if HTML5::HashedWhiteList::
|
155
|
+
if HTML5::HashedWhiteList::ALLOWED_ELEMENTS_WITH_LIBXML2[node.name]
|
156
156
|
node.attributes.each { |attr| node.remove_attribute(attr.first) }
|
157
157
|
return CONTINUE if node.namespaces.empty?
|
158
158
|
end
|
data/lib/loofah/xml/document.rb
CHANGED
@@ -3,17 +3,11 @@ module Loofah
|
|
3
3
|
#
|
4
4
|
# Subclass of Nokogiri::XML::Document.
|
5
5
|
#
|
6
|
-
# See Loofah::
|
6
|
+
# See Loofah::ScrubBehavior and Loofah::DocumentDecorator for additional methods.
|
7
7
|
#
|
8
8
|
class Document < Nokogiri::XML::Document
|
9
|
-
include Loofah::
|
10
|
-
|
11
|
-
private
|
12
|
-
|
13
|
-
def sanitize_roots # :nodoc:
|
14
|
-
self
|
15
|
-
end
|
16
|
-
|
9
|
+
include Loofah::ScrubBehavior::Node
|
10
|
+
include Loofah::DocumentDecorator
|
17
11
|
end
|
18
12
|
end
|
19
13
|
end
|
@@ -1,12 +1,12 @@
|
|
1
1
|
module Loofah
|
2
2
|
module XML # :nodoc:
|
3
3
|
#
|
4
|
-
# Subclass of Nokogiri::XML::DocumentFragment.
|
4
|
+
# Subclass of Nokogiri::XML::DocumentFragment.
|
5
5
|
#
|
6
|
-
# See Loofah::
|
6
|
+
# See Loofah::ScrubBehavior for additional methods.
|
7
7
|
#
|
8
8
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
9
|
-
include Loofah::
|
9
|
+
include Loofah::ScrubBehavior::Node
|
10
10
|
|
11
11
|
class << self
|
12
12
|
#
|
@@ -18,13 +18,6 @@ module Loofah
|
|
18
18
|
self.new(Loofah::XML::Document.new, tags)
|
19
19
|
end
|
20
20
|
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def sanitize_roots # :nodoc:
|
25
|
-
self
|
26
|
-
end
|
27
|
-
|
28
21
|
end
|
29
22
|
end
|
30
23
|
end
|
@@ -28,7 +28,7 @@ class Html5TestSanitizer < Test::Unit::TestCase
|
|
28
28
|
assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane), input)
|
29
29
|
end
|
30
30
|
|
31
|
-
HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
|
31
|
+
(HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
|
32
32
|
define_method "test_should_allow_#{tag_name}_tag" do
|
33
33
|
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
34
34
|
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
data/test/test_ad_hoc.rb
CHANGED
@@ -52,6 +52,60 @@ class TestAdHoc < Test::Unit::TestCase
|
|
52
52
|
assert_equal "Abe Vigoda", employees.first.inner_text
|
53
53
|
end
|
54
54
|
|
55
|
+
def test_html_fragment_to_s_should_not_include_head_tags
|
56
|
+
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
57
|
+
assert_equal "<div>bar</div>", html.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_html_fragment_text_should_not_include_head_tags
|
61
|
+
html = Loofah.fragment "<style>foo</style><div>bar</div>"
|
62
|
+
assert_equal "bar", html.text
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_html_document_text_should_not_include_head_tags
|
66
|
+
html = Loofah.document "<style>foo</style><div>bar</div>"
|
67
|
+
assert_equal "bar", html.text
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_node_scrub_should_only_scrub_subtree
|
71
|
+
xml = Loofah.document <<-EOHTML
|
72
|
+
<html><body>
|
73
|
+
<div class='scrub'>
|
74
|
+
<script>I should be removed</script>
|
75
|
+
</div>
|
76
|
+
<div class='noscrub'>
|
77
|
+
<script>I should remain</script>
|
78
|
+
</div>
|
79
|
+
</body></html>
|
80
|
+
EOHTML
|
81
|
+
node = xml.at_css "div.scrub"
|
82
|
+
node.scrub!(:prune)
|
83
|
+
assert_contains xml.to_s, /I should remain/
|
84
|
+
assert_does_not_contain xml.to_s, /I should be removed/
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_nodeset_scrub_should_only_scrub_subtrees
|
88
|
+
xml = Loofah.document <<-EOHTML
|
89
|
+
<html><body>
|
90
|
+
<div class='scrub'>
|
91
|
+
<script>I should be removed</script>
|
92
|
+
</div>
|
93
|
+
<div class='noscrub'>
|
94
|
+
<script>I should remain</script>
|
95
|
+
</div>
|
96
|
+
<div class='scrub'>
|
97
|
+
<script>I should also be removed</script>
|
98
|
+
</div>
|
99
|
+
</body></html>
|
100
|
+
EOHTML
|
101
|
+
node_set = xml.css "div.scrub"
|
102
|
+
assert_equal 2, node_set.length
|
103
|
+
node_set.scrub!(:prune)
|
104
|
+
assert_contains xml.to_s, /I should remain/
|
105
|
+
assert_does_not_contain xml.to_s, /I should be removed/
|
106
|
+
assert_does_not_contain xml.to_s, /I should also be removed/
|
107
|
+
end
|
108
|
+
|
55
109
|
def test_removal_of_illegal_tag
|
56
110
|
html = <<-HTML
|
57
111
|
following this there should be no jim tag
|
data/test/test_api.rb
CHANGED
@@ -68,6 +68,37 @@ class TestApi < Test::Unit::TestCase
|
|
68
68
|
assert_xml_fragmentish doc
|
69
69
|
end
|
70
70
|
|
71
|
+
def test_loofah_html_document_node_scrub!
|
72
|
+
doc = Loofah.document(HTML)
|
73
|
+
assert(node = doc.at_css("div"))
|
74
|
+
node.scrub!(:strip)
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_loofah_html_fragment_node_scrub!
|
78
|
+
doc = Loofah.fragment(HTML)
|
79
|
+
assert(node = doc.at_css("div"))
|
80
|
+
node.scrub!(:strip)
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_loofah_xml_document_node_scrub!
|
84
|
+
doc = Loofah.document(XML)
|
85
|
+
assert(node = doc.at_css("div"))
|
86
|
+
node.scrub!(:strip)
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_loofah_xml_fragment_node_scrub!
|
90
|
+
doc = Loofah.fragment(XML)
|
91
|
+
assert(node = doc.at_css("div"))
|
92
|
+
node.scrub!(:strip)
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_loofah_nodeset_scrub!
|
96
|
+
doc = Loofah.document(HTML)
|
97
|
+
assert(node_set = doc.css("div"))
|
98
|
+
assert_instance_of Nokogiri::XML::NodeSet, node_set
|
99
|
+
node_set.scrub!(:strip)
|
100
|
+
end
|
101
|
+
|
71
102
|
private
|
72
103
|
|
73
104
|
def assert_html_documentish(doc)
|
data/test/test_scrubber.rb
CHANGED
@@ -6,8 +6,8 @@ class TestScrubber < Test::Unit::TestCase
|
|
6
6
|
FRAGMENT_NODE_COUNT = 4 # span, text, span, text
|
7
7
|
FRAGMENT_NODE_STOP_TOP_DOWN = 2 # span, span
|
8
8
|
DOCUMENT = "<html><head><link></link></head><body><span>hello</span><span>goodbye</span></body></html>"
|
9
|
-
DOCUMENT_NODE_COUNT =
|
10
|
-
DOCUMENT_NODE_STOP_TOP_DOWN =
|
9
|
+
DOCUMENT_NODE_COUNT = 8 # html, head, link, body, span, text, span, text
|
10
|
+
DOCUMENT_NODE_STOP_TOP_DOWN = 1 # html
|
11
11
|
|
12
12
|
context "receiving a block" do
|
13
13
|
setup do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loofah
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
@@ -31,7 +31,7 @@ cert_chain:
|
|
31
31
|
FlqnTjy13J3nD30uxy9a1g==
|
32
32
|
-----END CERTIFICATE-----
|
33
33
|
|
34
|
-
date:
|
34
|
+
date: 2010-01-23 00:00:00 -05:00
|
35
35
|
default_executable:
|
36
36
|
dependencies:
|
37
37
|
- !ruby/object:Gem::Dependency
|
metadata.gz.sig
CHANGED
Binary file
|