loofah 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.rdoc +9 -0
- data/Manifest.txt +3 -1
- data/README.rdoc +223 -92
- data/Rakefile +11 -3
- data/TODO.rdoc +0 -5
- data/lib/loofah.rb +27 -138
- data/lib/loofah/active_record.rb +10 -18
- data/lib/loofah/html/document.rb +4 -4
- data/lib/loofah/html/document_fragment.rb +5 -5
- data/lib/loofah/html5/scrub.rb +1 -1
- data/lib/loofah/html5/whitelist.rb +1 -1
- data/lib/loofah/instance_methods.rb +47 -0
- data/lib/loofah/scrubber.rb +98 -76
- data/lib/loofah/scrubbers.rb +199 -0
- data/lib/loofah/xss_foliate.rb +71 -69
- data/test/html5/test_sanitizer.rb +12 -9
- data/test/test_active_record.rb +22 -0
- data/test/test_ad_hoc.rb +42 -0
- data/test/test_api.rb +47 -1
- data/test/test_scrubber.rb +204 -102
- data/test/test_scrubbers.rb +144 -0
- metadata +44 -12
- metadata.gz.sig +0 -0
- data/test/html5/testdata/tests1.dat +0 -501
data/TODO.rdoc
CHANGED
@@ -1,9 +1,4 @@
|
|
1
1
|
= TODO
|
2
2
|
|
3
|
-
* Allow developers to implement their own sanitizations.
|
4
|
-
* Implement a proper visitor pattern.
|
5
|
-
* Make internal loofah methods available.
|
6
|
-
|
7
3
|
* Allow a <tt>text</tt> option to insert nice newlines after headers and block elements.
|
8
|
-
|
9
4
|
* <tt>to_markdown<tt>
|
data/lib/loofah.rb
CHANGED
@@ -6,94 +6,16 @@ require 'loofah/html5/whitelist'
|
|
6
6
|
require 'loofah/html5/scrub'
|
7
7
|
|
8
8
|
require 'loofah/scrubber'
|
9
|
+
require 'loofah/scrubbers'
|
9
10
|
|
11
|
+
require 'loofah/instance_methods'
|
12
|
+
require 'loofah/xml/document'
|
13
|
+
require 'loofah/xml/document_fragment'
|
10
14
|
require 'loofah/html/document'
|
11
15
|
require 'loofah/html/document_fragment'
|
12
16
|
|
13
17
|
require 'loofah/helpers'
|
14
18
|
|
15
|
-
#
|
16
|
-
# Loofah is an HTML sanitizer wrapped around Nokogiri[http://nokogiri.org], an excellent
|
17
|
-
# HTML/XML parser. If you don't know how Nokogiri[http://nokogiri.org]
|
18
|
-
# works, you might want to pause for a moment and go check it out. I'll
|
19
|
-
# wait.
|
20
|
-
#
|
21
|
-
# A Loofah::HTML::Document is a subclass of Nokogiri::HTML::Document,
|
22
|
-
# so a parsed document gives you all the markup fixer-uppery and API
|
23
|
-
# goodness of Nokogiri.
|
24
|
-
#
|
25
|
-
# Loofah.document(unsafe_html).is_a?(Nokogiri::HTML::Document) # => true
|
26
|
-
# Loofah.fragment(unsafe_html).is_a?(Nokogiri::HTML::DocumentFragment) # => true
|
27
|
-
#
|
28
|
-
# Loofah adds a +scrub!+ method, which can clean up your HTML in a few
|
29
|
-
# different ways by modifying the document in-place:
|
30
|
-
#
|
31
|
-
# doc.scrub!(:strip) # replaces unknown/unsafe tags with their inner text
|
32
|
-
# doc.scrub!(:prune) # removes unknown/unsafe tags and their children
|
33
|
-
# doc.scrub!(:whitewash) # removes unknown/unsafe/namespaced tags and their children,
|
34
|
-
# # and strips all node attributes
|
35
|
-
# doc.scrub!(:escape) # escapes unknown/unsafe tags, like this: <script>
|
36
|
-
#
|
37
|
-
# Loofah overrides +to_s+ to return html:
|
38
|
-
#
|
39
|
-
# unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
|
40
|
-
#
|
41
|
-
# doc = Loofah.fragment(unsafe_html).scrub!(:strip)
|
42
|
-
# doc.to_s # => "ohai! <div>div is safe</div> "
|
43
|
-
#
|
44
|
-
# and +text+ to return plain text:
|
45
|
-
#
|
46
|
-
# doc.text # => "ohai! div is safe "
|
47
|
-
#
|
48
|
-
# Or, if you prefer, you can use the shorthand methods +scrub_fragment+ and +scrub_document+:
|
49
|
-
#
|
50
|
-
# Loofah.scrub_fragment(unsafe_html, :prune).to_s
|
51
|
-
# Loofah.scrub_document(unsafe_html, :strip).text
|
52
|
-
#
|
53
|
-
# == Usage
|
54
|
-
#
|
55
|
-
# Let's say you have a Web 2.0 application, and you allow people to
|
56
|
-
# send HTML snippets to each other.
|
57
|
-
#
|
58
|
-
# Let's also say some script-kiddie from Norland sends this to your
|
59
|
-
# users, in an effort to swipe some credit cards:
|
60
|
-
#
|
61
|
-
# <script src=http://ha.ckers.org/xss.js></script>
|
62
|
-
#
|
63
|
-
# Oooh, that could be bad. Here's how to fix it:
|
64
|
-
#
|
65
|
-
# Loofah.scrub_fragment(dangerous_html, :escape).to_s
|
66
|
-
#
|
67
|
-
# # => "<script src=\"http://ha.ckers.org/xss.js\"></script>"
|
68
|
-
#
|
69
|
-
# Loofah also makes available the sanitized markup in both HTML and
|
70
|
-
# plain-text formats without incurring the overhead of multiple
|
71
|
-
# parsings:
|
72
|
-
#
|
73
|
-
# safe_fragment = Loofah.scrub_fragment(dangerous_html, :strip)
|
74
|
-
# safe_fragment.to_s # => HTML output
|
75
|
-
# safe_fragment.text # => plain text output
|
76
|
-
#
|
77
|
-
# And you can modify the HTML using Nokogiri's API, if you like:
|
78
|
-
#
|
79
|
-
# stylized_fragment = Loofah.fragment(dangerous_html)
|
80
|
-
# stylized_fragment.xpath("//a/text()").wrap("<span></span>")
|
81
|
-
# stylized_fragment.scrub!(:strip)
|
82
|
-
#
|
83
|
-
# == Fragments vs Documents
|
84
|
-
#
|
85
|
-
# Generally speaking, unless you expect to have \<html\> and
|
86
|
-
# \<body\> tags in your HTML, you don't have a *document*, you
|
87
|
-
# have a *fragment*.
|
88
|
-
#
|
89
|
-
# For parsing fragments, you should use Loofah.fragment. Nokogiri
|
90
|
-
# won't wrap the result in +html+ and +body+ tags, and will ignore
|
91
|
-
# +head+ elements.
|
92
|
-
#
|
93
|
-
# Full HTML documents should be parsed with Loofah.document, which
|
94
|
-
# will add the DOCTYPE declaration, and properly handle +head+ and
|
95
|
-
# +body+ elements.
|
96
|
-
#
|
97
19
|
# == Strings and IO Objects as Input
|
98
20
|
#
|
99
21
|
# Loofah.document and Loofah.fragment accept any IO object in addition
|
@@ -102,64 +24,9 @@ require 'loofah/helpers'
|
|
102
24
|
# +close+. Which makes it particularly easy to sanitize mass
|
103
25
|
# quantities of docs.
|
104
26
|
#
|
105
|
-
# == Scrubbing Methods
|
106
|
-
#
|
107
|
-
# Given:
|
108
|
-
# unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
109
|
-
#
|
110
|
-
# === scrub!(:strip)
|
111
|
-
#
|
112
|
-
# +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
|
113
|
-
#
|
114
|
-
# Loofah.fragment(unsafe_html).scrub!(:strip)
|
115
|
-
# # or
|
116
|
-
# Loofah.scrub_fragment(unsafe_html, :strip)
|
117
|
-
#
|
118
|
-
# => "ohai! <div>div is safe</div> but foo is <b>not</b>"
|
119
|
-
#
|
120
|
-
# === scrub!(:prune)
|
121
|
-
#
|
122
|
-
# +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
|
123
|
-
#
|
124
|
-
# Loofah.fragment(unsafe_html).scrub!(:prune)
|
125
|
-
# # or
|
126
|
-
# Loofah.scrub_fragment(unsafe_html, :prune)
|
127
|
-
#
|
128
|
-
# => "ohai! <div>div is safe</div> "
|
129
|
-
#
|
130
|
-
# === scrub!(:escape)
|
131
|
-
#
|
132
|
-
# +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
|
133
|
-
#
|
134
|
-
# Loofah.fragment(unsafe_html).scrub!(:escape)
|
135
|
-
# # or
|
136
|
-
# Loofah.scrub_fragment(unsafe_html, :escape)
|
137
|
-
#
|
138
|
-
# => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
|
139
|
-
#
|
140
|
-
# === scrub!(:whitewash)
|
141
|
-
#
|
142
|
-
# +:whitewash+ removes all comments, styling and attributes in
|
143
|
-
# addition to doing markup-fixer-uppery and pruning unsafe tags. I
|
144
|
-
# like to call this "whitewashing", since it's like putting a new
|
145
|
-
# layer of paint on top of the HTML input to make it look nice.
|
146
|
-
#
|
147
|
-
# messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
|
148
|
-
#
|
149
|
-
# Loofah.fragment(messy_markup).scrub!(:whitewash)
|
150
|
-
# # or
|
151
|
-
# Loofah.scrub_fragment(messy_markup, :whitewash)
|
152
|
-
#
|
153
|
-
# => "ohai! <div>div with attributes</div>"
|
154
|
-
#
|
155
|
-
# One use case for this feature is to clean up HTML that was
|
156
|
-
# cut-and-pasted from Microsoft Word into a WYSIWYG editor or a rich
|
157
|
-
# text editor. Microsoft's software is famous for injecting all kinds
|
158
|
-
# of cruft into its HTML output. Who needs that? Certainly not me.
|
159
|
-
#
|
160
27
|
module Loofah
|
161
28
|
# The version of Loofah you are using
|
162
|
-
VERSION = '0.
|
29
|
+
VERSION = '0.4.0'
|
163
30
|
|
164
31
|
# The minimum required version of Nokogiri
|
165
32
|
REQUIRED_NOKOGIRI_VERSION = '1.3.3'
|
@@ -187,6 +54,28 @@ module Loofah
|
|
187
54
|
Loofah.document(string_or_io).scrub!(method)
|
188
55
|
end
|
189
56
|
|
57
|
+
# Shortcut for Loofah::XML::Document.parse
|
58
|
+
# This method accepts the same parameters as Nokogiri::XML::Document.parse
|
59
|
+
def xml_document(*args, &block)
|
60
|
+
Loofah::XML::Document.parse(*args, &block)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Shortcut for Loofah::XML::DocumentFragment.parse
|
64
|
+
# This method accepts the same parameters as Nokogiri::XML::DocumentFragment.parse
|
65
|
+
def xml_fragment(*args, &block)
|
66
|
+
Loofah::XML::DocumentFragment.parse(*args, &block)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Shortcut for Loofah.xml_fragment(string_or_io).scrub!(method)
|
70
|
+
def scrub_xml_fragment(string_or_io, method)
|
71
|
+
Loofah.xml_fragment(string_or_io).scrub!(method)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Shortcut for Loofah.xml_document(string_or_io).scrub!(method)
|
75
|
+
def scrub_xml_document(string_or_io, method)
|
76
|
+
Loofah.xml_document(string_or_io).scrub!(method)
|
77
|
+
end
|
78
|
+
|
190
79
|
end
|
191
80
|
end
|
192
81
|
|
data/lib/loofah/active_record.rb
CHANGED
@@ -21,19 +21,15 @@ module Loofah
|
|
21
21
|
module ActiveRecordExtension
|
22
22
|
#
|
23
23
|
# :call-seq:
|
24
|
-
# html_fragment(attribute, :scrub =>
|
24
|
+
# html_fragment(attribute, :scrub => scrubber_specification)
|
25
25
|
#
|
26
26
|
# Scrub an ActiveRecord attribute +attribute+ as an HTML *fragment*
|
27
|
-
# using the method specified by +
|
27
|
+
# using the method specified by +scrubber_specification+.
|
28
28
|
#
|
29
|
-
# +
|
29
|
+
# +scrubber_specification+ must be an argument acceptable to Loofah::InstanceMethods.scrub!, namely:
|
30
30
|
#
|
31
|
-
# *
|
32
|
-
# *
|
33
|
-
# * :escape
|
34
|
-
# * :whitewash
|
35
|
-
#
|
36
|
-
# See Loofah for an explanation of each sanitization method.
|
31
|
+
# * a symbol for one of the built-in scrubbers (see Loofah::Scrubbers for a full list)
|
32
|
+
# * or a Scrubber instance. (see Loofah::Scrubber for help on implementing a custom scrubber)
|
37
33
|
#
|
38
34
|
def html_fragment(attr, options={})
|
39
35
|
raise ArgumentError, "html_fragment requires :scrub option" unless method = options[:scrub]
|
@@ -44,19 +40,15 @@ module Loofah
|
|
44
40
|
|
45
41
|
#
|
46
42
|
# :call-seq:
|
47
|
-
# model.html_document(attribute, :scrub =>
|
43
|
+
# model.html_document(attribute, :scrub => scrubber_specification)
|
48
44
|
#
|
49
45
|
# Scrub an ActiveRecord attribute +attribute+ as an HTML *document*
|
50
|
-
# using the method specified by +
|
51
|
-
#
|
52
|
-
# +sanitization_method+ must be one of:
|
46
|
+
# using the method specified by +scrubber_specification+.
|
53
47
|
#
|
54
|
-
#
|
55
|
-
# * :prune
|
56
|
-
# * :escape
|
57
|
-
# * :whitewash
|
48
|
+
# +scrubber_specification+ must be an argument acceptable to Loofah::InstanceMethods.scrub!, namely:
|
58
49
|
#
|
59
|
-
#
|
50
|
+
# * a symbol for one of the built-in scrubbers (see Loofah::Scrubbers for a full list)
|
51
|
+
# * or a Scrubber instance.
|
60
52
|
#
|
61
53
|
def html_document(attr, options={})
|
62
54
|
raise ArgumentError, "html_document requires :scrub option" unless method = options[:scrub]
|
data/lib/loofah/html/document.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
module Loofah
|
2
|
-
module HTML
|
2
|
+
module HTML # :nodoc:
|
3
3
|
#
|
4
4
|
# Subclass of Nokogiri::HTML::Document.
|
5
5
|
#
|
6
|
-
# See Loofah::
|
6
|
+
# See Loofah::InstanceMethods for additional methods.
|
7
7
|
#
|
8
8
|
class Document < Nokogiri::HTML::Document
|
9
|
-
include Loofah::
|
9
|
+
include Loofah::InstanceMethods
|
10
10
|
|
11
11
|
private
|
12
12
|
|
13
|
-
def
|
13
|
+
def sanitize_roots # :nodoc:
|
14
14
|
xpath("/html/head","/html/body")
|
15
15
|
end
|
16
16
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
module Loofah
|
2
|
-
module HTML
|
2
|
+
module HTML # :nodoc:
|
3
3
|
#
|
4
4
|
# Subclass of Nokogiri::HTML::DocumentFragment. Also includes Loofah::ScrubberInstanceMethods.
|
5
5
|
#
|
6
|
-
# See Loofah::
|
6
|
+
# See Loofah::InstanceMethods for additional methods.
|
7
7
|
#
|
8
8
|
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
9
|
-
include Loofah::
|
9
|
+
include Loofah::InstanceMethods
|
10
10
|
|
11
11
|
class << self
|
12
12
|
#
|
@@ -23,13 +23,13 @@ module Loofah
|
|
23
23
|
# Returns the HTML markup contained by the fragment or document
|
24
24
|
#
|
25
25
|
def to_s
|
26
|
-
|
26
|
+
sanitize_roots.children.to_s
|
27
27
|
end
|
28
28
|
alias :serialize :to_s
|
29
29
|
|
30
30
|
private
|
31
31
|
|
32
|
-
def
|
32
|
+
def sanitize_roots # :nodoc:
|
33
33
|
xpath("./body").first || self
|
34
34
|
end
|
35
35
|
|
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
module Loofah
|
2
|
+
#
|
3
|
+
# Methods that are mixed into Loofah::HTML::Document and Loofah::HTML::DocumentFragment.
|
4
|
+
#
|
5
|
+
module InstanceMethods
|
6
|
+
#
|
7
|
+
# Traverse the document or fragment, invoking the +scrubber+ on
|
8
|
+
# each node.
|
9
|
+
#
|
10
|
+
# +scrubber+ must either be one of the symbols representing the
|
11
|
+
# built-in scrubbers (see Scrubbers), or a Scrubber instance.
|
12
|
+
#
|
13
|
+
# span2div = Loofah::Scrubber.new do |node|
|
14
|
+
# node.name = "div" if node.name == "span"
|
15
|
+
# end
|
16
|
+
# Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
|
17
|
+
# # => "<div>foo</div><p>bar</p>"
|
18
|
+
#
|
19
|
+
# or
|
20
|
+
#
|
21
|
+
# unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
|
22
|
+
# Loofah.fragment(unsafe_html).scrub!(:strip).to_s
|
23
|
+
# # => "ohai! <div>div is safe</div> "
|
24
|
+
#
|
25
|
+
# Note that this method is called implicitly from
|
26
|
+
# Loofah.scrub_fragment and Loofah.scrub_document.
|
27
|
+
#
|
28
|
+
# Please see Scrubber for more information on implementation and traversal, and
|
29
|
+
# README.rdoc for more example usage.
|
30
|
+
#
|
31
|
+
def scrub!(scrubber)
|
32
|
+
scrubber = Scrubbers::MAP[scrubber].new if Scrubbers::MAP[scrubber]
|
33
|
+
raise Loofah::ScrubberNotFound, "not a Scrubber or a scrubber name: #{scrubber.inspect}" unless scrubber.is_a?(Loofah::Scrubber)
|
34
|
+
sanitize_roots.children.each { |node| scrubber.traverse(node) }
|
35
|
+
self
|
36
|
+
end
|
37
|
+
|
38
|
+
#
|
39
|
+
# Returns a plain-text version of the markup contained by the fragment or document
|
40
|
+
#
|
41
|
+
def text
|
42
|
+
sanitize_roots.children.inner_text
|
43
|
+
end
|
44
|
+
alias :inner_text :text
|
45
|
+
alias :to_str :text
|
46
|
+
end
|
47
|
+
end
|
data/lib/loofah/scrubber.rb
CHANGED
@@ -1,100 +1,122 @@
|
|
1
1
|
module Loofah
|
2
2
|
#
|
3
|
-
#
|
3
|
+
# A RuntimeError raised when Loofah could not find an appropriate scrubber.
|
4
4
|
#
|
5
|
-
|
5
|
+
class ScrubberNotFound < RuntimeError ; end
|
6
|
+
|
7
|
+
#
|
8
|
+
# A Scrubber wraps up a block (or method) that is run on an HTML node (element):
|
9
|
+
#
|
10
|
+
# # change all <span> tags to <div> tags
|
11
|
+
# span2div = Loofah::Scrubber.new do |node|
|
12
|
+
# node.name = "div" if node.name == "span"
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# Alternatively, this scrubber could have been implemented as:
|
16
|
+
#
|
17
|
+
# class Span2Div < Loofah::Scrubber
|
18
|
+
# def scrub(node)
|
19
|
+
# node.name = "div" if node.name == "span"
|
20
|
+
# end
|
21
|
+
# end
|
22
|
+
# span2div = Span2Div.new
|
23
|
+
#
|
24
|
+
# This can then be run on a document:
|
25
|
+
#
|
26
|
+
# Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
|
27
|
+
# # => "<div>foo</div><p>bar</p>"
|
28
|
+
#
|
29
|
+
# Scrubbers can be run on a document in either a top-down traversal (the
|
30
|
+
# default) or bottom-up. Top-down scrubbers can optionally return
|
31
|
+
# Scrubber::STOP to terminate the traversal of a subtree.
|
32
|
+
#
|
33
|
+
class Scrubber
|
34
|
+
|
35
|
+
# Top-down Scrubbers may return CONTINUE to indicate that the subtree should be traversed.
|
36
|
+
CONTINUE = Object.new.freeze
|
37
|
+
|
38
|
+
# Top-down Scrubbers may return STOP to indicate that the subtree should not be traversed.
|
39
|
+
STOP = Object.new.freeze
|
40
|
+
|
41
|
+
# When a scrubber is initialized, the :direction may be specified
|
42
|
+
# as :top_down (the default) or :bottom_up.
|
43
|
+
attr_reader :direction
|
44
|
+
|
45
|
+
# When a scrubber is initialized, the optional block is saved as
|
46
|
+
# :block. Note that, if no block is passed, then the +scrub+
|
47
|
+
# method is assumed to have been implemented.
|
48
|
+
attr_reader :block
|
6
49
|
|
7
50
|
#
|
8
|
-
#
|
51
|
+
# Options may include
|
52
|
+
# :direction => :top_down (the default)
|
53
|
+
# or
|
54
|
+
# :direction => :bottom_up
|
9
55
|
#
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
raise ArgumentError, "
|
56
|
+
# For top_down traversals, if the block returns
|
57
|
+
# Loofah::Scrubber::STOP, then the traversal will be terminated
|
58
|
+
# for the current node's subtree.
|
59
|
+
#
|
60
|
+
# Alternatively, a Scrubber may inherit from Loofah::Scrubber,
|
61
|
+
# and implement +scrub+, which is slightly faster than using a
|
62
|
+
# block.
|
63
|
+
#
|
64
|
+
def initialize(options = {}, &block)
|
65
|
+
direction = options[:direction] || :top_down
|
66
|
+
unless [:top_down, :bottom_up].include?(direction)
|
67
|
+
raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
|
22
68
|
end
|
23
|
-
|
69
|
+
@direction, @block = direction, block
|
24
70
|
end
|
25
71
|
|
26
72
|
#
|
27
|
-
#
|
73
|
+
# Calling +traverse+ will cause the document to be traversed by
|
74
|
+
# either the lambda passed to the initializer or the +scrub+
|
75
|
+
# method, in the direction specified at +new+ time.
|
28
76
|
#
|
29
|
-
def
|
30
|
-
|
77
|
+
def traverse(node)
|
78
|
+
direction == :bottom_up ? traverse_conditionally_bottom_up(node) : traverse_conditionally_top_down(node)
|
31
79
|
end
|
32
|
-
alias :inner_text :text
|
33
|
-
alias :to_str :text
|
34
|
-
end
|
35
|
-
|
36
|
-
module Scrubber
|
37
|
-
class << self
|
38
|
-
|
39
|
-
def sanitize(node)
|
40
|
-
case node.type
|
41
|
-
when Nokogiri::XML::Node::ELEMENT_NODE
|
42
|
-
if HTML5::HashedWhiteList::ALLOWED_ELEMENTS[node.name]
|
43
|
-
HTML5::Scrub.scrub_attributes node
|
44
|
-
return false
|
45
|
-
end
|
46
|
-
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
47
|
-
return false
|
48
|
-
end
|
49
|
-
true
|
50
|
-
end
|
51
80
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
def prune(node)
|
61
|
-
return false unless sanitize(node)
|
62
|
-
node.remove
|
63
|
-
return true
|
64
|
-
end
|
81
|
+
#
|
82
|
+
# When +new+ is not passed a block, the class may implement
|
83
|
+
# +scrub+, which will be called for each document node.
|
84
|
+
#
|
85
|
+
def scrub(node)
|
86
|
+
raise ScrubberNotFound, "No scrub method has been defined on #{self.class.to_s}"
|
87
|
+
end
|
65
88
|
|
66
|
-
|
67
|
-
return false unless sanitize(node)
|
68
|
-
replacement_killer = node.before node.inner_html
|
69
|
-
node.remove
|
70
|
-
return true
|
71
|
-
end
|
89
|
+
private
|
72
90
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
end
|
80
|
-
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
81
|
-
return false
|
91
|
+
def html5lib_sanitize(node)
|
92
|
+
case node.type
|
93
|
+
when Nokogiri::XML::Node::ELEMENT_NODE
|
94
|
+
if HTML5::HashedWhiteList::ALLOWED_ELEMENTS[node.name]
|
95
|
+
HTML5::Scrub.scrub_attributes node
|
96
|
+
return Scrubber::CONTINUE
|
82
97
|
end
|
83
|
-
|
84
|
-
return
|
98
|
+
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
99
|
+
return Scrubber::CONTINUE
|
85
100
|
end
|
101
|
+
Scrubber::STOP
|
102
|
+
end
|
86
103
|
|
87
|
-
|
88
|
-
|
89
|
-
|
104
|
+
def traverse_conditionally_top_down(node)
|
105
|
+
if block
|
106
|
+
return if block.call(node) == STOP
|
107
|
+
else
|
108
|
+
return if scrub(node) == STOP
|
90
109
|
end
|
110
|
+
node.children.each {|j| traverse_conditionally_top_down(j)}
|
111
|
+
end
|
91
112
|
|
92
|
-
|
93
|
-
|
94
|
-
|
113
|
+
def traverse_conditionally_bottom_up(node)
|
114
|
+
node.children.each {|j| traverse_conditionally_bottom_up(j)}
|
115
|
+
if block
|
116
|
+
block.call(node)
|
117
|
+
else
|
118
|
+
scrub(node)
|
95
119
|
end
|
96
|
-
|
97
120
|
end
|
98
|
-
|
99
121
|
end
|
100
122
|
end
|