sanitize 1.2.1.dev.20100122 → 1.2.1.dev.20100124
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- data/HISTORY +4 -1
- data/README.rdoc +12 -5
- data/lib/sanitize/config.rb +11 -3
- data/lib/sanitize/version.rb +1 -1
- data/lib/sanitize.rb +35 -14
- metadata +2 -2
data/HISTORY
CHANGED
@@ -2,8 +2,11 @@ Sanitize History
|
|
2
2
|
================================================================================
|
3
3
|
|
4
4
|
Version 1.2.1 (git)
|
5
|
+
* Added an :escape_only config setting. If set to true, Sanitize will escape
|
6
|
+
non-whitelisted elements and their contents instead of removing them.
|
5
7
|
* Added a :remove_contents config setting. If set to true, Sanitize will
|
6
|
-
remove the contents of
|
8
|
+
remove the contents of non-whitelisted elements in addition to the elements
|
9
|
+
themselves.
|
7
10
|
* The environment hash passed into transformers now includes a :node_name item
|
8
11
|
containing the lowercase name of the current HTML node (e.g. "div").
|
9
12
|
* Returning anything other than a Hash or nil from a transformer will now
|
data/README.rdoc
CHANGED
@@ -11,8 +11,7 @@ that you don't explicitly allow will be removed.
|
|
11
11
|
|
12
12
|
Because it's based on Nokogiri, a full-fledged HTML parser, rather than a bunch
|
13
13
|
of fragile regular expressions, Sanitize has no trouble dealing with malformed
|
14
|
-
or maliciously-formed HTML
|
15
|
-
caution.
|
14
|
+
or maliciously-formed HTML, and will always output valid HTML or XHTML.
|
16
15
|
|
17
16
|
*Author*:: Ryan Grove (mailto:ryan@wonko.com)
|
18
17
|
*Version*:: 1.2.1.dev (git)
|
@@ -134,6 +133,11 @@ Array of element names to allow. Specify all names in lowercase.
|
|
134
133
|
'sup', 'u', 'ul'
|
135
134
|
]
|
136
135
|
|
136
|
+
==== :escape_only (boolean)
|
137
|
+
|
138
|
+
If set to +true+, Sanitize will escape non-whitelisted elements and their
|
139
|
+
contents rather than removing them.
|
140
|
+
|
137
141
|
==== :output (Symbol)
|
138
142
|
|
139
143
|
Output format. Supported formats are <code>:html</code> and <code>:xhtml</code>,
|
@@ -159,9 +163,12 @@ include the symbol <code>:relative</code> in the protocol array:
|
|
159
163
|
|
160
164
|
==== :remove_contents (boolean)
|
161
165
|
|
162
|
-
If set to
|
163
|
-
|
164
|
-
parts of
|
166
|
+
If set to +true+, Sanitize will remove the contents of any non-whitelisted
|
167
|
+
elements in addition to the elements themselves. By default, Sanitize leaves the
|
168
|
+
safe parts of an element's contents behind when the element is removed.
|
169
|
+
|
170
|
+
If both <code>:escape_only</code> and <code>:remove_contents</code> are enabled,
|
171
|
+
<code>:remove_contents</code> will take precedence.
|
165
172
|
|
166
173
|
==== :transformers
|
167
174
|
|
data/lib/sanitize/config.rb
CHANGED
@@ -40,6 +40,10 @@ class Sanitize
|
|
40
40
|
# that all HTML will be stripped).
|
41
41
|
:elements => [],
|
42
42
|
|
43
|
+
# If this is true, Sanitize will escape non-whitelisted elements and their
|
44
|
+
# contents rather than removing them.
|
45
|
+
:escape_only => false,
|
46
|
+
|
43
47
|
# Output format. Supported formats are :html and :xhtml (which is the
|
44
48
|
# default).
|
45
49
|
:output => :xhtml,
|
@@ -49,9 +53,13 @@ class Sanitize
|
|
49
53
|
# to allow relative URLs sans protocol.
|
50
54
|
:protocols => {},
|
51
55
|
|
52
|
-
# If this is true, Sanitize will remove the contents of any filtered
|
53
|
-
# in addition to the
|
54
|
-
# safe parts of
|
56
|
+
# If this is true, Sanitize will remove the contents of any filtered
|
57
|
+
# elements in addition to the elements themselves. By default, Sanitize
|
58
|
+
# leaves the safe parts of an element's contents behind when the element
|
59
|
+
# is removed.
|
60
|
+
#
|
61
|
+
# If both :escape_only and :remove_contents are true, :remove_contents
|
62
|
+
# will take precedence.
|
55
63
|
:remove_contents => false,
|
56
64
|
|
57
65
|
# Transformers allow you to filter or alter nodes using custom logic. See
|
data/lib/sanitize/version.rb
CHANGED
data/lib/sanitize.rb
CHANGED
@@ -72,6 +72,15 @@ class Sanitize
|
|
72
72
|
@config = Config::DEFAULT.merge(config)
|
73
73
|
@config[:transformers] = Array(@config[:transformers])
|
74
74
|
|
75
|
+
# :remove_contents takes precedence over :escape_only.
|
76
|
+
if @config[:remove_contents] && @config[:escape_only]
|
77
|
+
@config[:escape_only] = false
|
78
|
+
end
|
79
|
+
|
80
|
+
# Convert the list of allowed elements to a Hash for faster lookup.
|
81
|
+
@allowed_elements = {}
|
82
|
+
@config[:elements].each {|el| @allowed_elements[el] = true }
|
83
|
+
|
75
84
|
# Specific nodes to whitelist (along with all their attributes). This array
|
76
85
|
# is generated at runtime by transformers, and is cleared before and after
|
77
86
|
# a fragment is cleaned (so it applies only to a specific fragment).
|
@@ -87,10 +96,8 @@ class Sanitize
|
|
87
96
|
# Performs clean in place, returning _html_, or +nil+ if no changes were
|
88
97
|
# made.
|
89
98
|
def clean!(html)
|
90
|
-
@whitelist_nodes = []
|
91
99
|
fragment = Nokogiri::HTML::DocumentFragment.parse(html)
|
92
100
|
clean_node!(fragment)
|
93
|
-
@whitelist_nodes = []
|
94
101
|
|
95
102
|
output_method_params = {:encoding => 'utf-8', :indent => 0}
|
96
103
|
|
@@ -116,17 +123,26 @@ class Sanitize
|
|
116
123
|
def clean_node!(node)
|
117
124
|
raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)
|
118
125
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
elsif
|
125
|
-
|
126
|
-
|
126
|
+
@whitelist_nodes = []
|
127
|
+
|
128
|
+
node.traverse do |child|
|
129
|
+
if child.element?
|
130
|
+
clean_element!(child)
|
131
|
+
elsif child.comment?
|
132
|
+
unless @config[:allow_comments]
|
133
|
+
if @config[:escape_only]
|
134
|
+
child.replace(Nokogiri::XML::Text.new(child.to_s, child.document))
|
135
|
+
else
|
136
|
+
child.unlink
|
137
|
+
end
|
138
|
+
end
|
139
|
+
elsif child.cdata?
|
140
|
+
child.replace(Nokogiri::XML::Text.new(child.text, child.document))
|
127
141
|
end
|
128
142
|
end
|
129
143
|
|
144
|
+
@whitelist_nodes = []
|
145
|
+
|
130
146
|
node
|
131
147
|
end
|
132
148
|
|
@@ -143,12 +159,17 @@ class Sanitize
|
|
143
159
|
name = node.name.to_s.downcase
|
144
160
|
|
145
161
|
# Delete any element that isn't in the whitelist.
|
146
|
-
unless transform[:whitelist] || @
|
147
|
-
|
148
|
-
node.
|
162
|
+
unless transform[:whitelist] || @allowed_elements[name]
|
163
|
+
if @config[:escape_only]
|
164
|
+
node.replace(Nokogiri::XML::Text.new(node.to_s, node.document))
|
165
|
+
else
|
166
|
+
unless @config[:remove_contents]
|
167
|
+
node.children.each { |n| node.add_previous_sibling(n) }
|
168
|
+
end
|
169
|
+
|
170
|
+
node.unlink
|
149
171
|
end
|
150
172
|
|
151
|
-
node.unlink
|
152
173
|
return
|
153
174
|
end
|
154
175
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.1.dev.
|
4
|
+
version: 1.2.1.dev.20100124
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-24 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|