sanitize 2.1.1 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +520 -55
- data/LICENSE +1 -1
- data/README.md +438 -168
- data/lib/sanitize/config/basic.rb +12 -32
- data/lib/sanitize/config/default.rb +118 -0
- data/lib/sanitize/config/relaxed.rb +716 -53
- data/lib/sanitize/config/restricted.rb +3 -23
- data/lib/sanitize/config.rb +53 -79
- data/lib/sanitize/css.rb +348 -0
- data/lib/sanitize/transformers/clean_cdata.rb +3 -3
- data/lib/sanitize/transformers/clean_comment.rb +6 -3
- data/lib/sanitize/transformers/clean_css.rb +57 -0
- data/lib/sanitize/transformers/clean_doctype.rb +19 -0
- data/lib/sanitize/transformers/clean_element.rb +192 -124
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +172 -143
- data/test/common.rb +3 -0
- data/test/test_clean_comment.rb +47 -0
- data/test/test_clean_css.rb +67 -0
- data/test/test_clean_doctype.rb +71 -0
- data/test/test_clean_element.rb +545 -0
- data/test/test_config.rb +65 -0
- data/test/test_malicious_css.rb +42 -0
- data/test/test_malicious_html.rb +235 -0
- data/test/test_parser.rb +75 -0
- data/test/test_sanitize.rb +151 -675
- data/test/test_sanitize_css.rb +424 -0
- data/test/test_transformers.rb +230 -0
- metadata +44 -41
@@ -1,29 +1,9 @@
|
|
1
|
-
|
2
|
-
# Copyright (c) 2013 Ryan Grove <ryan@wonko.com>
|
3
|
-
#
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
-
# of this software and associated documentation files (the 'Software'), to deal
|
6
|
-
# in the Software without restriction, including without limitation the rights
|
7
|
-
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
-
# copies of the Software, and to permit persons to whom the Software is
|
9
|
-
# furnished to do so, subject to the following conditions:
|
10
|
-
#
|
11
|
-
# The above copyright notice and this permission notice shall be included in all
|
12
|
-
# copies or substantial portions of the Software.
|
13
|
-
#
|
14
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
-
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
-
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
-
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
-
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
-
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
-
# SOFTWARE.
|
21
|
-
#++
|
1
|
+
# encoding: utf-8
|
22
2
|
|
23
3
|
class Sanitize
|
24
4
|
module Config
|
25
|
-
RESTRICTED =
|
5
|
+
RESTRICTED = freeze_config(
|
26
6
|
:elements => %w[b em i strong u]
|
27
|
-
|
7
|
+
)
|
28
8
|
end
|
29
9
|
end
|
data/lib/sanitize/config.rb
CHANGED
@@ -1,86 +1,60 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
-
# of this software and associated documentation files (the 'Software'), to deal
|
6
|
-
# in the Software without restriction, including without limitation the rights
|
7
|
-
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
-
# copies of the Software, and to permit persons to whom the Software is
|
9
|
-
# furnished to do so, subject to the following conditions:
|
10
|
-
#
|
11
|
-
# The above copyright notice and this permission notice shall be included in all
|
12
|
-
# copies or substantial portions of the Software.
|
13
|
-
#
|
14
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
-
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
-
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
-
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
-
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
-
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
-
# SOFTWARE.
|
21
|
-
#++
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'set'
|
22
4
|
|
23
5
|
class Sanitize
|
24
6
|
module Config
|
25
|
-
DEFAULT = {
|
26
|
-
|
27
|
-
# Whether or not to allow HTML comments. Allowing comments is strongly
|
28
|
-
# discouraged, since IE allows script execution within conditional
|
29
|
-
# comments.
|
30
|
-
:allow_comments => false,
|
31
|
-
|
32
|
-
# HTML attributes to add to specific elements. By default, no attributes
|
33
|
-
# are added.
|
34
|
-
:add_attributes => {},
|
35
|
-
|
36
|
-
# HTML attributes to allow in specific elements. By default, no attributes
|
37
|
-
# are allowed. Use the symbol :data to indicate that arbitrary HTML5
|
38
|
-
# data-* attributes should be allowed.
|
39
|
-
:attributes => {},
|
40
|
-
|
41
|
-
# HTML elements to allow. By default, no elements are allowed (which means
|
42
|
-
# that all HTML will be stripped).
|
43
|
-
:elements => [],
|
44
|
-
|
45
|
-
# Output format. Supported formats are :html and :xhtml. Default is :html.
|
46
|
-
:output => :html,
|
47
|
-
|
48
|
-
# Character encoding to use for HTML output. Default is 'utf-8'.
|
49
|
-
:output_encoding => 'utf-8',
|
50
|
-
|
51
|
-
# URL handling protocols to allow in specific attributes. By default, no
|
52
|
-
# protocols are allowed. Use :relative in place of a protocol if you want
|
53
|
-
# to allow relative URLs sans protocol.
|
54
|
-
:protocols => {},
|
55
|
-
|
56
|
-
# If this is true, Sanitize will remove the contents of any filtered
|
57
|
-
# elements in addition to the elements themselves. By default, Sanitize
|
58
|
-
# leaves the safe parts of an element's contents behind when the element
|
59
|
-
# is removed.
|
60
|
-
#
|
61
|
-
# If this is an Array of element names, then only the contents of the
|
62
|
-
# specified elements (when filtered) will be removed, and the contents of
|
63
|
-
# all other filtered elements will be left behind.
|
64
|
-
:remove_contents => false,
|
65
|
-
|
66
|
-
# Transformers allow you to filter or alter nodes using custom logic. See
|
67
|
-
# README.rdoc for details and examples.
|
68
|
-
:transformers => [],
|
69
|
-
|
70
|
-
# By default, transformers perform depth-first traversal (deepest node
|
71
|
-
# upward). This setting allows you to specify transformers that should
|
72
|
-
# perform breadth-first traversal (top node downward).
|
73
|
-
:transformers_breadth => [],
|
74
7
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
8
|
+
# Deeply freezes and returns the given configuration Hash.
|
9
|
+
def self.freeze_config(config)
|
10
|
+
if Hash === config
|
11
|
+
config.each_value {|c| freeze_config(c) }
|
12
|
+
elsif Array === config || Set === config
|
13
|
+
config.each {|c| freeze_config(c) }
|
14
|
+
end
|
15
|
+
|
16
|
+
config.freeze
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns a new Hash containing the result of deeply merging *other_config*
|
20
|
+
# into *config*. Does not modify *config* or *other_config*.
|
21
|
+
#
|
22
|
+
# This is the safest way to use a built-in Sanitize config as the basis for
|
23
|
+
# your own custom config.
|
24
|
+
def self.merge(config, other_config = {})
|
25
|
+
raise ArgumentError, 'config must be a Hash' unless Hash === config
|
26
|
+
raise ArgumentError, 'other_config must be a Hash' unless Hash === other_config
|
27
|
+
|
28
|
+
merged = {}
|
29
|
+
keys = Set.new(config.keys + other_config.keys)
|
30
|
+
|
31
|
+
keys.each do |key|
|
32
|
+
oldval = config[key]
|
33
|
+
|
34
|
+
if other_config.has_key?(key)
|
35
|
+
newval = other_config[key]
|
36
|
+
|
37
|
+
if Hash === oldval && Hash === newval
|
38
|
+
merged[key] = oldval.empty? ? newval.dup : merge(oldval, newval)
|
39
|
+
elsif Array === newval && key != :transformers
|
40
|
+
merged[key] = Set.new(newval)
|
41
|
+
else
|
42
|
+
merged[key] = can_dupe?(newval) ? newval.dup : newval
|
43
|
+
end
|
44
|
+
else
|
45
|
+
merged[key] = can_dupe?(oldval) ? oldval.dup : oldval
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
merged
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns `true` if `dup` may be safely called on _value_, `false`
|
53
|
+
# otherwise.
|
54
|
+
def self.can_dupe?(value)
|
55
|
+
!(true == value || false == value || value.nil? || Method === value || Numeric === value || Symbol === value)
|
56
|
+
end
|
57
|
+
private_class_method :can_dupe?
|
83
58
|
|
84
|
-
}
|
85
59
|
end
|
86
60
|
end
|
data/lib/sanitize/css.rb
ADDED
@@ -0,0 +1,348 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'crass'
|
4
|
+
require 'set'
|
5
|
+
|
6
|
+
class Sanitize; class CSS
|
7
|
+
attr_reader :config
|
8
|
+
|
9
|
+
# -- Class Methods -----------------------------------------------------------
|
10
|
+
|
11
|
+
# Sanitizes inline CSS style properties.
|
12
|
+
#
|
13
|
+
# This is most useful for sanitizing non-stylesheet fragments of CSS like you
|
14
|
+
# would find in the `style` attribute of an HTML element. To sanitize a full
|
15
|
+
# CSS stylesheet, use {.stylesheet}.
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
# Sanitize::CSS.properties("background: url(foo.png); color: #fff;")
|
19
|
+
#
|
20
|
+
# @return [String] Sanitized CSS properties.
|
21
|
+
def self.properties(css, config = {})
|
22
|
+
self.new(config).properties(css)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Sanitizes a full CSS stylesheet.
|
26
|
+
#
|
27
|
+
# A stylesheet may include selectors, at-rules, and comments. To sanitize only
|
28
|
+
# inline style properties such as the contents of an HTML `style` attribute,
|
29
|
+
# use {.properties}.
|
30
|
+
#
|
31
|
+
# @example
|
32
|
+
# css = %[
|
33
|
+
# .foo {
|
34
|
+
# background: url(foo.png);
|
35
|
+
# color: #fff;
|
36
|
+
# }
|
37
|
+
#
|
38
|
+
# #bar {
|
39
|
+
# font: 42pt 'Comic Sans MS';
|
40
|
+
# }
|
41
|
+
# ]
|
42
|
+
#
|
43
|
+
# Sanitize::CSS.stylesheet(css, Sanitize::Config::RELAXED)
|
44
|
+
#
|
45
|
+
# @return [String] Sanitized CSS stylesheet.
|
46
|
+
def self.stylesheet(css, config = {})
|
47
|
+
self.new(config).stylesheet(css)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Sanitizes the given Crass CSS parse tree and all its children, modifying it
|
51
|
+
# in place.
|
52
|
+
#
|
53
|
+
# @example
|
54
|
+
# css = %[
|
55
|
+
# .foo {
|
56
|
+
# background: url(foo.png);
|
57
|
+
# color: #fff;
|
58
|
+
# }
|
59
|
+
#
|
60
|
+
# #bar {
|
61
|
+
# font: 42pt 'Comic Sans MS';
|
62
|
+
# }
|
63
|
+
# ]
|
64
|
+
#
|
65
|
+
# tree = Crass.parse(css)
|
66
|
+
# Sanitize::CSS.tree!(tree, Sanitize::Config::RELAXED)
|
67
|
+
#
|
68
|
+
# @return [Array] Sanitized Crass CSS parse tree.
|
69
|
+
def self.tree!(tree, config = {})
|
70
|
+
self.new(config).tree!(tree)
|
71
|
+
end
|
72
|
+
|
73
|
+
# -- Instance Methods --------------------------------------------------------
|
74
|
+
|
75
|
+
# Returns a new Sanitize::CSS object initialized with the settings in
|
76
|
+
# _config_.
|
77
|
+
def initialize(config = {})
|
78
|
+
@config = Config.merge(Config::DEFAULT[:css], config[:css] || config)
|
79
|
+
|
80
|
+
@at_rules = Set.new(@config[:at_rules])
|
81
|
+
@at_rules_with_properties = Set.new(@config[:at_rules_with_properties])
|
82
|
+
@at_rules_with_styles = Set.new(@config[:at_rules_with_styles])
|
83
|
+
@import_url_validator = @config[:import_url_validator]
|
84
|
+
end
|
85
|
+
|
86
|
+
# Sanitizes inline CSS style properties.
|
87
|
+
#
|
88
|
+
# This is most useful for sanitizing non-stylesheet fragments of CSS like you
|
89
|
+
# would find in the `style` attribute of an HTML element. To sanitize a full
|
90
|
+
# CSS stylesheet, use {#stylesheet}.
|
91
|
+
#
|
92
|
+
# @example
|
93
|
+
# scss = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
94
|
+
# scss.properties("background: url(foo.png); color: #fff;")
|
95
|
+
#
|
96
|
+
# @return [String] Sanitized CSS properties.
|
97
|
+
def properties(css)
|
98
|
+
tree = Crass.parse_properties(css,
|
99
|
+
:preserve_comments => @config[:allow_comments],
|
100
|
+
:preserve_hacks => @config[:allow_hacks])
|
101
|
+
|
102
|
+
tree!(tree)
|
103
|
+
Crass::Parser.stringify(tree)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Sanitizes a full CSS stylesheet.
|
107
|
+
#
|
108
|
+
# A stylesheet may include selectors, at-rules, and comments. To sanitize only
|
109
|
+
# inline style properties such as the contents of an HTML `style` attribute,
|
110
|
+
# use {#properties}.
|
111
|
+
#
|
112
|
+
# @example
|
113
|
+
# css = %[
|
114
|
+
# .foo {
|
115
|
+
# background: url(foo.png);
|
116
|
+
# color: #fff;
|
117
|
+
# }
|
118
|
+
#
|
119
|
+
# #bar {
|
120
|
+
# font: 42pt 'Comic Sans MS';
|
121
|
+
# }
|
122
|
+
# ]
|
123
|
+
#
|
124
|
+
# scss = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
125
|
+
# scss.stylesheet(css)
|
126
|
+
#
|
127
|
+
# @return [String] Sanitized CSS stylesheet.
|
128
|
+
def stylesheet(css)
|
129
|
+
tree = Crass.parse(css,
|
130
|
+
:preserve_comments => @config[:allow_comments],
|
131
|
+
:preserve_hacks => @config[:allow_hacks])
|
132
|
+
|
133
|
+
tree!(tree)
|
134
|
+
Crass::Parser.stringify(tree)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Sanitizes the given Crass CSS parse tree and all its children, modifying it
|
138
|
+
# in place.
|
139
|
+
#
|
140
|
+
# @example
|
141
|
+
# css = %[
|
142
|
+
# .foo {
|
143
|
+
# background: url(foo.png);
|
144
|
+
# color: #fff;
|
145
|
+
# }
|
146
|
+
#
|
147
|
+
# #bar {
|
148
|
+
# font: 42pt 'Comic Sans MS';
|
149
|
+
# }
|
150
|
+
# ]
|
151
|
+
#
|
152
|
+
# scss = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
153
|
+
# tree = Crass.parse(css)
|
154
|
+
#
|
155
|
+
# scss.tree!(tree)
|
156
|
+
#
|
157
|
+
# @return [Array] Sanitized Crass CSS parse tree.
|
158
|
+
def tree!(tree)
|
159
|
+
preceded_by_property = false
|
160
|
+
|
161
|
+
tree.map! do |node|
|
162
|
+
next nil if node.nil?
|
163
|
+
|
164
|
+
case node[:node]
|
165
|
+
when :at_rule
|
166
|
+
preceded_by_property = false
|
167
|
+
next at_rule!(node)
|
168
|
+
|
169
|
+
when :comment
|
170
|
+
next node if @config[:allow_comments]
|
171
|
+
|
172
|
+
when :property
|
173
|
+
prop = property!(node)
|
174
|
+
preceded_by_property = !prop.nil?
|
175
|
+
next prop
|
176
|
+
|
177
|
+
when :semicolon
|
178
|
+
# Only preserve the semicolon if it was preceded by an allowlisted
|
179
|
+
# property. Otherwise, omit it in order to prevent redundant semicolons.
|
180
|
+
if preceded_by_property
|
181
|
+
preceded_by_property = false
|
182
|
+
next node
|
183
|
+
end
|
184
|
+
|
185
|
+
when :style_rule
|
186
|
+
preceded_by_property = false
|
187
|
+
tree!(node[:children])
|
188
|
+
next node
|
189
|
+
|
190
|
+
when :whitespace
|
191
|
+
next node
|
192
|
+
end
|
193
|
+
|
194
|
+
nil
|
195
|
+
end
|
196
|
+
|
197
|
+
tree
|
198
|
+
end
|
199
|
+
|
200
|
+
# -- Protected Instance Methods ----------------------------------------------
|
201
|
+
protected
|
202
|
+
|
203
|
+
# Sanitizes a CSS at-rule node. Returns the sanitized node, or `nil` if the
|
204
|
+
# current config doesn't allow this at-rule.
|
205
|
+
def at_rule!(rule)
|
206
|
+
name = rule[:name].downcase
|
207
|
+
|
208
|
+
if @at_rules_with_styles.include?(name)
|
209
|
+
styles = Crass::Parser.parse_rules(rule[:block],
|
210
|
+
:preserve_comments => @config[:allow_comments],
|
211
|
+
:preserve_hacks => @config[:allow_hacks])
|
212
|
+
|
213
|
+
rule[:block] = tree!(styles)
|
214
|
+
|
215
|
+
elsif @at_rules_with_properties.include?(name)
|
216
|
+
props = Crass::Parser.parse_properties(rule[:block],
|
217
|
+
:preserve_comments => @config[:allow_comments],
|
218
|
+
:preserve_hacks => @config[:allow_hacks])
|
219
|
+
|
220
|
+
rule[:block] = tree!(props)
|
221
|
+
|
222
|
+
elsif @at_rules.include?(name)
|
223
|
+
return nil if name == "import" && !import_url_allowed?(rule)
|
224
|
+
return nil if rule.has_key?(:block)
|
225
|
+
else
|
226
|
+
return nil
|
227
|
+
end
|
228
|
+
|
229
|
+
rule
|
230
|
+
end
|
231
|
+
|
232
|
+
# Passes the URL value of an @import rule to a block to ensure
|
233
|
+
# it's an allowed URL
|
234
|
+
def import_url_allowed?(rule)
|
235
|
+
return true unless @import_url_validator
|
236
|
+
|
237
|
+
url_token = rule[:tokens].detect { |t| t[:node] == :url || t[:node] == :string }
|
238
|
+
|
239
|
+
# don't allow @imports with no URL value
|
240
|
+
return false unless url_token && (import_url = url_token[:value])
|
241
|
+
|
242
|
+
@import_url_validator.call(import_url)
|
243
|
+
end
|
244
|
+
|
245
|
+
# Sanitizes a CSS property node. Returns the sanitized node, or `nil` if the
|
246
|
+
# current config doesn't allow this property.
|
247
|
+
def property!(prop)
|
248
|
+
name = prop[:name].downcase
|
249
|
+
|
250
|
+
# Preserve IE * and _ hacks if desired.
|
251
|
+
if @config[:allow_hacks]
|
252
|
+
name.slice!(0) if name =~ /\A[*_]/
|
253
|
+
end
|
254
|
+
|
255
|
+
return nil unless @config[:properties].include?(name)
|
256
|
+
|
257
|
+
nodes = prop[:children].dup
|
258
|
+
combined_value = String.new
|
259
|
+
|
260
|
+
nodes.each do |child|
|
261
|
+
value = child[:value]
|
262
|
+
|
263
|
+
case child[:node]
|
264
|
+
when :ident
|
265
|
+
combined_value << value.downcase if String === value
|
266
|
+
|
267
|
+
when :function
|
268
|
+
if child.key?(:name)
|
269
|
+
name = child[:name].downcase
|
270
|
+
|
271
|
+
if name == 'url'
|
272
|
+
return nil unless valid_url?(child)
|
273
|
+
end
|
274
|
+
|
275
|
+
combined_value << name
|
276
|
+
return nil if name == 'expression' || combined_value == 'expression'
|
277
|
+
end
|
278
|
+
|
279
|
+
if Array === value
|
280
|
+
nodes.concat(value)
|
281
|
+
elsif String === value
|
282
|
+
lowercase_value = value.downcase
|
283
|
+
combined_value << lowercase_value
|
284
|
+
return nil if lowercase_value == 'expression' || combined_value == 'expression'
|
285
|
+
end
|
286
|
+
|
287
|
+
when :url
|
288
|
+
return nil unless valid_url?(child)
|
289
|
+
|
290
|
+
when :bad_url
|
291
|
+
return nil
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
prop
|
296
|
+
end
|
297
|
+
|
298
|
+
# Returns `true` if the given node (which may be of type `:url` or
|
299
|
+
# `:function`, since the CSS syntax can produce both) uses an allowlisted
|
300
|
+
# protocol.
|
301
|
+
def valid_url?(node)
|
302
|
+
type = node[:node]
|
303
|
+
|
304
|
+
if type == :function
|
305
|
+
return false unless node.key?(:name) && node[:name].downcase == 'url'
|
306
|
+
return false unless Array === node[:value]
|
307
|
+
|
308
|
+
# A URL function's `:value` should be an array containing no more than one
|
309
|
+
# `:string` node and any number of `:whitespace` nodes.
|
310
|
+
#
|
311
|
+
# If it contains more than one `:string` node, or if it contains any other
|
312
|
+
# nodes except `:whitespace` nodes, it's not valid.
|
313
|
+
url_string_node = nil
|
314
|
+
|
315
|
+
node[:value].each do |token|
|
316
|
+
return false unless Hash === token
|
317
|
+
|
318
|
+
case token[:node]
|
319
|
+
when :string
|
320
|
+
return false unless url_string_node.nil?
|
321
|
+
url_string_node = token
|
322
|
+
|
323
|
+
when :whitespace
|
324
|
+
next
|
325
|
+
|
326
|
+
else
|
327
|
+
return false
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
return false if url_string_node.nil?
|
332
|
+
url = url_string_node[:value]
|
333
|
+
elsif type == :url
|
334
|
+
url = node[:value]
|
335
|
+
else
|
336
|
+
return false
|
337
|
+
end
|
338
|
+
|
339
|
+
if url =~ Sanitize::REGEX_PROTOCOL
|
340
|
+
return @config[:protocols].include?($1.downcase)
|
341
|
+
else
|
342
|
+
return @config[:protocols].include?(:relative)
|
343
|
+
end
|
344
|
+
|
345
|
+
false
|
346
|
+
end
|
347
|
+
|
348
|
+
end; end
|
@@ -1,11 +1,11 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
class Sanitize; module Transformers
|
2
4
|
|
3
5
|
CleanCDATA = lambda do |env|
|
4
|
-
return if env[:is_whitelisted]
|
5
|
-
|
6
6
|
node = env[:node]
|
7
7
|
|
8
|
-
if node.
|
8
|
+
if node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
|
9
9
|
node.replace(Nokogiri::XML::Text.new(node.text, node.document))
|
10
10
|
end
|
11
11
|
end
|
@@ -1,10 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
class Sanitize; module Transformers
|
2
4
|
|
3
5
|
CleanComment = lambda do |env|
|
4
|
-
return if env[:is_whitelisted]
|
5
|
-
|
6
6
|
node = env[:node]
|
7
|
-
|
7
|
+
|
8
|
+
if node.type == Nokogiri::XML::Node::COMMENT_NODE
|
9
|
+
node.unlink unless env[:is_allowlisted]
|
10
|
+
end
|
8
11
|
end
|
9
12
|
|
10
13
|
end; end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
class Sanitize; module Transformers; module CSS
|
2
|
+
|
3
|
+
# Enforces a CSS allowlist on the contents of `style` attributes.
|
4
|
+
class CleanAttribute
|
5
|
+
def initialize(sanitizer_or_config)
|
6
|
+
if Sanitize::CSS === sanitizer_or_config
|
7
|
+
@scss = sanitizer_or_config
|
8
|
+
else
|
9
|
+
@scss = Sanitize::CSS.new(sanitizer_or_config)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def call(env)
|
14
|
+
node = env[:node]
|
15
|
+
|
16
|
+
return unless node.type == Nokogiri::XML::Node::ELEMENT_NODE &&
|
17
|
+
node.key?('style') && !env[:is_allowlisted]
|
18
|
+
|
19
|
+
attr = node.attribute('style')
|
20
|
+
css = @scss.properties(attr.value)
|
21
|
+
|
22
|
+
if css.strip.empty?
|
23
|
+
attr.unlink
|
24
|
+
else
|
25
|
+
attr.value = css
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Enforces a CSS allowlist on the contents of `<style>` elements.
|
31
|
+
class CleanElement
|
32
|
+
def initialize(sanitizer_or_config)
|
33
|
+
if Sanitize::CSS === sanitizer_or_config
|
34
|
+
@scss = sanitizer_or_config
|
35
|
+
else
|
36
|
+
@scss = Sanitize::CSS.new(sanitizer_or_config)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def call(env)
|
41
|
+
node = env[:node]
|
42
|
+
|
43
|
+
return unless node.type == Nokogiri::XML::Node::ELEMENT_NODE &&
|
44
|
+
env[:node_name] == 'style'
|
45
|
+
|
46
|
+
css = @scss.stylesheet(node.content)
|
47
|
+
|
48
|
+
if css.strip.empty?
|
49
|
+
node.unlink
|
50
|
+
else
|
51
|
+
node.children.unlink
|
52
|
+
node << Nokogiri::XML::Text.new(css, node.document)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end; end; end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class Sanitize; module Transformers
|
4
|
+
|
5
|
+
CleanDoctype = lambda do |env|
|
6
|
+
return if env[:is_allowlisted]
|
7
|
+
|
8
|
+
node = env[:node]
|
9
|
+
|
10
|
+
if node.type == Nokogiri::XML::Node::DTD_NODE
|
11
|
+
if env[:config][:allow_doctype]
|
12
|
+
node.name = 'html'
|
13
|
+
else
|
14
|
+
node.unlink
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end; end
|