sanitize 2.1.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/HISTORY.md +93 -14
- data/README.md +346 -134
- data/lib/sanitize.rb +177 -132
- data/lib/sanitize/config.rb +53 -79
- data/lib/sanitize/config/basic.rb +12 -32
- data/lib/sanitize/config/default.rb +103 -0
- data/lib/sanitize/config/relaxed.rb +517 -52
- data/lib/sanitize/config/restricted.rb +3 -23
- data/lib/sanitize/css.rb +218 -0
- data/lib/sanitize/transformers/clean_cdata.rb +3 -3
- data/lib/sanitize/transformers/clean_comment.rb +6 -3
- data/lib/sanitize/transformers/clean_css.rb +57 -0
- data/lib/sanitize/transformers/clean_doctype.rb +13 -0
- data/lib/sanitize/transformers/clean_element.rb +99 -129
- data/lib/sanitize/version.rb +3 -1
- data/test/common.rb +34 -0
- data/test/test_clean_comment.rb +51 -0
- data/test/test_clean_css.rb +66 -0
- data/test/test_clean_doctype.rb +71 -0
- data/test/test_clean_element.rb +399 -0
- data/test/test_config.rb +65 -0
- data/test/test_malicious_css.rb +42 -0
- data/test/test_malicious_html.rb +128 -0
- data/test/test_parser.rb +104 -0
- data/test/test_sanitize.rb +65 -693
- data/test/test_sanitize_css.rb +222 -0
- data/test/test_transformers.rb +144 -0
- data/test/test_unicode.rb +84 -0
- metadata +56 -8
@@ -1,29 +1,9 @@
|
|
1
|
-
|
2
|
-
# Copyright (c) 2013 Ryan Grove <ryan@wonko.com>
|
3
|
-
#
|
4
|
-
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
-
# of this software and associated documentation files (the 'Software'), to deal
|
6
|
-
# in the Software without restriction, including without limitation the rights
|
7
|
-
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
-
# copies of the Software, and to permit persons to whom the Software is
|
9
|
-
# furnished to do so, subject to the following conditions:
|
10
|
-
#
|
11
|
-
# The above copyright notice and this permission notice shall be included in all
|
12
|
-
# copies or substantial portions of the Software.
|
13
|
-
#
|
14
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
-
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
-
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
-
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
-
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
-
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
-
# SOFTWARE.
|
21
|
-
#++
|
1
|
+
# encoding: utf-8
|
22
2
|
|
23
3
|
class Sanitize
|
24
4
|
module Config
|
25
|
-
RESTRICTED =
|
5
|
+
RESTRICTED = freeze_config(
|
26
6
|
:elements => %w[b em i strong u]
|
27
|
-
|
7
|
+
)
|
28
8
|
end
|
29
9
|
end
|
data/lib/sanitize/css.rb
ADDED
@@ -0,0 +1,218 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'crass'
|
4
|
+
require 'set'
|
5
|
+
|
6
|
+
class Sanitize; class CSS
|
7
|
+
attr_reader :config
|
8
|
+
|
9
|
+
# Names of CSS at-rules whose blocks may contain properties.
|
10
|
+
AT_RULES_WITH_PROPERTIES = Set.new(%w[font-face page])
|
11
|
+
|
12
|
+
# Names of CSS at-rules whose blocks may contain style rules.
|
13
|
+
AT_RULES_WITH_STYLES = Set.new(%w[document media supports])
|
14
|
+
|
15
|
+
# -- Class Methods -----------------------------------------------------------
|
16
|
+
|
17
|
+
# Sanitizes inline CSS style properties.
|
18
|
+
#
|
19
|
+
# This is most useful for sanitizing non-stylesheet fragments of CSS like you
|
20
|
+
# would find in the `style` attribute of an HTML element. To sanitize a full
|
21
|
+
# CSS stylesheet, use {.stylesheet}.
|
22
|
+
#
|
23
|
+
# @example
|
24
|
+
# Sanitize::CSS.properties("background: url(foo.png); color: #fff;")
|
25
|
+
#
|
26
|
+
# @return [String] Sanitized CSS properties.
|
27
|
+
def self.properties(css, config = {})
|
28
|
+
self.new(config).properties(css)
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.stylesheet(css, config = {})
|
32
|
+
self.new(config).stylesheet(css)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.tree!(tree, config = {})
|
36
|
+
self.new(config).tree!(tree)
|
37
|
+
end
|
38
|
+
|
39
|
+
# -- Instance Methods --------------------------------------------------------
|
40
|
+
|
41
|
+
# Returns a new Sanitize::CSS object initialized with the settings in
|
42
|
+
# _config_.
|
43
|
+
def initialize(config = {})
|
44
|
+
@config = Config.merge(Config::DEFAULT[:css], config[:css] || config)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Sanitizes inline CSS style properties.
|
48
|
+
#
|
49
|
+
# This is most useful for sanitizing non-stylesheet fragments of CSS like you
|
50
|
+
# would find in the `style` attribute of an HTML element. To sanitize a full
|
51
|
+
# CSS stylesheet, use {#stylesheet}.
|
52
|
+
#
|
53
|
+
# @example
|
54
|
+
# scss = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
55
|
+
# scss.properties("background: url(foo.png); color: #fff;")
|
56
|
+
#
|
57
|
+
# @return [String] Sanitized CSS properties.
|
58
|
+
def properties(css)
|
59
|
+
tree = Crass.parse_properties(css,
|
60
|
+
:preserve_comments => @config[:allow_comments],
|
61
|
+
:preserve_hacks => @config[:allow_hacks])
|
62
|
+
|
63
|
+
tree!(tree)
|
64
|
+
Crass::Parser.stringify(tree)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Sanitizes a full CSS stylesheet.
|
68
|
+
#
|
69
|
+
# A stylesheet may include selectors, @ rules, and comments. To sanitize only
|
70
|
+
# inline style properties such as the contents of an HTML `style` attribute,
|
71
|
+
# use {#properties}.
|
72
|
+
#
|
73
|
+
# @example
|
74
|
+
# css = %[
|
75
|
+
# .foo {
|
76
|
+
# background: url(foo.png);
|
77
|
+
# color: #fff;
|
78
|
+
# }
|
79
|
+
#
|
80
|
+
# #bar {
|
81
|
+
# font: 42pt 'Comic Sans MS';
|
82
|
+
# }
|
83
|
+
# ]
|
84
|
+
#
|
85
|
+
# scss = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
86
|
+
# scss.stylesheet(css)
|
87
|
+
#
|
88
|
+
# @return [String] Sanitized CSS stylesheet.
|
89
|
+
def stylesheet(css)
|
90
|
+
tree = Crass.parse(css,
|
91
|
+
:preserve_comments => @config[:allow_comments],
|
92
|
+
:preserve_hacks => @config[:allow_hacks])
|
93
|
+
|
94
|
+
tree!(tree)
|
95
|
+
Crass::Parser.stringify(tree)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Sanitizes the given Crass CSS parse tree and all its children, modifying it
|
99
|
+
# in place.
|
100
|
+
#
|
101
|
+
# @example
|
102
|
+
# scss = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
103
|
+
# tree = Crass.parse(css)
|
104
|
+
#
|
105
|
+
# scss.tree!(tree)
|
106
|
+
#
|
107
|
+
# @return [Array] Sanitized Crass CSS parse tree.
|
108
|
+
def tree!(tree)
|
109
|
+
tree.map! do |node|
|
110
|
+
next nil if node.nil?
|
111
|
+
|
112
|
+
case node[:node]
|
113
|
+
when :at_rule
|
114
|
+
next at_rule!(node)
|
115
|
+
|
116
|
+
when :comment
|
117
|
+
next node if @config[:allow_comments]
|
118
|
+
|
119
|
+
when :property
|
120
|
+
next property!(node)
|
121
|
+
|
122
|
+
when :style_rule
|
123
|
+
tree!(node[:children])
|
124
|
+
next node
|
125
|
+
|
126
|
+
when :whitespace
|
127
|
+
next node
|
128
|
+
end
|
129
|
+
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
|
133
|
+
tree
|
134
|
+
end
|
135
|
+
|
136
|
+
# -- Protected Instance Methods ----------------------------------------------
|
137
|
+
protected
|
138
|
+
|
139
|
+
# Sanitizes a CSS at-rule node. Returns the sanitized node, or `nil` if the
|
140
|
+
# current config doesn't allow this at-rule.
|
141
|
+
def at_rule!(rule)
|
142
|
+
name = rule[:name].downcase
|
143
|
+
return nil unless @config[:at_rules].include?(name)
|
144
|
+
|
145
|
+
if AT_RULES_WITH_STYLES.include?(name)
|
146
|
+
styles = Crass::Parser.parse_rules(rule[:block][:value],
|
147
|
+
:preserve_comments => @config[:allow_comments],
|
148
|
+
:preserve_hacks => @config[:allow_hacks])
|
149
|
+
|
150
|
+
rule[:block][:value] = tree!(styles)
|
151
|
+
|
152
|
+
elsif AT_RULES_WITH_PROPERTIES.include?(name)
|
153
|
+
props = Crass::Parser.parse_properties(rule[:block][:value],
|
154
|
+
:preserve_comments => @config[:allow_comments],
|
155
|
+
:preserve_hacks => @config[:allow_hacks])
|
156
|
+
|
157
|
+
rule[:block][:value] = tree!(props)
|
158
|
+
|
159
|
+
else
|
160
|
+
rule.delete(:block)
|
161
|
+
end
|
162
|
+
|
163
|
+
rule
|
164
|
+
end
|
165
|
+
|
166
|
+
# Sanitizes a CSS property node. Returns the sanitized node, or `nil` if the
|
167
|
+
# current config doesn't allow this property.
|
168
|
+
def property!(prop)
|
169
|
+
name = prop[:name].downcase
|
170
|
+
|
171
|
+
# Preserve IE * and _ hacks if desired.
|
172
|
+
if @config[:allow_hacks]
|
173
|
+
name.slice!(0) if name =~ /\A[*_]/
|
174
|
+
end
|
175
|
+
|
176
|
+
return nil unless @config[:properties].include?(name)
|
177
|
+
|
178
|
+
nodes = prop[:children].dup
|
179
|
+
combined_value = ''
|
180
|
+
|
181
|
+
nodes.each do |child|
|
182
|
+
value = child[:value]
|
183
|
+
|
184
|
+
case child[:node]
|
185
|
+
when :ident
|
186
|
+
combined_value << value if String === value
|
187
|
+
|
188
|
+
when :function
|
189
|
+
if child.key?(:name)
|
190
|
+
return nil if child[:name].downcase == 'expression'
|
191
|
+
end
|
192
|
+
|
193
|
+
if Array === value
|
194
|
+
nodes.concat(value)
|
195
|
+
elsif String === value
|
196
|
+
combined_value << value
|
197
|
+
|
198
|
+
if value.downcase == 'expression' || combined_value.downcase == 'expression'
|
199
|
+
return nil
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
when :url
|
204
|
+
if value =~ Sanitize::REGEX_PROTOCOL
|
205
|
+
return nil unless @config[:protocols].include?($1.downcase)
|
206
|
+
else
|
207
|
+
return nil unless @config[:protocols].include?(:relative)
|
208
|
+
end
|
209
|
+
|
210
|
+
when :bad_url
|
211
|
+
return nil
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
prop
|
216
|
+
end
|
217
|
+
|
218
|
+
end; end
|
@@ -1,11 +1,11 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
class Sanitize; module Transformers
|
2
4
|
|
3
5
|
CleanCDATA = lambda do |env|
|
4
|
-
return if env[:is_whitelisted]
|
5
|
-
|
6
6
|
node = env[:node]
|
7
7
|
|
8
|
-
if node.
|
8
|
+
if node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
|
9
9
|
node.replace(Nokogiri::XML::Text.new(node.text, node.document))
|
10
10
|
end
|
11
11
|
end
|
@@ -1,10 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
class Sanitize; module Transformers
|
2
4
|
|
3
5
|
CleanComment = lambda do |env|
|
4
|
-
return if env[:is_whitelisted]
|
5
|
-
|
6
6
|
node = env[:node]
|
7
|
-
|
7
|
+
|
8
|
+
if node.type == Nokogiri::XML::Node::COMMENT_NODE
|
9
|
+
node.unlink unless env[:is_whitelisted]
|
10
|
+
end
|
8
11
|
end
|
9
12
|
|
10
13
|
end; end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
class Sanitize; module Transformers; module CSS
|
2
|
+
|
3
|
+
# Enforces a CSS whitelist on the contents of `style` attributes.
|
4
|
+
class CleanAttribute
|
5
|
+
def initialize(sanitizer_or_config)
|
6
|
+
if Sanitize::CSS === sanitizer_or_config
|
7
|
+
@scss = sanitizer_or_config
|
8
|
+
else
|
9
|
+
@scss = Sanitize::CSS.new(sanitizer_or_config)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def call(env)
|
14
|
+
node = env[:node]
|
15
|
+
|
16
|
+
return unless node.type == Nokogiri::XML::Node::ELEMENT_NODE &&
|
17
|
+
node.key?('style') && !env[:is_whitelisted]
|
18
|
+
|
19
|
+
attr = node.attribute('style')
|
20
|
+
css = @scss.properties(attr.value)
|
21
|
+
|
22
|
+
if css.strip.empty?
|
23
|
+
attr.unlink
|
24
|
+
else
|
25
|
+
attr.value = css
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Enforces a CSS whitelist on the contents of `<style>` elements.
|
31
|
+
class CleanElement
|
32
|
+
def initialize(sanitizer_or_config)
|
33
|
+
if Sanitize::CSS === sanitizer_or_config
|
34
|
+
@scss = sanitizer_or_config
|
35
|
+
else
|
36
|
+
@scss = Sanitize::CSS.new(sanitizer_or_config)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def call(env)
|
41
|
+
node = env[:node]
|
42
|
+
|
43
|
+
return unless node.type == Nokogiri::XML::Node::ELEMENT_NODE &&
|
44
|
+
env[:node_name] == 'style'
|
45
|
+
|
46
|
+
css = @scss.stylesheet(node.content)
|
47
|
+
|
48
|
+
if css.strip.empty?
|
49
|
+
node.unlink
|
50
|
+
else
|
51
|
+
node.children.unlink
|
52
|
+
node << Nokogiri::XML::Text.new(css, node.document)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end; end; end
|
@@ -1,155 +1,125 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
# For faster lookups.
|
34
|
-
@add_attributes = config[:add_attributes]
|
35
|
-
@allowed_elements = Set.new(config[:elements])
|
36
|
-
@attributes = config[:attributes]
|
37
|
-
@protocols = config[:protocols]
|
38
|
-
@remove_all_contents = false
|
39
|
-
@remove_element_contents = Set.new
|
40
|
-
@whitespace_elements = Set.new(config[:whitespace_elements])
|
41
|
-
|
42
|
-
if config[:remove_contents].is_a?(Array)
|
43
|
-
@remove_element_contents.merge(config[:remove_contents].map(&:to_s))
|
44
|
-
else
|
45
|
-
@remove_all_contents = !!config[:remove_contents]
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
class Sanitize; module Transformers; class CleanElement
|
6
|
+
|
7
|
+
# Matches a valid HTML5 data attribute name. The unicode ranges included here
|
8
|
+
# are a conservative subset of the full range of characters that are
|
9
|
+
# technically allowed, with the intent of matching the most common characters
|
10
|
+
# used in data attribute names while excluding uncommon or potentially
|
11
|
+
# misleading characters, or characters with the potential to be normalized
|
12
|
+
# into unsafe or confusing forms.
|
13
|
+
#
|
14
|
+
# If you need data attr names with characters that aren't included here (such
|
15
|
+
# as combining marks, full-width characters, or CJK), please consider creating
|
16
|
+
# a custom transformer to validate attributes according to your needs.
|
17
|
+
#
|
18
|
+
# http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#embedding-custom-non-visible-data-with-the-data-*-attributes
|
19
|
+
REGEX_DATA_ATTR = /\Adata-(?!xml)[a-z_][\w.\u00E0-\u00F6\u00F8-\u017F\u01DD-\u02AF-]*\z/u
|
20
|
+
|
21
|
+
def initialize(config)
|
22
|
+
@add_attributes = config[:add_attributes]
|
23
|
+
@attributes = config[:attributes].dup
|
24
|
+
@elements = config[:elements]
|
25
|
+
@protocols = config[:protocols]
|
26
|
+
@remove_all_contents = false
|
27
|
+
@remove_element_contents = Set.new
|
28
|
+
@whitespace_elements = {}
|
29
|
+
|
30
|
+
@attributes.each do |element_name, attrs|
|
31
|
+
unless element_name == :all
|
32
|
+
@attributes[element_name] = Set.new(attrs).merge(@attributes[:all] || [])
|
46
33
|
end
|
47
34
|
end
|
48
35
|
|
49
|
-
|
50
|
-
|
51
|
-
|
36
|
+
# Backcompat: if :whitespace_elements is a Set, convert it to a hash.
|
37
|
+
if config[:whitespace_elements].is_a?(Set)
|
38
|
+
config[:whitespace_elements].each do |element|
|
39
|
+
@whitespace_elements[element] = {:before => ' ', :after => ' '}
|
40
|
+
end
|
41
|
+
else
|
42
|
+
@whitespace_elements = config[:whitespace_elements]
|
43
|
+
end
|
52
44
|
|
53
|
-
|
45
|
+
if config[:remove_contents].is_a?(Set)
|
46
|
+
@remove_element_contents.merge(config[:remove_contents].map(&:to_s))
|
47
|
+
else
|
48
|
+
@remove_all_contents = !!config[:remove_contents]
|
49
|
+
end
|
50
|
+
end
|
54
51
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
# order to preserve readability.
|
59
|
-
if @whitespace_elements.include?(name)
|
60
|
-
node.add_previous_sibling(Nokogiri::XML::Text.new(' ', node.document))
|
52
|
+
def call(env)
|
53
|
+
node = env[:node]
|
54
|
+
return if node.type != Nokogiri::XML::Node::ELEMENT_NODE || env[:is_whitelisted]
|
61
55
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
56
|
+
name = env[:node_name]
|
57
|
+
|
58
|
+
# Delete any element that isn't in the config whitelist.
|
59
|
+
unless @elements.include?(name)
|
60
|
+
# Elements like br, div, p, etc. need to be replaced with whitespace in
|
61
|
+
# order to preserve readability.
|
62
|
+
if @whitespace_elements.include?(name)
|
63
|
+
node.add_previous_sibling(Nokogiri::XML::Text.new(@whitespace_elements[name][:before].to_s, node.document))
|
66
64
|
|
67
|
-
unless
|
68
|
-
node.
|
65
|
+
unless node.children.empty?
|
66
|
+
node.add_next_sibling(Nokogiri::XML::Text.new(@whitespace_elements[name][:after].to_s, node.document))
|
69
67
|
end
|
68
|
+
end
|
70
69
|
|
71
|
-
|
72
|
-
|
70
|
+
unless @remove_all_contents || @remove_element_contents.include?(name)
|
71
|
+
node.children.each {|n| node.add_previous_sibling(n) }
|
73
72
|
end
|
74
73
|
|
75
|
-
|
76
|
-
|
74
|
+
node.unlink
|
75
|
+
return
|
76
|
+
end
|
77
77
|
|
78
|
-
|
78
|
+
attr_whitelist = @attributes[name] || @attributes[:all]
|
79
79
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
node.attribute_nodes.each do |attr|
|
86
|
-
attr_name = attr.name.downcase
|
87
|
-
|
88
|
-
unless attr_whitelist.include?(attr_name)
|
89
|
-
# The attribute isn't explicitly whitelisted.
|
90
|
-
|
91
|
-
if allow_data_attributes && attr_name.start_with?('data-')
|
92
|
-
# Arbitrary data attributes are allowed. Verify that the attribute
|
93
|
-
# is a valid data attribute.
|
94
|
-
attr.unlink unless attr_name =~ REGEX_DATA_ATTR
|
95
|
-
else
|
96
|
-
# Either the attribute isn't a data attribute, or arbitrary data
|
97
|
-
# attributes aren't allowed. Remove the attribute.
|
98
|
-
attr.unlink
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
80
|
+
if attr_whitelist.nil?
|
81
|
+
# Delete all attributes from elements with no whitelisted attributes.
|
82
|
+
node.attribute_nodes.each {|attr| attr.unlink }
|
83
|
+
else
|
84
|
+
allow_data_attributes = attr_whitelist.include?(:data)
|
102
85
|
|
103
|
-
|
104
|
-
|
105
|
-
|
86
|
+
# Delete any attribute that isn't allowed on this element.
|
87
|
+
node.attribute_nodes.each do |attr|
|
88
|
+
attr_name = attr.name.downcase
|
106
89
|
|
107
|
-
|
108
|
-
|
109
|
-
next false unless protocol.has_key?(attr_name)
|
90
|
+
if attr_whitelist.include?(attr_name)
|
91
|
+
# The attribute is whitelisted.
|
110
92
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
!protocol[attr_name].include?(:relative)
|
115
|
-
end
|
93
|
+
# Remove any attributes that use unacceptable protocols.
|
94
|
+
if @protocols.include?(name) && @protocols[name].include?(attr_name)
|
95
|
+
attr_protocols = @protocols[name][attr_name]
|
116
96
|
|
117
|
-
if
|
118
|
-
attr.unlink
|
97
|
+
if attr.value.to_s.downcase =~ REGEX_PROTOCOL
|
98
|
+
attr.unlink unless attr_protocols.include?($1.downcase)
|
119
99
|
else
|
120
|
-
|
121
|
-
# time. Stripping it here prevents it from being escaped by the
|
122
|
-
# libxml2 workaround below.
|
123
|
-
attr.value = attr.value.strip
|
100
|
+
attr.unlink unless attr_protocols.include?(:relative)
|
124
101
|
end
|
125
102
|
end
|
103
|
+
else
|
104
|
+
# The attribute isn't whitelisted.
|
105
|
+
|
106
|
+
if allow_data_attributes && attr_name.start_with?('data-')
|
107
|
+
# Arbitrary data attributes are allowed. Verify that the attribute
|
108
|
+
# is a valid data attribute.
|
109
|
+
attr.unlink unless attr_name =~ REGEX_DATA_ATTR
|
110
|
+
else
|
111
|
+
# Either the attribute isn't a data attribute, or arbitrary data
|
112
|
+
# attributes aren't allowed. Remove the attribute.
|
113
|
+
attr.unlink
|
114
|
+
end
|
126
115
|
end
|
127
116
|
end
|
117
|
+
end
|
128
118
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
# non-whitelisted attribute.
|
133
|
-
#
|
134
|
-
# Sanitize works around this by implementing its own escaping for
|
135
|
-
# affected attributes, some of which can exist on any element and some
|
136
|
-
# of which can only exist on `<a>` elements.
|
137
|
-
#
|
138
|
-
# The relevant libxml2 code is here:
|
139
|
-
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
140
|
-
node.attribute_nodes.each do |attr|
|
141
|
-
attr_name = attr.name.downcase
|
142
|
-
if UNSAFE_LIBXML_ATTRS_GLOBAL.include?(attr_name) ||
|
143
|
-
(name == 'a' && UNSAFE_LIBXML_ATTRS_A.include?(attr_name))
|
144
|
-
attr.value = attr.value.gsub(UNSAFE_LIBXML_ESCAPE_REGEX, UNSAFE_LIBXML_ESCAPE_CHARS)
|
145
|
-
end
|
146
|
-
end
|
147
|
-
|
148
|
-
# Add required attributes.
|
149
|
-
if @add_attributes.has_key?(name)
|
150
|
-
@add_attributes[name].each {|key, val| node[key] = val }
|
151
|
-
end
|
119
|
+
# Add required attributes.
|
120
|
+
if @add_attributes.include?(name)
|
121
|
+
@add_attributes[name].each {|key, val| node[key] = val }
|
152
122
|
end
|
153
123
|
end
|
154
124
|
|
155
|
-
end; end
|
125
|
+
end; end; end
|