sanitize 2.1.1 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +520 -55
- data/LICENSE +1 -1
- data/README.md +438 -168
- data/lib/sanitize/config/basic.rb +12 -32
- data/lib/sanitize/config/default.rb +118 -0
- data/lib/sanitize/config/relaxed.rb +716 -53
- data/lib/sanitize/config/restricted.rb +3 -23
- data/lib/sanitize/config.rb +53 -79
- data/lib/sanitize/css.rb +348 -0
- data/lib/sanitize/transformers/clean_cdata.rb +3 -3
- data/lib/sanitize/transformers/clean_comment.rb +6 -3
- data/lib/sanitize/transformers/clean_css.rb +57 -0
- data/lib/sanitize/transformers/clean_doctype.rb +19 -0
- data/lib/sanitize/transformers/clean_element.rb +192 -124
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +172 -143
- data/test/common.rb +3 -0
- data/test/test_clean_comment.rb +47 -0
- data/test/test_clean_css.rb +67 -0
- data/test/test_clean_doctype.rb +71 -0
- data/test/test_clean_element.rb +545 -0
- data/test/test_config.rb +65 -0
- data/test/test_malicious_css.rb +42 -0
- data/test/test_malicious_html.rb +235 -0
- data/test/test_parser.rb +75 -0
- data/test/test_sanitize.rb +151 -675
- data/test/test_sanitize_css.rb +424 -0
- data/test/test_transformers.rb +230 -0
- metadata +44 -41
data/lib/sanitize.rb
CHANGED
@@ -1,94 +1,87 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
#--
|
3
|
-
# Copyright (c) 2013 Ryan Grove <ryan@wonko.com>
|
4
|
-
#
|
5
|
-
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
# of this software and associated documentation files (the 'Software'), to deal
|
7
|
-
# in the Software without restriction, including without limitation the rights
|
8
|
-
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
# copies of the Software, and to permit persons to whom the Software is
|
10
|
-
# furnished to do so, subject to the following conditions:
|
11
|
-
#
|
12
|
-
# The above copyright notice and this permission notice shall be included in all
|
13
|
-
# copies or substantial portions of the Software.
|
14
|
-
#
|
15
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
-
# SOFTWARE.
|
22
|
-
#++
|
23
2
|
|
3
|
+
require 'nokogiri'
|
24
4
|
require 'set'
|
25
5
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
6
|
+
require_relative 'sanitize/version'
|
7
|
+
require_relative 'sanitize/config'
|
8
|
+
require_relative 'sanitize/config/default'
|
9
|
+
require_relative 'sanitize/config/restricted'
|
10
|
+
require_relative 'sanitize/config/basic'
|
11
|
+
require_relative 'sanitize/config/relaxed'
|
12
|
+
require_relative 'sanitize/css'
|
13
|
+
require_relative 'sanitize/transformers/clean_cdata'
|
14
|
+
require_relative 'sanitize/transformers/clean_comment'
|
15
|
+
require_relative 'sanitize/transformers/clean_css'
|
16
|
+
require_relative 'sanitize/transformers/clean_doctype'
|
17
|
+
require_relative 'sanitize/transformers/clean_element'
|
35
18
|
|
36
19
|
class Sanitize
|
37
20
|
attr_reader :config
|
38
21
|
|
39
|
-
# Matches
|
40
|
-
#
|
41
|
-
# technically allowed, with the intent of matching the most common characters
|
42
|
-
# used in data attribute names while excluding uncommon or potentially
|
43
|
-
# misleading characters, or characters with the potential to be normalized
|
44
|
-
# into unsafe or confusing forms.
|
22
|
+
# Matches one or more control characters that should be removed from HTML
|
23
|
+
# before parsing, as defined by the HTML living standard.
|
45
24
|
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
|
25
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
26
|
+
# - https://infra.spec.whatwg.org/#control
|
27
|
+
REGEX_HTML_CONTROL_CHARACTERS = /[\u0001-\u0008\u000b\u000e-\u001f\u007f-\u009f]+/u
|
28
|
+
|
29
|
+
# Matches one or more non-characters that should be removed from HTML before
|
30
|
+
# parsing, as defined by the HTML living standard.
|
49
31
|
#
|
50
|
-
#
|
51
|
-
|
32
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
33
|
+
# - https://infra.spec.whatwg.org/#noncharacter
|
34
|
+
REGEX_HTML_NON_CHARACTERS = /[\ufdd0-\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}]+/u
|
52
35
|
|
53
36
|
# Matches an attribute value that could be treated by a browser as a URL
|
54
37
|
# with a protocol prefix, such as "http:" or "javascript:". Any string of zero
|
55
38
|
# or more characters followed by a colon is considered a match, even if the
|
56
39
|
# colon is encoded as an entity and even if it's an incomplete entity (which
|
57
40
|
# IE6 and Opera will still parse).
|
58
|
-
REGEX_PROTOCOL = /\A([^\/#]*?)(?:\:|�*58|�*3a)/i
|
41
|
+
REGEX_PROTOCOL = /\A\s*([^\/#]*?)(?:\:|�*58|�*3a)/i
|
42
|
+
|
43
|
+
# Matches one or more characters that should be stripped from HTML before
|
44
|
+
# parsing. This is a combination of `REGEX_HTML_CONTROL_CHARACTERS` and
|
45
|
+
# `REGEX_HTML_NON_CHARACTERS`.
|
46
|
+
#
|
47
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
48
|
+
REGEX_UNSUITABLE_CHARS = /(?:#{REGEX_HTML_CONTROL_CHARACTERS}|#{REGEX_HTML_NON_CHARACTERS})/u
|
59
49
|
|
60
50
|
#--
|
61
51
|
# Class Methods
|
62
52
|
#++
|
63
53
|
|
64
|
-
# Returns a sanitized copy of _html_, using the
|
65
|
-
# specified.
|
66
|
-
|
67
|
-
|
54
|
+
# Returns a sanitized copy of the given full _html_ document, using the
|
55
|
+
# settings in _config_ if specified.
|
56
|
+
#
|
57
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
58
|
+
# error will be raised. If this is undesirable, you should probably use
|
59
|
+
# {#fragment} instead.
|
60
|
+
def self.document(html, config = {})
|
61
|
+
Sanitize.new(config).document(html)
|
68
62
|
end
|
69
63
|
|
70
|
-
#
|
71
|
-
#
|
72
|
-
def self.
|
73
|
-
Sanitize.new(config).
|
64
|
+
# Returns a sanitized copy of the given _html_ fragment, using the settings in
|
65
|
+
# _config_ if specified.
|
66
|
+
def self.fragment(html, config = {})
|
67
|
+
Sanitize.new(config).fragment(html)
|
74
68
|
end
|
75
69
|
|
76
|
-
#
|
77
|
-
|
78
|
-
|
79
|
-
def self.clean_document(html, config = {})
|
80
|
-
Sanitize.new(config).clean_document(html)
|
70
|
+
# Sanitizes the given `Nokogiri::XML::Node` instance and all its children.
|
71
|
+
def self.node!(node, config = {})
|
72
|
+
Sanitize.new(config).node!(node)
|
81
73
|
end
|
82
74
|
|
83
|
-
#
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
end
|
75
|
+
# Aliases for pre-3.0.0 backcompat.
|
76
|
+
class << Sanitize
|
77
|
+
# @deprecated Use {.document} instead.
|
78
|
+
alias_method :clean_document, :document
|
88
79
|
|
89
|
-
|
90
|
-
|
91
|
-
|
80
|
+
# @deprecated Use {.fragment} instead.
|
81
|
+
alias_method :clean, :fragment
|
82
|
+
|
83
|
+
# @deprecated Use {.node!} instead.
|
84
|
+
alias_method :clean_node!, :node!
|
92
85
|
end
|
93
86
|
|
94
87
|
#--
|
@@ -97,118 +90,154 @@ class Sanitize
|
|
97
90
|
|
98
91
|
# Returns a new Sanitize object initialized with the settings in _config_.
|
99
92
|
def initialize(config = {})
|
100
|
-
@config = Config
|
101
|
-
|
102
|
-
@transformers = {
|
103
|
-
:breadth => Array(@config[:transformers_breadth].dup),
|
104
|
-
:depth => Array(@config[:transformers]) + Array(@config[:transformers_depth])
|
105
|
-
}
|
93
|
+
@config = Config.merge(Config::DEFAULT, config)
|
106
94
|
|
107
|
-
|
108
|
-
# after any custom transformers.
|
109
|
-
@transformers[:depth] << Transformers::CleanComment unless @config[:allow_comments]
|
95
|
+
@transformers = Array(@config[:transformers]).dup
|
110
96
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
97
|
+
# Default transformers always run at the end of the chain, after any custom
|
98
|
+
# transformers.
|
99
|
+
@transformers << Transformers::CleanElement.new(@config)
|
100
|
+
@transformers << Transformers::CleanComment unless @config[:allow_comments]
|
115
101
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
dupe = html.dup
|
120
|
-
clean!(dupe) || dupe
|
102
|
+
if @config[:elements].include?('style')
|
103
|
+
scss = Sanitize::CSS.new(config)
|
104
|
+
@transformers << Transformers::CSS::CleanElement.new(scss)
|
121
105
|
end
|
122
|
-
end
|
123
106
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
fragment = parser.parse(html)
|
128
|
-
clean_node!(fragment)
|
129
|
-
|
130
|
-
output_method_params = {:encoding => @config[:output_encoding], :indent => 0}
|
131
|
-
|
132
|
-
if @config[:output] == :xhtml
|
133
|
-
output_method = fragment.method(:to_xhtml)
|
134
|
-
output_method_params[:save_with] = Nokogiri::XML::Node::SaveOptions::AS_XHTML
|
135
|
-
elsif @config[:output] == :html
|
136
|
-
output_method = fragment.method(:to_html)
|
137
|
-
else
|
138
|
-
raise Error, "unsupported output format: #{@config[:output]}"
|
107
|
+
if @config[:attributes].values.any? {|attr| attr.include?('style') }
|
108
|
+
scss ||= Sanitize::CSS.new(config)
|
109
|
+
@transformers << Transformers::CSS::CleanAttribute.new(scss)
|
139
110
|
end
|
140
111
|
|
141
|
-
|
112
|
+
@transformers << Transformers::CleanDoctype
|
113
|
+
@transformers << Transformers::CleanCDATA
|
142
114
|
|
143
|
-
|
115
|
+
@transformer_config = { config: @config }
|
144
116
|
end
|
145
117
|
|
146
|
-
# Returns a sanitized copy of the given
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
118
|
+
# Returns a sanitized copy of the given _html_ document.
|
119
|
+
#
|
120
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
121
|
+
# error will be raised. If this is undesirable, you should probably use
|
122
|
+
# {#fragment} instead.
|
123
|
+
def document(html)
|
124
|
+
return '' unless html
|
125
|
+
|
126
|
+
doc = Nokogiri::HTML5.parse(preprocess(html), **@config[:parser_options])
|
127
|
+
node!(doc)
|
128
|
+
to_html(doc)
|
151
129
|
end
|
152
130
|
|
153
|
-
#
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
# it moves its children to the root document context
|
160
|
-
end
|
131
|
+
# @deprecated Use {#document} instead.
|
132
|
+
alias_method :clean_document, :document
|
133
|
+
|
134
|
+
# Returns a sanitized copy of the given _html_ fragment.
|
135
|
+
def fragment(html)
|
136
|
+
return '' unless html
|
161
137
|
|
162
|
-
|
138
|
+
frag = Nokogiri::HTML5.fragment(preprocess(html), **@config[:parser_options])
|
139
|
+
node!(frag)
|
140
|
+
to_html(frag)
|
163
141
|
end
|
164
142
|
|
165
|
-
#
|
166
|
-
|
143
|
+
# @deprecated Use {#fragment} instead.
|
144
|
+
alias_method :clean, :fragment
|
145
|
+
|
146
|
+
# Sanitizes the given `Nokogiri::XML::Node` and all its children, modifying it
|
147
|
+
# in place.
|
148
|
+
#
|
149
|
+
# If _node_ is a `Nokogiri::XML::Document`, the `<html>` element must be
|
150
|
+
# allowlisted or an error will be raised.
|
151
|
+
def node!(node)
|
167
152
|
raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)
|
168
153
|
|
169
|
-
|
154
|
+
if node.is_a?(Nokogiri::XML::Document)
|
155
|
+
unless @config[:elements].include?('html')
|
156
|
+
raise Error, 'When sanitizing a document, "<html>" must be allowlisted.'
|
157
|
+
end
|
158
|
+
end
|
170
159
|
|
171
|
-
|
172
|
-
|
160
|
+
node_allowlist = Set.new
|
161
|
+
|
162
|
+
traverse(node) do |n|
|
163
|
+
transform_node!(n, node_allowlist)
|
173
164
|
end
|
174
165
|
|
175
|
-
traverse_depth(node) {|n| transform_node!(n, node_whitelist, :depth) }
|
176
166
|
node
|
177
167
|
end
|
178
168
|
|
169
|
+
# @deprecated Use {#node!} instead.
|
170
|
+
alias_method :clean_node!, :node!
|
171
|
+
|
179
172
|
private
|
180
173
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
:
|
188
|
-
:
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
174
|
+
# Preprocesses HTML before parsing to remove undesirable Unicode chars.
|
175
|
+
def preprocess(html)
|
176
|
+
html = html.to_s.dup
|
177
|
+
|
178
|
+
unless html.encoding.name == 'UTF-8'
|
179
|
+
html.encode!('UTF-8',
|
180
|
+
:invalid => :replace,
|
181
|
+
:undef => :replace)
|
182
|
+
end
|
183
|
+
|
184
|
+
html.gsub!(REGEX_UNSUITABLE_CHARS, '')
|
185
|
+
html
|
186
|
+
end
|
187
|
+
|
188
|
+
def to_html(node)
|
189
|
+
node.to_html(preserve_newline: true)
|
190
|
+
end
|
191
|
+
|
192
|
+
def transform_node!(node, node_allowlist)
|
193
|
+
@transformers.each do |transformer|
|
194
|
+
# Since transform_node! may be called in a tight loop to process thousands
|
195
|
+
# of items, we can optimize both memory and CPU performance by:
|
196
|
+
#
|
197
|
+
# 1. Reusing the same config hash for each transformer
|
198
|
+
# 2. Directly assigning values to hash instead of using merge!. Not only
|
199
|
+
# does merge! create a new hash, it is also 2.6x slower:
|
200
|
+
# https://github.com/JuanitoFatas/fast-ruby#hashmerge-vs-hashmerge-code
|
201
|
+
config = @transformer_config
|
202
|
+
config[:is_allowlisted] = config[:is_whitelisted] = node_allowlist.include?(node)
|
203
|
+
config[:node] = node
|
204
|
+
config[:node_name] = node.name.downcase
|
205
|
+
config[:node_allowlist] = config[:node_whitelist] = node_allowlist
|
206
|
+
|
207
|
+
result = transformer.call(**config)
|
208
|
+
|
209
|
+
if result.is_a?(Hash)
|
210
|
+
result_allowlist = result[:node_allowlist] || result[:node_whitelist]
|
211
|
+
|
212
|
+
if result_allowlist.respond_to?(:each)
|
213
|
+
node_allowlist.merge(result_allowlist)
|
214
|
+
end
|
194
215
|
end
|
195
216
|
end
|
196
217
|
|
197
218
|
node
|
198
219
|
end
|
199
220
|
|
200
|
-
# Performs
|
201
|
-
# traversing
|
202
|
-
def
|
203
|
-
|
204
|
-
node.children.each {|child| traverse_breadth(child, &block) }
|
205
|
-
end
|
221
|
+
# Performs top-down traversal of the given node, operating first on the node
|
222
|
+
# itself, then traversing each child (if any) in order.
|
223
|
+
def traverse(node, &block)
|
224
|
+
yield node
|
206
225
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
226
|
+
child = node.child
|
227
|
+
|
228
|
+
while child do
|
229
|
+
prev = child.previous_sibling
|
230
|
+
traverse(child, &block)
|
231
|
+
|
232
|
+
if child.parent == node
|
233
|
+
child = child.next_sibling
|
234
|
+
else
|
235
|
+
# The child was unlinked or reparented, so traverse the previous node's
|
236
|
+
# next sibling, or the parent's first child if there is no previous
|
237
|
+
# node.
|
238
|
+
child = prev ? prev.next_sibling : node.child
|
239
|
+
end
|
240
|
+
end
|
212
241
|
end
|
213
242
|
|
214
243
|
class Error < StandardError; end
|
data/test/common.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Sanitize::Transformers::CleanComment' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
describe 'when :allow_comments is false' do
|
9
|
+
before do
|
10
|
+
@s = Sanitize.new(:allow_comments => false, :elements => ['div'])
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should remove comments' do
|
14
|
+
@s.fragment('foo <!-- comment --> bar').must_equal 'foo bar'
|
15
|
+
@s.fragment('foo <!-- ').must_equal 'foo '
|
16
|
+
@s.fragment('foo <!-- - -> bar').must_equal 'foo '
|
17
|
+
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal 'foo bar'
|
18
|
+
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo --> -->bar'
|
19
|
+
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
20
|
+
|
21
|
+
# Special case: the comment markup is inside a <script>, which makes it
|
22
|
+
# text content and not an actual HTML comment.
|
23
|
+
@s.fragment("<script><!-- comment --></script>").must_equal ''
|
24
|
+
|
25
|
+
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script'])
|
26
|
+
.must_equal '<script><!-- comment --></script>'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe 'when :allow_comments is true' do
|
31
|
+
before do
|
32
|
+
@s = Sanitize.new(:allow_comments => true, :elements => ['div'])
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should allow comments' do
|
36
|
+
@s.fragment('foo <!-- comment --> bar').must_equal 'foo <!-- comment --> bar'
|
37
|
+
@s.fragment('foo <!-- ').must_equal 'foo <!-- -->'
|
38
|
+
@s.fragment('foo <!-- - -> bar').must_equal 'foo <!-- - -> bar-->'
|
39
|
+
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal "foo <!--\n\n\n\n-->bar"
|
40
|
+
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
|
41
|
+
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
42
|
+
|
43
|
+
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script'])
|
44
|
+
.must_equal '<script><!-- comment --></script>'
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Sanitize::Transformers::CSS::CleanAttribute' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
before do
|
9
|
+
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should sanitize CSS properties in style attributes' do
|
13
|
+
@s.fragment(%[
|
14
|
+
<div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
|
15
|
+
].strip).must_equal %[
|
16
|
+
<div style="color: #fff; /* <-- evil! */"></div>
|
17
|
+
].strip
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should remove the style attribute if the sanitized CSS is empty' do
|
21
|
+
@s.fragment('<div style="width: expression(alert(1))"></div>').
|
22
|
+
must_equal '<div></div>'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe 'Sanitize::Transformers::CSS::CleanElement' do
|
27
|
+
make_my_diffs_pretty!
|
28
|
+
parallelize_me!
|
29
|
+
|
30
|
+
before do
|
31
|
+
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should sanitize CSS stylesheets in <style> elements' do
|
35
|
+
html = %[
|
36
|
+
<style>@import url(evil.css);
|
37
|
+
/* Yay CSS! */
|
38
|
+
.foo { color: #fff; }
|
39
|
+
#bar { background: url(yay.jpg); bogus: wtf; }
|
40
|
+
.evil { width: expression(xss()); }
|
41
|
+
|
42
|
+
@media screen (max-width:480px) {
|
43
|
+
.foo { width: 400px; }
|
44
|
+
#bar:not(.baz) { height: 100px; }
|
45
|
+
}
|
46
|
+
</style>
|
47
|
+
].strip
|
48
|
+
|
49
|
+
@s.fragment(html).must_equal %[
|
50
|
+
<style>
|
51
|
+
/* Yay CSS! */
|
52
|
+
.foo { color: #fff; }
|
53
|
+
#bar { background: url(yay.jpg); }
|
54
|
+
.evil { }
|
55
|
+
|
56
|
+
@media screen (max-width:480px) {
|
57
|
+
.foo { width: 400px; }
|
58
|
+
#bar:not(.baz) { height: 100px; }
|
59
|
+
}
|
60
|
+
</style>
|
61
|
+
].strip
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'should remove the <style> element if the sanitized CSS is empty' do
|
65
|
+
@s.fragment('<style></style>').must_equal ''
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Sanitize::Transformers::CleanDoctype' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
describe 'when :allow_doctype is false' do
|
9
|
+
before do
|
10
|
+
@s = Sanitize.new(:allow_doctype => false, :elements => ['html'])
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should remove doctype declarations' do
|
14
|
+
@s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html>"
|
15
|
+
@s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should not allow doctype definitions in fragments' do
|
19
|
+
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
20
|
+
.must_equal "foo"
|
21
|
+
|
22
|
+
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
23
|
+
.must_equal "foo"
|
24
|
+
|
25
|
+
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
26
|
+
.must_equal "foo"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe 'when :allow_doctype is true' do
|
31
|
+
before do
|
32
|
+
@s = Sanitize.new(:allow_doctype => true, :elements => ['html'])
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should allow doctype declarations in documents' do
|
36
|
+
@s.document('<!DOCTYPE html><html>foo</html>')
|
37
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
38
|
+
|
39
|
+
@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
40
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
41
|
+
|
42
|
+
@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
43
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should not allow obviously invalid doctype declarations in documents' do
|
47
|
+
@s.document('<!DOCTYPE blah blah blah><html>foo</html>')
|
48
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
49
|
+
|
50
|
+
@s.document('<!DOCTYPE blah><html>foo</html>')
|
51
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
52
|
+
|
53
|
+
@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
54
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
55
|
+
|
56
|
+
@s.document('<!whatever><html>foo</html>')
|
57
|
+
.must_equal "<html>foo</html>"
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should not allow doctype definitions in fragments' do
|
61
|
+
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
62
|
+
.must_equal "foo"
|
63
|
+
|
64
|
+
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
65
|
+
.must_equal "foo"
|
66
|
+
|
67
|
+
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
68
|
+
.must_equal "foo"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|