sanitize 5.1.0 → 6.0.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +155 -18
- data/LICENSE +1 -1
- data/README.md +67 -74
- data/lib/sanitize/config/default.rb +6 -1
- data/lib/sanitize/config/relaxed.rb +1 -1
- data/lib/sanitize/css.rb +2 -2
- data/lib/sanitize/transformers/clean_comment.rb +1 -1
- data/lib/sanitize/transformers/clean_css.rb +3 -3
- data/lib/sanitize/transformers/clean_doctype.rb +1 -1
- data/lib/sanitize/transformers/clean_element.rb +62 -20
- data/lib/sanitize/version.rb +1 -1
- data/lib/sanitize.rb +17 -13
- data/test/test_clean_comment.rb +16 -16
- data/test/test_clean_css.rb +5 -5
- data/test/test_clean_doctype.rb +15 -15
- data/test/test_clean_element.rb +130 -97
- data/test/test_config.rb +9 -9
- data/test/test_malicious_css.rb +7 -7
- data/test/test_malicious_html.rb +153 -30
- data/test/test_parser.rb +9 -9
- data/test/test_sanitize.rb +29 -29
- data/test/test_sanitize_css.rb +57 -57
- data/test/test_transformers.rb +48 -42
- metadata +17 -31
@@ -1,6 +1,6 @@
|
|
1
1
|
class Sanitize; module Transformers; module CSS
|
2
2
|
|
3
|
-
# Enforces a CSS
|
3
|
+
# Enforces a CSS allowlist on the contents of `style` attributes.
|
4
4
|
class CleanAttribute
|
5
5
|
def initialize(sanitizer_or_config)
|
6
6
|
if Sanitize::CSS === sanitizer_or_config
|
@@ -14,7 +14,7 @@ class CleanAttribute
|
|
14
14
|
node = env[:node]
|
15
15
|
|
16
16
|
return unless node.type == Nokogiri::XML::Node::ELEMENT_NODE &&
|
17
|
-
node.key?('style') && !env[:
|
17
|
+
node.key?('style') && !env[:is_allowlisted]
|
18
18
|
|
19
19
|
attr = node.attribute('style')
|
20
20
|
css = @scss.properties(attr.value)
|
@@ -27,7 +27,7 @@ class CleanAttribute
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
# Enforces a CSS
|
30
|
+
# Enforces a CSS allowlist on the contents of `<style>` elements.
|
31
31
|
class CleanElement
|
32
32
|
def initialize(sanitizer_or_config)
|
33
33
|
if Sanitize::CSS === sanitizer_or_config
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require 'cgi'
|
3
4
|
require 'set'
|
4
5
|
|
5
6
|
class Sanitize; module Transformers; class CleanElement
|
@@ -18,6 +19,18 @@ class Sanitize; module Transformers; class CleanElement
|
|
18
19
|
# http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#embedding-custom-non-visible-data-with-the-data-*-attributes
|
19
20
|
REGEX_DATA_ATTR = /\Adata-(?!xml)[a-z_][\w.\u00E0-\u00F6\u00F8-\u017F\u01DD-\u02AF-]*\z/u
|
20
21
|
|
22
|
+
# Elements whose content is treated as unescaped text by HTML parsers.
|
23
|
+
UNESCAPED_TEXT_ELEMENTS = Set.new(%w[
|
24
|
+
iframe
|
25
|
+
noembed
|
26
|
+
noframes
|
27
|
+
noscript
|
28
|
+
plaintext
|
29
|
+
script
|
30
|
+
style
|
31
|
+
xmp
|
32
|
+
])
|
33
|
+
|
21
34
|
# Attributes that need additional escaping on `<a>` elements due to unsafe
|
22
35
|
# libxml2 behavior.
|
23
36
|
UNSAFE_LIBXML_ATTRS_A = Set.new(%w[
|
@@ -76,11 +89,11 @@ class Sanitize; module Transformers; class CleanElement
|
|
76
89
|
|
77
90
|
def call(env)
|
78
91
|
node = env[:node]
|
79
|
-
return if node.type != Nokogiri::XML::Node::ELEMENT_NODE || env[:
|
92
|
+
return if node.type != Nokogiri::XML::Node::ELEMENT_NODE || env[:is_allowlisted]
|
80
93
|
|
81
94
|
name = env[:node_name]
|
82
95
|
|
83
|
-
# Delete any element that isn't in the config
|
96
|
+
# Delete any element that isn't in the config allowlist, unless the node has
|
84
97
|
# already been deleted from the document.
|
85
98
|
#
|
86
99
|
# It's important that we not try to reparent the children of a node that has
|
@@ -107,34 +120,31 @@ class Sanitize; module Transformers; class CleanElement
|
|
107
120
|
return
|
108
121
|
end
|
109
122
|
|
110
|
-
|
123
|
+
attr_allowlist = @attributes[name] || @attributes[:all]
|
111
124
|
|
112
|
-
if
|
113
|
-
# Delete all attributes from elements with no
|
125
|
+
if attr_allowlist.nil?
|
126
|
+
# Delete all attributes from elements with no allowlisted attributes.
|
114
127
|
node.attribute_nodes.each {|attr| attr.unlink }
|
115
128
|
else
|
116
|
-
allow_data_attributes =
|
129
|
+
allow_data_attributes = attr_allowlist.include?(:data)
|
117
130
|
|
118
131
|
# Delete any attribute that isn't allowed on this element.
|
119
132
|
node.attribute_nodes.each do |attr|
|
120
133
|
attr_name = attr.name.downcase
|
121
134
|
|
122
|
-
unless
|
123
|
-
# The attribute isn't
|
135
|
+
unless attr_allowlist.include?(attr_name)
|
136
|
+
# The attribute isn't in the allowlist, but may still be allowed if
|
137
|
+
# it's a data attribute.
|
124
138
|
|
125
|
-
|
126
|
-
#
|
127
|
-
#
|
128
|
-
|
139
|
+
unless allow_data_attributes && attr_name.start_with?('data-') && attr_name =~ REGEX_DATA_ATTR
|
140
|
+
# Either the attribute isn't a data attribute or arbitrary data
|
141
|
+
# attributes aren't allowed. Remove the attribute.
|
142
|
+
attr.unlink
|
143
|
+
next
|
129
144
|
end
|
130
|
-
|
131
|
-
# Either the attribute isn't a data attribute or arbitrary data
|
132
|
-
# attributes aren't allowed. Remove the attribute.
|
133
|
-
attr.unlink
|
134
|
-
next
|
135
145
|
end
|
136
146
|
|
137
|
-
# The attribute is
|
147
|
+
# The attribute is allowed.
|
138
148
|
|
139
149
|
# Remove any attributes that use unacceptable protocols.
|
140
150
|
if @protocols.include?(name) && @protocols[name].include?(attr_name)
|
@@ -162,7 +172,7 @@ class Sanitize; module Transformers; class CleanElement
|
|
162
172
|
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
163
173
|
# attempt to preserve server-side includes. This can result in XSS since
|
164
174
|
# an unescaped double quote can allow an attacker to inject a
|
165
|
-
# non-
|
175
|
+
# non-allowlisted attribute.
|
166
176
|
#
|
167
177
|
# Sanitize works around this by implementing its own escaping for
|
168
178
|
# affected attributes, some of which can exist on any element and some
|
@@ -188,10 +198,32 @@ class Sanitize; module Transformers; class CleanElement
|
|
188
198
|
@add_attributes[name].each {|key, val| node[key] = val }
|
189
199
|
end
|
190
200
|
|
201
|
+
# Make a best effort to ensure that text nodes in invalid "unescaped text"
|
202
|
+
# elements that are inside a math or svg namespace are properly escaped so
|
203
|
+
# that they don't get parsed as HTML.
|
204
|
+
#
|
205
|
+
# Sanitize is explicitly documented as not supporting MathML or SVG, but
|
206
|
+
# people sometimes allow `<math>` and `<svg>` elements in their custom
|
207
|
+
# configs without realizing that it's not safe. This workaround makes it
|
208
|
+
# slightly less unsafe, but you still shouldn't allow `<math>` or `<svg>`
|
209
|
+
# because Nokogiri doesn't parse them the same way browsers do and Sanitize
|
210
|
+
# can't guarantee that their contents are safe.
|
211
|
+
unless node.namespace.nil?
|
212
|
+
prefix = node.namespace.prefix
|
213
|
+
|
214
|
+
if (prefix == 'math' || prefix == 'svg') && UNESCAPED_TEXT_ELEMENTS.include?(name)
|
215
|
+
node.children.each do |child|
|
216
|
+
if child.type == Nokogiri::XML::Node::TEXT_NODE
|
217
|
+
child.content = CGI.escapeHTML(child.content)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
191
223
|
# Element-specific special cases.
|
192
224
|
case name
|
193
225
|
|
194
|
-
# If this is
|
226
|
+
# If this is an allowlisted iframe that has children, remove all its
|
195
227
|
# children. The HTML standard says iframes shouldn't have content, but when
|
196
228
|
# they do, this content is parsed as text and is serialized verbatim without
|
197
229
|
# being escaped, which is unsafe because legacy browsers may still render it
|
@@ -220,6 +252,16 @@ class Sanitize; module Transformers; class CleanElement
|
|
220
252
|
|
221
253
|
node['content'] = node['content'].gsub(/;\s*charset\s*=.+\z/, ';charset=utf-8')
|
222
254
|
end
|
255
|
+
|
256
|
+
# A `<noscript>` element's content is parsed differently in browsers
|
257
|
+
# depending on whether or not scripting is enabled. Since Nokogiri doesn't
|
258
|
+
# support scripting, it always parses `<noscript>` elements as if scripting
|
259
|
+
# is disabled. This results in edge cases where it's not possible to
|
260
|
+
# reliably sanitize the contents of a `<noscript>` element because Nokogiri
|
261
|
+
# can't fully replicate the parsing behavior of a scripting-enabled browser.
|
262
|
+
# The safest thing to do is to simply remove all `<noscript>` elements.
|
263
|
+
when 'noscript'
|
264
|
+
node.unlink
|
223
265
|
end
|
224
266
|
end
|
225
267
|
|
data/lib/sanitize/version.rb
CHANGED
data/lib/sanitize.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'nokogiri'
|
4
4
|
require 'set'
|
5
5
|
|
6
6
|
require_relative 'sanitize/version'
|
@@ -54,7 +54,7 @@ class Sanitize
|
|
54
54
|
# Returns a sanitized copy of the given full _html_ document, using the
|
55
55
|
# settings in _config_ if specified.
|
56
56
|
#
|
57
|
-
# When sanitizing a document, the `<html>` element must be
|
57
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
58
58
|
# error will be raised. If this is undesirable, you should probably use
|
59
59
|
# {#fragment} instead.
|
60
60
|
def self.document(html, config = {})
|
@@ -117,7 +117,7 @@ class Sanitize
|
|
117
117
|
|
118
118
|
# Returns a sanitized copy of the given _html_ document.
|
119
119
|
#
|
120
|
-
# When sanitizing a document, the `<html>` element must be
|
120
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
121
121
|
# error will be raised. If this is undesirable, you should probably use
|
122
122
|
# {#fragment} instead.
|
123
123
|
def document(html)
|
@@ -147,20 +147,20 @@ class Sanitize
|
|
147
147
|
# in place.
|
148
148
|
#
|
149
149
|
# If _node_ is a `Nokogiri::XML::Document`, the `<html>` element must be
|
150
|
-
#
|
150
|
+
# allowlisted or an error will be raised.
|
151
151
|
def node!(node)
|
152
152
|
raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)
|
153
153
|
|
154
154
|
if node.is_a?(Nokogiri::XML::Document)
|
155
155
|
unless @config[:elements].include?('html')
|
156
|
-
raise Error, 'When sanitizing a document, "<html>" must be
|
156
|
+
raise Error, 'When sanitizing a document, "<html>" must be allowlisted.'
|
157
157
|
end
|
158
158
|
end
|
159
159
|
|
160
|
-
|
160
|
+
node_allowlist = Set.new
|
161
161
|
|
162
162
|
traverse(node) do |n|
|
163
|
-
transform_node!(n,
|
163
|
+
transform_node!(n, node_allowlist)
|
164
164
|
end
|
165
165
|
|
166
166
|
node
|
@@ -189,7 +189,7 @@ class Sanitize
|
|
189
189
|
node.to_html(preserve_newline: true)
|
190
190
|
end
|
191
191
|
|
192
|
-
def transform_node!(node,
|
192
|
+
def transform_node!(node, node_allowlist)
|
193
193
|
@transformers.each do |transformer|
|
194
194
|
# Since transform_node! may be called in a tight loop to process thousands
|
195
195
|
# of items, we can optimize both memory and CPU performance by:
|
@@ -199,15 +199,19 @@ class Sanitize
|
|
199
199
|
# does merge! create a new hash, it is also 2.6x slower:
|
200
200
|
# https://github.com/JuanitoFatas/fast-ruby#hashmerge-vs-hashmerge-code
|
201
201
|
config = @transformer_config
|
202
|
-
config[:is_whitelisted] =
|
202
|
+
config[:is_allowlisted] = config[:is_whitelisted] = node_allowlist.include?(node)
|
203
203
|
config[:node] = node
|
204
204
|
config[:node_name] = node.name.downcase
|
205
|
-
config[:node_whitelist] =
|
205
|
+
config[:node_allowlist] = config[:node_whitelist] = node_allowlist
|
206
206
|
|
207
|
-
result = transformer.call(config)
|
207
|
+
result = transformer.call(**config)
|
208
208
|
|
209
|
-
if result.is_a?(Hash)
|
210
|
-
|
209
|
+
if result.is_a?(Hash)
|
210
|
+
result_allowlist = result[:node_allowlist] || result[:node_whitelist]
|
211
|
+
|
212
|
+
if result_allowlist.respond_to?(:each)
|
213
|
+
node_allowlist.merge(result_allowlist)
|
214
|
+
end
|
211
215
|
end
|
212
216
|
end
|
213
217
|
|
data/test/test_clean_comment.rb
CHANGED
@@ -11,18 +11,18 @@ describe 'Sanitize::Transformers::CleanComment' do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'should remove comments' do
|
14
|
-
@s.fragment('foo <!-- comment --> bar').must_equal 'foo bar'
|
15
|
-
@s.fragment('foo <!-- ').must_equal 'foo '
|
16
|
-
@s.fragment('foo <!-- - -> bar').must_equal 'foo '
|
17
|
-
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal 'foo bar'
|
18
|
-
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo --> -->bar'
|
19
|
-
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
14
|
+
_(@s.fragment('foo <!-- comment --> bar')).must_equal 'foo bar'
|
15
|
+
_(@s.fragment('foo <!-- ')).must_equal 'foo '
|
16
|
+
_(@s.fragment('foo <!-- - -> bar')).must_equal 'foo '
|
17
|
+
_(@s.fragment("foo <!--\n\n\n\n-->bar")).must_equal 'foo bar'
|
18
|
+
_(@s.fragment("foo <!-- <!-- <!-- --> --> -->bar")).must_equal 'foo --> -->bar'
|
19
|
+
_(@s.fragment("foo <div <!-- comment -->>bar</div>")).must_equal 'foo <div>>bar</div>'
|
20
20
|
|
21
21
|
# Special case: the comment markup is inside a <script>, which makes it
|
22
22
|
# text content and not an actual HTML comment.
|
23
|
-
@s.fragment("<script><!-- comment --></script>").must_equal ''
|
23
|
+
_(@s.fragment("<script><!-- comment --></script>")).must_equal ''
|
24
24
|
|
25
|
-
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script'])
|
25
|
+
_(Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script']))
|
26
26
|
.must_equal '<script><!-- comment --></script>'
|
27
27
|
end
|
28
28
|
end
|
@@ -33,14 +33,14 @@ describe 'Sanitize::Transformers::CleanComment' do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'should allow comments' do
|
36
|
-
@s.fragment('foo <!-- comment --> bar').must_equal 'foo <!-- comment --> bar'
|
37
|
-
@s.fragment('foo <!-- ').must_equal 'foo <!-- -->'
|
38
|
-
@s.fragment('foo <!-- - -> bar').must_equal 'foo <!-- - -> bar-->'
|
39
|
-
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal "foo <!--\n\n\n\n-->bar"
|
40
|
-
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
|
41
|
-
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
42
|
-
|
43
|
-
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script'])
|
36
|
+
_(@s.fragment('foo <!-- comment --> bar')).must_equal 'foo <!-- comment --> bar'
|
37
|
+
_(@s.fragment('foo <!-- ')).must_equal 'foo <!-- -->'
|
38
|
+
_(@s.fragment('foo <!-- - -> bar')).must_equal 'foo <!-- - -> bar-->'
|
39
|
+
_(@s.fragment("foo <!--\n\n\n\n-->bar")).must_equal "foo <!--\n\n\n\n-->bar"
|
40
|
+
_(@s.fragment("foo <!-- <!-- <!-- --> --> -->bar")).must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
|
41
|
+
_(@s.fragment("foo <div <!-- comment -->>bar</div>")).must_equal 'foo <div>>bar</div>'
|
42
|
+
|
43
|
+
_(Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script']))
|
44
44
|
.must_equal '<script><!-- comment --></script>'
|
45
45
|
end
|
46
46
|
end
|
data/test/test_clean_css.rb
CHANGED
@@ -10,15 +10,15 @@ describe 'Sanitize::Transformers::CSS::CleanAttribute' do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
it 'should sanitize CSS properties in style attributes' do
|
13
|
-
@s.fragment(%[
|
13
|
+
_(@s.fragment(%[
|
14
14
|
<div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
|
15
|
-
].strip).must_equal %[
|
15
|
+
].strip)).must_equal %[
|
16
16
|
<div style="color: #fff; /* <-- evil! */"></div>
|
17
17
|
].strip
|
18
18
|
end
|
19
19
|
|
20
20
|
it 'should remove the style attribute if the sanitized CSS is empty' do
|
21
|
-
@s.fragment('<div style="width: expression(alert(1))"></div>').
|
21
|
+
_(@s.fragment('<div style="width: expression(alert(1))"></div>')).
|
22
22
|
must_equal '<div></div>'
|
23
23
|
end
|
24
24
|
end
|
@@ -46,7 +46,7 @@ describe 'Sanitize::Transformers::CSS::CleanElement' do
|
|
46
46
|
</style>
|
47
47
|
].strip
|
48
48
|
|
49
|
-
@s.fragment(html).must_equal %[
|
49
|
+
_(@s.fragment(html)).must_equal %[
|
50
50
|
<style>
|
51
51
|
/* Yay CSS! */
|
52
52
|
.foo { color: #fff; }
|
@@ -62,6 +62,6 @@ describe 'Sanitize::Transformers::CSS::CleanElement' do
|
|
62
62
|
end
|
63
63
|
|
64
64
|
it 'should remove the <style> element if the sanitized CSS is empty' do
|
65
|
-
@s.fragment('<style></style>').must_equal ''
|
65
|
+
_(@s.fragment('<style></style>')).must_equal ''
|
66
66
|
end
|
67
67
|
end
|
data/test/test_clean_doctype.rb
CHANGED
@@ -11,18 +11,18 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'should remove doctype declarations' do
|
14
|
-
@s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html>"
|
15
|
-
@s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
|
14
|
+
_(@s.document('<!DOCTYPE html><html>foo</html>')).must_equal "<html>foo</html>"
|
15
|
+
_(@s.fragment('<!DOCTYPE html>foo')).must_equal 'foo'
|
16
16
|
end
|
17
17
|
|
18
18
|
it 'should not allow doctype definitions in fragments' do
|
19
|
-
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
19
|
+
_(@s.fragment('<!DOCTYPE html><html>foo</html>'))
|
20
20
|
.must_equal "foo"
|
21
21
|
|
22
|
-
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
22
|
+
_(@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
23
23
|
.must_equal "foo"
|
24
24
|
|
25
|
-
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
25
|
+
_(@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
|
26
26
|
.must_equal "foo"
|
27
27
|
end
|
28
28
|
end
|
@@ -33,38 +33,38 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'should allow doctype declarations in documents' do
|
36
|
-
@s.document('<!DOCTYPE html><html>foo</html>')
|
36
|
+
_(@s.document('<!DOCTYPE html><html>foo</html>'))
|
37
37
|
.must_equal "<!DOCTYPE html><html>foo</html>"
|
38
38
|
|
39
|
-
@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
39
|
+
_(@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
40
40
|
.must_equal "<!DOCTYPE html><html>foo</html>"
|
41
41
|
|
42
|
-
@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
42
|
+
_(@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
|
43
43
|
.must_equal "<!DOCTYPE html><html>foo</html>"
|
44
44
|
end
|
45
45
|
|
46
46
|
it 'should not allow obviously invalid doctype declarations in documents' do
|
47
|
-
@s.document('<!DOCTYPE blah blah blah><html>foo</html>')
|
47
|
+
_(@s.document('<!DOCTYPE blah blah blah><html>foo</html>'))
|
48
48
|
.must_equal "<!DOCTYPE html><html>foo</html>"
|
49
49
|
|
50
|
-
@s.document('<!DOCTYPE blah><html>foo</html>')
|
50
|
+
_(@s.document('<!DOCTYPE blah><html>foo</html>'))
|
51
51
|
.must_equal "<!DOCTYPE html><html>foo</html>"
|
52
52
|
|
53
|
-
@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
53
|
+
_(@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
54
54
|
.must_equal "<!DOCTYPE html><html>foo</html>"
|
55
55
|
|
56
|
-
@s.document('<!whatever><html>foo</html>')
|
56
|
+
_(@s.document('<!whatever><html>foo</html>'))
|
57
57
|
.must_equal "<html>foo</html>"
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'should not allow doctype definitions in fragments' do
|
61
|
-
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
61
|
+
_(@s.fragment('<!DOCTYPE html><html>foo</html>'))
|
62
62
|
.must_equal "foo"
|
63
63
|
|
64
|
-
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
64
|
+
_(@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
65
65
|
.must_equal "foo"
|
66
66
|
|
67
|
-
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
67
|
+
_(@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
|
68
68
|
.must_equal "foo"
|
69
69
|
end
|
70
70
|
end
|