sanitize 4.6.6 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +176 -16
- data/LICENSE +1 -1
- data/README.md +65 -67
- data/lib/sanitize/config/default.rb +10 -4
- data/lib/sanitize/config/relaxed.rb +1 -1
- data/lib/sanitize/css.rb +2 -2
- data/lib/sanitize/transformers/clean_comment.rb +1 -1
- data/lib/sanitize/transformers/clean_css.rb +3 -3
- data/lib/sanitize/transformers/clean_doctype.rb +1 -1
- data/lib/sanitize/transformers/clean_element.rb +60 -22
- data/lib/sanitize/version.rb +1 -1
- data/lib/sanitize.rb +39 -63
- data/test/common.rb +0 -31
- data/test/test_clean_comment.rb +1 -5
- data/test/test_clean_css.rb +1 -1
- data/test/test_clean_doctype.rb +8 -8
- data/test/test_clean_element.rb +137 -26
- data/test/test_malicious_html.rb +50 -7
- data/test/test_parser.rb +3 -32
- data/test/test_sanitize.rb +103 -18
- data/test/test_sanitize_css.rb +43 -16
- data/test/test_transformers.rb +29 -23
- metadata +17 -33
- data/test/test_unicode.rb +0 -95
@@ -1,6 +1,6 @@
|
|
1
1
|
class Sanitize; module Transformers; module CSS
|
2
2
|
|
3
|
-
# Enforces a CSS
|
3
|
+
# Enforces a CSS allowlist on the contents of `style` attributes.
|
4
4
|
class CleanAttribute
|
5
5
|
def initialize(sanitizer_or_config)
|
6
6
|
if Sanitize::CSS === sanitizer_or_config
|
@@ -14,7 +14,7 @@ class CleanAttribute
|
|
14
14
|
node = env[:node]
|
15
15
|
|
16
16
|
return unless node.type == Nokogiri::XML::Node::ELEMENT_NODE &&
|
17
|
-
node.key?('style') && !env[:
|
17
|
+
node.key?('style') && !env[:is_allowlisted]
|
18
18
|
|
19
19
|
attr = node.attribute('style')
|
20
20
|
css = @scss.properties(attr.value)
|
@@ -27,7 +27,7 @@ class CleanAttribute
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
# Enforces a CSS
|
30
|
+
# Enforces a CSS allowlist on the contents of `<style>` elements.
|
31
31
|
class CleanElement
|
32
32
|
def initialize(sanitizer_or_config)
|
33
33
|
if Sanitize::CSS === sanitizer_or_config
|
@@ -67,7 +67,7 @@ class Sanitize; module Transformers; class CleanElement
|
|
67
67
|
@whitespace_elements = config[:whitespace_elements]
|
68
68
|
end
|
69
69
|
|
70
|
-
if config[:remove_contents].is_a?(
|
70
|
+
if config[:remove_contents].is_a?(Enumerable)
|
71
71
|
@remove_element_contents.merge(config[:remove_contents].map(&:to_s))
|
72
72
|
else
|
73
73
|
@remove_all_contents = !!config[:remove_contents]
|
@@ -76,11 +76,11 @@ class Sanitize; module Transformers; class CleanElement
|
|
76
76
|
|
77
77
|
def call(env)
|
78
78
|
node = env[:node]
|
79
|
-
return if node.type != Nokogiri::XML::Node::ELEMENT_NODE || env[:
|
79
|
+
return if node.type != Nokogiri::XML::Node::ELEMENT_NODE || env[:is_allowlisted]
|
80
80
|
|
81
81
|
name = env[:node_name]
|
82
82
|
|
83
|
-
# Delete any element that isn't in the config
|
83
|
+
# Delete any element that isn't in the config allowlist, unless the node has
|
84
84
|
# already been deleted from the document.
|
85
85
|
#
|
86
86
|
# It's important that we not try to reparent the children of a node that has
|
@@ -97,42 +97,41 @@ class Sanitize; module Transformers; class CleanElement
|
|
97
97
|
end
|
98
98
|
end
|
99
99
|
|
100
|
-
unless
|
101
|
-
|
100
|
+
unless node.children.empty?
|
101
|
+
unless @remove_all_contents || @remove_element_contents.include?(name)
|
102
|
+
node.add_previous_sibling(node.children)
|
103
|
+
end
|
102
104
|
end
|
103
105
|
|
104
106
|
node.unlink
|
105
107
|
return
|
106
108
|
end
|
107
109
|
|
108
|
-
|
110
|
+
attr_allowlist = @attributes[name] || @attributes[:all]
|
109
111
|
|
110
|
-
if
|
111
|
-
# Delete all attributes from elements with no
|
112
|
+
if attr_allowlist.nil?
|
113
|
+
# Delete all attributes from elements with no allowlisted attributes.
|
112
114
|
node.attribute_nodes.each {|attr| attr.unlink }
|
113
115
|
else
|
114
|
-
allow_data_attributes =
|
116
|
+
allow_data_attributes = attr_allowlist.include?(:data)
|
115
117
|
|
116
118
|
# Delete any attribute that isn't allowed on this element.
|
117
119
|
node.attribute_nodes.each do |attr|
|
118
120
|
attr_name = attr.name.downcase
|
119
121
|
|
120
|
-
unless
|
121
|
-
# The attribute isn't
|
122
|
+
unless attr_allowlist.include?(attr_name)
|
123
|
+
# The attribute isn't in the allowlist, but may still be allowed if
|
124
|
+
# it's a data attribute.
|
122
125
|
|
123
|
-
|
124
|
-
#
|
125
|
-
#
|
126
|
-
|
126
|
+
unless allow_data_attributes && attr_name.start_with?('data-') && attr_name =~ REGEX_DATA_ATTR
|
127
|
+
# Either the attribute isn't a data attribute or arbitrary data
|
128
|
+
# attributes aren't allowed. Remove the attribute.
|
129
|
+
attr.unlink
|
130
|
+
next
|
127
131
|
end
|
128
|
-
|
129
|
-
# Either the attribute isn't a data attribute or arbitrary data
|
130
|
-
# attributes aren't allowed. Remove the attribute.
|
131
|
-
attr.unlink
|
132
|
-
next
|
133
132
|
end
|
134
133
|
|
135
|
-
# The attribute is
|
134
|
+
# The attribute is allowed.
|
136
135
|
|
137
136
|
# Remove any attributes that use unacceptable protocols.
|
138
137
|
if @protocols.include?(name) && @protocols[name].include?(attr_name)
|
@@ -160,12 +159,17 @@ class Sanitize; module Transformers; class CleanElement
|
|
160
159
|
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
161
160
|
# attempt to preserve server-side includes. This can result in XSS since
|
162
161
|
# an unescaped double quote can allow an attacker to inject a
|
163
|
-
# non-
|
162
|
+
# non-allowlisted attribute.
|
164
163
|
#
|
165
164
|
# Sanitize works around this by implementing its own escaping for
|
166
165
|
# affected attributes, some of which can exist on any element and some
|
167
166
|
# of which can only exist on `<a>` elements.
|
168
167
|
#
|
168
|
+
# This fix is technically no longer necessary with Nokogumbo >= 2.0
|
169
|
+
# since it no longer uses libxml2's serializer, but it's retained to
|
170
|
+
# avoid breaking use cases where people might be sanitizing individual
|
171
|
+
# Nokogiri nodes and then serializing them manually without Nokogumbo.
|
172
|
+
#
|
169
173
|
# The relevant libxml2 code is here:
|
170
174
|
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
171
175
|
if UNSAFE_LIBXML_ATTRS_GLOBAL.include?(attr_name) ||
|
@@ -180,6 +184,40 @@ class Sanitize; module Transformers; class CleanElement
|
|
180
184
|
if @add_attributes.include?(name)
|
181
185
|
@add_attributes[name].each {|key, val| node[key] = val }
|
182
186
|
end
|
187
|
+
|
188
|
+
# Element-specific special cases.
|
189
|
+
case name
|
190
|
+
|
191
|
+
# If this is an allowlisted iframe that has children, remove all its
|
192
|
+
# children. The HTML standard says iframes shouldn't have content, but when
|
193
|
+
# they do, this content is parsed as text and is serialized verbatim without
|
194
|
+
# being escaped, which is unsafe because legacy browsers may still render it
|
195
|
+
# and execute `<script>` content. So the safe and correct thing to do is to
|
196
|
+
# always remove iframe content.
|
197
|
+
when 'iframe'
|
198
|
+
if !node.children.empty?
|
199
|
+
node.children.each do |child|
|
200
|
+
child.unlink
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
# Prevent the use of `<meta>` elements that set a charset other than UTF-8,
|
205
|
+
# since Sanitize's output is always UTF-8.
|
206
|
+
when 'meta'
|
207
|
+
if node.has_attribute?('charset') &&
|
208
|
+
node['charset'].downcase != 'utf-8'
|
209
|
+
|
210
|
+
node['charset'] = 'utf-8'
|
211
|
+
end
|
212
|
+
|
213
|
+
if node.has_attribute?('http-equiv') &&
|
214
|
+
node.has_attribute?('content') &&
|
215
|
+
node['http-equiv'].downcase == 'content-type' &&
|
216
|
+
node['content'].downcase =~ /;\s*charset\s*=\s*(?!utf-8)/
|
217
|
+
|
218
|
+
node['content'] = node['content'].gsub(/;\s*charset\s*=.+\z/, ';charset=utf-8')
|
219
|
+
end
|
220
|
+
end
|
183
221
|
end
|
184
222
|
|
185
223
|
end; end; end
|
data/lib/sanitize/version.rb
CHANGED
data/lib/sanitize.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'nokogiri'
|
4
4
|
require 'set'
|
5
5
|
|
6
6
|
require_relative 'sanitize/version'
|
@@ -19,6 +19,20 @@ require_relative 'sanitize/transformers/clean_element'
|
|
19
19
|
class Sanitize
|
20
20
|
attr_reader :config
|
21
21
|
|
22
|
+
# Matches one or more control characters that should be removed from HTML
|
23
|
+
# before parsing, as defined by the HTML living standard.
|
24
|
+
#
|
25
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
26
|
+
# - https://infra.spec.whatwg.org/#control
|
27
|
+
REGEX_HTML_CONTROL_CHARACTERS = /[\u0001-\u0008\u000b\u000e-\u001f\u007f-\u009f]+/u
|
28
|
+
|
29
|
+
# Matches one or more non-characters that should be removed from HTML before
|
30
|
+
# parsing, as defined by the HTML living standard.
|
31
|
+
#
|
32
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
33
|
+
# - https://infra.spec.whatwg.org/#noncharacter
|
34
|
+
REGEX_HTML_NON_CHARACTERS = /[\ufdd0-\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}]+/u
|
35
|
+
|
22
36
|
# Matches an attribute value that could be treated by a browser as a URL
|
23
37
|
# with a protocol prefix, such as "http:" or "javascript:". Any string of zero
|
24
38
|
# or more characters followed by a colon is considered a match, even if the
|
@@ -26,11 +40,12 @@ class Sanitize
|
|
26
40
|
# IE6 and Opera will still parse).
|
27
41
|
REGEX_PROTOCOL = /\A\s*([^\/#]*?)(?:\:|�*58|�*3a)/i
|
28
42
|
|
29
|
-
# Matches
|
30
|
-
#
|
43
|
+
# Matches one or more characters that should be stripped from HTML before
|
44
|
+
# parsing. This is a combination of `REGEX_HTML_CONTROL_CHARACTERS` and
|
45
|
+
# `REGEX_HTML_NON_CHARACTERS`.
|
31
46
|
#
|
32
|
-
#
|
33
|
-
REGEX_UNSUITABLE_CHARS = /
|
47
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
48
|
+
REGEX_UNSUITABLE_CHARS = /(?:#{REGEX_HTML_CONTROL_CHARACTERS}|#{REGEX_HTML_NON_CHARACTERS})/u
|
34
49
|
|
35
50
|
#--
|
36
51
|
# Class Methods
|
@@ -39,7 +54,7 @@ class Sanitize
|
|
39
54
|
# Returns a sanitized copy of the given full _html_ document, using the
|
40
55
|
# settings in _config_ if specified.
|
41
56
|
#
|
42
|
-
# When sanitizing a document, the `<html>` element must be
|
57
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
43
58
|
# error will be raised. If this is undesirable, you should probably use
|
44
59
|
# {#fragment} instead.
|
45
60
|
def self.document(html, config = {})
|
@@ -102,13 +117,13 @@ class Sanitize
|
|
102
117
|
|
103
118
|
# Returns a sanitized copy of the given _html_ document.
|
104
119
|
#
|
105
|
-
# When sanitizing a document, the `<html>` element must be
|
120
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
106
121
|
# error will be raised. If this is undesirable, you should probably use
|
107
122
|
# {#fragment} instead.
|
108
123
|
def document(html)
|
109
124
|
return '' unless html
|
110
125
|
|
111
|
-
doc = Nokogiri::HTML5.parse(preprocess(html))
|
126
|
+
doc = Nokogiri::HTML5.parse(preprocess(html), **@config[:parser_options])
|
112
127
|
node!(doc)
|
113
128
|
to_html(doc)
|
114
129
|
end
|
@@ -120,20 +135,7 @@ class Sanitize
|
|
120
135
|
def fragment(html)
|
121
136
|
return '' unless html
|
122
137
|
|
123
|
-
|
124
|
-
doc = Nokogiri::HTML5.parse("<html><body>#{html}")
|
125
|
-
|
126
|
-
# Hack to allow fragments containing <body>. Borrowed from
|
127
|
-
# Nokogiri::HTML::DocumentFragment.
|
128
|
-
if html =~ /\A<body(?:\s|>)/i
|
129
|
-
path = '/html/body'
|
130
|
-
else
|
131
|
-
path = '/html/body/node()'
|
132
|
-
end
|
133
|
-
|
134
|
-
frag = doc.fragment
|
135
|
-
frag << doc.xpath(path)
|
136
|
-
|
138
|
+
frag = Nokogiri::HTML5.fragment(preprocess(html), **@config[:parser_options])
|
137
139
|
node!(frag)
|
138
140
|
to_html(frag)
|
139
141
|
end
|
@@ -145,20 +147,20 @@ class Sanitize
|
|
145
147
|
# in place.
|
146
148
|
#
|
147
149
|
# If _node_ is a `Nokogiri::XML::Document`, the `<html>` element must be
|
148
|
-
#
|
150
|
+
# allowlisted or an error will be raised.
|
149
151
|
def node!(node)
|
150
152
|
raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)
|
151
153
|
|
152
154
|
if node.is_a?(Nokogiri::XML::Document)
|
153
155
|
unless @config[:elements].include?('html')
|
154
|
-
raise Error, 'When sanitizing a document, "<html>" must be
|
156
|
+
raise Error, 'When sanitizing a document, "<html>" must be allowlisted.'
|
155
157
|
end
|
156
158
|
end
|
157
159
|
|
158
|
-
|
160
|
+
node_allowlist = Set.new
|
159
161
|
|
160
162
|
traverse(node) do |n|
|
161
|
-
transform_node!(n,
|
163
|
+
transform_node!(n, node_allowlist)
|
162
164
|
end
|
163
165
|
|
164
166
|
node
|
@@ -184,40 +186,10 @@ class Sanitize
|
|
184
186
|
end
|
185
187
|
|
186
188
|
def to_html(node)
|
187
|
-
|
188
|
-
|
189
|
-
# Hacky workaround for a libxml2 bug that adds an undesired Content-Type
|
190
|
-
# meta tag to all serialized HTML documents.
|
191
|
-
#
|
192
|
-
# https://github.com/sparklemotion/nokogiri/issues/1008
|
193
|
-
if node.type == Nokogiri::XML::Node::DOCUMENT_NODE ||
|
194
|
-
node.type == Nokogiri::XML::Node::HTML_DOCUMENT_NODE
|
195
|
-
|
196
|
-
regex_meta = %r|(<html[^>]*>\s*<head[^>]*>\s*)<meta http-equiv="Content-Type" content="text/html; charset=utf-8">|i
|
197
|
-
|
198
|
-
# Only replace the content-type meta tag if <meta> isn't whitelisted or
|
199
|
-
# the original document didn't actually include a content-type meta tag.
|
200
|
-
replace_meta = !@config[:elements].include?('meta') ||
|
201
|
-
node.xpath('/html/head/meta[@http-equiv]').none? do |meta|
|
202
|
-
meta['http-equiv'].casecmp('content-type').zero?
|
203
|
-
end
|
204
|
-
end
|
205
|
-
|
206
|
-
so = Nokogiri::XML::Node::SaveOptions
|
207
|
-
|
208
|
-
# Serialize to HTML without any formatting to prevent Nokogiri from adding
|
209
|
-
# newlines after certain tags.
|
210
|
-
html = node.to_html(
|
211
|
-
:encoding => 'utf-8',
|
212
|
-
:indent => 0,
|
213
|
-
:save_with => so::NO_DECLARATION | so::NO_EMPTY_TAGS | so::AS_HTML
|
214
|
-
)
|
215
|
-
|
216
|
-
html.gsub!(regex_meta, '\1') if replace_meta
|
217
|
-
html
|
189
|
+
node.to_html(preserve_newline: true)
|
218
190
|
end
|
219
191
|
|
220
|
-
def transform_node!(node,
|
192
|
+
def transform_node!(node, node_allowlist)
|
221
193
|
@transformers.each do |transformer|
|
222
194
|
# Since transform_node! may be called in a tight loop to process thousands
|
223
195
|
# of items, we can optimize both memory and CPU performance by:
|
@@ -227,15 +199,19 @@ class Sanitize
|
|
227
199
|
# does merge! create a new hash, it is also 2.6x slower:
|
228
200
|
# https://github.com/JuanitoFatas/fast-ruby#hashmerge-vs-hashmerge-code
|
229
201
|
config = @transformer_config
|
230
|
-
config[:is_whitelisted] =
|
202
|
+
config[:is_allowlisted] = config[:is_whitelisted] = node_allowlist.include?(node)
|
231
203
|
config[:node] = node
|
232
204
|
config[:node_name] = node.name.downcase
|
233
|
-
config[:node_whitelist] =
|
205
|
+
config[:node_allowlist] = config[:node_whitelist] = node_allowlist
|
234
206
|
|
235
|
-
result = transformer.call(config)
|
207
|
+
result = transformer.call(**config)
|
236
208
|
|
237
|
-
if result.is_a?(Hash)
|
238
|
-
|
209
|
+
if result.is_a?(Hash)
|
210
|
+
result_allowlist = result[:node_allowlist] || result[:node_whitelist]
|
211
|
+
|
212
|
+
if result_allowlist.respond_to?(:each)
|
213
|
+
node_allowlist.merge(result_allowlist)
|
214
|
+
end
|
239
215
|
end
|
240
216
|
end
|
241
217
|
|
data/test/common.rb
CHANGED
@@ -1,34 +1,3 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
gem 'minitest'
|
3
2
|
require 'minitest/autorun'
|
4
|
-
|
5
3
|
require_relative '../lib/sanitize'
|
6
|
-
|
7
|
-
# Helper to stub an instance method. Shamelessly stolen from
|
8
|
-
# https://github.com/codeodor/minitest-stub_any_instance/
|
9
|
-
class Object
|
10
|
-
def self.stub_instance(name, value, &block)
|
11
|
-
old_method = "__stubbed_method_#{name}__"
|
12
|
-
|
13
|
-
class_eval do
|
14
|
-
alias_method old_method, name
|
15
|
-
|
16
|
-
define_method(name) do |*args|
|
17
|
-
if value.respond_to?(:call) then
|
18
|
-
value.call(*args)
|
19
|
-
else
|
20
|
-
value
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
yield
|
26
|
-
|
27
|
-
ensure
|
28
|
-
class_eval do
|
29
|
-
undef_method name
|
30
|
-
alias_method name, old_method
|
31
|
-
undef_method old_method
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
data/test/test_clean_comment.rb
CHANGED
@@ -20,7 +20,7 @@ describe 'Sanitize::Transformers::CleanComment' do
|
|
20
20
|
|
21
21
|
# Special case: the comment markup is inside a <script>, which makes it
|
22
22
|
# text content and not an actual HTML comment.
|
23
|
-
@s.fragment("<script><!-- comment --></script>").must_equal '
|
23
|
+
@s.fragment("<script><!-- comment --></script>").must_equal ''
|
24
24
|
|
25
25
|
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script'])
|
26
26
|
.must_equal '<script><!-- comment --></script>'
|
@@ -40,10 +40,6 @@ describe 'Sanitize::Transformers::CleanComment' do
|
|
40
40
|
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
|
41
41
|
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
42
42
|
|
43
|
-
# Special case: the comment markup is inside a <script>, which makes it
|
44
|
-
# text content and not an actual HTML comment.
|
45
|
-
@s.fragment("<script><!-- comment --></script>").must_equal '<!-- comment -->'
|
46
|
-
|
47
43
|
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script'])
|
48
44
|
.must_equal '<script><!-- comment --></script>'
|
49
45
|
end
|
data/test/test_clean_css.rb
CHANGED
@@ -13,7 +13,7 @@ describe 'Sanitize::Transformers::CSS::CleanAttribute' do
|
|
13
13
|
@s.fragment(%[
|
14
14
|
<div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
|
15
15
|
].strip).must_equal %[
|
16
|
-
<div style="color: #fff; /*
|
16
|
+
<div style="color: #fff; /* <-- evil! */"></div>
|
17
17
|
].strip
|
18
18
|
end
|
19
19
|
|
data/test/test_clean_doctype.rb
CHANGED
@@ -11,7 +11,7 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'should remove doctype declarations' do
|
14
|
-
@s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html
|
14
|
+
@s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html>"
|
15
15
|
@s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
|
16
16
|
end
|
17
17
|
|
@@ -34,27 +34,27 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
34
34
|
|
35
35
|
it 'should allow doctype declarations in documents' do
|
36
36
|
@s.document('<!DOCTYPE html><html>foo</html>')
|
37
|
-
.must_equal "<!DOCTYPE html
|
37
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
38
38
|
|
39
39
|
@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
40
|
-
.must_equal "<!DOCTYPE html
|
40
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
41
41
|
|
42
42
|
@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
43
|
-
.must_equal "<!DOCTYPE html
|
43
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
44
44
|
end
|
45
45
|
|
46
46
|
it 'should not allow obviously invalid doctype declarations in documents' do
|
47
47
|
@s.document('<!DOCTYPE blah blah blah><html>foo</html>')
|
48
|
-
.must_equal "<!DOCTYPE html
|
48
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
49
49
|
|
50
50
|
@s.document('<!DOCTYPE blah><html>foo</html>')
|
51
|
-
.must_equal "<!DOCTYPE html
|
51
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
52
52
|
|
53
53
|
@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
54
|
-
.must_equal "<!DOCTYPE html
|
54
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
55
55
|
|
56
56
|
@s.document('<!whatever><html>foo</html>')
|
57
|
-
.must_equal "<html>foo</html
|
57
|
+
.must_equal "<html>foo</html>"
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'should not allow doctype definitions in fragments' do
|