sanitize 4.6.5 → 6.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +235 -16
- data/LICENSE +1 -1
- data/README.md +89 -76
- data/lib/sanitize/config/default.rb +15 -4
- data/lib/sanitize/config/relaxed.rb +1 -1
- data/lib/sanitize/css.rb +2 -2
- data/lib/sanitize/transformers/clean_comment.rb +1 -1
- data/lib/sanitize/transformers/clean_css.rb +3 -3
- data/lib/sanitize/transformers/clean_doctype.rb +1 -1
- data/lib/sanitize/transformers/clean_element.rb +105 -22
- data/lib/sanitize/version.rb +1 -1
- data/lib/sanitize.rb +53 -68
- data/test/common.rb +0 -31
- data/test/test_clean_comment.rb +16 -20
- data/test/test_clean_css.rb +6 -6
- data/test/test_clean_doctype.rb +22 -22
- data/test/test_clean_element.rb +200 -82
- data/test/test_config.rb +9 -9
- data/test/test_malicious_css.rb +7 -7
- data/test/test_malicious_html.rb +179 -32
- data/test/test_parser.rb +9 -38
- data/test/test_sanitize.rb +114 -29
- data/test/test_sanitize_css.rb +88 -61
- data/test/test_transformers.rb +52 -46
- metadata +17 -33
- data/test/test_unicode.rb +0 -95
data/lib/sanitize.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'nokogiri'
|
4
4
|
require 'set'
|
5
5
|
|
6
6
|
require_relative 'sanitize/version'
|
@@ -19,6 +19,20 @@ require_relative 'sanitize/transformers/clean_element'
|
|
19
19
|
class Sanitize
|
20
20
|
attr_reader :config
|
21
21
|
|
22
|
+
# Matches one or more control characters that should be removed from HTML
|
23
|
+
# before parsing, as defined by the HTML living standard.
|
24
|
+
#
|
25
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
26
|
+
# - https://infra.spec.whatwg.org/#control
|
27
|
+
REGEX_HTML_CONTROL_CHARACTERS = /[\u0001-\u0008\u000b\u000e-\u001f\u007f-\u009f]+/u
|
28
|
+
|
29
|
+
# Matches one or more non-characters that should be removed from HTML before
|
30
|
+
# parsing, as defined by the HTML living standard.
|
31
|
+
#
|
32
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
33
|
+
# - https://infra.spec.whatwg.org/#noncharacter
|
34
|
+
REGEX_HTML_NON_CHARACTERS = /[\ufdd0-\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}]+/u
|
35
|
+
|
22
36
|
# Matches an attribute value that could be treated by a browser as a URL
|
23
37
|
# with a protocol prefix, such as "http:" or "javascript:". Any string of zero
|
24
38
|
# or more characters followed by a colon is considered a match, even if the
|
@@ -26,11 +40,12 @@ class Sanitize
|
|
26
40
|
# IE6 and Opera will still parse).
|
27
41
|
REGEX_PROTOCOL = /\A\s*([^\/#]*?)(?:\:|�*58|�*3a)/i
|
28
42
|
|
29
|
-
# Matches
|
30
|
-
#
|
43
|
+
# Matches one or more characters that should be stripped from HTML before
|
44
|
+
# parsing. This is a combination of `REGEX_HTML_CONTROL_CHARACTERS` and
|
45
|
+
# `REGEX_HTML_NON_CHARACTERS`.
|
31
46
|
#
|
32
|
-
#
|
33
|
-
REGEX_UNSUITABLE_CHARS = /
|
47
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
48
|
+
REGEX_UNSUITABLE_CHARS = /(?:#{REGEX_HTML_CONTROL_CHARACTERS}|#{REGEX_HTML_NON_CHARACTERS})/u
|
34
49
|
|
35
50
|
#--
|
36
51
|
# Class Methods
|
@@ -39,7 +54,7 @@ class Sanitize
|
|
39
54
|
# Returns a sanitized copy of the given full _html_ document, using the
|
40
55
|
# settings in _config_ if specified.
|
41
56
|
#
|
42
|
-
# When sanitizing a document, the `<html>` element must be
|
57
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
43
58
|
# error will be raised. If this is undesirable, you should probably use
|
44
59
|
# {#fragment} instead.
|
45
60
|
def self.document(html, config = {})
|
@@ -96,17 +111,19 @@ class Sanitize
|
|
96
111
|
|
97
112
|
@transformers << Transformers::CleanDoctype
|
98
113
|
@transformers << Transformers::CleanCDATA
|
114
|
+
|
115
|
+
@transformer_config = { config: @config }
|
99
116
|
end
|
100
117
|
|
101
118
|
# Returns a sanitized copy of the given _html_ document.
|
102
119
|
#
|
103
|
-
# When sanitizing a document, the `<html>` element must be
|
120
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
104
121
|
# error will be raised. If this is undesirable, you should probably use
|
105
122
|
# {#fragment} instead.
|
106
123
|
def document(html)
|
107
124
|
return '' unless html
|
108
125
|
|
109
|
-
doc = Nokogiri::HTML5.parse(preprocess(html))
|
126
|
+
doc = Nokogiri::HTML5.parse(preprocess(html), **@config[:parser_options])
|
110
127
|
node!(doc)
|
111
128
|
to_html(doc)
|
112
129
|
end
|
@@ -118,20 +135,7 @@ class Sanitize
|
|
118
135
|
def fragment(html)
|
119
136
|
return '' unless html
|
120
137
|
|
121
|
-
|
122
|
-
doc = Nokogiri::HTML5.parse("<html><body>#{html}")
|
123
|
-
|
124
|
-
# Hack to allow fragments containing <body>. Borrowed from
|
125
|
-
# Nokogiri::HTML::DocumentFragment.
|
126
|
-
if html =~ /\A<body(?:\s|>)/i
|
127
|
-
path = '/html/body'
|
128
|
-
else
|
129
|
-
path = '/html/body/node()'
|
130
|
-
end
|
131
|
-
|
132
|
-
frag = doc.fragment
|
133
|
-
frag << doc.xpath(path)
|
134
|
-
|
138
|
+
frag = Nokogiri::HTML5.fragment(preprocess(html), **@config[:parser_options])
|
135
139
|
node!(frag)
|
136
140
|
to_html(frag)
|
137
141
|
end
|
@@ -143,20 +147,20 @@ class Sanitize
|
|
143
147
|
# in place.
|
144
148
|
#
|
145
149
|
# If _node_ is a `Nokogiri::XML::Document`, the `<html>` element must be
|
146
|
-
#
|
150
|
+
# allowlisted or an error will be raised.
|
147
151
|
def node!(node)
|
148
152
|
raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)
|
149
153
|
|
150
154
|
if node.is_a?(Nokogiri::XML::Document)
|
151
155
|
unless @config[:elements].include?('html')
|
152
|
-
raise Error, 'When sanitizing a document, "<html>" must be
|
156
|
+
raise Error, 'When sanitizing a document, "<html>" must be allowlisted.'
|
153
157
|
end
|
154
158
|
end
|
155
159
|
|
156
|
-
|
160
|
+
node_allowlist = Set.new
|
157
161
|
|
158
162
|
traverse(node) do |n|
|
159
|
-
transform_node!(n,
|
163
|
+
transform_node!(n, node_allowlist)
|
160
164
|
end
|
161
165
|
|
162
166
|
node
|
@@ -182,51 +186,32 @@ class Sanitize
|
|
182
186
|
end
|
183
187
|
|
184
188
|
def to_html(node)
|
185
|
-
|
186
|
-
|
187
|
-
# Hacky workaround for a libxml2 bug that adds an undesired Content-Type
|
188
|
-
# meta tag to all serialized HTML documents.
|
189
|
-
#
|
190
|
-
# https://github.com/sparklemotion/nokogiri/issues/1008
|
191
|
-
if node.type == Nokogiri::XML::Node::DOCUMENT_NODE ||
|
192
|
-
node.type == Nokogiri::XML::Node::HTML_DOCUMENT_NODE
|
193
|
-
|
194
|
-
regex_meta = %r|(<html[^>]*>\s*<head[^>]*>\s*)<meta http-equiv="Content-Type" content="text/html; charset=utf-8">|i
|
195
|
-
|
196
|
-
# Only replace the content-type meta tag if <meta> isn't whitelisted or
|
197
|
-
# the original document didn't actually include a content-type meta tag.
|
198
|
-
replace_meta = !@config[:elements].include?('meta') ||
|
199
|
-
node.xpath('/html/head/meta[@http-equiv]').none? do |meta|
|
200
|
-
meta['http-equiv'].casecmp('content-type').zero?
|
201
|
-
end
|
202
|
-
end
|
203
|
-
|
204
|
-
so = Nokogiri::XML::Node::SaveOptions
|
205
|
-
|
206
|
-
# Serialize to HTML without any formatting to prevent Nokogiri from adding
|
207
|
-
# newlines after certain tags.
|
208
|
-
html = node.to_html(
|
209
|
-
:encoding => 'utf-8',
|
210
|
-
:indent => 0,
|
211
|
-
:save_with => so::NO_DECLARATION | so::NO_EMPTY_TAGS | so::AS_HTML
|
212
|
-
)
|
213
|
-
|
214
|
-
html.gsub!(regex_meta, '\1') if replace_meta
|
215
|
-
html
|
189
|
+
node.to_html(preserve_newline: true)
|
216
190
|
end
|
217
191
|
|
218
|
-
def transform_node!(node,
|
192
|
+
def transform_node!(node, node_allowlist)
|
219
193
|
@transformers.each do |transformer|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
194
|
+
# Since transform_node! may be called in a tight loop to process thousands
|
195
|
+
# of items, we can optimize both memory and CPU performance by:
|
196
|
+
#
|
197
|
+
# 1. Reusing the same config hash for each transformer
|
198
|
+
# 2. Directly assigning values to hash instead of using merge!. Not only
|
199
|
+
# does merge! create a new hash, it is also 2.6x slower:
|
200
|
+
# https://github.com/JuanitoFatas/fast-ruby#hashmerge-vs-hashmerge-code
|
201
|
+
config = @transformer_config
|
202
|
+
config[:is_allowlisted] = config[:is_whitelisted] = node_allowlist.include?(node)
|
203
|
+
config[:node] = node
|
204
|
+
config[:node_name] = node.name.downcase
|
205
|
+
config[:node_allowlist] = config[:node_whitelist] = node_allowlist
|
206
|
+
|
207
|
+
result = transformer.call(**config)
|
208
|
+
|
209
|
+
if result.is_a?(Hash)
|
210
|
+
result_allowlist = result[:node_allowlist] || result[:node_whitelist]
|
211
|
+
|
212
|
+
if result_allowlist.respond_to?(:each)
|
213
|
+
node_allowlist.merge(result_allowlist)
|
214
|
+
end
|
230
215
|
end
|
231
216
|
end
|
232
217
|
|
data/test/common.rb
CHANGED
@@ -1,34 +1,3 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
gem 'minitest'
|
3
2
|
require 'minitest/autorun'
|
4
|
-
|
5
3
|
require_relative '../lib/sanitize'
|
6
|
-
|
7
|
-
# Helper to stub an instance method. Shamelessly stolen from
|
8
|
-
# https://github.com/codeodor/minitest-stub_any_instance/
|
9
|
-
class Object
|
10
|
-
def self.stub_instance(name, value, &block)
|
11
|
-
old_method = "__stubbed_method_#{name}__"
|
12
|
-
|
13
|
-
class_eval do
|
14
|
-
alias_method old_method, name
|
15
|
-
|
16
|
-
define_method(name) do |*args|
|
17
|
-
if value.respond_to?(:call) then
|
18
|
-
value.call(*args)
|
19
|
-
else
|
20
|
-
value
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
yield
|
26
|
-
|
27
|
-
ensure
|
28
|
-
class_eval do
|
29
|
-
undef_method name
|
30
|
-
alias_method name, old_method
|
31
|
-
undef_method old_method
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
data/test/test_clean_comment.rb
CHANGED
@@ -11,18 +11,18 @@ describe 'Sanitize::Transformers::CleanComment' do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'should remove comments' do
|
14
|
-
@s.fragment('foo <!-- comment --> bar').must_equal 'foo bar'
|
15
|
-
@s.fragment('foo <!-- ').must_equal 'foo '
|
16
|
-
@s.fragment('foo <!-- - -> bar').must_equal 'foo '
|
17
|
-
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal 'foo bar'
|
18
|
-
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo --> -->bar'
|
19
|
-
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
14
|
+
_(@s.fragment('foo <!-- comment --> bar')).must_equal 'foo bar'
|
15
|
+
_(@s.fragment('foo <!-- ')).must_equal 'foo '
|
16
|
+
_(@s.fragment('foo <!-- - -> bar')).must_equal 'foo '
|
17
|
+
_(@s.fragment("foo <!--\n\n\n\n-->bar")).must_equal 'foo bar'
|
18
|
+
_(@s.fragment("foo <!-- <!-- <!-- --> --> -->bar")).must_equal 'foo --> -->bar'
|
19
|
+
_(@s.fragment("foo <div <!-- comment -->>bar</div>")).must_equal 'foo <div>>bar</div>'
|
20
20
|
|
21
21
|
# Special case: the comment markup is inside a <script>, which makes it
|
22
22
|
# text content and not an actual HTML comment.
|
23
|
-
@s.fragment("<script><!-- comment --></script>").must_equal '
|
23
|
+
_(@s.fragment("<script><!-- comment --></script>")).must_equal ''
|
24
24
|
|
25
|
-
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script'])
|
25
|
+
_(Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script']))
|
26
26
|
.must_equal '<script><!-- comment --></script>'
|
27
27
|
end
|
28
28
|
end
|
@@ -33,18 +33,14 @@ describe 'Sanitize::Transformers::CleanComment' do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'should allow comments' do
|
36
|
-
@s.fragment('foo <!-- comment --> bar').must_equal 'foo <!-- comment --> bar'
|
37
|
-
@s.fragment('foo <!-- ').must_equal 'foo <!-- -->'
|
38
|
-
@s.fragment('foo <!-- - -> bar').must_equal 'foo <!-- - -> bar-->'
|
39
|
-
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal "foo <!--\n\n\n\n-->bar"
|
40
|
-
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
|
41
|
-
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
42
|
-
|
43
|
-
|
44
|
-
# text content and not an actual HTML comment.
|
45
|
-
@s.fragment("<script><!-- comment --></script>").must_equal '<!-- comment -->'
|
46
|
-
|
47
|
-
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script'])
|
36
|
+
_(@s.fragment('foo <!-- comment --> bar')).must_equal 'foo <!-- comment --> bar'
|
37
|
+
_(@s.fragment('foo <!-- ')).must_equal 'foo <!-- -->'
|
38
|
+
_(@s.fragment('foo <!-- - -> bar')).must_equal 'foo <!-- - -> bar-->'
|
39
|
+
_(@s.fragment("foo <!--\n\n\n\n-->bar")).must_equal "foo <!--\n\n\n\n-->bar"
|
40
|
+
_(@s.fragment("foo <!-- <!-- <!-- --> --> -->bar")).must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
|
41
|
+
_(@s.fragment("foo <div <!-- comment -->>bar</div>")).must_equal 'foo <div>>bar</div>'
|
42
|
+
|
43
|
+
_(Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script']))
|
48
44
|
.must_equal '<script><!-- comment --></script>'
|
49
45
|
end
|
50
46
|
end
|
data/test/test_clean_css.rb
CHANGED
@@ -10,15 +10,15 @@ describe 'Sanitize::Transformers::CSS::CleanAttribute' do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
it 'should sanitize CSS properties in style attributes' do
|
13
|
-
@s.fragment(%[
|
13
|
+
_(@s.fragment(%[
|
14
14
|
<div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
|
15
|
-
].strip).must_equal %[
|
16
|
-
<div style="color: #fff; /*
|
15
|
+
].strip)).must_equal %[
|
16
|
+
<div style="color: #fff; /* <-- evil! */"></div>
|
17
17
|
].strip
|
18
18
|
end
|
19
19
|
|
20
20
|
it 'should remove the style attribute if the sanitized CSS is empty' do
|
21
|
-
@s.fragment('<div style="width: expression(alert(1))"></div>').
|
21
|
+
_(@s.fragment('<div style="width: expression(alert(1))"></div>')).
|
22
22
|
must_equal '<div></div>'
|
23
23
|
end
|
24
24
|
end
|
@@ -46,7 +46,7 @@ describe 'Sanitize::Transformers::CSS::CleanElement' do
|
|
46
46
|
</style>
|
47
47
|
].strip
|
48
48
|
|
49
|
-
@s.fragment(html).must_equal %[
|
49
|
+
_(@s.fragment(html)).must_equal %[
|
50
50
|
<style>
|
51
51
|
/* Yay CSS! */
|
52
52
|
.foo { color: #fff; }
|
@@ -62,6 +62,6 @@ describe 'Sanitize::Transformers::CSS::CleanElement' do
|
|
62
62
|
end
|
63
63
|
|
64
64
|
it 'should remove the <style> element if the sanitized CSS is empty' do
|
65
|
-
@s.fragment('<style></style>').must_equal ''
|
65
|
+
_(@s.fragment('<style></style>')).must_equal ''
|
66
66
|
end
|
67
67
|
end
|
data/test/test_clean_doctype.rb
CHANGED
@@ -11,18 +11,18 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'should remove doctype declarations' do
|
14
|
-
@s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html
|
15
|
-
@s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
|
14
|
+
_(@s.document('<!DOCTYPE html><html>foo</html>')).must_equal "<html>foo</html>"
|
15
|
+
_(@s.fragment('<!DOCTYPE html>foo')).must_equal 'foo'
|
16
16
|
end
|
17
17
|
|
18
18
|
it 'should not allow doctype definitions in fragments' do
|
19
|
-
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
19
|
+
_(@s.fragment('<!DOCTYPE html><html>foo</html>'))
|
20
20
|
.must_equal "foo"
|
21
21
|
|
22
|
-
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
22
|
+
_(@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
23
23
|
.must_equal "foo"
|
24
24
|
|
25
|
-
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
25
|
+
_(@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
|
26
26
|
.must_equal "foo"
|
27
27
|
end
|
28
28
|
end
|
@@ -33,38 +33,38 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'should allow doctype declarations in documents' do
|
36
|
-
@s.document('<!DOCTYPE html><html>foo</html>')
|
37
|
-
.must_equal "<!DOCTYPE html
|
36
|
+
_(@s.document('<!DOCTYPE html><html>foo</html>'))
|
37
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
38
38
|
|
39
|
-
@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
40
|
-
.must_equal "<!DOCTYPE html
|
39
|
+
_(@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
40
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
41
41
|
|
42
|
-
@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
43
|
-
.must_equal "<!DOCTYPE html
|
42
|
+
_(@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
|
43
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
44
44
|
end
|
45
45
|
|
46
46
|
it 'should not allow obviously invalid doctype declarations in documents' do
|
47
|
-
@s.document('<!DOCTYPE blah blah blah><html>foo</html>')
|
48
|
-
.must_equal "<!DOCTYPE html
|
47
|
+
_(@s.document('<!DOCTYPE blah blah blah><html>foo</html>'))
|
48
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
49
49
|
|
50
|
-
@s.document('<!DOCTYPE blah><html>foo</html>')
|
51
|
-
.must_equal "<!DOCTYPE html
|
50
|
+
_(@s.document('<!DOCTYPE blah><html>foo</html>'))
|
51
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
52
52
|
|
53
|
-
@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
54
|
-
.must_equal "<!DOCTYPE html
|
53
|
+
_(@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
54
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
55
55
|
|
56
|
-
@s.document('<!whatever><html>foo</html>')
|
57
|
-
.must_equal "<html>foo</html
|
56
|
+
_(@s.document('<!whatever><html>foo</html>'))
|
57
|
+
.must_equal "<html>foo</html>"
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'should not allow doctype definitions in fragments' do
|
61
|
-
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
61
|
+
_(@s.fragment('<!DOCTYPE html><html>foo</html>'))
|
62
62
|
.must_equal "foo"
|
63
63
|
|
64
|
-
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
64
|
+
_(@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
65
65
|
.must_equal "foo"
|
66
66
|
|
67
|
-
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
67
|
+
_(@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
|
68
68
|
.must_equal "foo"
|
69
69
|
end
|
70
70
|
end
|