sanitize 4.6.4 → 6.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +259 -16
- data/LICENSE +1 -1
- data/README.md +89 -76
- data/lib/sanitize/config/default.rb +15 -4
- data/lib/sanitize/config/relaxed.rb +1 -1
- data/lib/sanitize/css.rb +2 -2
- data/lib/sanitize/transformers/clean_comment.rb +1 -1
- data/lib/sanitize/transformers/clean_css.rb +4 -3
- data/lib/sanitize/transformers/clean_doctype.rb +1 -1
- data/lib/sanitize/transformers/clean_element.rb +105 -22
- data/lib/sanitize/version.rb +1 -3
- data/lib/sanitize.rb +56 -72
- data/test/common.rb +0 -31
- data/test/test_clean_comment.rb +16 -20
- data/test/test_clean_css.rb +6 -6
- data/test/test_clean_doctype.rb +22 -22
- data/test/test_clean_element.rb +200 -82
- data/test/test_config.rb +9 -9
- data/test/test_malicious_css.rb +20 -7
- data/test/test_malicious_html.rb +179 -32
- data/test/test_parser.rb +9 -38
- data/test/test_sanitize.rb +114 -29
- data/test/test_sanitize_css.rb +88 -61
- data/test/test_transformers.rb +52 -46
- metadata +17 -33
- data/test/test_unicode.rb +0 -95
data/lib/sanitize.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'nokogiri'
|
4
4
|
require 'set'
|
5
5
|
|
6
6
|
require_relative 'sanitize/version'
|
@@ -19,6 +19,20 @@ require_relative 'sanitize/transformers/clean_element'
|
|
19
19
|
class Sanitize
|
20
20
|
attr_reader :config
|
21
21
|
|
22
|
+
# Matches one or more control characters that should be removed from HTML
|
23
|
+
# before parsing, as defined by the HTML living standard.
|
24
|
+
#
|
25
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
26
|
+
# - https://infra.spec.whatwg.org/#control
|
27
|
+
REGEX_HTML_CONTROL_CHARACTERS = /[\u0001-\u0008\u000b\u000e-\u001f\u007f-\u009f]+/u
|
28
|
+
|
29
|
+
# Matches one or more non-characters that should be removed from HTML before
|
30
|
+
# parsing, as defined by the HTML living standard.
|
31
|
+
#
|
32
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
33
|
+
# - https://infra.spec.whatwg.org/#noncharacter
|
34
|
+
REGEX_HTML_NON_CHARACTERS = /[\ufdd0-\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}]+/u
|
35
|
+
|
22
36
|
# Matches an attribute value that could be treated by a browser as a URL
|
23
37
|
# with a protocol prefix, such as "http:" or "javascript:". Any string of zero
|
24
38
|
# or more characters followed by a colon is considered a match, even if the
|
@@ -26,11 +40,12 @@ class Sanitize
|
|
26
40
|
# IE6 and Opera will still parse).
|
27
41
|
REGEX_PROTOCOL = /\A\s*([^\/#]*?)(?:\:|�*58|�*3a)/i
|
28
42
|
|
29
|
-
# Matches
|
30
|
-
#
|
43
|
+
# Matches one or more characters that should be stripped from HTML before
|
44
|
+
# parsing. This is a combination of `REGEX_HTML_CONTROL_CHARACTERS` and
|
45
|
+
# `REGEX_HTML_NON_CHARACTERS`.
|
31
46
|
#
|
32
|
-
#
|
33
|
-
REGEX_UNSUITABLE_CHARS = /
|
47
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
48
|
+
REGEX_UNSUITABLE_CHARS = /(?:#{REGEX_HTML_CONTROL_CHARACTERS}|#{REGEX_HTML_NON_CHARACTERS})/u
|
34
49
|
|
35
50
|
#--
|
36
51
|
# Class Methods
|
@@ -39,7 +54,7 @@ class Sanitize
|
|
39
54
|
# Returns a sanitized copy of the given full _html_ document, using the
|
40
55
|
# settings in _config_ if specified.
|
41
56
|
#
|
42
|
-
# When sanitizing a document, the `<html>` element must be
|
57
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
43
58
|
# error will be raised. If this is undesirable, you should probably use
|
44
59
|
# {#fragment} instead.
|
45
60
|
def self.document(html, config = {})
|
@@ -81,6 +96,7 @@ class Sanitize
|
|
81
96
|
|
82
97
|
# Default transformers always run at the end of the chain, after any custom
|
83
98
|
# transformers.
|
99
|
+
@transformers << Transformers::CleanElement.new(@config)
|
84
100
|
@transformers << Transformers::CleanComment unless @config[:allow_comments]
|
85
101
|
|
86
102
|
if @config[:elements].include?('style')
|
@@ -93,21 +109,21 @@ class Sanitize
|
|
93
109
|
@transformers << Transformers::CSS::CleanAttribute.new(scss)
|
94
110
|
end
|
95
111
|
|
96
|
-
@transformers <<
|
97
|
-
|
98
|
-
|
99
|
-
|
112
|
+
@transformers << Transformers::CleanDoctype
|
113
|
+
@transformers << Transformers::CleanCDATA
|
114
|
+
|
115
|
+
@transformer_config = { config: @config }
|
100
116
|
end
|
101
117
|
|
102
118
|
# Returns a sanitized copy of the given _html_ document.
|
103
119
|
#
|
104
|
-
# When sanitizing a document, the `<html>` element must be
|
120
|
+
# When sanitizing a document, the `<html>` element must be allowlisted or an
|
105
121
|
# error will be raised. If this is undesirable, you should probably use
|
106
122
|
# {#fragment} instead.
|
107
123
|
def document(html)
|
108
124
|
return '' unless html
|
109
125
|
|
110
|
-
doc = Nokogiri::HTML5.parse(preprocess(html))
|
126
|
+
doc = Nokogiri::HTML5.parse(preprocess(html), **@config[:parser_options])
|
111
127
|
node!(doc)
|
112
128
|
to_html(doc)
|
113
129
|
end
|
@@ -119,20 +135,7 @@ class Sanitize
|
|
119
135
|
def fragment(html)
|
120
136
|
return '' unless html
|
121
137
|
|
122
|
-
|
123
|
-
doc = Nokogiri::HTML5.parse("<html><body>#{html}")
|
124
|
-
|
125
|
-
# Hack to allow fragments containing <body>. Borrowed from
|
126
|
-
# Nokogiri::HTML::DocumentFragment.
|
127
|
-
if html =~ /\A<body(?:\s|>)/i
|
128
|
-
path = '/html/body'
|
129
|
-
else
|
130
|
-
path = '/html/body/node()'
|
131
|
-
end
|
132
|
-
|
133
|
-
frag = doc.fragment
|
134
|
-
frag << doc.xpath(path)
|
135
|
-
|
138
|
+
frag = Nokogiri::HTML5.fragment(preprocess(html), **@config[:parser_options])
|
136
139
|
node!(frag)
|
137
140
|
to_html(frag)
|
138
141
|
end
|
@@ -144,20 +147,20 @@ class Sanitize
|
|
144
147
|
# in place.
|
145
148
|
#
|
146
149
|
# If _node_ is a `Nokogiri::XML::Document`, the `<html>` element must be
|
147
|
-
#
|
150
|
+
# allowlisted or an error will be raised.
|
148
151
|
def node!(node)
|
149
152
|
raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)
|
150
153
|
|
151
154
|
if node.is_a?(Nokogiri::XML::Document)
|
152
155
|
unless @config[:elements].include?('html')
|
153
|
-
raise Error, 'When sanitizing a document, "<html>" must be
|
156
|
+
raise Error, 'When sanitizing a document, "<html>" must be allowlisted.'
|
154
157
|
end
|
155
158
|
end
|
156
159
|
|
157
|
-
|
160
|
+
node_allowlist = Set.new
|
158
161
|
|
159
162
|
traverse(node) do |n|
|
160
|
-
transform_node!(n,
|
163
|
+
transform_node!(n, node_allowlist)
|
161
164
|
end
|
162
165
|
|
163
166
|
node
|
@@ -183,51 +186,32 @@ class Sanitize
|
|
183
186
|
end
|
184
187
|
|
185
188
|
def to_html(node)
|
186
|
-
|
187
|
-
|
188
|
-
# Hacky workaround for a libxml2 bug that adds an undesired Content-Type
|
189
|
-
# meta tag to all serialized HTML documents.
|
190
|
-
#
|
191
|
-
# https://github.com/sparklemotion/nokogiri/issues/1008
|
192
|
-
if node.type == Nokogiri::XML::Node::DOCUMENT_NODE ||
|
193
|
-
node.type == Nokogiri::XML::Node::HTML_DOCUMENT_NODE
|
194
|
-
|
195
|
-
regex_meta = %r|(<html[^>]*>\s*<head[^>]*>\s*)<meta http-equiv="Content-Type" content="text/html; charset=utf-8">|i
|
196
|
-
|
197
|
-
# Only replace the content-type meta tag if <meta> isn't whitelisted or
|
198
|
-
# the original document didn't actually include a content-type meta tag.
|
199
|
-
replace_meta = !@config[:elements].include?('meta') ||
|
200
|
-
node.xpath('/html/head/meta[@http-equiv]').none? do |meta|
|
201
|
-
meta['http-equiv'].casecmp('content-type').zero?
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
so = Nokogiri::XML::Node::SaveOptions
|
206
|
-
|
207
|
-
# Serialize to HTML without any formatting to prevent Nokogiri from adding
|
208
|
-
# newlines after certain tags.
|
209
|
-
html = node.to_html(
|
210
|
-
:encoding => 'utf-8',
|
211
|
-
:indent => 0,
|
212
|
-
:save_with => so::NO_DECLARATION | so::NO_EMPTY_TAGS | so::AS_HTML
|
213
|
-
)
|
214
|
-
|
215
|
-
html.gsub!(regex_meta, '\1') if replace_meta
|
216
|
-
html
|
189
|
+
node.to_html(preserve_newline: true)
|
217
190
|
end
|
218
191
|
|
219
|
-
def transform_node!(node,
|
192
|
+
def transform_node!(node, node_allowlist)
|
220
193
|
@transformers.each do |transformer|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
194
|
+
# Since transform_node! may be called in a tight loop to process thousands
|
195
|
+
# of items, we can optimize both memory and CPU performance by:
|
196
|
+
#
|
197
|
+
# 1. Reusing the same config hash for each transformer
|
198
|
+
# 2. Directly assigning values to hash instead of using merge!. Not only
|
199
|
+
# does merge! create a new hash, it is also 2.6x slower:
|
200
|
+
# https://github.com/JuanitoFatas/fast-ruby#hashmerge-vs-hashmerge-code
|
201
|
+
config = @transformer_config
|
202
|
+
config[:is_allowlisted] = config[:is_whitelisted] = node_allowlist.include?(node)
|
203
|
+
config[:node] = node
|
204
|
+
config[:node_name] = node.name.downcase
|
205
|
+
config[:node_allowlist] = config[:node_whitelist] = node_allowlist
|
206
|
+
|
207
|
+
result = transformer.call(**config)
|
208
|
+
|
209
|
+
if result.is_a?(Hash)
|
210
|
+
result_allowlist = result[:node_allowlist] || result[:node_whitelist]
|
211
|
+
|
212
|
+
if result_allowlist.respond_to?(:each)
|
213
|
+
node_allowlist.merge(result_allowlist)
|
214
|
+
end
|
231
215
|
end
|
232
216
|
end
|
233
217
|
|
data/test/common.rb
CHANGED
@@ -1,34 +1,3 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
gem 'minitest'
|
3
2
|
require 'minitest/autorun'
|
4
|
-
|
5
3
|
require_relative '../lib/sanitize'
|
6
|
-
|
7
|
-
# Helper to stub an instance method. Shamelessly stolen from
|
8
|
-
# https://github.com/codeodor/minitest-stub_any_instance/
|
9
|
-
class Object
|
10
|
-
def self.stub_instance(name, value, &block)
|
11
|
-
old_method = "__stubbed_method_#{name}__"
|
12
|
-
|
13
|
-
class_eval do
|
14
|
-
alias_method old_method, name
|
15
|
-
|
16
|
-
define_method(name) do |*args|
|
17
|
-
if value.respond_to?(:call) then
|
18
|
-
value.call(*args)
|
19
|
-
else
|
20
|
-
value
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
yield
|
26
|
-
|
27
|
-
ensure
|
28
|
-
class_eval do
|
29
|
-
undef_method name
|
30
|
-
alias_method name, old_method
|
31
|
-
undef_method old_method
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
data/test/test_clean_comment.rb
CHANGED
@@ -11,18 +11,18 @@ describe 'Sanitize::Transformers::CleanComment' do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'should remove comments' do
|
14
|
-
@s.fragment('foo <!-- comment --> bar').must_equal 'foo bar'
|
15
|
-
@s.fragment('foo <!-- ').must_equal 'foo '
|
16
|
-
@s.fragment('foo <!-- - -> bar').must_equal 'foo '
|
17
|
-
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal 'foo bar'
|
18
|
-
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo --> -->bar'
|
19
|
-
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
14
|
+
_(@s.fragment('foo <!-- comment --> bar')).must_equal 'foo bar'
|
15
|
+
_(@s.fragment('foo <!-- ')).must_equal 'foo '
|
16
|
+
_(@s.fragment('foo <!-- - -> bar')).must_equal 'foo '
|
17
|
+
_(@s.fragment("foo <!--\n\n\n\n-->bar")).must_equal 'foo bar'
|
18
|
+
_(@s.fragment("foo <!-- <!-- <!-- --> --> -->bar")).must_equal 'foo --> -->bar'
|
19
|
+
_(@s.fragment("foo <div <!-- comment -->>bar</div>")).must_equal 'foo <div>>bar</div>'
|
20
20
|
|
21
21
|
# Special case: the comment markup is inside a <script>, which makes it
|
22
22
|
# text content and not an actual HTML comment.
|
23
|
-
@s.fragment("<script><!-- comment --></script>").must_equal '
|
23
|
+
_(@s.fragment("<script><!-- comment --></script>")).must_equal ''
|
24
24
|
|
25
|
-
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script'])
|
25
|
+
_(Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script']))
|
26
26
|
.must_equal '<script><!-- comment --></script>'
|
27
27
|
end
|
28
28
|
end
|
@@ -33,18 +33,14 @@ describe 'Sanitize::Transformers::CleanComment' do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'should allow comments' do
|
36
|
-
@s.fragment('foo <!-- comment --> bar').must_equal 'foo <!-- comment --> bar'
|
37
|
-
@s.fragment('foo <!-- ').must_equal 'foo <!-- -->'
|
38
|
-
@s.fragment('foo <!-- - -> bar').must_equal 'foo <!-- - -> bar-->'
|
39
|
-
@s.fragment("foo <!--\n\n\n\n-->bar").must_equal "foo <!--\n\n\n\n-->bar"
|
40
|
-
@s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
|
41
|
-
@s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
|
42
|
-
|
43
|
-
|
44
|
-
# text content and not an actual HTML comment.
|
45
|
-
@s.fragment("<script><!-- comment --></script>").must_equal '<!-- comment -->'
|
46
|
-
|
47
|
-
Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script'])
|
36
|
+
_(@s.fragment('foo <!-- comment --> bar')).must_equal 'foo <!-- comment --> bar'
|
37
|
+
_(@s.fragment('foo <!-- ')).must_equal 'foo <!-- -->'
|
38
|
+
_(@s.fragment('foo <!-- - -> bar')).must_equal 'foo <!-- - -> bar-->'
|
39
|
+
_(@s.fragment("foo <!--\n\n\n\n-->bar")).must_equal "foo <!--\n\n\n\n-->bar"
|
40
|
+
_(@s.fragment("foo <!-- <!-- <!-- --> --> -->bar")).must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
|
41
|
+
_(@s.fragment("foo <div <!-- comment -->>bar</div>")).must_equal 'foo <div>>bar</div>'
|
42
|
+
|
43
|
+
_(Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script']))
|
48
44
|
.must_equal '<script><!-- comment --></script>'
|
49
45
|
end
|
50
46
|
end
|
data/test/test_clean_css.rb
CHANGED
@@ -10,15 +10,15 @@ describe 'Sanitize::Transformers::CSS::CleanAttribute' do
|
|
10
10
|
end
|
11
11
|
|
12
12
|
it 'should sanitize CSS properties in style attributes' do
|
13
|
-
@s.fragment(%[
|
13
|
+
_(@s.fragment(%[
|
14
14
|
<div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
|
15
|
-
].strip).must_equal %[
|
16
|
-
<div style="color: #fff; /*
|
15
|
+
].strip)).must_equal %[
|
16
|
+
<div style="color: #fff; /* <-- evil! */"></div>
|
17
17
|
].strip
|
18
18
|
end
|
19
19
|
|
20
20
|
it 'should remove the style attribute if the sanitized CSS is empty' do
|
21
|
-
@s.fragment('<div style="width: expression(alert(1))"></div>').
|
21
|
+
_(@s.fragment('<div style="width: expression(alert(1))"></div>')).
|
22
22
|
must_equal '<div></div>'
|
23
23
|
end
|
24
24
|
end
|
@@ -46,7 +46,7 @@ describe 'Sanitize::Transformers::CSS::CleanElement' do
|
|
46
46
|
</style>
|
47
47
|
].strip
|
48
48
|
|
49
|
-
@s.fragment(html).must_equal %[
|
49
|
+
_(@s.fragment(html)).must_equal %[
|
50
50
|
<style>
|
51
51
|
/* Yay CSS! */
|
52
52
|
.foo { color: #fff; }
|
@@ -62,6 +62,6 @@ describe 'Sanitize::Transformers::CSS::CleanElement' do
|
|
62
62
|
end
|
63
63
|
|
64
64
|
it 'should remove the <style> element if the sanitized CSS is empty' do
|
65
|
-
@s.fragment('<style></style>').must_equal ''
|
65
|
+
_(@s.fragment('<style></style>')).must_equal ''
|
66
66
|
end
|
67
67
|
end
|
data/test/test_clean_doctype.rb
CHANGED
@@ -11,18 +11,18 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
it 'should remove doctype declarations' do
|
14
|
-
@s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html
|
15
|
-
@s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
|
14
|
+
_(@s.document('<!DOCTYPE html><html>foo</html>')).must_equal "<html>foo</html>"
|
15
|
+
_(@s.fragment('<!DOCTYPE html>foo')).must_equal 'foo'
|
16
16
|
end
|
17
17
|
|
18
18
|
it 'should not allow doctype definitions in fragments' do
|
19
|
-
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
19
|
+
_(@s.fragment('<!DOCTYPE html><html>foo</html>'))
|
20
20
|
.must_equal "foo"
|
21
21
|
|
22
|
-
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
22
|
+
_(@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
23
23
|
.must_equal "foo"
|
24
24
|
|
25
|
-
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
25
|
+
_(@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
|
26
26
|
.must_equal "foo"
|
27
27
|
end
|
28
28
|
end
|
@@ -33,38 +33,38 @@ describe 'Sanitize::Transformers::CleanDoctype' do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'should allow doctype declarations in documents' do
|
36
|
-
@s.document('<!DOCTYPE html><html>foo</html>')
|
37
|
-
.must_equal "<!DOCTYPE html
|
36
|
+
_(@s.document('<!DOCTYPE html><html>foo</html>'))
|
37
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
38
38
|
|
39
|
-
@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
40
|
-
.must_equal "<!DOCTYPE html
|
39
|
+
_(@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
40
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
41
41
|
|
42
|
-
@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
43
|
-
.must_equal "<!DOCTYPE html
|
42
|
+
_(@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
|
43
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
44
44
|
end
|
45
45
|
|
46
46
|
it 'should not allow obviously invalid doctype declarations in documents' do
|
47
|
-
@s.document('<!DOCTYPE blah blah blah><html>foo</html>')
|
48
|
-
.must_equal "<!DOCTYPE html
|
47
|
+
_(@s.document('<!DOCTYPE blah blah blah><html>foo</html>'))
|
48
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
49
49
|
|
50
|
-
@s.document('<!DOCTYPE blah><html>foo</html>')
|
51
|
-
.must_equal "<!DOCTYPE html
|
50
|
+
_(@s.document('<!DOCTYPE blah><html>foo</html>'))
|
51
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
52
52
|
|
53
|
-
@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
54
|
-
.must_equal "<!DOCTYPE html
|
53
|
+
_(@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
54
|
+
.must_equal "<!DOCTYPE html><html>foo</html>"
|
55
55
|
|
56
|
-
@s.document('<!whatever><html>foo</html>')
|
57
|
-
.must_equal "<html>foo</html
|
56
|
+
_(@s.document('<!whatever><html>foo</html>'))
|
57
|
+
.must_equal "<html>foo</html>"
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'should not allow doctype definitions in fragments' do
|
61
|
-
@s.fragment('<!DOCTYPE html><html>foo</html>')
|
61
|
+
_(@s.fragment('<!DOCTYPE html><html>foo</html>'))
|
62
62
|
.must_equal "foo"
|
63
63
|
|
64
|
-
@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
|
64
|
+
_(@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
|
65
65
|
.must_equal "foo"
|
66
66
|
|
67
|
-
@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
|
67
|
+
_(@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
|
68
68
|
.must_equal "foo"
|
69
69
|
end
|
70
70
|
end
|