sanitize 5.0.0 → 5.1.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +19 -0
- data/README.md +11 -0
- data/lib/sanitize.rb +21 -7
- data/lib/sanitize/config/default.rb +4 -0
- data/lib/sanitize/version.rb +1 -1
- data/test/common.rb +0 -31
- data/test/test_malicious_html.rb +22 -7
- data/test/test_sanitize.rb +98 -13
- data/test/test_sanitize_css.rb +39 -12
- metadata +3 -5
- data/test/test_unicode.rb +0 -95
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8cf7bac25cea64ed464d106bdc57019388598ca9f1a4e7d8eddf3a98bab12267
|
4
|
+
data.tar.gz: e8b1f402b0d67a825b0ad4aad83829816fd9c78cd8445879636cba0a282e8ee5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 956edaca6569a5933223da0aa7dcac4880b5164aa59e37256ac896c9fefb271da71425defe7e09e241b1333b441f5a2629893abed6d5a2a47d0726bf03597614
|
7
|
+
data.tar.gz: e45a018b904bcf8cb996f8ed08427e80b8ce058c4fe414782460c5496e88bb6c2a4055304118057621a630e514b4f96bac11bdc686181a6f0097dc7bf912ab04
|
data/HISTORY.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
# Sanitize History
|
2
2
|
|
3
|
+
## 5.1.0 (2019-09-07)
|
4
|
+
|
5
|
+
### Features
|
6
|
+
|
7
|
+
* Added a `:parser_options` config hash, which makes it possible to pass custom
|
8
|
+
parsing options to Nokogumbo. [@austin-wang - #194][194]
|
9
|
+
|
10
|
+
### Bug Fixes
|
11
|
+
|
12
|
+
* Non-characters and non-whitespace control characters are now stripped from
|
13
|
+
HTML input before parsing to comply with the HTML Standard's [preprocessing
|
14
|
+
guidelines][html-preprocessing]. Prior to this Sanitize had adhered to [older
|
15
|
+
W3C guidelines][unicode-xml] that have since been withdrawn. [#179][179]
|
16
|
+
|
17
|
+
[179]:https://github.com/rgrove/sanitize/issues/179
|
18
|
+
[194]:https://github.com/rgrove/sanitize/pull/194
|
19
|
+
[html-preprocessing]:https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
20
|
+
[unicode-xml]:https://www.w3.org/TR/unicode-xml/
|
21
|
+
|
3
22
|
## 5.0.0 (2018-10-14)
|
4
23
|
|
5
24
|
For most users, upgrading from 4.x shouldn't require any changes. However, the
|
data/README.md
CHANGED
@@ -417,6 +417,17 @@ elements not in this array will be removed.
|
|
417
417
|
]
|
418
418
|
```
|
419
419
|
|
420
|
+
#### :parser_options (Hash)
|
421
|
+
|
422
|
+
[Parsing options](https://github.com/rubys/nokogumbo/tree/v2.0.1#parsing-options) supplied to `nokogumbo`.
|
423
|
+
|
424
|
+
```ruby
|
425
|
+
:parser_options => {
|
426
|
+
max_errors: -1,
|
427
|
+
max_tree_depth: -1
|
428
|
+
}
|
429
|
+
```
|
430
|
+
|
420
431
|
#### :protocols (Hash)
|
421
432
|
|
422
433
|
URL protocols to allow in specific attributes. If an attribute is listed here
|
data/lib/sanitize.rb
CHANGED
@@ -19,6 +19,20 @@ require_relative 'sanitize/transformers/clean_element'
|
|
19
19
|
class Sanitize
|
20
20
|
attr_reader :config
|
21
21
|
|
22
|
+
# Matches one or more control characters that should be removed from HTML
|
23
|
+
# before parsing, as defined by the HTML living standard.
|
24
|
+
#
|
25
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
26
|
+
# - https://infra.spec.whatwg.org/#control
|
27
|
+
REGEX_HTML_CONTROL_CHARACTERS = /[\u0001-\u0008\u000b\u000e-\u001f\u007f-\u009f]+/u
|
28
|
+
|
29
|
+
# Matches one or more non-characters that should be removed from HTML before
|
30
|
+
# parsing, as defined by the HTML living standard.
|
31
|
+
#
|
32
|
+
# - https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
33
|
+
# - https://infra.spec.whatwg.org/#noncharacter
|
34
|
+
REGEX_HTML_NON_CHARACTERS = /[\ufdd0-\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}]+/u
|
35
|
+
|
22
36
|
# Matches an attribute value that could be treated by a browser as a URL
|
23
37
|
# with a protocol prefix, such as "http:" or "javascript:". Any string of zero
|
24
38
|
# or more characters followed by a colon is considered a match, even if the
|
@@ -26,11 +40,12 @@ class Sanitize
|
|
26
40
|
# IE6 and Opera will still parse).
|
27
41
|
REGEX_PROTOCOL = /\A\s*([^\/#]*?)(?:\:|�*58|�*3a)/i
|
28
42
|
|
29
|
-
# Matches
|
30
|
-
#
|
43
|
+
# Matches one or more characters that should be stripped from HTML before
|
44
|
+
# parsing. This is a combination of `REGEX_HTML_CONTROL_CHARACTERS` and
|
45
|
+
# `REGEX_HTML_NON_CHARACTERS`.
|
31
46
|
#
|
32
|
-
#
|
33
|
-
REGEX_UNSUITABLE_CHARS = /
|
47
|
+
# https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
|
48
|
+
REGEX_UNSUITABLE_CHARS = /(?:#{REGEX_HTML_CONTROL_CHARACTERS}|#{REGEX_HTML_NON_CHARACTERS})/u
|
34
49
|
|
35
50
|
#--
|
36
51
|
# Class Methods
|
@@ -108,7 +123,7 @@ class Sanitize
|
|
108
123
|
def document(html)
|
109
124
|
return '' unless html
|
110
125
|
|
111
|
-
doc = Nokogiri::HTML5.parse(preprocess(html))
|
126
|
+
doc = Nokogiri::HTML5.parse(preprocess(html), **@config[:parser_options])
|
112
127
|
node!(doc)
|
113
128
|
to_html(doc)
|
114
129
|
end
|
@@ -120,8 +135,7 @@ class Sanitize
|
|
120
135
|
def fragment(html)
|
121
136
|
return '' unless html
|
122
137
|
|
123
|
-
|
124
|
-
frag = Nokogiri::HTML5.fragment(html)
|
138
|
+
frag = Nokogiri::HTML5.fragment(preprocess(html), **@config[:parser_options])
|
125
139
|
node!(frag)
|
126
140
|
to_html(frag)
|
127
141
|
end
|
@@ -56,6 +56,10 @@ class Sanitize
|
|
56
56
|
# that all HTML will be stripped).
|
57
57
|
:elements => [],
|
58
58
|
|
59
|
+
# HTML parsing options to pass to Nokogumbo.
|
60
|
+
# https://github.com/rubys/nokogumbo/tree/v2.0.1#parsing-options
|
61
|
+
:parser_options => {},
|
62
|
+
|
59
63
|
# URL handling protocols to allow in specific attributes. By default, no
|
60
64
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
61
65
|
# to allow relative URLs sans protocol.
|
data/lib/sanitize/version.rb
CHANGED
data/test/common.rb
CHANGED
@@ -1,34 +1,3 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
gem 'minitest'
|
3
2
|
require 'minitest/autorun'
|
4
|
-
|
5
3
|
require_relative '../lib/sanitize'
|
6
|
-
|
7
|
-
# Helper to stub an instance method. Shamelessly stolen from
|
8
|
-
# https://github.com/codeodor/minitest-stub_any_instance/
|
9
|
-
class Object
|
10
|
-
def self.stub_instance(name, value, &block)
|
11
|
-
old_method = "__stubbed_method_#{name}__"
|
12
|
-
|
13
|
-
class_eval do
|
14
|
-
alias_method old_method, name
|
15
|
-
|
16
|
-
define_method(name) do |*args|
|
17
|
-
if value.respond_to?(:call) then
|
18
|
-
value.call(*args)
|
19
|
-
else
|
20
|
-
value
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
yield
|
26
|
-
|
27
|
-
ensure
|
28
|
-
class_eval do
|
29
|
-
undef_method name
|
30
|
-
alias_method name, old_method
|
31
|
-
undef_method old_method
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
data/test/test_malicious_html.rb
CHANGED
@@ -166,12 +166,19 @@ describe 'Malicious HTML' do
|
|
166
166
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
167
167
|
|
168
168
|
it 'should escape unsafe characters in attributes' do
|
169
|
-
|
170
|
-
|
171
|
-
|
169
|
+
# This uses Nokogumbo's HTML-compliant serializer rather than
|
170
|
+
# libxml2's.
|
171
|
+
@s.fragment(input).
|
172
|
+
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
173
|
+
|
174
|
+
# This uses the not-quite-standards-compliant libxml2 serializer via
|
175
|
+
# Nokogiri, so the output may be a little different as of Nokogiri
|
176
|
+
# 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
|
177
|
+
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
172
178
|
fragment = Nokogiri::HTML.fragment(input)
|
173
179
|
@s.node!(fragment)
|
174
|
-
fragment.to_html.
|
180
|
+
fragment.to_html.
|
181
|
+
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
175
182
|
end
|
176
183
|
|
177
184
|
it 'should round-trip to the same output' do
|
@@ -184,11 +191,19 @@ describe 'Malicious HTML' do
|
|
184
191
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
185
192
|
|
186
193
|
it 'should not escape characters unnecessarily' do
|
187
|
-
|
188
|
-
|
194
|
+
# This uses Nokogumbo's HTML-compliant serializer rather than
|
195
|
+
# libxml2's.
|
196
|
+
@s.fragment(input).
|
197
|
+
must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
198
|
+
|
199
|
+
# This uses the not-quite-standards-compliant libxml2 serializer via
|
200
|
+
# Nokogiri, so the output may be a little different as of Nokogiri
|
201
|
+
# 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
|
202
|
+
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
189
203
|
fragment = Nokogiri::HTML.fragment(input)
|
190
204
|
@s.node!(fragment)
|
191
|
-
fragment.to_html.
|
205
|
+
fragment.to_html.
|
206
|
+
must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
|
192
207
|
end
|
193
208
|
|
194
209
|
it 'should round-trip to the same output' do
|
data/test/test_sanitize.rb
CHANGED
@@ -37,6 +37,44 @@ describe 'Sanitize' do
|
|
37
37
|
it 'should not choke on frozen documents' do
|
38
38
|
@s.document('<!doctype html><html><b>foo</b>'.freeze).must_equal "<html>foo</html>"
|
39
39
|
end
|
40
|
+
|
41
|
+
it 'should normalize newlines' do
|
42
|
+
@s.document("a\r\n\n\r\r\r\nz").must_equal "<html>a\n\n\n\n\nz</html>"
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should strip control characters (except ASCII whitespace)' do
|
46
|
+
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
47
|
+
whitespace = "\t\n\f\u0020"
|
48
|
+
@s.document("a#{sample_control_chars}#{whitespace}z").must_equal "<html>a#{whitespace}z</html>"
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'should strip non-characters' do
|
52
|
+
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
53
|
+
@s.document("a#{sample_non_chars}z").must_equal "<html>az</html>"
|
54
|
+
end
|
55
|
+
|
56
|
+
describe 'when html body exceeds Nokogumbo::DEFAULT_MAX_TREE_DEPTH' do
|
57
|
+
let(:content) do
|
58
|
+
content = nest_html_content('<b>foo</b>', Nokogumbo::DEFAULT_MAX_TREE_DEPTH)
|
59
|
+
"<html>#{content}</html>"
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'raises an ArgumentError exception' do
|
63
|
+
assert_raises ArgumentError do
|
64
|
+
@s.document(content)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
describe 'and :max_tree_depth of -1 is supplied in :parser_options' do
|
69
|
+
before do
|
70
|
+
@s = Sanitize.new(elements: ['html'], parser_options: { max_tree_depth: -1 })
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'does not raise an ArgumentError exception' do
|
74
|
+
@s.document(content).must_equal '<html>foo</html>'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
40
78
|
end
|
41
79
|
|
42
80
|
describe '#fragment' do
|
@@ -61,6 +99,44 @@ describe 'Sanitize' do
|
|
61
99
|
it 'should not choke on frozen fragments' do
|
62
100
|
@s.fragment('<b>foo</b>'.freeze).must_equal 'foo'
|
63
101
|
end
|
102
|
+
|
103
|
+
it 'should normalize newlines' do
|
104
|
+
@s.fragment("a\r\n\n\r\r\r\nz").must_equal "a\n\n\n\n\nz"
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'should strip control characters (except ASCII whitespace)' do
|
108
|
+
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
109
|
+
whitespace = "\t\n\f\u0020"
|
110
|
+
@s.fragment("a#{sample_control_chars}#{whitespace}z").must_equal "a#{whitespace}z"
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'should strip non-characters' do
|
114
|
+
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
115
|
+
@s.fragment("a#{sample_non_chars}z").must_equal "az"
|
116
|
+
end
|
117
|
+
|
118
|
+
describe 'when html body exceeds Nokogumbo::DEFAULT_MAX_TREE_DEPTH' do
|
119
|
+
let(:content) do
|
120
|
+
content = nest_html_content('<b>foo</b>', Nokogumbo::DEFAULT_MAX_TREE_DEPTH)
|
121
|
+
"<body>#{content}</body>"
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'raises an ArgumentError exception' do
|
125
|
+
assert_raises ArgumentError do
|
126
|
+
@s.fragment(content)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
describe 'and :max_tree_depth of -1 is supplied in :parser_options' do
|
131
|
+
before do
|
132
|
+
@s = Sanitize.new(parser_options: { max_tree_depth: -1 })
|
133
|
+
end
|
134
|
+
|
135
|
+
it 'does not raise an ArgumentError exception' do
|
136
|
+
@s.fragment(content).must_equal 'foo'
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
64
140
|
end
|
65
141
|
|
66
142
|
describe '#node!' do
|
@@ -85,28 +161,37 @@ describe 'Sanitize' do
|
|
85
161
|
|
86
162
|
describe 'class methods' do
|
87
163
|
describe '.document' do
|
88
|
-
it 'should
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
end
|
164
|
+
it 'should sanitize an HTML document with the given config' do
|
165
|
+
html = '<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'
|
166
|
+
Sanitize.document(html, :elements => ['html'])
|
167
|
+
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
93
168
|
end
|
94
169
|
end
|
95
170
|
|
96
171
|
describe '.fragment' do
|
97
|
-
it 'should
|
98
|
-
|
99
|
-
|
100
|
-
|
172
|
+
it 'should sanitize an HTML fragment with the given config' do
|
173
|
+
html = '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'
|
174
|
+
Sanitize.fragment(html, :elements => ['strong'])
|
175
|
+
.must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
101
176
|
end
|
102
177
|
end
|
103
178
|
|
104
179
|
describe '.node!' do
|
105
|
-
it 'should
|
106
|
-
|
107
|
-
|
108
|
-
|
180
|
+
it 'should sanitize a Nokogiri::XML::Node with the given config' do
|
181
|
+
doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
182
|
+
frag = doc.fragment
|
183
|
+
|
184
|
+
doc.xpath('/html/body/node()').each {|node| frag << node }
|
185
|
+
|
186
|
+
Sanitize.node!(frag, :elements => ['strong'])
|
187
|
+
frag.to_html.must_equal 'Lorem ipsum <strong>dolor</strong> sit amet '
|
109
188
|
end
|
110
189
|
end
|
111
190
|
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
def nest_html_content(html_content, depth)
|
195
|
+
"#{'<span>' * depth}#{html_content}#{'</span>' * depth}"
|
196
|
+
end
|
112
197
|
end
|
data/test/test_sanitize_css.rb
CHANGED
@@ -196,26 +196,53 @@ describe 'Sanitize::CSS' do
|
|
196
196
|
|
197
197
|
describe 'class methods' do
|
198
198
|
describe '.properties' do
|
199
|
-
it 'should
|
200
|
-
|
201
|
-
|
202
|
-
|
199
|
+
it 'should sanitize CSS properties with the given config' do
|
200
|
+
css = 'background: #fff; width: expression(alert("hi"));'
|
201
|
+
|
202
|
+
Sanitize::CSS.properties(css).must_equal ' '
|
203
|
+
Sanitize::CSS.properties(css, Sanitize::Config::RELAXED[:css]).must_equal 'background: #fff; '
|
204
|
+
Sanitize::CSS.properties(css, :properties => %w[background color width]).must_equal 'background: #fff; '
|
203
205
|
end
|
204
206
|
end
|
205
207
|
|
206
208
|
describe '.stylesheet' do
|
207
|
-
it 'should
|
208
|
-
|
209
|
-
|
210
|
-
|
209
|
+
it 'should sanitize a CSS stylesheet with the given config' do
|
210
|
+
css = %[
|
211
|
+
/* Yay CSS! */
|
212
|
+
.foo { color: #fff; }
|
213
|
+
#bar { background: url(yay.jpg); }
|
214
|
+
|
215
|
+
@media screen (max-width:480px) {
|
216
|
+
.foo { width: 400px; }
|
217
|
+
#bar:not(.baz) { height: 100px; }
|
218
|
+
}
|
219
|
+
].strip
|
220
|
+
|
221
|
+
Sanitize::CSS.stylesheet(css).strip.must_equal %[
|
222
|
+
.foo { }
|
223
|
+
#bar { }
|
224
|
+
].strip
|
225
|
+
|
226
|
+
Sanitize::CSS.stylesheet(css, Sanitize::Config::RELAXED[:css]).must_equal css
|
227
|
+
|
228
|
+
Sanitize::CSS.stylesheet(css, :properties => %w[background color width]).strip.must_equal %[
|
229
|
+
.foo { color: #fff; }
|
230
|
+
#bar { }
|
231
|
+
].strip
|
211
232
|
end
|
212
233
|
end
|
213
234
|
|
214
235
|
describe '.tree!' do
|
215
|
-
it 'should
|
216
|
-
|
217
|
-
|
218
|
-
|
236
|
+
it 'should sanitize a Crass CSS parse tree with the given config' do
|
237
|
+
tree = Crass.parse(String.new("@import url(foo.css);\n") <<
|
238
|
+
".foo { background: #fff; font: 16pt 'Comic Sans MS'; }\n" <<
|
239
|
+
"#bar { top: 125px; background: green; }")
|
240
|
+
|
241
|
+
Sanitize::CSS.tree!(tree, :properties => %w[background color width]).must_be_same_as tree
|
242
|
+
|
243
|
+
Crass::Parser.stringify(tree).must_equal String.new("\n") <<
|
244
|
+
".foo { background: #fff; }\n" <<
|
245
|
+
"#bar { background: green; }"
|
219
246
|
end
|
220
247
|
end
|
221
248
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-09-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: crass
|
@@ -116,7 +116,6 @@ files:
|
|
116
116
|
- test/test_sanitize.rb
|
117
117
|
- test/test_sanitize_css.rb
|
118
118
|
- test/test_transformers.rb
|
119
|
-
- test/test_unicode.rb
|
120
119
|
homepage: https://github.com/rgrove/sanitize/
|
121
120
|
licenses:
|
122
121
|
- MIT
|
@@ -136,8 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
136
135
|
- !ruby/object:Gem::Version
|
137
136
|
version: 1.2.0
|
138
137
|
requirements: []
|
139
|
-
|
140
|
-
rubygems_version: 2.7.6
|
138
|
+
rubygems_version: 3.0.3
|
141
139
|
signing_key:
|
142
140
|
specification_version: 4
|
143
141
|
summary: Whitelist-based HTML and CSS sanitizer.
|
data/test/test_unicode.rb
DELETED
@@ -1,95 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require_relative 'common'
|
3
|
-
|
4
|
-
describe 'Unicode' do
|
5
|
-
make_my_diffs_pretty!
|
6
|
-
parallelize_me!
|
7
|
-
|
8
|
-
# http://www.w3.org/TR/unicode-xml/#Charlist
|
9
|
-
describe 'Unsuitable characters' do
|
10
|
-
before do
|
11
|
-
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
12
|
-
end
|
13
|
-
|
14
|
-
it 'should not modify the input string' do
|
15
|
-
fragment = "a\u0340b\u0341c"
|
16
|
-
document = "a\u0340b\u0341c"
|
17
|
-
|
18
|
-
@s.document(document)
|
19
|
-
@s.fragment(fragment)
|
20
|
-
|
21
|
-
fragment.must_equal "a\u0340b\u0341c"
|
22
|
-
document.must_equal "a\u0340b\u0341c"
|
23
|
-
end
|
24
|
-
|
25
|
-
it 'should strip deprecated grave and acute clones' do
|
26
|
-
@s.document("a\u0340b\u0341c").must_equal "<html><head></head><body>abc</body></html>"
|
27
|
-
@s.fragment("a\u0340b\u0341c").must_equal 'abc'
|
28
|
-
end
|
29
|
-
|
30
|
-
it 'should strip deprecated Khmer characters' do
|
31
|
-
@s.document("a\u17a3b\u17d3c").must_equal "<html><head></head><body>abc</body></html>"
|
32
|
-
@s.fragment("a\u17a3b\u17d3c").must_equal 'abc'
|
33
|
-
end
|
34
|
-
|
35
|
-
it 'should strip line and paragraph separator punctuation' do
|
36
|
-
@s.document("a\u2028b\u2029c").must_equal "<html><head></head><body>abc</body></html>"
|
37
|
-
@s.fragment("a\u2028b\u2029c").must_equal 'abc'
|
38
|
-
end
|
39
|
-
|
40
|
-
it 'should strip bidi embedding control characters' do
|
41
|
-
@s.document("a\u202ab\u202bc\u202cd\u202de\u202e")
|
42
|
-
.must_equal "<html><head></head><body>abcde</body></html>"
|
43
|
-
|
44
|
-
@s.fragment("a\u202ab\u202bc\u202cd\u202de\u202e")
|
45
|
-
.must_equal 'abcde'
|
46
|
-
end
|
47
|
-
|
48
|
-
it 'should strip deprecated symmetric swapping characters' do
|
49
|
-
@s.document("a\u206ab\u206bc").must_equal "<html><head></head><body>abc</body></html>"
|
50
|
-
@s.fragment("a\u206ab\u206bc").must_equal 'abc'
|
51
|
-
end
|
52
|
-
|
53
|
-
it 'should strip deprecated Arabic form shaping characters' do
|
54
|
-
@s.document("a\u206cb\u206dc").must_equal "<html><head></head><body>abc</body></html>"
|
55
|
-
@s.fragment("a\u206cb\u206dc").must_equal 'abc'
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'should strip deprecated National digit shape characters' do
|
59
|
-
@s.document("a\u206eb\u206fc").must_equal "<html><head></head><body>abc</body></html>"
|
60
|
-
@s.fragment("a\u206eb\u206fc").must_equal 'abc'
|
61
|
-
end
|
62
|
-
|
63
|
-
it 'should strip interlinear annotation characters' do
|
64
|
-
@s.document("a\ufff9b\ufffac\ufffb").must_equal "<html><head></head><body>abc</body></html>"
|
65
|
-
@s.fragment("a\ufff9b\ufffac\ufffb").must_equal 'abc'
|
66
|
-
end
|
67
|
-
|
68
|
-
it 'should strip BOM/zero-width non-breaking space characters' do
|
69
|
-
@s.document("a\ufeffbc").must_equal "<html><head></head><body>abc</body></html>"
|
70
|
-
@s.fragment("a\ufeffbc").must_equal 'abc'
|
71
|
-
end
|
72
|
-
|
73
|
-
it 'should strip object replacement characters' do
|
74
|
-
@s.document("a\ufffcbc").must_equal "<html><head></head><body>abc</body></html>"
|
75
|
-
@s.fragment("a\ufffcbc").must_equal 'abc'
|
76
|
-
end
|
77
|
-
|
78
|
-
it 'should strip musical notation scoping characters' do
|
79
|
-
@s.document("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
|
80
|
-
.must_equal "<html><head></head><body>abcdefgh</body></html>"
|
81
|
-
|
82
|
-
@s.fragment("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
|
83
|
-
.must_equal 'abcdefgh'
|
84
|
-
end
|
85
|
-
|
86
|
-
it 'should strip language tag code point characters' do
|
87
|
-
str = String.new 'a'
|
88
|
-
(0xE0000..0xE007F).each {|n| str << [n].pack('U') }
|
89
|
-
str << 'b'
|
90
|
-
|
91
|
-
@s.document(str).must_equal "<html><head></head><body>ab</body></html>"
|
92
|
-
@s.fragment(str).must_equal 'ab'
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|