sanitize 6.1.2 → 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +40 -14
- data/LICENSE +3 -1
- data/README.md +120 -238
- data/lib/sanitize/config/basic.rb +15 -15
- data/lib/sanitize/config/default.rb +45 -45
- data/lib/sanitize/config/relaxed.rb +136 -32
- data/lib/sanitize/config/restricted.rb +2 -2
- data/lib/sanitize/config.rb +12 -14
- data/lib/sanitize/css.rb +309 -303
- data/lib/sanitize/transformers/clean_cdata.rb +9 -9
- data/lib/sanitize/transformers/clean_comment.rb +9 -9
- data/lib/sanitize/transformers/clean_css.rb +59 -55
- data/lib/sanitize/transformers/clean_doctype.rb +15 -15
- data/lib/sanitize/transformers/clean_element.rb +220 -237
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +38 -38
- data/test/common.rb +4 -3
- data/test/test_clean_comment.rb +26 -25
- data/test/test_clean_css.rb +14 -13
- data/test/test_clean_doctype.rb +21 -20
- data/test/test_clean_element.rb +258 -273
- data/test/test_config.rb +22 -21
- data/test/test_malicious_css.rb +20 -19
- data/test/test_malicious_html.rb +100 -99
- data/test/test_parser.rb +26 -25
- data/test/test_sanitize.rb +70 -69
- data/test/test_sanitize_css.rb +152 -114
- data/test/test_transformers.rb +81 -83
- metadata +14 -43
data/test/test_sanitize.rb
CHANGED
@@ -1,190 +1,191 @@
|
|
1
|
-
#
|
2
|
-
require_relative 'common'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
require_relative "common"
|
4
|
+
|
5
|
+
describe "Sanitize" do
|
6
|
+
describe "initializer" do
|
7
|
+
it "should not modify a transformers array in the given config" do
|
7
8
|
transformers = [
|
8
9
|
lambda {}
|
9
10
|
]
|
10
11
|
|
11
|
-
Sanitize.new({
|
12
|
+
Sanitize.new({transformers: transformers})
|
12
13
|
_(transformers.length).must_equal(1)
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
16
|
-
describe
|
17
|
+
describe "instance methods" do
|
17
18
|
before do
|
18
19
|
@s = Sanitize.new
|
19
20
|
end
|
20
21
|
|
21
|
-
describe
|
22
|
+
describe "#document" do
|
22
23
|
before do
|
23
|
-
@s = Sanitize.new(:
|
24
|
+
@s = Sanitize.new(elements: ["html"])
|
24
25
|
end
|
25
26
|
|
26
|
-
it
|
27
|
+
it "should sanitize an HTML document" do
|
27
28
|
_(@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'))
|
28
29
|
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
29
30
|
end
|
30
31
|
|
31
|
-
it
|
32
|
-
input =
|
32
|
+
it "should not modify the input string" do
|
33
|
+
input = "<!DOCTYPE html><b>foo</b>"
|
33
34
|
@s.document(input)
|
34
|
-
_(input).must_equal(
|
35
|
+
_(input).must_equal("<!DOCTYPE html><b>foo</b>")
|
35
36
|
end
|
36
37
|
|
37
|
-
it
|
38
|
-
_(@s.document(
|
38
|
+
it "should not choke on frozen documents" do
|
39
|
+
_(@s.document("<!doctype html><html><b>foo</b>")).must_equal "<html>foo</html>"
|
39
40
|
end
|
40
41
|
|
41
|
-
it
|
42
|
+
it "should normalize newlines" do
|
42
43
|
_(@s.document("a\r\n\n\r\r\r\nz")).must_equal "<html>a\n\n\n\n\nz</html>"
|
43
44
|
end
|
44
45
|
|
45
|
-
it
|
46
|
+
it "should strip control characters (except ASCII whitespace)" do
|
46
47
|
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
47
48
|
whitespace = "\t\n\f\u0020"
|
48
49
|
_(@s.document("a#{sample_control_chars}#{whitespace}z")).must_equal "<html>a#{whitespace}z</html>"
|
49
50
|
end
|
50
51
|
|
51
|
-
it
|
52
|
+
it "should strip non-characters" do
|
52
53
|
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
53
54
|
_(@s.document("a#{sample_non_chars}z")).must_equal "<html>az</html>"
|
54
55
|
end
|
55
56
|
|
56
|
-
describe
|
57
|
+
describe "when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH" do
|
57
58
|
let(:content) do
|
58
|
-
content = nest_html_content(
|
59
|
+
content = nest_html_content("<b>foo</b>", Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH)
|
59
60
|
"<html>#{content}</html>"
|
60
61
|
end
|
61
62
|
|
62
|
-
it
|
63
|
+
it "raises an ArgumentError exception" do
|
63
64
|
assert_raises ArgumentError do
|
64
65
|
@s.document(content)
|
65
66
|
end
|
66
67
|
end
|
67
68
|
|
68
|
-
describe
|
69
|
+
describe "and :max_tree_depth of -1 is supplied in :parser_options" do
|
69
70
|
before do
|
70
|
-
@s = Sanitize.new(elements: [
|
71
|
+
@s = Sanitize.new(elements: ["html"], parser_options: {max_tree_depth: -1})
|
71
72
|
end
|
72
73
|
|
73
|
-
it
|
74
|
-
_(@s.document(content)).must_equal
|
74
|
+
it "does not raise an ArgumentError exception" do
|
75
|
+
_(@s.document(content)).must_equal "<html>foo</html>"
|
75
76
|
end
|
76
77
|
end
|
77
78
|
end
|
78
79
|
end
|
79
80
|
|
80
|
-
describe
|
81
|
-
it
|
81
|
+
describe "#fragment" do
|
82
|
+
it "should sanitize an HTML fragment" do
|
82
83
|
_(@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'))
|
83
|
-
.must_equal
|
84
|
+
.must_equal "Lorem ipsum dolor sit amet "
|
84
85
|
end
|
85
86
|
|
86
|
-
it
|
87
|
-
input =
|
87
|
+
it "should not modify the input string" do
|
88
|
+
input = "<b>foo</b>"
|
88
89
|
@s.fragment(input)
|
89
|
-
_(input).must_equal
|
90
|
+
_(input).must_equal "<b>foo</b>"
|
90
91
|
end
|
91
92
|
|
92
|
-
it
|
93
|
-
_(@s.fragment(
|
94
|
-
_(@s.fragment(
|
95
|
-
_(@s.fragment(
|
96
|
-
_(@s.fragment(
|
93
|
+
it "should not choke on fragments containing <html> or <body>" do
|
94
|
+
_(@s.fragment("<html><b>foo</b></html>")).must_equal "foo"
|
95
|
+
_(@s.fragment("<body><b>foo</b></body>")).must_equal "foo"
|
96
|
+
_(@s.fragment("<html><body><b>foo</b></body></html>")).must_equal "foo"
|
97
|
+
_(@s.fragment("<!DOCTYPE html><html><body><b>foo</b></body></html>")).must_equal "foo"
|
97
98
|
end
|
98
99
|
|
99
|
-
it
|
100
|
-
_(@s.fragment(
|
100
|
+
it "should not choke on frozen fragments" do
|
101
|
+
_(@s.fragment("<b>foo</b>")).must_equal "foo"
|
101
102
|
end
|
102
103
|
|
103
|
-
it
|
104
|
+
it "should normalize newlines" do
|
104
105
|
_(@s.fragment("a\r\n\n\r\r\r\nz")).must_equal "a\n\n\n\n\nz"
|
105
106
|
end
|
106
107
|
|
107
|
-
it
|
108
|
+
it "should strip control characters (except ASCII whitespace)" do
|
108
109
|
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
109
110
|
whitespace = "\t\n\f\u0020"
|
110
111
|
_(@s.fragment("a#{sample_control_chars}#{whitespace}z")).must_equal "a#{whitespace}z"
|
111
112
|
end
|
112
113
|
|
113
|
-
it
|
114
|
+
it "should strip non-characters" do
|
114
115
|
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
115
116
|
_(@s.fragment("a#{sample_non_chars}z")).must_equal "az"
|
116
117
|
end
|
117
118
|
|
118
|
-
describe
|
119
|
+
describe "when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH" do
|
119
120
|
let(:content) do
|
120
|
-
content = nest_html_content(
|
121
|
+
content = nest_html_content("<b>foo</b>", Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH)
|
121
122
|
"<body>#{content}</body>"
|
122
123
|
end
|
123
124
|
|
124
|
-
it
|
125
|
+
it "raises an ArgumentError exception" do
|
125
126
|
assert_raises ArgumentError do
|
126
127
|
@s.fragment(content)
|
127
128
|
end
|
128
129
|
end
|
129
130
|
|
130
|
-
describe
|
131
|
+
describe "and :max_tree_depth of -1 is supplied in :parser_options" do
|
131
132
|
before do
|
132
|
-
@s = Sanitize.new(parser_options: {
|
133
|
+
@s = Sanitize.new(parser_options: {max_tree_depth: -1})
|
133
134
|
end
|
134
135
|
|
135
|
-
it
|
136
|
-
_(@s.fragment(content)).must_equal
|
136
|
+
it "does not raise an ArgumentError exception" do
|
137
|
+
_(@s.fragment(content)).must_equal "foo"
|
137
138
|
end
|
138
139
|
end
|
139
140
|
end
|
140
141
|
end
|
141
142
|
|
142
|
-
describe
|
143
|
-
it
|
144
|
-
doc
|
143
|
+
describe "#node!" do
|
144
|
+
it "should sanitize a Nokogiri::XML::Node" do
|
145
|
+
doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
145
146
|
frag = doc.fragment
|
146
147
|
|
147
|
-
doc.xpath(
|
148
|
+
doc.xpath("/html/body/node()").each { |node| frag << node }
|
148
149
|
|
149
150
|
@s.node!(frag)
|
150
|
-
_(frag.to_html).must_equal
|
151
|
+
_(frag.to_html).must_equal "Lorem ipsum dolor sit amet "
|
151
152
|
end
|
152
153
|
|
153
154
|
describe "when the given node is a document and <html> isn't allowlisted" do
|
154
|
-
it
|
155
|
-
doc = Nokogiri::HTML5.parse(
|
155
|
+
it "should raise a Sanitize::Error" do
|
156
|
+
doc = Nokogiri::HTML5.parse("foo")
|
156
157
|
_(proc { @s.node!(doc) }).must_raise Sanitize::Error
|
157
158
|
end
|
158
159
|
end
|
159
160
|
end
|
160
161
|
end
|
161
162
|
|
162
|
-
describe
|
163
|
-
describe
|
164
|
-
it
|
163
|
+
describe "class methods" do
|
164
|
+
describe ".document" do
|
165
|
+
it "should sanitize an HTML document with the given config" do
|
165
166
|
html = '<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'
|
166
|
-
_(Sanitize.document(html, :
|
167
|
+
_(Sanitize.document(html, elements: ["html"]))
|
167
168
|
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
168
169
|
end
|
169
170
|
end
|
170
171
|
|
171
|
-
describe
|
172
|
-
it
|
172
|
+
describe ".fragment" do
|
173
|
+
it "should sanitize an HTML fragment with the given config" do
|
173
174
|
html = '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'
|
174
|
-
_(Sanitize.fragment(html, :
|
175
|
-
.must_equal
|
175
|
+
_(Sanitize.fragment(html, elements: ["strong"]))
|
176
|
+
.must_equal "Lorem ipsum <strong>dolor</strong> sit amet "
|
176
177
|
end
|
177
178
|
end
|
178
179
|
|
179
|
-
describe
|
180
|
-
it
|
180
|
+
describe ".node!" do
|
181
|
+
it "should sanitize a Nokogiri::XML::Node with the given config" do
|
181
182
|
doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
182
183
|
frag = doc.fragment
|
183
184
|
|
184
|
-
doc.xpath(
|
185
|
+
doc.xpath("/html/body/node()").each { |node| frag << node }
|
185
186
|
|
186
|
-
Sanitize.node!(frag, :
|
187
|
-
_(frag.to_html).must_equal
|
187
|
+
Sanitize.node!(frag, elements: ["strong"])
|
188
|
+
_(frag.to_html).must_equal "Lorem ipsum <strong>dolor</strong> sit amet "
|
188
189
|
end
|
189
190
|
end
|
190
191
|
end
|
@@ -192,6 +193,6 @@ describe 'Sanitize' do
|
|
192
193
|
private
|
193
194
|
|
194
195
|
def nest_html_content(html_content, depth)
|
195
|
-
"#{
|
196
|
+
"#{"<span>" * depth}#{html_content}#{"</span>" * depth}"
|
196
197
|
end
|
197
198
|
end
|