sanitize 6.1.2 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +40 -14
- data/LICENSE +3 -1
- data/README.md +120 -238
- data/lib/sanitize/config/basic.rb +15 -15
- data/lib/sanitize/config/default.rb +45 -45
- data/lib/sanitize/config/relaxed.rb +136 -32
- data/lib/sanitize/config/restricted.rb +2 -2
- data/lib/sanitize/config.rb +12 -14
- data/lib/sanitize/css.rb +309 -303
- data/lib/sanitize/transformers/clean_cdata.rb +9 -9
- data/lib/sanitize/transformers/clean_comment.rb +9 -9
- data/lib/sanitize/transformers/clean_css.rb +59 -55
- data/lib/sanitize/transformers/clean_doctype.rb +15 -15
- data/lib/sanitize/transformers/clean_element.rb +220 -237
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +38 -38
- data/test/common.rb +4 -3
- data/test/test_clean_comment.rb +26 -25
- data/test/test_clean_css.rb +14 -13
- data/test/test_clean_doctype.rb +21 -20
- data/test/test_clean_element.rb +258 -273
- data/test/test_config.rb +22 -21
- data/test/test_malicious_css.rb +20 -19
- data/test/test_malicious_html.rb +100 -99
- data/test/test_parser.rb +26 -25
- data/test/test_sanitize.rb +70 -69
- data/test/test_sanitize_css.rb +152 -114
- data/test/test_transformers.rb +81 -83
- metadata +14 -43
data/test/test_sanitize.rb
CHANGED
@@ -1,190 +1,191 @@
|
|
1
|
-
#
|
2
|
-
require_relative 'common'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
require_relative "common"
|
4
|
+
|
5
|
+
describe "Sanitize" do
|
6
|
+
describe "initializer" do
|
7
|
+
it "should not modify a transformers array in the given config" do
|
7
8
|
transformers = [
|
8
9
|
lambda {}
|
9
10
|
]
|
10
11
|
|
11
|
-
Sanitize.new({
|
12
|
+
Sanitize.new({transformers: transformers})
|
12
13
|
_(transformers.length).must_equal(1)
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
16
|
-
describe
|
17
|
+
describe "instance methods" do
|
17
18
|
before do
|
18
19
|
@s = Sanitize.new
|
19
20
|
end
|
20
21
|
|
21
|
-
describe
|
22
|
+
describe "#document" do
|
22
23
|
before do
|
23
|
-
@s = Sanitize.new(:
|
24
|
+
@s = Sanitize.new(elements: ["html"])
|
24
25
|
end
|
25
26
|
|
26
|
-
it
|
27
|
+
it "should sanitize an HTML document" do
|
27
28
|
_(@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'))
|
28
29
|
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
29
30
|
end
|
30
31
|
|
31
|
-
it
|
32
|
-
input =
|
32
|
+
it "should not modify the input string" do
|
33
|
+
input = "<!DOCTYPE html><b>foo</b>"
|
33
34
|
@s.document(input)
|
34
|
-
_(input).must_equal(
|
35
|
+
_(input).must_equal("<!DOCTYPE html><b>foo</b>")
|
35
36
|
end
|
36
37
|
|
37
|
-
it
|
38
|
-
_(@s.document(
|
38
|
+
it "should not choke on frozen documents" do
|
39
|
+
_(@s.document("<!doctype html><html><b>foo</b>")).must_equal "<html>foo</html>"
|
39
40
|
end
|
40
41
|
|
41
|
-
it
|
42
|
+
it "should normalize newlines" do
|
42
43
|
_(@s.document("a\r\n\n\r\r\r\nz")).must_equal "<html>a\n\n\n\n\nz</html>"
|
43
44
|
end
|
44
45
|
|
45
|
-
it
|
46
|
+
it "should strip control characters (except ASCII whitespace)" do
|
46
47
|
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
47
48
|
whitespace = "\t\n\f\u0020"
|
48
49
|
_(@s.document("a#{sample_control_chars}#{whitespace}z")).must_equal "<html>a#{whitespace}z</html>"
|
49
50
|
end
|
50
51
|
|
51
|
-
it
|
52
|
+
it "should strip non-characters" do
|
52
53
|
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
53
54
|
_(@s.document("a#{sample_non_chars}z")).must_equal "<html>az</html>"
|
54
55
|
end
|
55
56
|
|
56
|
-
describe
|
57
|
+
describe "when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH" do
|
57
58
|
let(:content) do
|
58
|
-
content = nest_html_content(
|
59
|
+
content = nest_html_content("<b>foo</b>", Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH)
|
59
60
|
"<html>#{content}</html>"
|
60
61
|
end
|
61
62
|
|
62
|
-
it
|
63
|
+
it "raises an ArgumentError exception" do
|
63
64
|
assert_raises ArgumentError do
|
64
65
|
@s.document(content)
|
65
66
|
end
|
66
67
|
end
|
67
68
|
|
68
|
-
describe
|
69
|
+
describe "and :max_tree_depth of -1 is supplied in :parser_options" do
|
69
70
|
before do
|
70
|
-
@s = Sanitize.new(elements: [
|
71
|
+
@s = Sanitize.new(elements: ["html"], parser_options: {max_tree_depth: -1})
|
71
72
|
end
|
72
73
|
|
73
|
-
it
|
74
|
-
_(@s.document(content)).must_equal
|
74
|
+
it "does not raise an ArgumentError exception" do
|
75
|
+
_(@s.document(content)).must_equal "<html>foo</html>"
|
75
76
|
end
|
76
77
|
end
|
77
78
|
end
|
78
79
|
end
|
79
80
|
|
80
|
-
describe
|
81
|
-
it
|
81
|
+
describe "#fragment" do
|
82
|
+
it "should sanitize an HTML fragment" do
|
82
83
|
_(@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'))
|
83
|
-
.must_equal
|
84
|
+
.must_equal "Lorem ipsum dolor sit amet "
|
84
85
|
end
|
85
86
|
|
86
|
-
it
|
87
|
-
input =
|
87
|
+
it "should not modify the input string" do
|
88
|
+
input = "<b>foo</b>"
|
88
89
|
@s.fragment(input)
|
89
|
-
_(input).must_equal
|
90
|
+
_(input).must_equal "<b>foo</b>"
|
90
91
|
end
|
91
92
|
|
92
|
-
it
|
93
|
-
_(@s.fragment(
|
94
|
-
_(@s.fragment(
|
95
|
-
_(@s.fragment(
|
96
|
-
_(@s.fragment(
|
93
|
+
it "should not choke on fragments containing <html> or <body>" do
|
94
|
+
_(@s.fragment("<html><b>foo</b></html>")).must_equal "foo"
|
95
|
+
_(@s.fragment("<body><b>foo</b></body>")).must_equal "foo"
|
96
|
+
_(@s.fragment("<html><body><b>foo</b></body></html>")).must_equal "foo"
|
97
|
+
_(@s.fragment("<!DOCTYPE html><html><body><b>foo</b></body></html>")).must_equal "foo"
|
97
98
|
end
|
98
99
|
|
99
|
-
it
|
100
|
-
_(@s.fragment(
|
100
|
+
it "should not choke on frozen fragments" do
|
101
|
+
_(@s.fragment("<b>foo</b>")).must_equal "foo"
|
101
102
|
end
|
102
103
|
|
103
|
-
it
|
104
|
+
it "should normalize newlines" do
|
104
105
|
_(@s.fragment("a\r\n\n\r\r\r\nz")).must_equal "a\n\n\n\n\nz"
|
105
106
|
end
|
106
107
|
|
107
|
-
it
|
108
|
+
it "should strip control characters (except ASCII whitespace)" do
|
108
109
|
sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f"
|
109
110
|
whitespace = "\t\n\f\u0020"
|
110
111
|
_(@s.fragment("a#{sample_control_chars}#{whitespace}z")).must_equal "a#{whitespace}z"
|
111
112
|
end
|
112
113
|
|
113
|
-
it
|
114
|
+
it "should strip non-characters" do
|
114
115
|
sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}"
|
115
116
|
_(@s.fragment("a#{sample_non_chars}z")).must_equal "az"
|
116
117
|
end
|
117
118
|
|
118
|
-
describe
|
119
|
+
describe "when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH" do
|
119
120
|
let(:content) do
|
120
|
-
content = nest_html_content(
|
121
|
+
content = nest_html_content("<b>foo</b>", Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH)
|
121
122
|
"<body>#{content}</body>"
|
122
123
|
end
|
123
124
|
|
124
|
-
it
|
125
|
+
it "raises an ArgumentError exception" do
|
125
126
|
assert_raises ArgumentError do
|
126
127
|
@s.fragment(content)
|
127
128
|
end
|
128
129
|
end
|
129
130
|
|
130
|
-
describe
|
131
|
+
describe "and :max_tree_depth of -1 is supplied in :parser_options" do
|
131
132
|
before do
|
132
|
-
@s = Sanitize.new(parser_options: {
|
133
|
+
@s = Sanitize.new(parser_options: {max_tree_depth: -1})
|
133
134
|
end
|
134
135
|
|
135
|
-
it
|
136
|
-
_(@s.fragment(content)).must_equal
|
136
|
+
it "does not raise an ArgumentError exception" do
|
137
|
+
_(@s.fragment(content)).must_equal "foo"
|
137
138
|
end
|
138
139
|
end
|
139
140
|
end
|
140
141
|
end
|
141
142
|
|
142
|
-
describe
|
143
|
-
it
|
144
|
-
doc
|
143
|
+
describe "#node!" do
|
144
|
+
it "should sanitize a Nokogiri::XML::Node" do
|
145
|
+
doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
145
146
|
frag = doc.fragment
|
146
147
|
|
147
|
-
doc.xpath(
|
148
|
+
doc.xpath("/html/body/node()").each { |node| frag << node }
|
148
149
|
|
149
150
|
@s.node!(frag)
|
150
|
-
_(frag.to_html).must_equal
|
151
|
+
_(frag.to_html).must_equal "Lorem ipsum dolor sit amet "
|
151
152
|
end
|
152
153
|
|
153
154
|
describe "when the given node is a document and <html> isn't allowlisted" do
|
154
|
-
it
|
155
|
-
doc = Nokogiri::HTML5.parse(
|
155
|
+
it "should raise a Sanitize::Error" do
|
156
|
+
doc = Nokogiri::HTML5.parse("foo")
|
156
157
|
_(proc { @s.node!(doc) }).must_raise Sanitize::Error
|
157
158
|
end
|
158
159
|
end
|
159
160
|
end
|
160
161
|
end
|
161
162
|
|
162
|
-
describe
|
163
|
-
describe
|
164
|
-
it
|
163
|
+
describe "class methods" do
|
164
|
+
describe ".document" do
|
165
|
+
it "should sanitize an HTML document with the given config" do
|
165
166
|
html = '<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>'
|
166
|
-
_(Sanitize.document(html, :
|
167
|
+
_(Sanitize.document(html, elements: ["html"]))
|
167
168
|
.must_equal "<html>Lorem ipsum dolor sit amet </html>"
|
168
169
|
end
|
169
170
|
end
|
170
171
|
|
171
|
-
describe
|
172
|
-
it
|
172
|
+
describe ".fragment" do
|
173
|
+
it "should sanitize an HTML fragment with the given config" do
|
173
174
|
html = '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>'
|
174
|
-
_(Sanitize.fragment(html, :
|
175
|
-
.must_equal
|
175
|
+
_(Sanitize.fragment(html, elements: ["strong"]))
|
176
|
+
.must_equal "Lorem ipsum <strong>dolor</strong> sit amet "
|
176
177
|
end
|
177
178
|
end
|
178
179
|
|
179
|
-
describe
|
180
|
-
it
|
180
|
+
describe ".node!" do
|
181
|
+
it "should sanitize a Nokogiri::XML::Node with the given config" do
|
181
182
|
doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
182
183
|
frag = doc.fragment
|
183
184
|
|
184
|
-
doc.xpath(
|
185
|
+
doc.xpath("/html/body/node()").each { |node| frag << node }
|
185
186
|
|
186
|
-
Sanitize.node!(frag, :
|
187
|
-
_(frag.to_html).must_equal
|
187
|
+
Sanitize.node!(frag, elements: ["strong"])
|
188
|
+
_(frag.to_html).must_equal "Lorem ipsum <strong>dolor</strong> sit amet "
|
188
189
|
end
|
189
190
|
end
|
190
191
|
end
|
@@ -192,6 +193,6 @@ describe 'Sanitize' do
|
|
192
193
|
private
|
193
194
|
|
194
195
|
def nest_html_content(html_content, depth)
|
195
|
-
"#{
|
196
|
+
"#{"<span>" * depth}#{html_content}#{"</span>" * depth}"
|
196
197
|
end
|
197
198
|
end
|