sanitize 6.1.3 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +32 -14
- data/LICENSE +3 -1
- data/README.md +120 -238
- data/lib/sanitize/config/basic.rb +15 -15
- data/lib/sanitize/config/default.rb +45 -45
- data/lib/sanitize/config/relaxed.rb +136 -32
- data/lib/sanitize/config/restricted.rb +2 -2
- data/lib/sanitize/config.rb +12 -14
- data/lib/sanitize/css.rb +308 -308
- data/lib/sanitize/transformers/clean_cdata.rb +9 -9
- data/lib/sanitize/transformers/clean_comment.rb +9 -9
- data/lib/sanitize/transformers/clean_css.rb +59 -55
- data/lib/sanitize/transformers/clean_doctype.rb +15 -15
- data/lib/sanitize/transformers/clean_element.rb +220 -237
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +38 -38
- data/test/common.rb +4 -3
- data/test/test_clean_comment.rb +26 -25
- data/test/test_clean_css.rb +14 -13
- data/test/test_clean_doctype.rb +21 -20
- data/test/test_clean_element.rb +258 -273
- data/test/test_config.rb +22 -21
- data/test/test_malicious_css.rb +20 -19
- data/test/test_malicious_html.rb +100 -99
- data/test/test_parser.rb +26 -25
- data/test/test_sanitize.rb +70 -69
- data/test/test_sanitize_css.rb +149 -114
- data/test/test_transformers.rb +81 -83
- metadata +14 -43
data/test/test_transformers.rb
CHANGED
@@ -1,123 +1,121 @@
|
|
1
|
-
#
|
2
|
-
require_relative 'common'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require_relative "common"
|
4
|
+
|
5
|
+
describe "Transformers" do
|
5
6
|
make_my_diffs_pretty!
|
6
7
|
parallelize_me!
|
7
8
|
|
8
|
-
it
|
9
|
-
Sanitize.fragment(
|
10
|
-
:
|
11
|
-
:
|
9
|
+
it "should receive a complete env Hash as input" do
|
10
|
+
Sanitize.fragment("<SPAN>foo</SPAN>",
|
11
|
+
foo: :bar,
|
12
|
+
transformers: lambda { |env|
|
12
13
|
return unless env[:node].element?
|
13
14
|
|
14
15
|
_(env[:config][:foo]).must_equal :bar
|
15
16
|
_(env[:is_allowlisted]).must_equal false
|
16
17
|
_(env[:is_whitelisted]).must_equal env[:is_allowlisted]
|
17
18
|
_(env[:node]).must_be_kind_of Nokogiri::XML::Node
|
18
|
-
_(env[:node_name]).must_equal
|
19
|
+
_(env[:node_name]).must_equal "span"
|
19
20
|
_(env[:node_allowlist]).must_be_kind_of Set
|
20
21
|
_(env[:node_allowlist]).must_be_empty
|
21
22
|
_(env[:node_whitelist]).must_equal env[:node_allowlist]
|
22
|
-
}
|
23
|
-
)
|
23
|
+
})
|
24
24
|
end
|
25
25
|
|
26
|
-
it
|
26
|
+
it "should traverse all node types, including the fragment itself" do
|
27
27
|
nodes = []
|
28
28
|
|
29
|
-
Sanitize.fragment(
|
30
|
-
:
|
31
|
-
)
|
29
|
+
Sanitize.fragment("<div>foo</div><!--bar--><script>cdata!</script>",
|
30
|
+
transformers: proc { |env| nodes << env[:node_name] })
|
32
31
|
|
33
32
|
_(nodes).must_equal %w[
|
34
33
|
#document-fragment div text text text comment script text
|
35
34
|
]
|
36
35
|
end
|
37
36
|
|
38
|
-
it
|
37
|
+
it "should perform top-down traversal" do
|
39
38
|
nodes = []
|
40
39
|
|
41
|
-
Sanitize.fragment(
|
42
|
-
:
|
43
|
-
)
|
40
|
+
Sanitize.fragment("<div><span><strong>foo</strong></span><b></b></div><p>bar</p>",
|
41
|
+
transformers: proc { |env| nodes << env[:node_name] if env[:node].element? })
|
44
42
|
|
45
43
|
_(nodes).must_equal %w[div span strong b p]
|
46
44
|
end
|
47
45
|
|
48
|
-
it
|
46
|
+
it "should allowlist nodes in the node allowlist" do
|
49
47
|
_(Sanitize.fragment('<div class="foo">foo</div><span>bar</span>',
|
50
|
-
:
|
51
|
-
proc {|env|
|
52
|
-
{:
|
48
|
+
transformers: [
|
49
|
+
proc { |env|
|
50
|
+
{node_allowlist: [env[:node]]} if env[:node_name] == "div"
|
53
51
|
},
|
54
52
|
|
55
|
-
proc {|env|
|
56
|
-
_(env[:is_allowlisted]).must_equal false unless env[:node_name] ==
|
57
|
-
_(env[:is_allowlisted]).must_equal true if env[:node_name] ==
|
58
|
-
_(env[:node_allowlist]).must_include env[:node] if env[:node_name] ==
|
53
|
+
proc { |env|
|
54
|
+
_(env[:is_allowlisted]).must_equal false unless env[:node_name] == "div"
|
55
|
+
_(env[:is_allowlisted]).must_equal true if env[:node_name] == "div"
|
56
|
+
_(env[:node_allowlist]).must_include env[:node] if env[:node_name] == "div"
|
59
57
|
_(env[:is_whitelisted]).must_equal env[:is_allowlisted]
|
60
58
|
_(env[:node_whitelist]).must_equal env[:node_allowlist]
|
61
59
|
}
|
62
|
-
]
|
63
|
-
)).must_equal '<div class="foo">foo</div>bar'
|
60
|
+
])).must_equal '<div class="foo">foo</div>bar'
|
64
61
|
end
|
65
62
|
|
66
|
-
it
|
63
|
+
it "should clear the node allowlist after each fragment" do
|
67
64
|
called = false
|
68
65
|
|
69
|
-
Sanitize.fragment(
|
70
|
-
:
|
71
|
-
)
|
66
|
+
Sanitize.fragment("<div>foo</div>",
|
67
|
+
transformers: proc { |env| {node_allowlist: [env[:node]]} })
|
72
68
|
|
73
|
-
Sanitize.fragment(
|
74
|
-
:
|
69
|
+
Sanitize.fragment("<div>foo</div>",
|
70
|
+
transformers: proc { |env|
|
75
71
|
called = true
|
76
72
|
_(env[:is_allowlisted]).must_equal false
|
77
73
|
_(env[:is_whitelisted]).must_equal env[:is_allowlisted]
|
78
74
|
_(env[:node_allowlist]).must_be_empty
|
79
75
|
_(env[:node_whitelist]).must_equal env[:node_allowlist]
|
80
|
-
}
|
81
|
-
)
|
76
|
+
})
|
82
77
|
|
83
78
|
_(called).must_equal true
|
84
79
|
end
|
85
80
|
|
86
|
-
it
|
87
|
-
def transformer(env)
|
88
|
-
|
89
|
-
|
81
|
+
it "should accept a method transformer" do
|
82
|
+
def transformer(env)
|
83
|
+
end
|
84
|
+
_(Sanitize.fragment("<div>foo</div>", transformers: method(:transformer)))
|
85
|
+
.must_equal(" foo ")
|
90
86
|
end
|
91
87
|
|
92
|
-
describe
|
93
|
-
require
|
88
|
+
describe "Image allowlist transformer" do
|
89
|
+
require "uri"
|
94
90
|
|
95
91
|
image_allowlist_transformer = lambda do |env|
|
96
92
|
# Ignore everything except <img> elements.
|
97
|
-
return unless env[:node_name] ==
|
93
|
+
return unless env[:node_name] == "img"
|
98
94
|
|
99
|
-
node
|
100
|
-
image_uri = URI.parse(node[
|
95
|
+
node = env[:node]
|
96
|
+
image_uri = URI.parse(node["src"])
|
101
97
|
|
102
98
|
# Only allow relative URLs or URLs with the example.com domain. The
|
103
99
|
# image_uri.host.nil? check ensures that protocol-relative URLs like
|
104
|
-
# "//evil.com/foo.jpg".
|
105
|
-
unless image_uri.host ==
|
106
|
-
|
100
|
+
# "//evil.com/foo.jpg" are not allowed.
|
101
|
+
unless image_uri.host == "example.com"
|
102
|
+
unless image_uri.host.nil? && image_uri.relative?
|
103
|
+
node.unlink # `Nokogiri::XML::Node#unlink` removes a node from the document
|
104
|
+
end
|
107
105
|
end
|
108
106
|
end
|
109
107
|
|
110
108
|
before do
|
111
109
|
@s = Sanitize.new(Sanitize::Config.merge(Sanitize::Config::RELAXED,
|
112
|
-
|
110
|
+
transformers: image_allowlist_transformer))
|
113
111
|
end
|
114
112
|
|
115
|
-
it
|
113
|
+
it "should allow images with relative URLs" do
|
116
114
|
input = '<img src="/foo/bar.jpg">'
|
117
115
|
_(@s.fragment(input)).must_equal(input)
|
118
116
|
end
|
119
117
|
|
120
|
-
it
|
118
|
+
it "should allow images at the example.com domain" do
|
121
119
|
input = '<img src="http://example.com/foo/bar.jpg">'
|
122
120
|
_(@s.fragment(input)).must_equal(input)
|
123
121
|
|
@@ -128,103 +126,103 @@ describe 'Transformers' do
|
|
128
126
|
_(@s.fragment(input)).must_equal(input)
|
129
127
|
end
|
130
128
|
|
131
|
-
it
|
129
|
+
it "should not allow images at other domains" do
|
132
130
|
input = '<img src="http://evil.com/foo/bar.jpg">'
|
133
|
-
_(@s.fragment(input)).must_equal(
|
131
|
+
_(@s.fragment(input)).must_equal("")
|
134
132
|
|
135
133
|
input = '<img src="https://evil.com/foo/bar.jpg">'
|
136
|
-
_(@s.fragment(input)).must_equal(
|
134
|
+
_(@s.fragment(input)).must_equal("")
|
137
135
|
|
138
136
|
input = '<img src="//evil.com/foo/bar.jpg">'
|
139
|
-
_(@s.fragment(input)).must_equal(
|
137
|
+
_(@s.fragment(input)).must_equal("")
|
140
138
|
|
141
139
|
input = '<img src="http://subdomain.example.com/foo/bar.jpg">'
|
142
|
-
_(@s.fragment(input)).must_equal(
|
140
|
+
_(@s.fragment(input)).must_equal("")
|
143
141
|
end
|
144
142
|
end
|
145
143
|
|
146
|
-
describe
|
144
|
+
describe "YouTube transformer" do
|
147
145
|
youtube_transformer = lambda do |env|
|
148
|
-
node
|
146
|
+
node = env[:node]
|
149
147
|
node_name = env[:node_name]
|
150
148
|
|
151
149
|
# Don't continue if this node is already allowlisted or is not an element.
|
152
150
|
return if env[:is_allowlisted] || !node.element?
|
153
151
|
|
154
152
|
# Don't continue unless the node is an iframe.
|
155
|
-
return unless node_name ==
|
153
|
+
return unless node_name == "iframe"
|
156
154
|
|
157
155
|
# Verify that the video URL is actually a valid YouTube video URL.
|
158
|
-
return unless
|
156
|
+
return unless %r{\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/}.match?(node["src"])
|
159
157
|
|
160
158
|
# We're now certain that this is a YouTube embed, but we still need to run
|
161
159
|
# it through a special Sanitize step to ensure that no unwanted elements or
|
162
160
|
# attributes that don't belong in a YouTube embed can sneak in.
|
163
161
|
Sanitize.node!(node, {
|
164
|
-
:
|
162
|
+
elements: %w[iframe],
|
165
163
|
|
166
|
-
:
|
167
|
-
|
164
|
+
attributes: {
|
165
|
+
"iframe" => %w[allowfullscreen frameborder height src width]
|
168
166
|
}
|
169
167
|
})
|
170
168
|
|
171
169
|
# Now that we're sure that this is a valid YouTube embed and that there are
|
172
170
|
# no unwanted elements or attributes hidden inside it, we can tell Sanitize
|
173
171
|
# to allowlist the current node.
|
174
|
-
{:
|
172
|
+
{node_allowlist: [node]}
|
175
173
|
end
|
176
174
|
|
177
|
-
it
|
175
|
+
it "should allow HTTP YouTube video embeds" do
|
178
176
|
input = '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
179
177
|
|
180
|
-
_(Sanitize.fragment(input, :
|
178
|
+
_(Sanitize.fragment(input, transformers: youtube_transformer))
|
181
179
|
.must_equal '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""></iframe>'
|
182
180
|
end
|
183
181
|
|
184
|
-
it
|
182
|
+
it "should allow HTTPS YouTube video embeds" do
|
185
183
|
input = '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
186
184
|
|
187
|
-
_(Sanitize.fragment(input, :
|
185
|
+
_(Sanitize.fragment(input, transformers: youtube_transformer))
|
188
186
|
.must_equal '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""></iframe>'
|
189
187
|
end
|
190
188
|
|
191
|
-
it
|
189
|
+
it "should allow protocol-relative YouTube video embeds" do
|
192
190
|
input = '<iframe width="420" height="315" src="//www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
193
191
|
|
194
|
-
_(Sanitize.fragment(input, :
|
192
|
+
_(Sanitize.fragment(input, transformers: youtube_transformer))
|
195
193
|
.must_equal '<iframe width="420" height="315" src="//www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""></iframe>'
|
196
194
|
end
|
197
195
|
|
198
|
-
it
|
196
|
+
it "should allow privacy-enhanced YouTube video embeds" do
|
199
197
|
input = '<iframe width="420" height="315" src="https://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
200
198
|
|
201
|
-
_(Sanitize.fragment(input, :
|
199
|
+
_(Sanitize.fragment(input, transformers: youtube_transformer))
|
202
200
|
.must_equal '<iframe width="420" height="315" src="https://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""></iframe>'
|
203
201
|
end
|
204
202
|
|
205
|
-
it
|
203
|
+
it "should not allow non-YouTube video embeds" do
|
206
204
|
input = '<iframe width="420" height="315" src="http://www.fake-youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen></iframe>'
|
207
205
|
|
208
|
-
_(Sanitize.fragment(input, :
|
209
|
-
.must_equal(
|
206
|
+
_(Sanitize.fragment(input, transformers: youtube_transformer))
|
207
|
+
.must_equal("")
|
210
208
|
end
|
211
209
|
end
|
212
210
|
|
213
|
-
describe
|
211
|
+
describe "DOM modification transformer" do
|
214
212
|
b_to_strong_tag_transformer = lambda do |env|
|
215
|
-
node
|
213
|
+
node = env[:node]
|
216
214
|
node_name = env[:node_name]
|
217
215
|
|
218
|
-
if node_name ==
|
219
|
-
node.name =
|
216
|
+
if node_name == "b"
|
217
|
+
node.name = "strong"
|
220
218
|
end
|
221
219
|
end
|
222
220
|
|
223
|
-
it
|
224
|
-
input =
|
221
|
+
it "should allow the <b> tag to be changed to a <strong> tag" do
|
222
|
+
input = "<b>text</b>"
|
225
223
|
|
226
|
-
_(Sanitize.fragment(input, :
|
227
|
-
.must_equal
|
224
|
+
_(Sanitize.fragment(input, elements: ["strong"], transformers: b_to_strong_tag_transformer))
|
225
|
+
.must_equal "<strong>text</strong>"
|
228
226
|
end
|
229
227
|
end
|
230
228
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 7.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date: 2024-
|
10
|
+
date: 2024-12-30 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: crass
|
@@ -30,51 +29,24 @@ dependencies:
|
|
30
29
|
requirements:
|
31
30
|
- - ">="
|
32
31
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.
|
32
|
+
version: 1.16.8
|
34
33
|
type: :runtime
|
35
34
|
prerelease: false
|
36
35
|
version_requirements: !ruby/object:Gem::Requirement
|
37
36
|
requirements:
|
38
37
|
- - ">="
|
39
38
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '5.15'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '5.15'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rake
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 13.0.6
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: 13.0.6
|
69
|
-
description: Sanitize is an allowlist-based HTML and CSS sanitizer. It removes all
|
70
|
-
HTML and/or CSS from a string except the elements, attributes, and properties you
|
71
|
-
choose to allow.
|
39
|
+
version: 1.16.8
|
40
|
+
description: |
|
41
|
+
Sanitize is an allowlist-based HTML and CSS sanitizer. It removes all HTML
|
42
|
+
and/or CSS from a string except the elements, attributes, and properties you
|
43
|
+
choose to allow.'
|
72
44
|
email: ryan@wonko.com
|
73
45
|
executables: []
|
74
46
|
extensions: []
|
75
47
|
extra_rdoc_files: []
|
76
48
|
files:
|
77
|
-
-
|
49
|
+
- CHANGELOG.md
|
78
50
|
- LICENSE
|
79
51
|
- README.md
|
80
52
|
- lib/sanitize.rb
|
@@ -106,9 +78,9 @@ homepage: https://github.com/rgrove/sanitize/
|
|
106
78
|
licenses:
|
107
79
|
- MIT
|
108
80
|
metadata:
|
109
|
-
changelog_uri: https://github.com/rgrove/sanitize/blob/main/
|
81
|
+
changelog_uri: https://github.com/rgrove/sanitize/blob/main/CHANGELOG.md
|
110
82
|
documentation_uri: https://rubydoc.info/github/rgrove/sanitize
|
111
|
-
|
83
|
+
rubygems_mfa_required: 'true'
|
112
84
|
rdoc_options: []
|
113
85
|
require_paths:
|
114
86
|
- lib
|
@@ -116,15 +88,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
116
88
|
requirements:
|
117
89
|
- - ">="
|
118
90
|
- !ruby/object:Gem::Version
|
119
|
-
version:
|
91
|
+
version: 3.1.0
|
120
92
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
121
93
|
requirements:
|
122
94
|
- - ">="
|
123
95
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
96
|
+
version: '0'
|
125
97
|
requirements: []
|
126
|
-
rubygems_version: 3.
|
127
|
-
signing_key:
|
98
|
+
rubygems_version: 3.6.2
|
128
99
|
specification_version: 4
|
129
100
|
summary: Allowlist-based HTML and CSS sanitizer.
|
130
101
|
test_files: []
|