sanitize 3.0.0 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +12 -2
- data/README.md +27 -0
- data/lib/sanitize/version.rb +1 -1
- data/test/test_transformers.rb +109 -53
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8446f68552c50a6a2145f1e9898a75541099c430
|
4
|
+
data.tar.gz: b7bed4246c97064a0ba6551eeacf9e5a972b5f6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 17403cd6e16d7d760e74c9549cde13d3f8c6fca545ae06812810128b561d1a04f58ac432847d58b90daa4231aff97bd5cbd87d03ac6b7c6d3f07b8710d788ebc
|
7
|
+
data.tar.gz: 54b94108167ccfbd31bf502aeb7c3dc270263e4d8d5cf149161f0a18c53324c6d84e4dc7251416f4e91824b8124433d6abc1f203e13c3627ea53cb8a3152533a
|
data/HISTORY.md
CHANGED
@@ -1,8 +1,18 @@
|
|
1
1
|
Sanitize History
|
2
2
|
================================================================================
|
3
3
|
|
4
|
-
Version 3.0.
|
5
|
-
|
4
|
+
Version 3.0.1 (2014-09-02)
|
5
|
+
--------------------------
|
6
|
+
|
7
|
+
* Updated Nokogumbo to 1.1.11 to pick up a fix for a Gumbo bug in which certain
|
8
|
+
HTML character entities, such as `Ö`, were parsed incorrectly, leaving
|
9
|
+
the semicolon behind in the output. [#114][114]
|
10
|
+
|
11
|
+
[114]:https://github.com/rgrove/sanitize/issues/114
|
12
|
+
|
13
|
+
|
14
|
+
Version 3.0.0 (2014-06-21)
|
15
|
+
--------------------------
|
6
16
|
|
7
17
|
As of this version, Sanitize adheres strictly to the [SemVer 2.0.0][semver]
|
8
18
|
versioning standard. This release contains API and output changes that are
|
data/README.md
CHANGED
@@ -30,6 +30,8 @@ Links
|
|
30
30
|
* [Home](https://github.com/rgrove/sanitize/)
|
31
31
|
* [API Docs](http://rubydoc.info/github/rgrove/sanitize/master)
|
32
32
|
* [Issues](https://github.com/rgrove/sanitize/issues)
|
33
|
+
* [Release History](https://github.com/rgrove/sanitize/blob/master/HISTORY.md#sanitize-history)
|
34
|
+
* [Online Demo](https://sanitize.herokuapp.com/)
|
33
35
|
* [Biased comparison of Ruby HTML sanitization libraries](https://github.com/rgrove/sanitize/blob/master/COMPARISON.md)
|
34
36
|
|
35
37
|
Installation
|
@@ -541,6 +543,31 @@ Transformers have a tremendous amount of power, including the power to
|
|
541
543
|
completely bypass Sanitize's built-in filtering. Be careful! Your safety is in
|
542
544
|
your own hands.
|
543
545
|
|
546
|
+
### Example: Transformer to whitelist image URLs by domain
|
547
|
+
|
548
|
+
The following example demonstrates how to remove image elements unless they use
|
549
|
+
a relative URL or are hosted on a specific domain. It assumes that the `<img>`
|
550
|
+
element and its `src` attribute are already whitelisted.
|
551
|
+
|
552
|
+
```ruby
|
553
|
+
require 'uri'
|
554
|
+
|
555
|
+
image_whitelist_transformer = lambda do |env|
|
556
|
+
# Ignore everything except <img> elements.
|
557
|
+
return unless env[:node_name] == 'img'
|
558
|
+
|
559
|
+
node = env[:node]
|
560
|
+
image_uri = URI.parse(node['src'])
|
561
|
+
|
562
|
+
# Only allow relative URLs or URLs with the example.com domain. The
|
563
|
+
# image_uri.host.nil? check ensures that protocol-relative URLs like
|
564
|
+
# "//evil.com/foo.jpg".
|
565
|
+
unless image_uri.host == 'example.com' || (image_uri.host.nil? && image_uri.relative?)
|
566
|
+
node.unlink # `Nokogiri::XML::Node#unlink` removes a node from the document
|
567
|
+
end
|
568
|
+
end
|
569
|
+
```
|
570
|
+
|
544
571
|
### Example: Transformer to whitelist YouTube video embeds
|
545
572
|
|
546
573
|
The following example demonstrates how to create a transformer that will safely
|
data/lib/sanitize/version.rb
CHANGED
data/test/test_transformers.rb
CHANGED
@@ -5,36 +5,6 @@ describe 'Transformers' do
|
|
5
5
|
make_my_diffs_pretty!
|
6
6
|
parallelize_me!
|
7
7
|
|
8
|
-
youtube_transformer = lambda do |env|
|
9
|
-
node = env[:node]
|
10
|
-
node_name = env[:node_name]
|
11
|
-
|
12
|
-
# Don't continue if this node is already whitelisted or is not an element.
|
13
|
-
return if env[:is_whitelisted] || !node.element?
|
14
|
-
|
15
|
-
# Don't continue unless the node is an iframe.
|
16
|
-
return unless node_name == 'iframe'
|
17
|
-
|
18
|
-
# Verify that the video URL is actually a valid YouTube video URL.
|
19
|
-
return unless node['src'] =~ %r|\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/|
|
20
|
-
|
21
|
-
# We're now certain that this is a YouTube embed, but we still need to run
|
22
|
-
# it through a special Sanitize step to ensure that no unwanted elements or
|
23
|
-
# attributes that don't belong in a YouTube embed can sneak in.
|
24
|
-
Sanitize.node!(node, {
|
25
|
-
:elements => %w[iframe],
|
26
|
-
|
27
|
-
:attributes => {
|
28
|
-
'iframe' => %w[allowfullscreen frameborder height src width]
|
29
|
-
}
|
30
|
-
})
|
31
|
-
|
32
|
-
# Now that we're sure that this is a valid YouTube embed and that there are
|
33
|
-
# no unwanted elements or attributes hidden inside it, we can tell Sanitize
|
34
|
-
# to whitelist the current node.
|
35
|
-
{:node_whitelist => [node]}
|
36
|
-
end
|
37
|
-
|
38
8
|
it 'should receive a complete env Hash as input' do
|
39
9
|
Sanitize.fragment('<SPAN>foo</SPAN>',
|
40
10
|
:foo => :bar,
|
@@ -107,38 +77,124 @@ describe 'Transformers' do
|
|
107
77
|
called.must_equal true
|
108
78
|
end
|
109
79
|
|
110
|
-
|
111
|
-
|
80
|
+
describe 'Image whitelist transformer' do
|
81
|
+
require 'uri'
|
112
82
|
|
113
|
-
|
114
|
-
|
115
|
-
|
83
|
+
image_whitelist_transformer = lambda do |env|
|
84
|
+
# Ignore everything except <img> elements.
|
85
|
+
return unless env[:node_name] == 'img'
|
116
86
|
|
117
|
-
|
118
|
-
|
87
|
+
node = env[:node]
|
88
|
+
image_uri = URI.parse(node['src'])
|
119
89
|
|
120
|
-
|
121
|
-
.
|
122
|
-
|
90
|
+
# Only allow relative URLs or URLs with the example.com domain. The
|
91
|
+
# image_uri.host.nil? check ensures that protocol-relative URLs like
|
92
|
+
# "//evil.com/foo.jpg".
|
93
|
+
unless image_uri.host == 'example.com' || (image_uri.host.nil? && image_uri.relative?)
|
94
|
+
node.unlink # `Nokogiri::XML::Node#unlink` removes a node from the document
|
95
|
+
end
|
96
|
+
end
|
123
97
|
|
124
|
-
|
125
|
-
|
98
|
+
before do
|
99
|
+
@s = Sanitize.new(Sanitize::Config.merge(Sanitize::Config::RELAXED,
|
100
|
+
:transformers => image_whitelist_transformer))
|
101
|
+
end
|
126
102
|
|
127
|
-
|
128
|
-
|
129
|
-
|
103
|
+
it 'should allow images with relative URLs' do
|
104
|
+
input = '<img src="/foo/bar.jpg">'
|
105
|
+
@s.fragment(input).must_equal(input)
|
106
|
+
end
|
107
|
+
|
108
|
+
it 'should allow images at the example.com domain' do
|
109
|
+
input = '<img src="http://example.com/foo/bar.jpg">'
|
110
|
+
@s.fragment(input).must_equal(input)
|
111
|
+
|
112
|
+
input = '<img src="https://example.com/foo/bar.jpg">'
|
113
|
+
@s.fragment(input).must_equal(input)
|
114
|
+
|
115
|
+
input = '<img src="//example.com/foo/bar.jpg">'
|
116
|
+
@s.fragment(input).must_equal(input)
|
117
|
+
end
|
118
|
+
|
119
|
+
it 'should not allow images at other domains' do
|
120
|
+
input = '<img src="http://evil.com/foo/bar.jpg">'
|
121
|
+
@s.fragment(input).must_equal('')
|
122
|
+
|
123
|
+
input = '<img src="https://evil.com/foo/bar.jpg">'
|
124
|
+
@s.fragment(input).must_equal('')
|
130
125
|
|
131
|
-
|
132
|
-
|
126
|
+
input = '<img src="//evil.com/foo/bar.jpg">'
|
127
|
+
@s.fragment(input).must_equal('')
|
133
128
|
|
134
|
-
|
135
|
-
.must_equal
|
129
|
+
input = '<img src="http://subdomain.example.com/foo/bar.jpg">'
|
130
|
+
@s.fragment(input).must_equal('')
|
131
|
+
end
|
136
132
|
end
|
137
133
|
|
138
|
-
|
139
|
-
|
134
|
+
describe 'YouTube transformer' do
|
135
|
+
youtube_transformer = lambda do |env|
|
136
|
+
node = env[:node]
|
137
|
+
node_name = env[:node_name]
|
138
|
+
|
139
|
+
# Don't continue if this node is already whitelisted or is not an element.
|
140
|
+
return if env[:is_whitelisted] || !node.element?
|
141
|
+
|
142
|
+
# Don't continue unless the node is an iframe.
|
143
|
+
return unless node_name == 'iframe'
|
144
|
+
|
145
|
+
# Verify that the video URL is actually a valid YouTube video URL.
|
146
|
+
return unless node['src'] =~ %r|\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/|
|
147
|
+
|
148
|
+
# We're now certain that this is a YouTube embed, but we still need to run
|
149
|
+
# it through a special Sanitize step to ensure that no unwanted elements or
|
150
|
+
# attributes that don't belong in a YouTube embed can sneak in.
|
151
|
+
Sanitize.node!(node, {
|
152
|
+
:elements => %w[iframe],
|
153
|
+
|
154
|
+
:attributes => {
|
155
|
+
'iframe' => %w[allowfullscreen frameborder height src width]
|
156
|
+
}
|
157
|
+
})
|
158
|
+
|
159
|
+
# Now that we're sure that this is a valid YouTube embed and that there are
|
160
|
+
# no unwanted elements or attributes hidden inside it, we can tell Sanitize
|
161
|
+
# to whitelist the current node.
|
162
|
+
{:node_whitelist => [node]}
|
163
|
+
end
|
164
|
+
|
165
|
+
it 'should allow HTTP YouTube video embeds' do
|
166
|
+
input = '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
167
|
+
|
168
|
+
Sanitize.fragment(input, :transformers => youtube_transformer)
|
169
|
+
.must_equal '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""><script>alert()</script></iframe>'
|
170
|
+
end
|
171
|
+
|
172
|
+
it 'should allow HTTPS YouTube video embeds' do
|
173
|
+
input = '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
174
|
+
|
175
|
+
Sanitize.fragment(input, :transformers => youtube_transformer)
|
176
|
+
.must_equal '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""><script>alert()</script></iframe>'
|
177
|
+
end
|
178
|
+
|
179
|
+
it 'should allow protocol-relative YouTube video embeds' do
|
180
|
+
input = '<iframe width="420" height="315" src="//www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
181
|
+
|
182
|
+
Sanitize.fragment(input, :transformers => youtube_transformer)
|
183
|
+
.must_equal '<iframe width="420" height="315" src="//www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""><script>alert()</script></iframe>'
|
184
|
+
end
|
185
|
+
|
186
|
+
it 'should allow privacy-enhanced YouTube video embeds' do
|
187
|
+
input = '<iframe width="420" height="315" src="https://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
188
|
+
|
189
|
+
Sanitize.fragment(input, :transformers => youtube_transformer)
|
190
|
+
.must_equal '<iframe width="420" height="315" src="https://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""><script>alert()</script></iframe>'
|
191
|
+
end
|
192
|
+
|
193
|
+
it 'should not allow non-YouTube video embeds' do
|
194
|
+
input = '<iframe width="420" height="315" src="http://www.fake-youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen></iframe>'
|
140
195
|
|
141
|
-
|
142
|
-
|
196
|
+
Sanitize.fragment(input, :transformers => youtube_transformer)
|
197
|
+
.must_equal('')
|
198
|
+
end
|
143
199
|
end
|
144
200
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: crass
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - '='
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.1.
|
47
|
+
version: 1.1.11
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - '='
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.1.
|
54
|
+
version: 1.1.11
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: minitest
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|