rack-utf8_sanitizer 1.1.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +4 -6
- data/README.md +2 -2
- data/lib/rack/utf8_sanitizer.rb +90 -23
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +95 -0
- metadata +15 -23
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: be92627f205eac2e80ab554ad5add8ca2a084026
|
4
|
+
data.tar.gz: 5325281ed65e1e17d74cabbb75df877e0387e2e8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a3cc3842501b3c7e7bd4da92ae81432a6b6d014ab4af8a828a4269d781554fdb774e6c7fe3dd19ea582d959ea8d3093abc1984882587aede7084bf6e3381c10c
|
7
|
+
data.tar.gz: 108c557d20ce9c4716ae5d89fcd2878b8eb6ef3cd8a359d3e240a2cef575ef9a69a9cd129f8ab00bbde5628badb5f4d7f3f374badcaa04693162fa2f40531ac0
|
data/.travis.yml
CHANGED
@@ -4,12 +4,10 @@ rvm:
|
|
4
4
|
- 1.9.2
|
5
5
|
- 1.9.3
|
6
6
|
- 2.0.0
|
7
|
-
|
8
|
-
-
|
7
|
+
# 2.1, not 2.1.0 until fixed https://github.com/travis-ci/travis-ci/issues/2220
|
8
|
+
- 2.1
|
9
|
+
- jruby
|
10
|
+
- rbx-2
|
9
11
|
|
10
12
|
script:
|
11
13
|
- rake spec
|
12
|
-
|
13
|
-
matrix:
|
14
|
-
allow_failures:
|
15
|
-
- rvm: jruby-19mode
|
data/README.md
CHANGED
@@ -19,7 +19,7 @@ Or install it yourself as:
|
|
19
19
|
For Rails, add this to your `application.rb`:
|
20
20
|
|
21
21
|
``` ruby
|
22
|
-
config.middleware.insert_before "Rack::
|
22
|
+
config.middleware.insert_before "Rack::Runtime", Rack::UTF8Sanitizer
|
23
23
|
```
|
24
24
|
|
25
25
|
For Rack apps, add this to `config.ru`:
|
@@ -30,7 +30,7 @@ use Rack::UTF8Sanitizer
|
|
30
30
|
|
31
31
|
## Usage
|
32
32
|
|
33
|
-
Rack::UTF8Sanitizer divides all keys in the [Rack environment](rack.rubyforge.org/doc/SPEC.html) in two distinct groups: keys which contain raw data and the ones with percent-encoded data. The fields which are treated as percent-encoded are: `SCRIPT_NAME`, `REQUEST_PATH`, `REQUEST_URI`, `PATH_INFO`, `QUERY_STRING`, `HTTP_REFERER`.
|
33
|
+
Rack::UTF8Sanitizer divides all keys in the [Rack environment](http://rack.rubyforge.org/doc/SPEC.html) in two distinct groups: keys which contain raw data and the ones with percent-encoded data. The fields which are treated as percent-encoded are: `SCRIPT_NAME`, `REQUEST_PATH`, `REQUEST_URI`, `PATH_INFO`, `QUERY_STRING`, `HTTP_REFERER`.
|
34
34
|
|
35
35
|
The generic sanitization algorithm is as follows:
|
36
36
|
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# encoding: ascii-8bit
|
2
2
|
|
3
3
|
require 'uri'
|
4
|
+
require 'stringio'
|
4
5
|
|
5
6
|
module Rack
|
6
7
|
class UTF8Sanitizer
|
8
|
+
StringIO = ::StringIO
|
9
|
+
|
7
10
|
def initialize(app)
|
8
11
|
@app = app
|
9
12
|
end
|
@@ -20,30 +23,27 @@ module Rack
|
|
20
23
|
HTTP_REFERER
|
21
24
|
)
|
22
25
|
|
26
|
+
SANITIZABLE_CONTENT_TYPES = %w(
|
27
|
+
text/plain
|
28
|
+
application/x-www-form-urlencoded
|
29
|
+
)
|
30
|
+
|
31
|
+
# MRI-optimization
|
32
|
+
POST = 'POST'
|
33
|
+
PUT = 'PUT'
|
34
|
+
|
23
35
|
def sanitize(env)
|
36
|
+
request_method = env['REQUEST_METHOD']
|
37
|
+
if request_method == POST || request_method == PUT
|
38
|
+
sanitize_rack_input(env)
|
39
|
+
end
|
24
40
|
env.each do |key, value|
|
25
41
|
if URI_FIELDS.include?(key)
|
26
|
-
# URI.encode/decode expect the input to be in ASCII-8BIT.
|
27
|
-
# However, there could be invalid UTF-8 characters both in
|
28
|
-
# raw and percent-encoded form.
|
29
|
-
#
|
30
|
-
# So, first sanitize the value, then percent-decode it while
|
31
|
-
# treating as UTF-8, then sanitize the result and encode it back.
|
32
|
-
#
|
33
|
-
# The result is guaranteed to be UTF-8-safe.
|
34
|
-
|
35
|
-
decoded_value = unescape_unreserved(
|
36
|
-
sanitize_string(value).
|
37
|
-
force_encoding('ASCII-8BIT'))
|
38
|
-
|
39
42
|
env[key] = transfer_frozen(value,
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
elsif key =~ /^HTTP_/
|
43
|
+
sanitize_uri_encoded_string(value))
|
44
|
+
elsif key.start_with?("HTTP_")
|
44
45
|
# Just sanitize the headers and leave them in UTF-8. There is
|
45
46
|
# no reason to have UTF-8 in headers, but if it's valid, let it be.
|
46
|
-
|
47
47
|
env[key] = transfer_frozen(value,
|
48
48
|
sanitize_string(value))
|
49
49
|
end
|
@@ -52,6 +52,73 @@ module Rack
|
|
52
52
|
|
53
53
|
protected
|
54
54
|
|
55
|
+
def sanitize_rack_input(env)
|
56
|
+
# https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
|
57
|
+
# Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
|
58
|
+
# Ignoring charset in content type.
|
59
|
+
content_type = env['CONTENT_TYPE']
|
60
|
+
content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
|
61
|
+
content_type &&= content_type.downcase
|
62
|
+
return unless SANITIZABLE_CONTENT_TYPES.any? {|type| content_type == type }
|
63
|
+
env['rack.input'] &&= sanitize_io(env['rack.input'])
|
64
|
+
end
|
65
|
+
|
66
|
+
# Modeled after Rack::RewindableInput
|
67
|
+
# TODO: Should this delegate any methods to the original io?
|
68
|
+
class SanitizedRackInput
|
69
|
+
def initialize(original_io, sanitized_io)
|
70
|
+
@original_io = original_io
|
71
|
+
@sanitized_io = sanitized_io
|
72
|
+
end
|
73
|
+
def gets
|
74
|
+
@sanitized_io.gets
|
75
|
+
end
|
76
|
+
def read(*args)
|
77
|
+
@sanitized_io.read(*args)
|
78
|
+
end
|
79
|
+
def each(&block)
|
80
|
+
@sanitized_io.each(&block)
|
81
|
+
end
|
82
|
+
def rewind
|
83
|
+
@sanitized_io.rewind
|
84
|
+
end
|
85
|
+
def close
|
86
|
+
@sanitized_io.close
|
87
|
+
@original_io.close if @original_io.respond_to?(:close)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def sanitize_io(io)
|
92
|
+
input = io.read
|
93
|
+
sanitized_io = transfer_frozen(input,
|
94
|
+
sanitize_string(input))
|
95
|
+
SanitizedRackInput.new(io, StringIO.new(sanitized_io))
|
96
|
+
end
|
97
|
+
|
98
|
+
# URI.encode/decode expect the input to be in ASCII-8BIT.
|
99
|
+
# However, there could be invalid UTF-8 characters both in
|
100
|
+
# raw and percent-encoded form.
|
101
|
+
#
|
102
|
+
# So, first sanitize the value, then percent-decode it while
|
103
|
+
# treating as UTF-8, then sanitize the result and encode it back.
|
104
|
+
#
|
105
|
+
# The result is guaranteed to be UTF-8-safe.
|
106
|
+
def sanitize_uri_encoded_string(input)
|
107
|
+
decoded_value = decode_string(input)
|
108
|
+
reencode_string(decoded_value)
|
109
|
+
end
|
110
|
+
|
111
|
+
def reencode_string(decoded_value)
|
112
|
+
escape_unreserved(
|
113
|
+
sanitize_string(decoded_value))
|
114
|
+
end
|
115
|
+
|
116
|
+
def decode_string(input)
|
117
|
+
unescape_unreserved(
|
118
|
+
sanitize_string(input).
|
119
|
+
force_encoding(Encoding::ASCII_8BIT))
|
120
|
+
end
|
121
|
+
|
55
122
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
56
123
|
# plus all multibyte UTF-8 characters.
|
57
124
|
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
|
@@ -82,21 +149,21 @@ module Rack
|
|
82
149
|
UNSAFE = /[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]%]/
|
83
150
|
|
84
151
|
# Performs the reverse function of `unescape_unreserved`. Unlike
|
85
|
-
# the previous function, we can reuse the logic in URI#
|
152
|
+
# the previous function, we can reuse the logic in URI#encode
|
86
153
|
def escape_unreserved(input)
|
87
|
-
URI.
|
154
|
+
URI.encode(input, UNSAFE)
|
88
155
|
end
|
89
156
|
|
90
157
|
def sanitize_string(input)
|
91
158
|
if input.is_a? String
|
92
|
-
input = input.dup.force_encoding(
|
159
|
+
input = input.dup.force_encoding(Encoding::UTF_8)
|
93
160
|
|
94
161
|
if input.valid_encoding?
|
95
162
|
input
|
96
163
|
else
|
97
164
|
input.
|
98
|
-
force_encoding(
|
99
|
-
encode!(
|
165
|
+
force_encoding(Encoding::ASCII_8BIT).
|
166
|
+
encode!(Encoding::UTF_8,
|
100
167
|
invalid: :replace,
|
101
168
|
undef: :replace)
|
102
169
|
end
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.1
|
5
|
+
gem.version = '1.2.1'
|
6
6
|
gem.authors = ["Peter Zotov"]
|
7
7
|
gem.email = ["whitequark@whitequark.org"]
|
8
8
|
gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -151,4 +151,99 @@ describe Rack::UTF8Sanitizer do
|
|
151
151
|
env["REQUEST_PATH"].should.be.frozen
|
152
152
|
end
|
153
153
|
end
|
154
|
+
|
155
|
+
describe "with form data" do
|
156
|
+
def request_env
|
157
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
158
|
+
{
|
159
|
+
"REQUEST_METHOD" => "POST",
|
160
|
+
"CONTENT_TYPE" => "application/x-www-form-urlencoded;foo=bar",
|
161
|
+
"HTTP_USER_AGENT" => @plain_input,
|
162
|
+
"rack.input" => @rack_input,
|
163
|
+
}
|
164
|
+
end
|
165
|
+
def sanitize_form_data(request_env = request_env)
|
166
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
167
|
+
env = @app.(request_env)
|
168
|
+
sanitized_input = env['rack.input'].read
|
169
|
+
yield sanitized_input if block_given?
|
170
|
+
env['rack.input'].rewind
|
171
|
+
behaves_like :does_sanitize_plain
|
172
|
+
behaves_like :does_sanitize_uri
|
173
|
+
behaves_like :identity_plain
|
174
|
+
behaves_like :identity_uri
|
175
|
+
env['rack.input'].close
|
176
|
+
end
|
177
|
+
|
178
|
+
it "sanitizes StringIO rack.input" do
|
179
|
+
input = "foo=bla&quux=bar"
|
180
|
+
@rack_input = StringIO.new input
|
181
|
+
|
182
|
+
sanitize_form_data do |sanitized_input|
|
183
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
184
|
+
sanitized_input.should.be.valid_encoding
|
185
|
+
sanitized_input.should == input
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
it "sanitizes StringIO rack.input with bad encoding" do
|
190
|
+
input = "foo=bla&quux=bar\xED"
|
191
|
+
@rack_input = StringIO.new input
|
192
|
+
|
193
|
+
sanitize_form_data do |sanitized_input|
|
194
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
195
|
+
sanitized_input.should.be.valid_encoding
|
196
|
+
sanitized_input.should != input
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
it "sanitizes non-StringIO rack.input" do
|
201
|
+
require 'rack/rewindable_input'
|
202
|
+
input = "foo=bla&quux=bar"
|
203
|
+
@rack_input = Rack::RewindableInput.new(StringIO.new(input))
|
204
|
+
|
205
|
+
sanitize_form_data do |sanitized_input|
|
206
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
207
|
+
sanitized_input.should.be.valid_encoding
|
208
|
+
sanitized_input.should == input
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
it "sanitizes non-StringIO rack.input with bad encoding" do
|
213
|
+
require 'rack/rewindable_input'
|
214
|
+
input = "foo=bla&quux=bar\xED"
|
215
|
+
@rack_input = Rack::RewindableInput.new(StringIO.new(input))
|
216
|
+
|
217
|
+
sanitize_form_data do |sanitized_input|
|
218
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
219
|
+
sanitized_input.should.be.valid_encoding
|
220
|
+
sanitized_input.should != input
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
it "does not sanitize the rack body if there is no CONTENT_TYPE" do
|
225
|
+
input = "foo=bla&quux=bar\xED"
|
226
|
+
@rack_input = StringIO.new input
|
227
|
+
|
228
|
+
env = request_env.update('CONTENT_TYPE' => nil)
|
229
|
+
sanitize_form_data(env) do |sanitized_input|
|
230
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
231
|
+
sanitized_input.should.be.valid_encoding
|
232
|
+
sanitized_input.should == input
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
it "does not sanitize the rack body if there is empty CONTENT_TYPE" do
|
237
|
+
input = "foo=bla&quux=bar\xED"
|
238
|
+
@rack_input = StringIO.new input
|
239
|
+
|
240
|
+
env = request_env.update('CONTENT_TYPE' => '')
|
241
|
+
sanitize_form_data(env) do |sanitized_input|
|
242
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
243
|
+
sanitized_input.should.be.valid_encoding
|
244
|
+
sanitized_input.should == input
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
end
|
154
249
|
end
|
metadata
CHANGED
@@ -1,62 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Peter Zotov
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-05-27 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rack
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '1.0'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: bacon
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - ">="
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - ">="
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: bacon-colored_output
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - ">="
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - ">="
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
description: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8
|
@@ -67,8 +60,8 @@ executables: []
|
|
67
60
|
extensions: []
|
68
61
|
extra_rdoc_files: []
|
69
62
|
files:
|
70
|
-
- .gitignore
|
71
|
-
- .travis.yml
|
63
|
+
- ".gitignore"
|
64
|
+
- ".travis.yml"
|
72
65
|
- Gemfile
|
73
66
|
- LICENSE.txt
|
74
67
|
- README.md
|
@@ -78,27 +71,26 @@ files:
|
|
78
71
|
- test/test_utf8_sanitizer.rb
|
79
72
|
homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
80
73
|
licenses: []
|
74
|
+
metadata: {}
|
81
75
|
post_install_message:
|
82
76
|
rdoc_options: []
|
83
77
|
require_paths:
|
84
78
|
- lib
|
85
79
|
required_ruby_version: !ruby/object:Gem::Requirement
|
86
|
-
none: false
|
87
80
|
requirements:
|
88
|
-
- -
|
81
|
+
- - ">="
|
89
82
|
- !ruby/object:Gem::Version
|
90
83
|
version: '1.9'
|
91
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
-
none: false
|
93
85
|
requirements:
|
94
|
-
- -
|
86
|
+
- - ">="
|
95
87
|
- !ruby/object:Gem::Version
|
96
88
|
version: '0'
|
97
89
|
requirements: []
|
98
90
|
rubyforge_project:
|
99
|
-
rubygems_version:
|
91
|
+
rubygems_version: 2.2.2
|
100
92
|
signing_key:
|
101
|
-
specification_version:
|
93
|
+
specification_version: 4
|
102
94
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
103
95
|
in request URI and headers.
|
104
96
|
test_files:
|