rack-utf8_sanitizer 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +4 -6
- data/README.md +2 -2
- data/lib/rack/utf8_sanitizer.rb +90 -23
- data/rack-utf8_sanitizer.gemspec +1 -1
- data/test/test_utf8_sanitizer.rb +95 -0
- metadata +15 -23
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: be92627f205eac2e80ab554ad5add8ca2a084026
|
4
|
+
data.tar.gz: 5325281ed65e1e17d74cabbb75df877e0387e2e8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a3cc3842501b3c7e7bd4da92ae81432a6b6d014ab4af8a828a4269d781554fdb774e6c7fe3dd19ea582d959ea8d3093abc1984882587aede7084bf6e3381c10c
|
7
|
+
data.tar.gz: 108c557d20ce9c4716ae5d89fcd2878b8eb6ef3cd8a359d3e240a2cef575ef9a69a9cd129f8ab00bbde5628badb5f4d7f3f374badcaa04693162fa2f40531ac0
|
data/.travis.yml
CHANGED
@@ -4,12 +4,10 @@ rvm:
|
|
4
4
|
- 1.9.2
|
5
5
|
- 1.9.3
|
6
6
|
- 2.0.0
|
7
|
-
|
8
|
-
-
|
7
|
+
# 2.1, not 2.1.0 until fixed https://github.com/travis-ci/travis-ci/issues/2220
|
8
|
+
- 2.1
|
9
|
+
- jruby
|
10
|
+
- rbx-2
|
9
11
|
|
10
12
|
script:
|
11
13
|
- rake spec
|
12
|
-
|
13
|
-
matrix:
|
14
|
-
allow_failures:
|
15
|
-
- rvm: jruby-19mode
|
data/README.md
CHANGED
@@ -19,7 +19,7 @@ Or install it yourself as:
|
|
19
19
|
For Rails, add this to your `application.rb`:
|
20
20
|
|
21
21
|
``` ruby
|
22
|
-
config.middleware.insert_before "Rack::
|
22
|
+
config.middleware.insert_before "Rack::Runtime", Rack::UTF8Sanitizer
|
23
23
|
```
|
24
24
|
|
25
25
|
For Rack apps, add this to `config.ru`:
|
@@ -30,7 +30,7 @@ use Rack::UTF8Sanitizer
|
|
30
30
|
|
31
31
|
## Usage
|
32
32
|
|
33
|
-
Rack::UTF8Sanitizer divides all keys in the [Rack environment](rack.rubyforge.org/doc/SPEC.html) in two distinct groups: keys which contain raw data and the ones with percent-encoded data. The fields which are treated as percent-encoded are: `SCRIPT_NAME`, `REQUEST_PATH`, `REQUEST_URI`, `PATH_INFO`, `QUERY_STRING`, `HTTP_REFERER`.
|
33
|
+
Rack::UTF8Sanitizer divides all keys in the [Rack environment](http://rack.rubyforge.org/doc/SPEC.html) in two distinct groups: keys which contain raw data and the ones with percent-encoded data. The fields which are treated as percent-encoded are: `SCRIPT_NAME`, `REQUEST_PATH`, `REQUEST_URI`, `PATH_INFO`, `QUERY_STRING`, `HTTP_REFERER`.
|
34
34
|
|
35
35
|
The generic sanitization algorithm is as follows:
|
36
36
|
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# encoding: ascii-8bit
|
2
2
|
|
3
3
|
require 'uri'
|
4
|
+
require 'stringio'
|
4
5
|
|
5
6
|
module Rack
|
6
7
|
class UTF8Sanitizer
|
8
|
+
StringIO = ::StringIO
|
9
|
+
|
7
10
|
def initialize(app)
|
8
11
|
@app = app
|
9
12
|
end
|
@@ -20,30 +23,27 @@ module Rack
|
|
20
23
|
HTTP_REFERER
|
21
24
|
)
|
22
25
|
|
26
|
+
SANITIZABLE_CONTENT_TYPES = %w(
|
27
|
+
text/plain
|
28
|
+
application/x-www-form-urlencoded
|
29
|
+
)
|
30
|
+
|
31
|
+
# MRI-optimization
|
32
|
+
POST = 'POST'
|
33
|
+
PUT = 'PUT'
|
34
|
+
|
23
35
|
def sanitize(env)
|
36
|
+
request_method = env['REQUEST_METHOD']
|
37
|
+
if request_method == POST || request_method == PUT
|
38
|
+
sanitize_rack_input(env)
|
39
|
+
end
|
24
40
|
env.each do |key, value|
|
25
41
|
if URI_FIELDS.include?(key)
|
26
|
-
# URI.encode/decode expect the input to be in ASCII-8BIT.
|
27
|
-
# However, there could be invalid UTF-8 characters both in
|
28
|
-
# raw and percent-encoded form.
|
29
|
-
#
|
30
|
-
# So, first sanitize the value, then percent-decode it while
|
31
|
-
# treating as UTF-8, then sanitize the result and encode it back.
|
32
|
-
#
|
33
|
-
# The result is guaranteed to be UTF-8-safe.
|
34
|
-
|
35
|
-
decoded_value = unescape_unreserved(
|
36
|
-
sanitize_string(value).
|
37
|
-
force_encoding('ASCII-8BIT'))
|
38
|
-
|
39
42
|
env[key] = transfer_frozen(value,
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
elsif key =~ /^HTTP_/
|
43
|
+
sanitize_uri_encoded_string(value))
|
44
|
+
elsif key.start_with?("HTTP_")
|
44
45
|
# Just sanitize the headers and leave them in UTF-8. There is
|
45
46
|
# no reason to have UTF-8 in headers, but if it's valid, let it be.
|
46
|
-
|
47
47
|
env[key] = transfer_frozen(value,
|
48
48
|
sanitize_string(value))
|
49
49
|
end
|
@@ -52,6 +52,73 @@ module Rack
|
|
52
52
|
|
53
53
|
protected
|
54
54
|
|
55
|
+
def sanitize_rack_input(env)
|
56
|
+
# https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
|
57
|
+
# Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
|
58
|
+
# Ignoring charset in content type.
|
59
|
+
content_type = env['CONTENT_TYPE']
|
60
|
+
content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
|
61
|
+
content_type &&= content_type.downcase
|
62
|
+
return unless SANITIZABLE_CONTENT_TYPES.any? {|type| content_type == type }
|
63
|
+
env['rack.input'] &&= sanitize_io(env['rack.input'])
|
64
|
+
end
|
65
|
+
|
66
|
+
# Modeled after Rack::RewindableInput
|
67
|
+
# TODO: Should this delegate any methods to the original io?
|
68
|
+
class SanitizedRackInput
|
69
|
+
def initialize(original_io, sanitized_io)
|
70
|
+
@original_io = original_io
|
71
|
+
@sanitized_io = sanitized_io
|
72
|
+
end
|
73
|
+
def gets
|
74
|
+
@sanitized_io.gets
|
75
|
+
end
|
76
|
+
def read(*args)
|
77
|
+
@sanitized_io.read(*args)
|
78
|
+
end
|
79
|
+
def each(&block)
|
80
|
+
@sanitized_io.each(&block)
|
81
|
+
end
|
82
|
+
def rewind
|
83
|
+
@sanitized_io.rewind
|
84
|
+
end
|
85
|
+
def close
|
86
|
+
@sanitized_io.close
|
87
|
+
@original_io.close if @original_io.respond_to?(:close)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def sanitize_io(io)
|
92
|
+
input = io.read
|
93
|
+
sanitized_io = transfer_frozen(input,
|
94
|
+
sanitize_string(input))
|
95
|
+
SanitizedRackInput.new(io, StringIO.new(sanitized_io))
|
96
|
+
end
|
97
|
+
|
98
|
+
# URI.encode/decode expect the input to be in ASCII-8BIT.
|
99
|
+
# However, there could be invalid UTF-8 characters both in
|
100
|
+
# raw and percent-encoded form.
|
101
|
+
#
|
102
|
+
# So, first sanitize the value, then percent-decode it while
|
103
|
+
# treating as UTF-8, then sanitize the result and encode it back.
|
104
|
+
#
|
105
|
+
# The result is guaranteed to be UTF-8-safe.
|
106
|
+
def sanitize_uri_encoded_string(input)
|
107
|
+
decoded_value = decode_string(input)
|
108
|
+
reencode_string(decoded_value)
|
109
|
+
end
|
110
|
+
|
111
|
+
def reencode_string(decoded_value)
|
112
|
+
escape_unreserved(
|
113
|
+
sanitize_string(decoded_value))
|
114
|
+
end
|
115
|
+
|
116
|
+
def decode_string(input)
|
117
|
+
unescape_unreserved(
|
118
|
+
sanitize_string(input).
|
119
|
+
force_encoding(Encoding::ASCII_8BIT))
|
120
|
+
end
|
121
|
+
|
55
122
|
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
56
123
|
# plus all multibyte UTF-8 characters.
|
57
124
|
UNRESERVED_OR_UTF8 = /[A-Za-z0-9\-._~\x80-\xFF]/
|
@@ -82,21 +149,21 @@ module Rack
|
|
82
149
|
UNSAFE = /[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]%]/
|
83
150
|
|
84
151
|
# Performs the reverse function of `unescape_unreserved`. Unlike
|
85
|
-
# the previous function, we can reuse the logic in URI#
|
152
|
+
# the previous function, we can reuse the logic in URI#encode
|
86
153
|
def escape_unreserved(input)
|
87
|
-
URI.
|
154
|
+
URI.encode(input, UNSAFE)
|
88
155
|
end
|
89
156
|
|
90
157
|
def sanitize_string(input)
|
91
158
|
if input.is_a? String
|
92
|
-
input = input.dup.force_encoding(
|
159
|
+
input = input.dup.force_encoding(Encoding::UTF_8)
|
93
160
|
|
94
161
|
if input.valid_encoding?
|
95
162
|
input
|
96
163
|
else
|
97
164
|
input.
|
98
|
-
force_encoding(
|
99
|
-
encode!(
|
165
|
+
force_encoding(Encoding::ASCII_8BIT).
|
166
|
+
encode!(Encoding::UTF_8,
|
100
167
|
invalid: :replace,
|
101
168
|
undef: :replace)
|
102
169
|
end
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.1
|
5
|
+
gem.version = '1.2.1'
|
6
6
|
gem.authors = ["Peter Zotov"]
|
7
7
|
gem.email = ["whitequark@whitequark.org"]
|
8
8
|
gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -151,4 +151,99 @@ describe Rack::UTF8Sanitizer do
|
|
151
151
|
env["REQUEST_PATH"].should.be.frozen
|
152
152
|
end
|
153
153
|
end
|
154
|
+
|
155
|
+
describe "with form data" do
|
156
|
+
def request_env
|
157
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
158
|
+
{
|
159
|
+
"REQUEST_METHOD" => "POST",
|
160
|
+
"CONTENT_TYPE" => "application/x-www-form-urlencoded;foo=bar",
|
161
|
+
"HTTP_USER_AGENT" => @plain_input,
|
162
|
+
"rack.input" => @rack_input,
|
163
|
+
}
|
164
|
+
end
|
165
|
+
def sanitize_form_data(request_env = request_env)
|
166
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
167
|
+
env = @app.(request_env)
|
168
|
+
sanitized_input = env['rack.input'].read
|
169
|
+
yield sanitized_input if block_given?
|
170
|
+
env['rack.input'].rewind
|
171
|
+
behaves_like :does_sanitize_plain
|
172
|
+
behaves_like :does_sanitize_uri
|
173
|
+
behaves_like :identity_plain
|
174
|
+
behaves_like :identity_uri
|
175
|
+
env['rack.input'].close
|
176
|
+
end
|
177
|
+
|
178
|
+
it "sanitizes StringIO rack.input" do
|
179
|
+
input = "foo=bla&quux=bar"
|
180
|
+
@rack_input = StringIO.new input
|
181
|
+
|
182
|
+
sanitize_form_data do |sanitized_input|
|
183
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
184
|
+
sanitized_input.should.be.valid_encoding
|
185
|
+
sanitized_input.should == input
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
it "sanitizes StringIO rack.input with bad encoding" do
|
190
|
+
input = "foo=bla&quux=bar\xED"
|
191
|
+
@rack_input = StringIO.new input
|
192
|
+
|
193
|
+
sanitize_form_data do |sanitized_input|
|
194
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
195
|
+
sanitized_input.should.be.valid_encoding
|
196
|
+
sanitized_input.should != input
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
it "sanitizes non-StringIO rack.input" do
|
201
|
+
require 'rack/rewindable_input'
|
202
|
+
input = "foo=bla&quux=bar"
|
203
|
+
@rack_input = Rack::RewindableInput.new(StringIO.new(input))
|
204
|
+
|
205
|
+
sanitize_form_data do |sanitized_input|
|
206
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
207
|
+
sanitized_input.should.be.valid_encoding
|
208
|
+
sanitized_input.should == input
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
it "sanitizes non-StringIO rack.input with bad encoding" do
|
213
|
+
require 'rack/rewindable_input'
|
214
|
+
input = "foo=bla&quux=bar\xED"
|
215
|
+
@rack_input = Rack::RewindableInput.new(StringIO.new(input))
|
216
|
+
|
217
|
+
sanitize_form_data do |sanitized_input|
|
218
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
219
|
+
sanitized_input.should.be.valid_encoding
|
220
|
+
sanitized_input.should != input
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
it "does not sanitize the rack body if there is no CONTENT_TYPE" do
|
225
|
+
input = "foo=bla&quux=bar\xED"
|
226
|
+
@rack_input = StringIO.new input
|
227
|
+
|
228
|
+
env = request_env.update('CONTENT_TYPE' => nil)
|
229
|
+
sanitize_form_data(env) do |sanitized_input|
|
230
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
231
|
+
sanitized_input.should.be.valid_encoding
|
232
|
+
sanitized_input.should == input
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
it "does not sanitize the rack body if there is empty CONTENT_TYPE" do
|
237
|
+
input = "foo=bla&quux=bar\xED"
|
238
|
+
@rack_input = StringIO.new input
|
239
|
+
|
240
|
+
env = request_env.update('CONTENT_TYPE' => '')
|
241
|
+
sanitize_form_data(env) do |sanitized_input|
|
242
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
243
|
+
sanitized_input.should.be.valid_encoding
|
244
|
+
sanitized_input.should == input
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
end
|
154
249
|
end
|
metadata
CHANGED
@@ -1,62 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Peter Zotov
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-05-27 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rack
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '1.0'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: bacon
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - ">="
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - ">="
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: bacon-colored_output
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - ">="
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - ">="
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
description: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8
|
@@ -67,8 +60,8 @@ executables: []
|
|
67
60
|
extensions: []
|
68
61
|
extra_rdoc_files: []
|
69
62
|
files:
|
70
|
-
- .gitignore
|
71
|
-
- .travis.yml
|
63
|
+
- ".gitignore"
|
64
|
+
- ".travis.yml"
|
72
65
|
- Gemfile
|
73
66
|
- LICENSE.txt
|
74
67
|
- README.md
|
@@ -78,27 +71,26 @@ files:
|
|
78
71
|
- test/test_utf8_sanitizer.rb
|
79
72
|
homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
80
73
|
licenses: []
|
74
|
+
metadata: {}
|
81
75
|
post_install_message:
|
82
76
|
rdoc_options: []
|
83
77
|
require_paths:
|
84
78
|
- lib
|
85
79
|
required_ruby_version: !ruby/object:Gem::Requirement
|
86
|
-
none: false
|
87
80
|
requirements:
|
88
|
-
- -
|
81
|
+
- - ">="
|
89
82
|
- !ruby/object:Gem::Version
|
90
83
|
version: '1.9'
|
91
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
-
none: false
|
93
85
|
requirements:
|
94
|
-
- -
|
86
|
+
- - ">="
|
95
87
|
- !ruby/object:Gem::Version
|
96
88
|
version: '0'
|
97
89
|
requirements: []
|
98
90
|
rubyforge_project:
|
99
|
-
rubygems_version:
|
91
|
+
rubygems_version: 2.2.2
|
100
92
|
signing_key:
|
101
|
-
specification_version:
|
93
|
+
specification_version: 4
|
102
94
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
103
95
|
in request URI and headers.
|
104
96
|
test_files:
|