rack-sanitizer 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.editorconfig +17 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/ci.yml +23 -0
- data/.gitignore +17 -0
- data/CHANGELOG.md +39 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +119 -0
- data/Rakefile +8 -0
- data/lib/rack/sanitizer.rb +273 -0
- data/rack-sanitizer.gemspec +27 -0
- data/test/test_sanitizer.rb +526 -0
- metadata +121 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f8c301a4677ff19611734f3460a73bb4600cbdc01e0a60104aab3aff8e67e15c
|
4
|
+
data.tar.gz: 338dee798f354fc9ff31785dc47495c57ffc86f33c0090d4cf615a8b693f5a8b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: af51bf36db0f9e02320fe38ba99c30cbe08222f7e1ab051eecd87314e67ba128f72cbca8e08862a96f9b5cec6bfb19089bbadf336438d9828369386e74a5e8e5
|
7
|
+
data.tar.gz: 4b22ddc4c638da994926ccfd9f77ea2a3961e32bd2d66bad35bfa7d7064c38d8696e688b18f0b3e39d56f9650fd1a5f899bb081c8baf6dcc3e5b2cc2e9c10bc3
|
data/.editorconfig
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
indent_style = space
|
5
|
+
indent_size = 2
|
6
|
+
end_of_line = lf
|
7
|
+
charset = utf-8
|
8
|
+
trim_trailing_whitespace = true
|
9
|
+
insert_final_newline = true
|
10
|
+
|
11
|
+
[*.md]
|
12
|
+
indent_style = space
|
13
|
+
indent_size = 2
|
14
|
+
|
15
|
+
[*.y{a,}ml]
|
16
|
+
indent_style = space
|
17
|
+
indent_size = 2
|
@@ -0,0 +1,23 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
|
10
|
+
strategy:
|
11
|
+
fail-fast: false
|
12
|
+
matrix:
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v4
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
bundler-cache: true # 'bundle install' and cache gems
|
21
|
+
ruby-version: ${{ matrix.ruby }}
|
22
|
+
- name: Run tests
|
23
|
+
run: bundle exec rake
|
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
Changelog
|
2
|
+
=========
|
3
|
+
|
4
|
+
Master
|
5
|
+
-------------------------
|
6
|
+
|
7
|
+
API modifications:
|
8
|
+
|
9
|
+
Features implemented:
|
10
|
+
|
11
|
+
Bugs fixed:
|
12
|
+
|
13
|
+
v1.3.1 (2015-07-09)
|
14
|
+
-------------------------
|
15
|
+
|
16
|
+
Bugs fixed:
|
17
|
+
* Make sure Content-Length is adjusted. (Samuel Cochran, #26)
|
18
|
+
|
19
|
+
v1.3.0 (2015-01-26)
|
20
|
+
-------------------------
|
21
|
+
|
22
|
+
v1.2.4 (2014-11-29)
|
23
|
+
-------------------------
|
24
|
+
|
25
|
+
v1.2.3 (2014-10-08)
|
26
|
+
-------------------------
|
27
|
+
|
28
|
+
v1.2.2 (2014-07-10)
|
29
|
+
-------------------------
|
30
|
+
|
31
|
+
Features implemented:
|
32
|
+
* Sanitize request body for all HTTP verbs. (Nathaniel Talbott, #15)
|
33
|
+
* Add `application/json` and `text/javascript` as sanitizable content types. (Benjamin Fleischer, #12)
|
34
|
+
|
35
|
+
Bugs fixed:
|
36
|
+
* Ensure Rack::UTF8 Sanitizer is first middleware. (Aaron Renner, #13)
|
37
|
+
|
38
|
+
v1.2.1 (2014-05-27)
|
39
|
+
-------------------------
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Peter Zotov
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
# Rack::Sanitizer
|
2
|
+
|
3
|
+
Rack::Sanitizer is a Rack middleware which cleans up invalid UTF8 characters in request URI and headers. Additionally,
|
4
|
+
it cleans up invalid UTF8 characters in the request body (depending on the configurable content type filters) by reading
|
5
|
+
the input into a string, sanitizing the string, then replacing the Rack input stream with a rewindable input stream backed
|
6
|
+
by the sanitized string.
|
7
|
+
|
8
|
+
It is a mordernized and optimized fork of rack-utf8_sanitizer
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'rack-sanitizer'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install rack-sanitizer
|
23
|
+
|
24
|
+
For Rails, add this to your `application.rb`:
|
25
|
+
|
26
|
+
``` ruby
|
27
|
+
config.middleware.insert 0, Rack::Sanitizer
|
28
|
+
```
|
29
|
+
|
30
|
+
For Rack apps, add this to `config.ru`:
|
31
|
+
|
32
|
+
``` ruby
|
33
|
+
use Rack::Sanitizer
|
34
|
+
```
|
35
|
+
|
36
|
+
## Usage
|
37
|
+
|
38
|
+
Rack::Sanitizer divides all keys in the [Rack environment](http://rack.rubyforge.org/doc/SPEC.html) in two distinct groups: keys which contain raw data and the ones with percent-encoded data. The fields which are treated as percent-encoded are: `SCRIPT_NAME`, `REQUEST_PATH`, `REQUEST_URI`, `PATH_INFO`, `QUERY_STRING`, `HTTP_REFERER`.
|
39
|
+
|
40
|
+
The generic sanitization algorithm is as follows:
|
41
|
+
|
42
|
+
1. Force the encoding to UTF-8.
|
43
|
+
2. If the result contains invalid characters:
|
44
|
+
1. Force the encoding to ASCII8-BIT.
|
45
|
+
2. Re-encode it as UTF-8, replacing invalid and undefined characters as U+FFFD.
|
46
|
+
|
47
|
+
For fields with "raw data", the algorithm is applied once and the (UTF-8 encoded) result is left in the environment.
|
48
|
+
|
49
|
+
For fields with "percent-encoded data", the algorithm is applied twice to catch both invalid characters appearing as-is and invalid characters appearing in the percent encoding. The percent encoded, ASCII-8BIT encoded result is left in the environment.
|
50
|
+
|
51
|
+
### Sanitizable content types
|
52
|
+
|
53
|
+
The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::Sanitizer.
|
54
|
+
|
55
|
+
To add sanitizable content types to the list of defaults, pass the `additional_content_types` options when using Rack::Sanitizer, e.g.
|
56
|
+
|
57
|
+
config.middleware.insert 0, Rack::Sanitizer, additional_content_types: ['application/vnd.api+json']
|
58
|
+
|
59
|
+
To explicitly set sanitizable content types and override the defaults, use the `sanitizable_content_types` option:
|
60
|
+
|
61
|
+
config.middleware.insert 0, Rack::Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
|
62
|
+
|
63
|
+
### Strategies
|
64
|
+
|
65
|
+
There are two built in strategies for handling invalid characters. The default strategy is `:replace`, which will cause any invalid characters to be replaces with the unicode replacement character (�). The second built in strategy is `:exception` which will cause an `EncodingError` exception to be raised if invalid characters are found (the exception can then be handled by another Rack middleware).
|
66
|
+
|
67
|
+
This is an example of handling the `:exception` strategy with additional middleware:
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
require "./your/middleware/directory/rack_sanitizer_exception_handler.rb"
|
71
|
+
|
72
|
+
config.middleware.insert 0, Rack::SanitizerExceptionHandler
|
73
|
+
config.middleware.insert_after Rack::SanitizerExceptionHandler, Rack::Sanitizer, strategy: :exception
|
74
|
+
```
|
75
|
+
|
76
|
+
Note: The exception handling middleware must be inserted before `Rack::Sanitizer`
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
module Rack
|
80
|
+
class SanitizerExceptionHandler
|
81
|
+
def initialize(app)
|
82
|
+
@app = app
|
83
|
+
end
|
84
|
+
|
85
|
+
def call(env)
|
86
|
+
@app.call(env)
|
87
|
+
rescue EncodingError => exception
|
88
|
+
# OPTIONAL: Add error logging service of your choice here
|
89
|
+
return [400, {}, ["Bad Request"]]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
```
|
94
|
+
|
95
|
+
An object that responds to `#call` and accepts the offending string with invalid characters as an argument can also be passed as a `:strategy`. This is how you can define custom strategies.
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
config.middleware.insert 0, Rack::Sanitizer, strategy: :exception
|
99
|
+
```
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
replace_string = lambda do |_invalid|
|
103
|
+
Rails.logger.warn('Replacing invalid string')
|
104
|
+
|
105
|
+
'<Bad Encoding>'.freeze
|
106
|
+
end
|
107
|
+
|
108
|
+
config.middleware.insert 0, Rack::Sanitizer, strategy: replace_string
|
109
|
+
```
|
110
|
+
|
111
|
+
## Contributing
|
112
|
+
|
113
|
+
1. Fork it
|
114
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
115
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
116
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
117
|
+
5. Create new Pull Request
|
118
|
+
|
119
|
+
To run the tests, run `rake spec` in the project directory.
|
data/Rakefile
ADDED
@@ -0,0 +1,273 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "uri"
|
4
|
+
require "stringio"
|
5
|
+
|
6
|
+
module Rack
|
7
|
+
class Sanitizer
|
8
|
+
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
9
|
+
|
10
|
+
# options[:sanitizable_content_types] Array
|
11
|
+
# options[:additional_content_types] Array
|
12
|
+
def initialize(app, options={})
|
13
|
+
@app = app
|
14
|
+
@strategy = build_strategy(options)
|
15
|
+
@sanitizable_content_types = options[:sanitizable_content_types]
|
16
|
+
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
17
|
+
end
|
18
|
+
|
19
|
+
def call(env)
|
20
|
+
env = sanitize(env)
|
21
|
+
begin
|
22
|
+
@app.call(env)
|
23
|
+
rescue SanitizedRackInput::FailedToReadBody
|
24
|
+
return BAD_REQUEST
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
DEFAULT_STRATEGIES = {
|
29
|
+
replace: lambda do |input|
|
30
|
+
input.
|
31
|
+
force_encoding(Encoding::ASCII_8BIT).
|
32
|
+
encode!(Encoding::UTF_8,
|
33
|
+
invalid: :replace,
|
34
|
+
undef: :replace)
|
35
|
+
input
|
36
|
+
end,
|
37
|
+
exception: lambda do |input|
|
38
|
+
input.
|
39
|
+
force_encoding(Encoding::ASCII_8BIT).
|
40
|
+
encode!(Encoding::UTF_8)
|
41
|
+
input
|
42
|
+
end
|
43
|
+
}.freeze
|
44
|
+
|
45
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
46
|
+
URI_FIELDS = %w(
|
47
|
+
SCRIPT_NAME
|
48
|
+
REQUEST_PATH REQUEST_URI PATH_INFO
|
49
|
+
QUERY_STRING
|
50
|
+
HTTP_REFERER
|
51
|
+
ORIGINAL_FULLPATH
|
52
|
+
ORIGINAL_SCRIPT_NAME
|
53
|
+
SERVER_NAME
|
54
|
+
).freeze
|
55
|
+
|
56
|
+
SANITIZABLE_CONTENT_TYPES = %w(
|
57
|
+
text/plain
|
58
|
+
application/x-www-form-urlencoded
|
59
|
+
application/json
|
60
|
+
text/javascript
|
61
|
+
).freeze
|
62
|
+
|
63
|
+
URI_ENCODED_CONTENT_TYPES = %w(
|
64
|
+
application/x-www-form-urlencoded
|
65
|
+
).freeze
|
66
|
+
|
67
|
+
def sanitize(env)
|
68
|
+
sanitize_rack_input(env)
|
69
|
+
sanitize_cookies(env)
|
70
|
+
env.each do |key, value|
|
71
|
+
if URI_FIELDS.include?(key)
|
72
|
+
if value.frozen?
|
73
|
+
env[key] = sanitize_uri_encoded_string(value.dup).freeze
|
74
|
+
else
|
75
|
+
env[key] = sanitize_uri_encoded_string(value)
|
76
|
+
end
|
77
|
+
elsif key.to_s.start_with?("HTTP_")
|
78
|
+
# Just sanitize the headers and leave them in UTF-8. There is
|
79
|
+
# no reason to have UTF-8 in headers, but if it's valid, let it be.
|
80
|
+
if value.frozen?
|
81
|
+
env[key] = sanitize_string(value.dup).freeze
|
82
|
+
else
|
83
|
+
env[key] = sanitize_string(value)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def build_strategy(options)
|
92
|
+
strategy = options.fetch(:strategy) { :replace }
|
93
|
+
|
94
|
+
return strategy unless DEFAULT_STRATEGIES.key?(strategy)
|
95
|
+
|
96
|
+
DEFAULT_STRATEGIES[strategy]
|
97
|
+
end
|
98
|
+
|
99
|
+
def sanitize_rack_input(env)
|
100
|
+
# https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
|
101
|
+
# Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
|
102
|
+
# Ignoring charset in content type.
|
103
|
+
content_type = env['CONTENT_TYPE']
|
104
|
+
content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
|
105
|
+
content_type &&= content_type.downcase
|
106
|
+
return unless @sanitizable_content_types.include?(content_type)
|
107
|
+
uri_encoded = URI_ENCODED_CONTENT_TYPES.include?(content_type)
|
108
|
+
|
109
|
+
if env['rack.input']
|
110
|
+
env['rack.input'] = SanitizedRackInput.new(
|
111
|
+
env['rack.input'],
|
112
|
+
env,
|
113
|
+
uri_encoded,
|
114
|
+
@strategy
|
115
|
+
)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Cookies need to be split and then sanitized as url encoded strings
|
120
|
+
# since the cookie string itself is not url encoded (separated by `;`),
|
121
|
+
# and the normal method of `sanitize_uri_encoded_string` would break
|
122
|
+
# later cookie parsing in the case that a cookie value contained an
|
123
|
+
# encoded `;`.
|
124
|
+
def sanitize_cookies(env)
|
125
|
+
return unless env['HTTP_COOKIE']
|
126
|
+
|
127
|
+
env['HTTP_COOKIE'] = env['HTTP_COOKIE']
|
128
|
+
.split(/[;,] */n)
|
129
|
+
.map { |cookie| sanitize_uri_encoded_string(cookie) }
|
130
|
+
.join('; ')
|
131
|
+
end
|
132
|
+
|
133
|
+
module Sanitizers
|
134
|
+
private
|
135
|
+
|
136
|
+
# URI.encode/decode expect the input to be in ASCII-8BIT.
|
137
|
+
# However, there could be invalid UTF-8 characters both in
|
138
|
+
# raw and percent-encoded form.
|
139
|
+
#
|
140
|
+
# So, first sanitize the value, then percent-decode it while
|
141
|
+
# treating as UTF-8, then sanitize the result and encode it back.
|
142
|
+
#
|
143
|
+
# The result is guaranteed to be UTF-8-safe.
|
144
|
+
def sanitize_uri_encoded_string(input)
|
145
|
+
return input if input.nil?
|
146
|
+
decoded_value = decode_string(input)
|
147
|
+
reencode_string(decoded_value)
|
148
|
+
end
|
149
|
+
|
150
|
+
def reencode_string(decoded_value)
|
151
|
+
escape_unreserved(
|
152
|
+
sanitize_string(decoded_value))
|
153
|
+
end
|
154
|
+
|
155
|
+
def decode_string(input)
|
156
|
+
unescape_unreserved(
|
157
|
+
sanitize_string(input).
|
158
|
+
force_encoding(Encoding::ASCII_8BIT))
|
159
|
+
end
|
160
|
+
|
161
|
+
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
162
|
+
# protected during normalization (which is what Rack::Sanitizer does).
|
163
|
+
#
|
164
|
+
# However, the regexp approach used by URI.unescape is not sophisticated
|
165
|
+
# enough for our task.
|
166
|
+
def unescape_unreserved(input)
|
167
|
+
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
168
|
+
decoded = $1.hex.chr
|
169
|
+
|
170
|
+
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
171
|
+
# plus all multibyte UTF-8 characters.
|
172
|
+
if decoded.match?(/[A-Za-z0-9\-._~\x80-\xFF]/n)
|
173
|
+
decoded
|
174
|
+
else
|
175
|
+
encoded
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Performs the reverse function of `unescape_unreserved`. Unlike
|
181
|
+
# the previous function, we can reuse the logic in URI#encode
|
182
|
+
def escape_unreserved(input)
|
183
|
+
# This regexp matches unsafe characters, i.e. everything except 'reserved'
|
184
|
+
# and 'unreserved' characters from RFC3986 (2.3), and additionally '%',
|
185
|
+
# as percent-encoded unreserved characters could be left over from the
|
186
|
+
# `unescape_unreserved` invocation.
|
187
|
+
#
|
188
|
+
# See also URI::REGEXP::PATTERN::{UNRESERVED,RESERVED}.
|
189
|
+
URI::DEFAULT_PARSER.escape(input, /[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]%]/)
|
190
|
+
end
|
191
|
+
|
192
|
+
def sanitize_string(input)
|
193
|
+
if input.is_a? String
|
194
|
+
input = input.force_encoding(Encoding::UTF_8)
|
195
|
+
|
196
|
+
if input.valid_encoding?
|
197
|
+
input
|
198
|
+
else
|
199
|
+
@strategy.call(input)
|
200
|
+
end
|
201
|
+
else
|
202
|
+
input
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
include Sanitizers
|
208
|
+
|
209
|
+
class SanitizedRackInput
|
210
|
+
FailedToReadBody = Class.new(Exception)
|
211
|
+
|
212
|
+
include Sanitizers
|
213
|
+
|
214
|
+
def initialize(original_io, env, uri_encoded, strategy)
|
215
|
+
@original_io = original_io
|
216
|
+
@uri_encoded = uri_encoded
|
217
|
+
@env = env
|
218
|
+
@strategy = strategy
|
219
|
+
@sanitized_io = nil
|
220
|
+
end
|
221
|
+
|
222
|
+
def gets
|
223
|
+
sanitized_io.gets
|
224
|
+
end
|
225
|
+
|
226
|
+
def read(*args)
|
227
|
+
sanitized_io.read(*args)
|
228
|
+
end
|
229
|
+
|
230
|
+
def each(&block)
|
231
|
+
sanitized_io.each(&block)
|
232
|
+
end
|
233
|
+
|
234
|
+
def rewind
|
235
|
+
sanitized_io.rewind
|
236
|
+
end
|
237
|
+
|
238
|
+
def size
|
239
|
+
# StringIO#size is bytesize
|
240
|
+
sanitized_io.size
|
241
|
+
end
|
242
|
+
|
243
|
+
def close
|
244
|
+
@sanitized_io&.close
|
245
|
+
@original_io.close if @original_io.respond_to?(:close)
|
246
|
+
end
|
247
|
+
|
248
|
+
private
|
249
|
+
|
250
|
+
UTF8_BOM = "\xef\xbb\xbf".b.freeze
|
251
|
+
UTF8_BOM_SIZE = UTF8_BOM.bytesize
|
252
|
+
|
253
|
+
def sanitized_io
|
254
|
+
@sanitized_io ||= begin
|
255
|
+
content_length = @env['CONTENT_LENGTH']&.to_i
|
256
|
+
input = content_length && content_length >= 0 ? @original_io.read(content_length) : @original_io.read
|
257
|
+
if input.start_with?(UTF8_BOM)
|
258
|
+
input = input.byteslice(UTF8_BOM_SIZE..-1)
|
259
|
+
end
|
260
|
+
|
261
|
+
input = sanitize_string(input)
|
262
|
+
if @uri_encoded
|
263
|
+
input = sanitize_uri_encoded_string(input).force_encoding(Encoding::UTF_8)
|
264
|
+
end
|
265
|
+
@env['CONTENT_LENGTH'] &&= input.bytesize.to_s
|
266
|
+
StringIO.new(input)
|
267
|
+
end
|
268
|
+
rescue ::EOFError => error
|
269
|
+
raise FailedToReadBody, error.message
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = "rack-sanitizer"
|
5
|
+
gem.version = '2.0.0'
|
6
|
+
gem.authors = ["Jean Boussier", "whitequark"]
|
7
|
+
gem.license = "MIT"
|
8
|
+
gem.email = ["jean.boussier@gmail.org"]
|
9
|
+
gem.description = %{Rack::Sanitizer is a Rack middleware which cleans up } <<
|
10
|
+
%{invalid UTF8 characters in request URI and headers.}
|
11
|
+
gem.summary = "It is a mordernized and optimized fork of rack-utf8_sanitizer"
|
12
|
+
gem.homepage = "http://github.com/Shopify/rack-sanitizer"
|
13
|
+
|
14
|
+
gem.files = `git ls-files`.split($/)
|
15
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
|
18
|
+
gem.metadata["allowed_push_host"] = "https://rubygems.org/"
|
19
|
+
|
20
|
+
gem.required_ruby_version = '>= 2.5'
|
21
|
+
|
22
|
+
gem.add_dependency "rack", '>= 1.0', '< 4.0'
|
23
|
+
|
24
|
+
gem.add_development_dependency "bacon"
|
25
|
+
gem.add_development_dependency "bacon-colored_output"
|
26
|
+
gem.add_development_dependency "rake"
|
27
|
+
end
|
@@ -0,0 +1,526 @@
|
|
1
|
+
# encoding:ascii-8bit
|
2
|
+
|
3
|
+
require 'bacon/colored_output'
|
4
|
+
require 'cgi'
|
5
|
+
require 'rack/sanitizer'
|
6
|
+
|
7
|
+
describe Rack::Sanitizer do
|
8
|
+
before do
|
9
|
+
@app = Rack::Sanitizer.new(-> env { env["rack.input"]&.size; env })
|
10
|
+
end
|
11
|
+
|
12
|
+
shared :does_sanitize_plain do
|
13
|
+
it "sanitizes plaintext entity (HTTP_USER_AGENT)" do
|
14
|
+
env = @app.({ "HTTP_USER_AGENT" => @plain_input })
|
15
|
+
result = env["HTTP_USER_AGENT"]
|
16
|
+
|
17
|
+
result.encoding.should == Encoding::UTF_8
|
18
|
+
result.should.be.valid_encoding
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
shared :does_sanitize_uri do
|
23
|
+
it "sanitizes URI-like entity (REQUEST_PATH)" do
|
24
|
+
env = @app.({ "REQUEST_PATH" => @uri_input })
|
25
|
+
result = env["REQUEST_PATH"]
|
26
|
+
|
27
|
+
result.encoding.should == Encoding::US_ASCII
|
28
|
+
result.should.be.valid_encoding
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "with invalid host input" do
|
33
|
+
it "sanitizes host entity (SERVER_NAME)" do
|
34
|
+
host = "host\xD0".force_encoding('UTF-8')
|
35
|
+
env = @app.({ "SERVER_NAME" => host })
|
36
|
+
result = env["SERVER_NAME"]
|
37
|
+
|
38
|
+
result.encoding.should == Encoding::US_ASCII
|
39
|
+
result.should.be.valid_encoding
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "with invalid UTF-8 input" do
|
44
|
+
before do
|
45
|
+
@plain_input = "foo\xe0".force_encoding('UTF-8')
|
46
|
+
@uri_input = "http://bar/foo%E0".force_encoding('UTF-8')
|
47
|
+
end
|
48
|
+
|
49
|
+
behaves_like :does_sanitize_plain
|
50
|
+
behaves_like :does_sanitize_uri
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "with invalid, incorrectly percent-encoded UTF-8 URI input" do
|
54
|
+
before do
|
55
|
+
@uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
|
56
|
+
end
|
57
|
+
|
58
|
+
behaves_like :does_sanitize_uri
|
59
|
+
end
|
60
|
+
|
61
|
+
describe "with invalid ASCII-8BIT input" do
|
62
|
+
before do
|
63
|
+
@plain_input = "foo\xe0"
|
64
|
+
@uri_input = "http://bar/foo%E0"
|
65
|
+
end
|
66
|
+
|
67
|
+
behaves_like :does_sanitize_plain
|
68
|
+
behaves_like :does_sanitize_uri
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "with invalid, incorrectly percent-encoded ASCII-8BIT URI input" do
|
72
|
+
before do
|
73
|
+
@uri_input = "http://bar/foo%E0\xe0"
|
74
|
+
end
|
75
|
+
|
76
|
+
behaves_like :does_sanitize_uri
|
77
|
+
end
|
78
|
+
|
79
|
+
shared :identity_plain do
|
80
|
+
it "does not change plaintext entity (HTTP_USER_AGENT)" do
|
81
|
+
env = @app.({ "HTTP_USER_AGENT" => @plain_input })
|
82
|
+
result = env["HTTP_USER_AGENT"]
|
83
|
+
|
84
|
+
result.encoding.should == Encoding::UTF_8
|
85
|
+
result.should.be.valid_encoding
|
86
|
+
result.should == @plain_input
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
shared :identity_uri do
|
91
|
+
it "does not change URI-like entity (REQUEST_PATH)" do
|
92
|
+
env = @app.({ "REQUEST_PATH" => @uri_input })
|
93
|
+
result = env["REQUEST_PATH"]
|
94
|
+
|
95
|
+
result.encoding.should == Encoding::US_ASCII
|
96
|
+
result.should.be.valid_encoding
|
97
|
+
result.should == @uri_input
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "with valid UTF-8 input" do
|
102
|
+
before do
|
103
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
104
|
+
@uri_input = "http://bar/foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
105
|
+
end
|
106
|
+
|
107
|
+
behaves_like :identity_plain
|
108
|
+
behaves_like :identity_uri
|
109
|
+
|
110
|
+
describe "with URI characters from reserved range" do
|
111
|
+
before do
|
112
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
113
|
+
end
|
114
|
+
|
115
|
+
behaves_like :identity_uri
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
describe "with valid, not percent-encoded UTF-8 URI input" do
|
120
|
+
before do
|
121
|
+
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
122
|
+
@encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
|
123
|
+
end
|
124
|
+
|
125
|
+
it "does not change URI-like entity (REQUEST_PATH)" do
|
126
|
+
env = @app.({ "REQUEST_PATH" => @uri_input })
|
127
|
+
result = env["REQUEST_PATH"]
|
128
|
+
|
129
|
+
result.encoding.should == Encoding::US_ASCII
|
130
|
+
result.should.be.valid_encoding
|
131
|
+
result.should == @encoded
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe "with valid ASCII-8BIT input" do
|
136
|
+
before do
|
137
|
+
@plain_input = "bar baz"
|
138
|
+
@uri_input = "http://bar/bar+baz"
|
139
|
+
end
|
140
|
+
|
141
|
+
behaves_like :identity_plain
|
142
|
+
behaves_like :identity_uri
|
143
|
+
|
144
|
+
describe "with URI characters from reserved range" do
|
145
|
+
before do
|
146
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB"
|
147
|
+
end
|
148
|
+
|
149
|
+
behaves_like :identity_uri
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
describe "with frozen strings" do
|
154
|
+
before do
|
155
|
+
@plain_input = "bar baz".freeze
|
156
|
+
@uri_input = "http://bar/bar+baz".freeze
|
157
|
+
end
|
158
|
+
|
159
|
+
it "preserves the frozen? status of input" do
|
160
|
+
env = @app.({ "HTTP_USER_AGENT" => @plain_input,
|
161
|
+
"REQUEST_PATH" => @uri_input })
|
162
|
+
|
163
|
+
env["HTTP_USER_AGENT"].should.be.frozen
|
164
|
+
env["REQUEST_PATH"].should.be.frozen
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
describe "with symbols in the env" do
|
169
|
+
before do
|
170
|
+
@uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
|
171
|
+
end
|
172
|
+
|
173
|
+
it "sanitizes REQUEST_PATH with invalid UTF-8 URI input" do
|
174
|
+
env = @app.({ :requested_at => "2014-07-22",
|
175
|
+
"REQUEST_PATH" => @uri_input })
|
176
|
+
|
177
|
+
result = env["REQUEST_PATH"]
|
178
|
+
|
179
|
+
result.encoding.should == Encoding::US_ASCII
|
180
|
+
result.should.be.valid_encoding
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
describe "with form data" do
|
185
|
+
def request_env
|
186
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
187
|
+
{
|
188
|
+
"REQUEST_METHOD" => "POST",
|
189
|
+
"CONTENT_TYPE" => "application/x-www-form-urlencoded;foo=bar",
|
190
|
+
"HTTP_USER_AGENT" => @plain_input,
|
191
|
+
"rack.input" => @rack_input,
|
192
|
+
}
|
193
|
+
end
|
194
|
+
|
195
|
+
def sanitize_form_data(request_env = request_env())
|
196
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
197
|
+
@response_env = @app.(request_env)
|
198
|
+
sanitized_input = @response_env['rack.input'].read
|
199
|
+
|
200
|
+
yield sanitized_input if block_given?
|
201
|
+
|
202
|
+
@response_env['rack.input'].rewind
|
203
|
+
behaves_like :does_sanitize_plain
|
204
|
+
behaves_like :does_sanitize_uri
|
205
|
+
behaves_like :identity_plain
|
206
|
+
behaves_like :identity_uri
|
207
|
+
@response_env['rack.input'].close
|
208
|
+
end
|
209
|
+
|
210
|
+
class BrokenIO < StringIO
|
211
|
+
def read(_length = nil)
|
212
|
+
raise EOFError
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
it "returns HTTP 400 on EOF" do
|
217
|
+
@rack_input = BrokenIO.new
|
218
|
+
@response_env = @app.(request_env)
|
219
|
+
@response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
|
220
|
+
end
|
221
|
+
|
222
|
+
it "sanitizes StringIO rack.input" do
|
223
|
+
input = "foo=bla&quux=bar"
|
224
|
+
@rack_input = StringIO.new input
|
225
|
+
|
226
|
+
sanitize_form_data do |sanitized_input|
|
227
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
228
|
+
sanitized_input.should.be.valid_encoding
|
229
|
+
sanitized_input.should == input
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
it "sanitizes StringIO rack.input on GET" do
|
234
|
+
input = "foo=bla&quux=bar"
|
235
|
+
@rack_input = StringIO.new input
|
236
|
+
|
237
|
+
sanitize_form_data(request_env.merge("REQUEST_METHOD" => "GET")) do |sanitized_input|
|
238
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
239
|
+
sanitized_input.should.be.valid_encoding
|
240
|
+
sanitized_input.should == input
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
it "sanitizes StringIO rack.input with bad encoding" do
|
245
|
+
input = "foo=bla&quux=bar\xED"
|
246
|
+
@rack_input = StringIO.new input
|
247
|
+
|
248
|
+
sanitize_form_data do |sanitized_input|
|
249
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
250
|
+
sanitized_input.should.be.valid_encoding
|
251
|
+
sanitized_input.should != input
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
it "strip UTF-8 BOM from StringIO rack.input" do
|
256
|
+
input = %(\xef\xbb\xbf{"Hello": "World"})
|
257
|
+
@rack_input = StringIO.new input
|
258
|
+
|
259
|
+
sanitize_form_data(request_env.merge("CONTENT_TYPE" => "application/json")) do |sanitized_input|
|
260
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
261
|
+
sanitized_input.should.be.valid_encoding
|
262
|
+
sanitized_input.should == '{"Hello": "World"}'
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
it "sanitizes StringIO rack.input with form encoded bad encoding" do
|
267
|
+
input = "foo=bla&foo=baz&quux%ED=bar%ED"
|
268
|
+
@rack_input = StringIO.new input
|
269
|
+
|
270
|
+
sanitize_form_data do |sanitized_input|
|
271
|
+
# URI.decode_www_form does some encoding magic
|
272
|
+
sanitized_input.split("&").each do |pair|
|
273
|
+
pair.split("=", 2).each do |component|
|
274
|
+
decoded = URI.decode_www_form_component(component)
|
275
|
+
decoded.should.be.valid_encoding
|
276
|
+
end
|
277
|
+
end
|
278
|
+
sanitized_input.should != input
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
it "sanitizes non-StringIO rack.input" do
|
283
|
+
require 'rack/rewindable_input'
|
284
|
+
input = "foo=bla&quux=bar"
|
285
|
+
@rack_input = Rack::RewindableInput.new(StringIO.new(input))
|
286
|
+
|
287
|
+
sanitize_form_data do |sanitized_input|
|
288
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
289
|
+
sanitized_input.should.be.valid_encoding
|
290
|
+
sanitized_input.should == input
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
it "sanitizes non-StringIO rack.input with bad encoding" do
|
295
|
+
require 'rack/rewindable_input'
|
296
|
+
input = "foo=bla&quux=bar\xED"
|
297
|
+
@rack_input = Rack::RewindableInput.new(StringIO.new(input))
|
298
|
+
|
299
|
+
sanitize_form_data do |sanitized_input|
|
300
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
301
|
+
sanitized_input.should.be.valid_encoding
|
302
|
+
sanitized_input.should != input
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
it "does not sanitize the rack body if there is no CONTENT_TYPE" do
|
307
|
+
input = "foo=bla&quux=bar\xED"
|
308
|
+
@rack_input = StringIO.new input
|
309
|
+
|
310
|
+
env = request_env.update('CONTENT_TYPE' => nil)
|
311
|
+
sanitize_form_data(env) do |sanitized_input|
|
312
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
313
|
+
sanitized_input.should.be.valid_encoding
|
314
|
+
sanitized_input.should == input
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
it "does not sanitize the rack body if there is empty CONTENT_TYPE" do
|
319
|
+
input = "foo=bla&quux=bar\xED"
|
320
|
+
@rack_input = StringIO.new input
|
321
|
+
|
322
|
+
env = request_env.update('CONTENT_TYPE' => '')
|
323
|
+
sanitize_form_data(env) do |sanitized_input|
|
324
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
325
|
+
sanitized_input.should.be.valid_encoding
|
326
|
+
sanitized_input.should == input
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
it "adjusts content-length when replacing input" do
|
331
|
+
input = "foo=bla&quux=bar\xED"
|
332
|
+
@rack_input = StringIO.new input
|
333
|
+
|
334
|
+
env = request_env.update("CONTENT_LENGTH" => input.bytesize)
|
335
|
+
sanitize_form_data(env) do |sanitized_input|
|
336
|
+
sanitized_input.bytesize.should != input.bytesize
|
337
|
+
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
describe "with custom content-type" do
|
354
|
+
def request_env
|
355
|
+
{
|
356
|
+
"REQUEST_METHOD" => "GET",
|
357
|
+
"CONTENT_TYPE" => "application/json",
|
358
|
+
"HTTP_COOKIE" => @cookie,
|
359
|
+
"rack.input" => StringIO.new,
|
360
|
+
}
|
361
|
+
end
|
362
|
+
|
363
|
+
it "sanitizes bad http cookie" do
|
364
|
+
@cookie = "foo=bla; quux=bar\xED"
|
365
|
+
response_env = @app.(request_env)
|
366
|
+
response_env['HTTP_COOKIE'].should != @cookie
|
367
|
+
response_env['HTTP_COOKIE'].should == 'foo=bla; quux=bar%EF%BF%BD'
|
368
|
+
end
|
369
|
+
|
370
|
+
it "does not change ok http cookie" do
|
371
|
+
@cookie = "foo=bla; quux=bar"
|
372
|
+
response_env = @app.(request_env)
|
373
|
+
response_env['HTTP_COOKIE'].should == @cookie
|
374
|
+
|
375
|
+
@cookie = "foo=b%3bla; quux=b%20a%20r"
|
376
|
+
response_env = @app.(request_env)
|
377
|
+
response_env['HTTP_COOKIE'].should == @cookie
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
describe "with custom content-type" do
|
382
|
+
def request_env
|
383
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
384
|
+
{
|
385
|
+
"REQUEST_METHOD" => "POST",
|
386
|
+
"CONTENT_TYPE" => "application/vnd.api+json",
|
387
|
+
"HTTP_USER_AGENT" => @plain_input,
|
388
|
+
"rack.input" => @rack_input,
|
389
|
+
}
|
390
|
+
end
|
391
|
+
|
392
|
+
def sanitize_data(request_env = request_env())
|
393
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
394
|
+
@response_env = @app.(request_env)
|
395
|
+
sanitized_input = @response_env['rack.input'].read
|
396
|
+
|
397
|
+
yield sanitized_input if block_given?
|
398
|
+
end
|
399
|
+
|
400
|
+
it "does not sanitize custom content-type by default" do
|
401
|
+
input = "foo=bla&quux=bar\xED"
|
402
|
+
@rack_input = StringIO.new input
|
403
|
+
|
404
|
+
env = request_env
|
405
|
+
sanitize_data(env) do |sanitized_input|
|
406
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
407
|
+
sanitized_input.should.be.valid_encoding
|
408
|
+
sanitized_input.should == input
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
it "sanitizes custom content-type if additional_content_types given" do
|
413
|
+
@app = Rack::Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
|
414
|
+
input = "foo=bla&quux=bar\xED"
|
415
|
+
@rack_input = StringIO.new input
|
416
|
+
|
417
|
+
env = request_env
|
418
|
+
sanitize_data(env) do |sanitized_input|
|
419
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
420
|
+
sanitized_input.should.be.valid_encoding
|
421
|
+
sanitized_input.should != input
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
it "sanitizes default content-type if additional_content_types given" do
|
426
|
+
@app = Rack::Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
|
427
|
+
input = "foo=bla&quux=bar\xED"
|
428
|
+
@rack_input = StringIO.new input
|
429
|
+
|
430
|
+
env = request_env.update('CONTENT_TYPE' => 'application/json')
|
431
|
+
sanitize_data(env) do |sanitized_input|
|
432
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
433
|
+
sanitized_input.should.be.valid_encoding
|
434
|
+
sanitized_input.should != input
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
it "sanitizes custom content-type if sanitizable_content_types given" do
|
439
|
+
@app = Rack::Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
|
440
|
+
input = "foo=bla&quux=bar\xED"
|
441
|
+
@rack_input = StringIO.new input
|
442
|
+
|
443
|
+
env = request_env
|
444
|
+
sanitize_data(env) do |sanitized_input|
|
445
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
446
|
+
sanitized_input.should.be.valid_encoding
|
447
|
+
sanitized_input.should != input
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
it "does not sanitize default content-type if sanitizable_content_types does not include it" do
|
452
|
+
@app = Rack::Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
|
453
|
+
input = "foo=bla&quux=bar\xED"
|
454
|
+
@rack_input = StringIO.new input
|
455
|
+
|
456
|
+
env = request_env.update('CONTENT_TYPE' => 'application/json')
|
457
|
+
sanitize_data(env) do |sanitized_input|
|
458
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
459
|
+
sanitized_input.should.be.valid_encoding
|
460
|
+
sanitized_input.should == input
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|
464
|
+
|
465
|
+
describe "with custom strategy" do
|
466
|
+
def request_env
|
467
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
468
|
+
{
|
469
|
+
"REQUEST_METHOD" => "POST",
|
470
|
+
"CONTENT_TYPE" => "application/json",
|
471
|
+
"HTTP_USER_AGENT" => @plain_input,
|
472
|
+
"rack.input" => @rack_input,
|
473
|
+
}
|
474
|
+
end
|
475
|
+
|
476
|
+
def sanitize_data(request_env = request_env())
|
477
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
478
|
+
@response_env = @app.(request_env)
|
479
|
+
sanitized_input = @response_env['rack.input'].read
|
480
|
+
|
481
|
+
yield sanitized_input if block_given?
|
482
|
+
end
|
483
|
+
|
484
|
+
it "calls a default strategy (replace)" do
|
485
|
+
@app = Rack::Sanitizer.new(-> env { env })
|
486
|
+
|
487
|
+
input = "foo=bla&quux=bar\xED"
|
488
|
+
@rack_input = StringIO.new input
|
489
|
+
|
490
|
+
env = request_env
|
491
|
+
sanitize_data(env) do |sanitized_input|
|
492
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
493
|
+
sanitized_input.should.be.valid_encoding
|
494
|
+
sanitized_input.should != input
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
it "calls the exception strategy" do
|
499
|
+
@app = Rack::Sanitizer.new(-> env { env }, strategy: :exception)
|
500
|
+
|
501
|
+
input = "foo=bla&quux=bar\xED"
|
502
|
+
@rack_input = StringIO.new input
|
503
|
+
|
504
|
+
env = request_env
|
505
|
+
should.raise(EncodingError) { sanitize_data(env) }
|
506
|
+
end
|
507
|
+
|
508
|
+
it "accepts a proc as a strategy" do
|
509
|
+
truncate = -> (input) do
|
510
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
511
|
+
end
|
512
|
+
|
513
|
+
@app = Rack::Sanitizer.new(-> env { env }, strategy: truncate)
|
514
|
+
|
515
|
+
input = "foo=bla&quux=bar\xED"
|
516
|
+
@rack_input = StringIO.new input
|
517
|
+
|
518
|
+
env = request_env
|
519
|
+
sanitize_data(env) do |sanitized_input|
|
520
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
521
|
+
sanitized_input.should.be.valid_encoding
|
522
|
+
sanitized_input.should == 'replace'
|
523
|
+
end
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
metadata
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rack-sanitizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jean Boussier
|
8
|
+
- whitequark
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2023-11-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rack
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.0'
|
21
|
+
- - "<"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: '4.0'
|
24
|
+
type: :runtime
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
requirements:
|
28
|
+
- - ">="
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '1.0'
|
31
|
+
- - "<"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '4.0'
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: bacon
|
36
|
+
requirement: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
type: :development
|
42
|
+
prerelease: false
|
43
|
+
version_requirements: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: bacon-colored_output
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rake
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
type: :development
|
70
|
+
prerelease: false
|
71
|
+
version_requirements: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
description: Rack::Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
77
|
+
in request URI and headers.
|
78
|
+
email:
|
79
|
+
- jean.boussier@gmail.org
|
80
|
+
executables: []
|
81
|
+
extensions: []
|
82
|
+
extra_rdoc_files: []
|
83
|
+
files:
|
84
|
+
- ".editorconfig"
|
85
|
+
- ".github/dependabot.yml"
|
86
|
+
- ".github/workflows/ci.yml"
|
87
|
+
- ".gitignore"
|
88
|
+
- CHANGELOG.md
|
89
|
+
- Gemfile
|
90
|
+
- LICENSE.txt
|
91
|
+
- README.md
|
92
|
+
- Rakefile
|
93
|
+
- lib/rack/sanitizer.rb
|
94
|
+
- rack-sanitizer.gemspec
|
95
|
+
- test/test_sanitizer.rb
|
96
|
+
homepage: http://github.com/Shopify/rack-sanitizer
|
97
|
+
licenses:
|
98
|
+
- MIT
|
99
|
+
metadata:
|
100
|
+
allowed_push_host: https://rubygems.org/
|
101
|
+
post_install_message:
|
102
|
+
rdoc_options: []
|
103
|
+
require_paths:
|
104
|
+
- lib
|
105
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '2.5'
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
requirements: []
|
116
|
+
rubygems_version: 3.4.21
|
117
|
+
signing_key:
|
118
|
+
specification_version: 4
|
119
|
+
summary: It is a mordernized and optimized fork of rack-utf8_sanitizer
|
120
|
+
test_files:
|
121
|
+
- test/test_sanitizer.rb
|