rack-sanitizer 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +17 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/ci.yml +23 -0
- data/.gitignore +17 -0
- data/CHANGELOG.md +39 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +119 -0
- data/Rakefile +8 -0
- data/lib/rack/sanitizer.rb +273 -0
- data/rack-sanitizer.gemspec +27 -0
- data/test/test_sanitizer.rb +526 -0
- metadata +121 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f8c301a4677ff19611734f3460a73bb4600cbdc01e0a60104aab3aff8e67e15c
|
4
|
+
data.tar.gz: 338dee798f354fc9ff31785dc47495c57ffc86f33c0090d4cf615a8b693f5a8b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: af51bf36db0f9e02320fe38ba99c30cbe08222f7e1ab051eecd87314e67ba128f72cbca8e08862a96f9b5cec6bfb19089bbadf336438d9828369386e74a5e8e5
|
7
|
+
data.tar.gz: 4b22ddc4c638da994926ccfd9f77ea2a3961e32bd2d66bad35bfa7d7064c38d8696e688b18f0b3e39d56f9650fd1a5f899bb081c8baf6dcc3e5b2cc2e9c10bc3
|
data/.editorconfig
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
indent_style = space
|
5
|
+
indent_size = 2
|
6
|
+
end_of_line = lf
|
7
|
+
charset = utf-8
|
8
|
+
trim_trailing_whitespace = true
|
9
|
+
insert_final_newline = true
|
10
|
+
|
11
|
+
[*.md]
|
12
|
+
indent_style = space
|
13
|
+
indent_size = 2
|
14
|
+
|
15
|
+
[*.y{a,}ml]
|
16
|
+
indent_style = space
|
17
|
+
indent_size = 2
|
@@ -0,0 +1,23 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
|
10
|
+
strategy:
|
11
|
+
fail-fast: false
|
12
|
+
matrix:
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v4
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
bundler-cache: true # 'bundle install' and cache gems
|
21
|
+
ruby-version: ${{ matrix.ruby }}
|
22
|
+
- name: Run tests
|
23
|
+
run: bundle exec rake
|
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
Changelog
|
2
|
+
=========
|
3
|
+
|
4
|
+
Master
|
5
|
+
-------------------------
|
6
|
+
|
7
|
+
API modifications:
|
8
|
+
|
9
|
+
Features implemented:
|
10
|
+
|
11
|
+
Bugs fixed:
|
12
|
+
|
13
|
+
v1.3.1 (2015-07-09)
|
14
|
+
-------------------------
|
15
|
+
|
16
|
+
Bugs fixed:
|
17
|
+
* Make sure Content-Length is adjusted. (Samuel Cochran, #26)
|
18
|
+
|
19
|
+
v1.3.0 (2015-01-26)
|
20
|
+
-------------------------
|
21
|
+
|
22
|
+
v1.2.4 (2014-11-29)
|
23
|
+
-------------------------
|
24
|
+
|
25
|
+
v1.2.3 (2014-10-08)
|
26
|
+
-------------------------
|
27
|
+
|
28
|
+
v1.2.2 (2014-07-10)
|
29
|
+
-------------------------
|
30
|
+
|
31
|
+
Features implemented:
|
32
|
+
* Sanitize request body for all HTTP verbs. (Nathaniel Talbott, #15)
|
33
|
+
* Add `application/json` and `text/javascript` as sanitizable content types. (Benjamin Fleischer, #12)
|
34
|
+
|
35
|
+
Bugs fixed:
|
36
|
+
* Ensure Rack::UTF8 Sanitizer is first middleware. (Aaron Renner, #13)
|
37
|
+
|
38
|
+
v1.2.1 (2014-05-27)
|
39
|
+
-------------------------
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Peter Zotov
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
# Rack::Sanitizer
|
2
|
+
|
3
|
+
Rack::Sanitizer is a Rack middleware which cleans up invalid UTF8 characters in request URI and headers. Additionally,
|
4
|
+
it cleans up invalid UTF8 characters in the request body (depending on the configurable content type filters) by reading
|
5
|
+
the input into a string, sanitizing the string, then replacing the Rack input stream with a rewindable input stream backed
|
6
|
+
by the sanitized string.
|
7
|
+
|
8
|
+
It is a mordernized and optimized fork of rack-utf8_sanitizer
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'rack-sanitizer'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install rack-sanitizer
|
23
|
+
|
24
|
+
For Rails, add this to your `application.rb`:
|
25
|
+
|
26
|
+
``` ruby
|
27
|
+
config.middleware.insert 0, Rack::Sanitizer
|
28
|
+
```
|
29
|
+
|
30
|
+
For Rack apps, add this to `config.ru`:
|
31
|
+
|
32
|
+
``` ruby
|
33
|
+
use Rack::Sanitizer
|
34
|
+
```
|
35
|
+
|
36
|
+
## Usage
|
37
|
+
|
38
|
+
Rack::Sanitizer divides all keys in the [Rack environment](http://rack.rubyforge.org/doc/SPEC.html) in two distinct groups: keys which contain raw data and the ones with percent-encoded data. The fields which are treated as percent-encoded are: `SCRIPT_NAME`, `REQUEST_PATH`, `REQUEST_URI`, `PATH_INFO`, `QUERY_STRING`, `HTTP_REFERER`.
|
39
|
+
|
40
|
+
The generic sanitization algorithm is as follows:
|
41
|
+
|
42
|
+
1. Force the encoding to UTF-8.
|
43
|
+
2. If the result contains invalid characters:
|
44
|
+
1. Force the encoding to ASCII8-BIT.
|
45
|
+
2. Re-encode it as UTF-8, replacing invalid and undefined characters as U+FFFD.
|
46
|
+
|
47
|
+
For fields with "raw data", the algorithm is applied once and the (UTF-8 encoded) result is left in the environment.
|
48
|
+
|
49
|
+
For fields with "percent-encoded data", the algorithm is applied twice to catch both invalid characters appearing as-is and invalid characters appearing in the percent encoding. The percent encoded, ASCII-8BIT encoded result is left in the environment.
|
50
|
+
|
51
|
+
### Sanitizable content types
|
52
|
+
|
53
|
+
The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::Sanitizer.
|
54
|
+
|
55
|
+
To add sanitizable content types to the list of defaults, pass the `additional_content_types` options when using Rack::Sanitizer, e.g.
|
56
|
+
|
57
|
+
config.middleware.insert 0, Rack::Sanitizer, additional_content_types: ['application/vnd.api+json']
|
58
|
+
|
59
|
+
To explicitly set sanitizable content types and override the defaults, use the `sanitizable_content_types` option:
|
60
|
+
|
61
|
+
config.middleware.insert 0, Rack::Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
|
62
|
+
|
63
|
+
### Strategies
|
64
|
+
|
65
|
+
There are two built in strategies for handling invalid characters. The default strategy is `:replace`, which will cause any invalid characters to be replaces with the unicode replacement character (�). The second built in strategy is `:exception` which will cause an `EncodingError` exception to be raised if invalid characters are found (the exception can then be handled by another Rack middleware).
|
66
|
+
|
67
|
+
This is an example of handling the `:exception` strategy with additional middleware:
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
require "./your/middleware/directory/rack_sanitizer_exception_handler.rb"
|
71
|
+
|
72
|
+
config.middleware.insert 0, Rack::SanitizerExceptionHandler
|
73
|
+
config.middleware.insert_after Rack::SanitizerExceptionHandler, Rack::Sanitizer, strategy: :exception
|
74
|
+
```
|
75
|
+
|
76
|
+
Note: The exception handling middleware must be inserted before `Rack::Sanitizer`
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
module Rack
|
80
|
+
class SanitizerExceptionHandler
|
81
|
+
def initialize(app)
|
82
|
+
@app = app
|
83
|
+
end
|
84
|
+
|
85
|
+
def call(env)
|
86
|
+
@app.call(env)
|
87
|
+
rescue EncodingError => exception
|
88
|
+
# OPTIONAL: Add error logging service of your choice here
|
89
|
+
return [400, {}, ["Bad Request"]]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
```
|
94
|
+
|
95
|
+
An object that responds to `#call` and accepts the offending string with invalid characters as an argument can also be passed as a `:strategy`. This is how you can define custom strategies.
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
config.middleware.insert 0, Rack::Sanitizer, strategy: :exception
|
99
|
+
```
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
replace_string = lambda do |_invalid|
|
103
|
+
Rails.logger.warn('Replacing invalid string')
|
104
|
+
|
105
|
+
'<Bad Encoding>'.freeze
|
106
|
+
end
|
107
|
+
|
108
|
+
config.middleware.insert 0, Rack::Sanitizer, strategy: replace_string
|
109
|
+
```
|
110
|
+
|
111
|
+
## Contributing
|
112
|
+
|
113
|
+
1. Fork it
|
114
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
115
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
116
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
117
|
+
5. Create new Pull Request
|
118
|
+
|
119
|
+
To run the tests, run `rake spec` in the project directory.
|
data/Rakefile
ADDED
@@ -0,0 +1,273 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "uri"
|
4
|
+
require "stringio"
|
5
|
+
|
6
|
+
module Rack
|
7
|
+
class Sanitizer
|
8
|
+
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
9
|
+
|
10
|
+
# options[:sanitizable_content_types] Array
|
11
|
+
# options[:additional_content_types] Array
|
12
|
+
def initialize(app, options={})
|
13
|
+
@app = app
|
14
|
+
@strategy = build_strategy(options)
|
15
|
+
@sanitizable_content_types = options[:sanitizable_content_types]
|
16
|
+
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
17
|
+
end
|
18
|
+
|
19
|
+
def call(env)
|
20
|
+
env = sanitize(env)
|
21
|
+
begin
|
22
|
+
@app.call(env)
|
23
|
+
rescue SanitizedRackInput::FailedToReadBody
|
24
|
+
return BAD_REQUEST
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
DEFAULT_STRATEGIES = {
|
29
|
+
replace: lambda do |input|
|
30
|
+
input.
|
31
|
+
force_encoding(Encoding::ASCII_8BIT).
|
32
|
+
encode!(Encoding::UTF_8,
|
33
|
+
invalid: :replace,
|
34
|
+
undef: :replace)
|
35
|
+
input
|
36
|
+
end,
|
37
|
+
exception: lambda do |input|
|
38
|
+
input.
|
39
|
+
force_encoding(Encoding::ASCII_8BIT).
|
40
|
+
encode!(Encoding::UTF_8)
|
41
|
+
input
|
42
|
+
end
|
43
|
+
}.freeze
|
44
|
+
|
45
|
+
# https://github.com/rack/rack/blob/main/SPEC.rdoc
|
46
|
+
URI_FIELDS = %w(
|
47
|
+
SCRIPT_NAME
|
48
|
+
REQUEST_PATH REQUEST_URI PATH_INFO
|
49
|
+
QUERY_STRING
|
50
|
+
HTTP_REFERER
|
51
|
+
ORIGINAL_FULLPATH
|
52
|
+
ORIGINAL_SCRIPT_NAME
|
53
|
+
SERVER_NAME
|
54
|
+
).freeze
|
55
|
+
|
56
|
+
SANITIZABLE_CONTENT_TYPES = %w(
|
57
|
+
text/plain
|
58
|
+
application/x-www-form-urlencoded
|
59
|
+
application/json
|
60
|
+
text/javascript
|
61
|
+
).freeze
|
62
|
+
|
63
|
+
URI_ENCODED_CONTENT_TYPES = %w(
|
64
|
+
application/x-www-form-urlencoded
|
65
|
+
).freeze
|
66
|
+
|
67
|
+
def sanitize(env)
|
68
|
+
sanitize_rack_input(env)
|
69
|
+
sanitize_cookies(env)
|
70
|
+
env.each do |key, value|
|
71
|
+
if URI_FIELDS.include?(key)
|
72
|
+
if value.frozen?
|
73
|
+
env[key] = sanitize_uri_encoded_string(value.dup).freeze
|
74
|
+
else
|
75
|
+
env[key] = sanitize_uri_encoded_string(value)
|
76
|
+
end
|
77
|
+
elsif key.to_s.start_with?("HTTP_")
|
78
|
+
# Just sanitize the headers and leave them in UTF-8. There is
|
79
|
+
# no reason to have UTF-8 in headers, but if it's valid, let it be.
|
80
|
+
if value.frozen?
|
81
|
+
env[key] = sanitize_string(value.dup).freeze
|
82
|
+
else
|
83
|
+
env[key] = sanitize_string(value)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def build_strategy(options)
|
92
|
+
strategy = options.fetch(:strategy) { :replace }
|
93
|
+
|
94
|
+
return strategy unless DEFAULT_STRATEGIES.key?(strategy)
|
95
|
+
|
96
|
+
DEFAULT_STRATEGIES[strategy]
|
97
|
+
end
|
98
|
+
|
99
|
+
def sanitize_rack_input(env)
|
100
|
+
# https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
|
101
|
+
# Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
|
102
|
+
# Ignoring charset in content type.
|
103
|
+
content_type = env['CONTENT_TYPE']
|
104
|
+
content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
|
105
|
+
content_type &&= content_type.downcase
|
106
|
+
return unless @sanitizable_content_types.include?(content_type)
|
107
|
+
uri_encoded = URI_ENCODED_CONTENT_TYPES.include?(content_type)
|
108
|
+
|
109
|
+
if env['rack.input']
|
110
|
+
env['rack.input'] = SanitizedRackInput.new(
|
111
|
+
env['rack.input'],
|
112
|
+
env,
|
113
|
+
uri_encoded,
|
114
|
+
@strategy
|
115
|
+
)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Cookies need to be split and then sanitized as url encoded strings
|
120
|
+
# since the cookie string itself is not url encoded (separated by `;`),
|
121
|
+
# and the normal method of `sanitize_uri_encoded_string` would break
|
122
|
+
# later cookie parsing in the case that a cookie value contained an
|
123
|
+
# encoded `;`.
|
124
|
+
def sanitize_cookies(env)
|
125
|
+
return unless env['HTTP_COOKIE']
|
126
|
+
|
127
|
+
env['HTTP_COOKIE'] = env['HTTP_COOKIE']
|
128
|
+
.split(/[;,] */n)
|
129
|
+
.map { |cookie| sanitize_uri_encoded_string(cookie) }
|
130
|
+
.join('; ')
|
131
|
+
end
|
132
|
+
|
133
|
+
module Sanitizers
|
134
|
+
private
|
135
|
+
|
136
|
+
# URI.encode/decode expect the input to be in ASCII-8BIT.
|
137
|
+
# However, there could be invalid UTF-8 characters both in
|
138
|
+
# raw and percent-encoded form.
|
139
|
+
#
|
140
|
+
# So, first sanitize the value, then percent-decode it while
|
141
|
+
# treating as UTF-8, then sanitize the result and encode it back.
|
142
|
+
#
|
143
|
+
# The result is guaranteed to be UTF-8-safe.
|
144
|
+
def sanitize_uri_encoded_string(input)
|
145
|
+
return input if input.nil?
|
146
|
+
decoded_value = decode_string(input)
|
147
|
+
reencode_string(decoded_value)
|
148
|
+
end
|
149
|
+
|
150
|
+
def reencode_string(decoded_value)
|
151
|
+
escape_unreserved(
|
152
|
+
sanitize_string(decoded_value))
|
153
|
+
end
|
154
|
+
|
155
|
+
def decode_string(input)
|
156
|
+
unescape_unreserved(
|
157
|
+
sanitize_string(input).
|
158
|
+
force_encoding(Encoding::ASCII_8BIT))
|
159
|
+
end
|
160
|
+
|
161
|
+
# RFC3986, 2.2 states that the characters from 'reserved' group must be
|
162
|
+
# protected during normalization (which is what Rack::Sanitizer does).
|
163
|
+
#
|
164
|
+
# However, the regexp approach used by URI.unescape is not sophisticated
|
165
|
+
# enough for our task.
|
166
|
+
def unescape_unreserved(input)
|
167
|
+
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
168
|
+
decoded = $1.hex.chr
|
169
|
+
|
170
|
+
# This regexp matches all 'unreserved' characters from RFC3986 (2.3),
|
171
|
+
# plus all multibyte UTF-8 characters.
|
172
|
+
if decoded.match?(/[A-Za-z0-9\-._~\x80-\xFF]/n)
|
173
|
+
decoded
|
174
|
+
else
|
175
|
+
encoded
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Performs the reverse function of `unescape_unreserved`. Unlike
|
181
|
+
# the previous function, we can reuse the logic in URI#encode
|
182
|
+
def escape_unreserved(input)
|
183
|
+
# This regexp matches unsafe characters, i.e. everything except 'reserved'
|
184
|
+
# and 'unreserved' characters from RFC3986 (2.3), and additionally '%',
|
185
|
+
# as percent-encoded unreserved characters could be left over from the
|
186
|
+
# `unescape_unreserved` invocation.
|
187
|
+
#
|
188
|
+
# See also URI::REGEXP::PATTERN::{UNRESERVED,RESERVED}.
|
189
|
+
URI::DEFAULT_PARSER.escape(input, /[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]%]/)
|
190
|
+
end
|
191
|
+
|
192
|
+
def sanitize_string(input)
|
193
|
+
if input.is_a? String
|
194
|
+
input = input.force_encoding(Encoding::UTF_8)
|
195
|
+
|
196
|
+
if input.valid_encoding?
|
197
|
+
input
|
198
|
+
else
|
199
|
+
@strategy.call(input)
|
200
|
+
end
|
201
|
+
else
|
202
|
+
input
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
include Sanitizers
|
208
|
+
|
209
|
+
class SanitizedRackInput
|
210
|
+
FailedToReadBody = Class.new(Exception)
|
211
|
+
|
212
|
+
include Sanitizers
|
213
|
+
|
214
|
+
def initialize(original_io, env, uri_encoded, strategy)
|
215
|
+
@original_io = original_io
|
216
|
+
@uri_encoded = uri_encoded
|
217
|
+
@env = env
|
218
|
+
@strategy = strategy
|
219
|
+
@sanitized_io = nil
|
220
|
+
end
|
221
|
+
|
222
|
+
def gets
|
223
|
+
sanitized_io.gets
|
224
|
+
end
|
225
|
+
|
226
|
+
def read(*args)
|
227
|
+
sanitized_io.read(*args)
|
228
|
+
end
|
229
|
+
|
230
|
+
def each(&block)
|
231
|
+
sanitized_io.each(&block)
|
232
|
+
end
|
233
|
+
|
234
|
+
def rewind
|
235
|
+
sanitized_io.rewind
|
236
|
+
end
|
237
|
+
|
238
|
+
def size
|
239
|
+
# StringIO#size is bytesize
|
240
|
+
sanitized_io.size
|
241
|
+
end
|
242
|
+
|
243
|
+
def close
|
244
|
+
@sanitized_io&.close
|
245
|
+
@original_io.close if @original_io.respond_to?(:close)
|
246
|
+
end
|
247
|
+
|
248
|
+
private
|
249
|
+
|
250
|
+
UTF8_BOM = "\xef\xbb\xbf".b.freeze
|
251
|
+
UTF8_BOM_SIZE = UTF8_BOM.bytesize
|
252
|
+
|
253
|
+
def sanitized_io
|
254
|
+
@sanitized_io ||= begin
|
255
|
+
content_length = @env['CONTENT_LENGTH']&.to_i
|
256
|
+
input = content_length && content_length >= 0 ? @original_io.read(content_length) : @original_io.read
|
257
|
+
if input.start_with?(UTF8_BOM)
|
258
|
+
input = input.byteslice(UTF8_BOM_SIZE..-1)
|
259
|
+
end
|
260
|
+
|
261
|
+
input = sanitize_string(input)
|
262
|
+
if @uri_encoded
|
263
|
+
input = sanitize_uri_encoded_string(input).force_encoding(Encoding::UTF_8)
|
264
|
+
end
|
265
|
+
@env['CONTENT_LENGTH'] &&= input.bytesize.to_s
|
266
|
+
StringIO.new(input)
|
267
|
+
end
|
268
|
+
rescue ::EOFError => error
|
269
|
+
raise FailedToReadBody, error.message
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = "rack-sanitizer"
|
5
|
+
gem.version = '2.0.0'
|
6
|
+
gem.authors = ["Jean Boussier", "whitequark"]
|
7
|
+
gem.license = "MIT"
|
8
|
+
gem.email = ["jean.boussier@gmail.org"]
|
9
|
+
gem.description = %{Rack::Sanitizer is a Rack middleware which cleans up } <<
|
10
|
+
%{invalid UTF8 characters in request URI and headers.}
|
11
|
+
gem.summary = "It is a mordernized and optimized fork of rack-utf8_sanitizer"
|
12
|
+
gem.homepage = "http://github.com/Shopify/rack-sanitizer"
|
13
|
+
|
14
|
+
gem.files = `git ls-files`.split($/)
|
15
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
|
18
|
+
gem.metadata["allowed_push_host"] = "https://rubygems.org/"
|
19
|
+
|
20
|
+
gem.required_ruby_version = '>= 2.5'
|
21
|
+
|
22
|
+
gem.add_dependency "rack", '>= 1.0', '< 4.0'
|
23
|
+
|
24
|
+
gem.add_development_dependency "bacon"
|
25
|
+
gem.add_development_dependency "bacon-colored_output"
|
26
|
+
gem.add_development_dependency "rake"
|
27
|
+
end
|
@@ -0,0 +1,526 @@
|
|
1
|
+
# encoding:ascii-8bit
|
2
|
+
|
3
|
+
require 'bacon/colored_output'
|
4
|
+
require 'cgi'
|
5
|
+
require 'rack/sanitizer'
|
6
|
+
|
7
|
+
describe Rack::Sanitizer do
|
8
|
+
before do
|
9
|
+
@app = Rack::Sanitizer.new(-> env { env["rack.input"]&.size; env })
|
10
|
+
end
|
11
|
+
|
12
|
+
shared :does_sanitize_plain do
|
13
|
+
it "sanitizes plaintext entity (HTTP_USER_AGENT)" do
|
14
|
+
env = @app.({ "HTTP_USER_AGENT" => @plain_input })
|
15
|
+
result = env["HTTP_USER_AGENT"]
|
16
|
+
|
17
|
+
result.encoding.should == Encoding::UTF_8
|
18
|
+
result.should.be.valid_encoding
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
shared :does_sanitize_uri do
|
23
|
+
it "sanitizes URI-like entity (REQUEST_PATH)" do
|
24
|
+
env = @app.({ "REQUEST_PATH" => @uri_input })
|
25
|
+
result = env["REQUEST_PATH"]
|
26
|
+
|
27
|
+
result.encoding.should == Encoding::US_ASCII
|
28
|
+
result.should.be.valid_encoding
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "with invalid host input" do
|
33
|
+
it "sanitizes host entity (SERVER_NAME)" do
|
34
|
+
host = "host\xD0".force_encoding('UTF-8')
|
35
|
+
env = @app.({ "SERVER_NAME" => host })
|
36
|
+
result = env["SERVER_NAME"]
|
37
|
+
|
38
|
+
result.encoding.should == Encoding::US_ASCII
|
39
|
+
result.should.be.valid_encoding
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "with invalid UTF-8 input" do
|
44
|
+
before do
|
45
|
+
@plain_input = "foo\xe0".force_encoding('UTF-8')
|
46
|
+
@uri_input = "http://bar/foo%E0".force_encoding('UTF-8')
|
47
|
+
end
|
48
|
+
|
49
|
+
behaves_like :does_sanitize_plain
|
50
|
+
behaves_like :does_sanitize_uri
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "with invalid, incorrectly percent-encoded UTF-8 URI input" do
|
54
|
+
before do
|
55
|
+
@uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
|
56
|
+
end
|
57
|
+
|
58
|
+
behaves_like :does_sanitize_uri
|
59
|
+
end
|
60
|
+
|
61
|
+
describe "with invalid ASCII-8BIT input" do
|
62
|
+
before do
|
63
|
+
@plain_input = "foo\xe0"
|
64
|
+
@uri_input = "http://bar/foo%E0"
|
65
|
+
end
|
66
|
+
|
67
|
+
behaves_like :does_sanitize_plain
|
68
|
+
behaves_like :does_sanitize_uri
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "with invalid, incorrectly percent-encoded ASCII-8BIT URI input" do
|
72
|
+
before do
|
73
|
+
@uri_input = "http://bar/foo%E0\xe0"
|
74
|
+
end
|
75
|
+
|
76
|
+
behaves_like :does_sanitize_uri
|
77
|
+
end
|
78
|
+
|
79
|
+
shared :identity_plain do
|
80
|
+
it "does not change plaintext entity (HTTP_USER_AGENT)" do
|
81
|
+
env = @app.({ "HTTP_USER_AGENT" => @plain_input })
|
82
|
+
result = env["HTTP_USER_AGENT"]
|
83
|
+
|
84
|
+
result.encoding.should == Encoding::UTF_8
|
85
|
+
result.should.be.valid_encoding
|
86
|
+
result.should == @plain_input
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
shared :identity_uri do
|
91
|
+
it "does not change URI-like entity (REQUEST_PATH)" do
|
92
|
+
env = @app.({ "REQUEST_PATH" => @uri_input })
|
93
|
+
result = env["REQUEST_PATH"]
|
94
|
+
|
95
|
+
result.encoding.should == Encoding::US_ASCII
|
96
|
+
result.should.be.valid_encoding
|
97
|
+
result.should == @uri_input
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "with valid UTF-8 input" do
|
102
|
+
before do
|
103
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
104
|
+
@uri_input = "http://bar/foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
105
|
+
end
|
106
|
+
|
107
|
+
behaves_like :identity_plain
|
108
|
+
behaves_like :identity_uri
|
109
|
+
|
110
|
+
describe "with URI characters from reserved range" do
|
111
|
+
before do
|
112
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
113
|
+
end
|
114
|
+
|
115
|
+
behaves_like :identity_uri
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
describe "with valid, not percent-encoded UTF-8 URI input" do
|
120
|
+
before do
|
121
|
+
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
122
|
+
@encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
|
123
|
+
end
|
124
|
+
|
125
|
+
it "does not change URI-like entity (REQUEST_PATH)" do
|
126
|
+
env = @app.({ "REQUEST_PATH" => @uri_input })
|
127
|
+
result = env["REQUEST_PATH"]
|
128
|
+
|
129
|
+
result.encoding.should == Encoding::US_ASCII
|
130
|
+
result.should.be.valid_encoding
|
131
|
+
result.should == @encoded
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe "with valid ASCII-8BIT input" do
|
136
|
+
before do
|
137
|
+
@plain_input = "bar baz"
|
138
|
+
@uri_input = "http://bar/bar+baz"
|
139
|
+
end
|
140
|
+
|
141
|
+
behaves_like :identity_plain
|
142
|
+
behaves_like :identity_uri
|
143
|
+
|
144
|
+
describe "with URI characters from reserved range" do
|
145
|
+
before do
|
146
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB"
|
147
|
+
end
|
148
|
+
|
149
|
+
behaves_like :identity_uri
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
describe "with frozen strings" do
|
154
|
+
before do
|
155
|
+
@plain_input = "bar baz".freeze
|
156
|
+
@uri_input = "http://bar/bar+baz".freeze
|
157
|
+
end
|
158
|
+
|
159
|
+
it "preserves the frozen? status of input" do
|
160
|
+
env = @app.({ "HTTP_USER_AGENT" => @plain_input,
|
161
|
+
"REQUEST_PATH" => @uri_input })
|
162
|
+
|
163
|
+
env["HTTP_USER_AGENT"].should.be.frozen
|
164
|
+
env["REQUEST_PATH"].should.be.frozen
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
describe "with symbols in the env" do
|
169
|
+
before do
|
170
|
+
@uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
|
171
|
+
end
|
172
|
+
|
173
|
+
it "sanitizes REQUEST_PATH with invalid UTF-8 URI input" do
|
174
|
+
env = @app.({ :requested_at => "2014-07-22",
|
175
|
+
"REQUEST_PATH" => @uri_input })
|
176
|
+
|
177
|
+
result = env["REQUEST_PATH"]
|
178
|
+
|
179
|
+
result.encoding.should == Encoding::US_ASCII
|
180
|
+
result.should.be.valid_encoding
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
describe "with form data" do
|
185
|
+
def request_env
|
186
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
187
|
+
{
|
188
|
+
"REQUEST_METHOD" => "POST",
|
189
|
+
"CONTENT_TYPE" => "application/x-www-form-urlencoded;foo=bar",
|
190
|
+
"HTTP_USER_AGENT" => @plain_input,
|
191
|
+
"rack.input" => @rack_input,
|
192
|
+
}
|
193
|
+
end
|
194
|
+
|
195
|
+
def sanitize_form_data(request_env = request_env())
|
196
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
197
|
+
@response_env = @app.(request_env)
|
198
|
+
sanitized_input = @response_env['rack.input'].read
|
199
|
+
|
200
|
+
yield sanitized_input if block_given?
|
201
|
+
|
202
|
+
@response_env['rack.input'].rewind
|
203
|
+
behaves_like :does_sanitize_plain
|
204
|
+
behaves_like :does_sanitize_uri
|
205
|
+
behaves_like :identity_plain
|
206
|
+
behaves_like :identity_uri
|
207
|
+
@response_env['rack.input'].close
|
208
|
+
end
|
209
|
+
|
210
|
+
class BrokenIO < StringIO
|
211
|
+
def read(_length = nil)
|
212
|
+
raise EOFError
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
it "returns HTTP 400 on EOF" do
|
217
|
+
@rack_input = BrokenIO.new
|
218
|
+
@response_env = @app.(request_env)
|
219
|
+
@response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
|
220
|
+
end
|
221
|
+
|
222
|
+
it "sanitizes StringIO rack.input" do
|
223
|
+
input = "foo=bla&quux=bar"
|
224
|
+
@rack_input = StringIO.new input
|
225
|
+
|
226
|
+
sanitize_form_data do |sanitized_input|
|
227
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
228
|
+
sanitized_input.should.be.valid_encoding
|
229
|
+
sanitized_input.should == input
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
it "sanitizes StringIO rack.input on GET" do
|
234
|
+
input = "foo=bla&quux=bar"
|
235
|
+
@rack_input = StringIO.new input
|
236
|
+
|
237
|
+
sanitize_form_data(request_env.merge("REQUEST_METHOD" => "GET")) do |sanitized_input|
|
238
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
239
|
+
sanitized_input.should.be.valid_encoding
|
240
|
+
sanitized_input.should == input
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
it "sanitizes StringIO rack.input with bad encoding" do
|
245
|
+
input = "foo=bla&quux=bar\xED"
|
246
|
+
@rack_input = StringIO.new input
|
247
|
+
|
248
|
+
sanitize_form_data do |sanitized_input|
|
249
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
250
|
+
sanitized_input.should.be.valid_encoding
|
251
|
+
sanitized_input.should != input
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
it "strip UTF-8 BOM from StringIO rack.input" do
|
256
|
+
input = %(\xef\xbb\xbf{"Hello": "World"})
|
257
|
+
@rack_input = StringIO.new input
|
258
|
+
|
259
|
+
sanitize_form_data(request_env.merge("CONTENT_TYPE" => "application/json")) do |sanitized_input|
|
260
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
261
|
+
sanitized_input.should.be.valid_encoding
|
262
|
+
sanitized_input.should == '{"Hello": "World"}'
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
it "sanitizes StringIO rack.input with form encoded bad encoding" do
|
267
|
+
input = "foo=bla&foo=baz&quux%ED=bar%ED"
|
268
|
+
@rack_input = StringIO.new input
|
269
|
+
|
270
|
+
sanitize_form_data do |sanitized_input|
|
271
|
+
# URI.decode_www_form does some encoding magic
|
272
|
+
sanitized_input.split("&").each do |pair|
|
273
|
+
pair.split("=", 2).each do |component|
|
274
|
+
decoded = URI.decode_www_form_component(component)
|
275
|
+
decoded.should.be.valid_encoding
|
276
|
+
end
|
277
|
+
end
|
278
|
+
sanitized_input.should != input
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
it "sanitizes non-StringIO rack.input" do
|
283
|
+
require 'rack/rewindable_input'
|
284
|
+
input = "foo=bla&quux=bar"
|
285
|
+
@rack_input = Rack::RewindableInput.new(StringIO.new(input))
|
286
|
+
|
287
|
+
sanitize_form_data do |sanitized_input|
|
288
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
289
|
+
sanitized_input.should.be.valid_encoding
|
290
|
+
sanitized_input.should == input
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
it "sanitizes non-StringIO rack.input with bad encoding" do
|
295
|
+
require 'rack/rewindable_input'
|
296
|
+
input = "foo=bla&quux=bar\xED"
|
297
|
+
@rack_input = Rack::RewindableInput.new(StringIO.new(input))
|
298
|
+
|
299
|
+
sanitize_form_data do |sanitized_input|
|
300
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
301
|
+
sanitized_input.should.be.valid_encoding
|
302
|
+
sanitized_input.should != input
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
it "does not sanitize the rack body if there is no CONTENT_TYPE" do
|
307
|
+
input = "foo=bla&quux=bar\xED"
|
308
|
+
@rack_input = StringIO.new input
|
309
|
+
|
310
|
+
env = request_env.update('CONTENT_TYPE' => nil)
|
311
|
+
sanitize_form_data(env) do |sanitized_input|
|
312
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
313
|
+
sanitized_input.should.be.valid_encoding
|
314
|
+
sanitized_input.should == input
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
it "does not sanitize the rack body if there is empty CONTENT_TYPE" do
|
319
|
+
input = "foo=bla&quux=bar\xED"
|
320
|
+
@rack_input = StringIO.new input
|
321
|
+
|
322
|
+
env = request_env.update('CONTENT_TYPE' => '')
|
323
|
+
sanitize_form_data(env) do |sanitized_input|
|
324
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
325
|
+
sanitized_input.should.be.valid_encoding
|
326
|
+
sanitized_input.should == input
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
it "adjusts content-length when replacing input" do
|
331
|
+
input = "foo=bla&quux=bar\xED"
|
332
|
+
@rack_input = StringIO.new input
|
333
|
+
|
334
|
+
env = request_env.update("CONTENT_LENGTH" => input.bytesize)
|
335
|
+
sanitize_form_data(env) do |sanitized_input|
|
336
|
+
sanitized_input.bytesize.should != input.bytesize
|
337
|
+
@response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
it "does not sanitize null bytes by default" do
|
342
|
+
input = "foo=bla&quux=bar%00"
|
343
|
+
@rack_input = StringIO.new input
|
344
|
+
|
345
|
+
sanitize_form_data do |sanitized_input|
|
346
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
347
|
+
sanitized_input.should.be.valid_encoding
|
348
|
+
sanitized_input.should == input
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
describe "with custom content-type" do
|
354
|
+
def request_env
|
355
|
+
{
|
356
|
+
"REQUEST_METHOD" => "GET",
|
357
|
+
"CONTENT_TYPE" => "application/json",
|
358
|
+
"HTTP_COOKIE" => @cookie,
|
359
|
+
"rack.input" => StringIO.new,
|
360
|
+
}
|
361
|
+
end
|
362
|
+
|
363
|
+
it "sanitizes bad http cookie" do
|
364
|
+
@cookie = "foo=bla; quux=bar\xED"
|
365
|
+
response_env = @app.(request_env)
|
366
|
+
response_env['HTTP_COOKIE'].should != @cookie
|
367
|
+
response_env['HTTP_COOKIE'].should == 'foo=bla; quux=bar%EF%BF%BD'
|
368
|
+
end
|
369
|
+
|
370
|
+
it "does not change ok http cookie" do
|
371
|
+
@cookie = "foo=bla; quux=bar"
|
372
|
+
response_env = @app.(request_env)
|
373
|
+
response_env['HTTP_COOKIE'].should == @cookie
|
374
|
+
|
375
|
+
@cookie = "foo=b%3bla; quux=b%20a%20r"
|
376
|
+
response_env = @app.(request_env)
|
377
|
+
response_env['HTTP_COOKIE'].should == @cookie
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
describe "with custom content-type" do
|
382
|
+
def request_env
|
383
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
384
|
+
{
|
385
|
+
"REQUEST_METHOD" => "POST",
|
386
|
+
"CONTENT_TYPE" => "application/vnd.api+json",
|
387
|
+
"HTTP_USER_AGENT" => @plain_input,
|
388
|
+
"rack.input" => @rack_input,
|
389
|
+
}
|
390
|
+
end
|
391
|
+
|
392
|
+
def sanitize_data(request_env = request_env())
|
393
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
394
|
+
@response_env = @app.(request_env)
|
395
|
+
sanitized_input = @response_env['rack.input'].read
|
396
|
+
|
397
|
+
yield sanitized_input if block_given?
|
398
|
+
end
|
399
|
+
|
400
|
+
it "does not sanitize custom content-type by default" do
|
401
|
+
input = "foo=bla&quux=bar\xED"
|
402
|
+
@rack_input = StringIO.new input
|
403
|
+
|
404
|
+
env = request_env
|
405
|
+
sanitize_data(env) do |sanitized_input|
|
406
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
407
|
+
sanitized_input.should.be.valid_encoding
|
408
|
+
sanitized_input.should == input
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
it "sanitizes custom content-type if additional_content_types given" do
|
413
|
+
@app = Rack::Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
|
414
|
+
input = "foo=bla&quux=bar\xED"
|
415
|
+
@rack_input = StringIO.new input
|
416
|
+
|
417
|
+
env = request_env
|
418
|
+
sanitize_data(env) do |sanitized_input|
|
419
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
420
|
+
sanitized_input.should.be.valid_encoding
|
421
|
+
sanitized_input.should != input
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
it "sanitizes default content-type if additional_content_types given" do
|
426
|
+
@app = Rack::Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
|
427
|
+
input = "foo=bla&quux=bar\xED"
|
428
|
+
@rack_input = StringIO.new input
|
429
|
+
|
430
|
+
env = request_env.update('CONTENT_TYPE' => 'application/json')
|
431
|
+
sanitize_data(env) do |sanitized_input|
|
432
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
433
|
+
sanitized_input.should.be.valid_encoding
|
434
|
+
sanitized_input.should != input
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
it "sanitizes custom content-type if sanitizable_content_types given" do
|
439
|
+
@app = Rack::Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
|
440
|
+
input = "foo=bla&quux=bar\xED"
|
441
|
+
@rack_input = StringIO.new input
|
442
|
+
|
443
|
+
env = request_env
|
444
|
+
sanitize_data(env) do |sanitized_input|
|
445
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
446
|
+
sanitized_input.should.be.valid_encoding
|
447
|
+
sanitized_input.should != input
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
it "does not sanitize default content-type if sanitizable_content_types does not include it" do
|
452
|
+
@app = Rack::Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
|
453
|
+
input = "foo=bla&quux=bar\xED"
|
454
|
+
@rack_input = StringIO.new input
|
455
|
+
|
456
|
+
env = request_env.update('CONTENT_TYPE' => 'application/json')
|
457
|
+
sanitize_data(env) do |sanitized_input|
|
458
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
459
|
+
sanitized_input.should.be.valid_encoding
|
460
|
+
sanitized_input.should == input
|
461
|
+
end
|
462
|
+
end
|
463
|
+
end
|
464
|
+
|
465
|
+
describe "with custom strategy" do
|
466
|
+
def request_env
|
467
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
468
|
+
{
|
469
|
+
"REQUEST_METHOD" => "POST",
|
470
|
+
"CONTENT_TYPE" => "application/json",
|
471
|
+
"HTTP_USER_AGENT" => @plain_input,
|
472
|
+
"rack.input" => @rack_input,
|
473
|
+
}
|
474
|
+
end
|
475
|
+
|
476
|
+
def sanitize_data(request_env = request_env())
|
477
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
478
|
+
@response_env = @app.(request_env)
|
479
|
+
sanitized_input = @response_env['rack.input'].read
|
480
|
+
|
481
|
+
yield sanitized_input if block_given?
|
482
|
+
end
|
483
|
+
|
484
|
+
it "calls a default strategy (replace)" do
|
485
|
+
@app = Rack::Sanitizer.new(-> env { env })
|
486
|
+
|
487
|
+
input = "foo=bla&quux=bar\xED"
|
488
|
+
@rack_input = StringIO.new input
|
489
|
+
|
490
|
+
env = request_env
|
491
|
+
sanitize_data(env) do |sanitized_input|
|
492
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
493
|
+
sanitized_input.should.be.valid_encoding
|
494
|
+
sanitized_input.should != input
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
it "calls the exception strategy" do
|
499
|
+
@app = Rack::Sanitizer.new(-> env { env }, strategy: :exception)
|
500
|
+
|
501
|
+
input = "foo=bla&quux=bar\xED"
|
502
|
+
@rack_input = StringIO.new input
|
503
|
+
|
504
|
+
env = request_env
|
505
|
+
should.raise(EncodingError) { sanitize_data(env) }
|
506
|
+
end
|
507
|
+
|
508
|
+
it "accepts a proc as a strategy" do
|
509
|
+
truncate = -> (input) do
|
510
|
+
'replace'.force_encoding(Encoding::UTF_8)
|
511
|
+
end
|
512
|
+
|
513
|
+
@app = Rack::Sanitizer.new(-> env { env }, strategy: truncate)
|
514
|
+
|
515
|
+
input = "foo=bla&quux=bar\xED"
|
516
|
+
@rack_input = StringIO.new input
|
517
|
+
|
518
|
+
env = request_env
|
519
|
+
sanitize_data(env) do |sanitized_input|
|
520
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
521
|
+
sanitized_input.should.be.valid_encoding
|
522
|
+
sanitized_input.should == 'replace'
|
523
|
+
end
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
metadata
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rack-sanitizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jean Boussier
|
8
|
+
- whitequark
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2023-11-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rack
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.0'
|
21
|
+
- - "<"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: '4.0'
|
24
|
+
type: :runtime
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
requirements:
|
28
|
+
- - ">="
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '1.0'
|
31
|
+
- - "<"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '4.0'
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: bacon
|
36
|
+
requirement: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
type: :development
|
42
|
+
prerelease: false
|
43
|
+
version_requirements: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: bacon-colored_output
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rake
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
type: :development
|
70
|
+
prerelease: false
|
71
|
+
version_requirements: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
description: Rack::Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|
77
|
+
in request URI and headers.
|
78
|
+
email:
|
79
|
+
- jean.boussier@gmail.org
|
80
|
+
executables: []
|
81
|
+
extensions: []
|
82
|
+
extra_rdoc_files: []
|
83
|
+
files:
|
84
|
+
- ".editorconfig"
|
85
|
+
- ".github/dependabot.yml"
|
86
|
+
- ".github/workflows/ci.yml"
|
87
|
+
- ".gitignore"
|
88
|
+
- CHANGELOG.md
|
89
|
+
- Gemfile
|
90
|
+
- LICENSE.txt
|
91
|
+
- README.md
|
92
|
+
- Rakefile
|
93
|
+
- lib/rack/sanitizer.rb
|
94
|
+
- rack-sanitizer.gemspec
|
95
|
+
- test/test_sanitizer.rb
|
96
|
+
homepage: http://github.com/Shopify/rack-sanitizer
|
97
|
+
licenses:
|
98
|
+
- MIT
|
99
|
+
metadata:
|
100
|
+
allowed_push_host: https://rubygems.org/
|
101
|
+
post_install_message:
|
102
|
+
rdoc_options: []
|
103
|
+
require_paths:
|
104
|
+
- lib
|
105
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '2.5'
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
requirements: []
|
116
|
+
rubygems_version: 3.4.21
|
117
|
+
signing_key:
|
118
|
+
specification_version: 4
|
119
|
+
summary: It is a mordernized and optimized fork of rack-utf8_sanitizer
|
120
|
+
test_files:
|
121
|
+
- test/test_sanitizer.rb
|