rack-sanitizer 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f8c301a4677ff19611734f3460a73bb4600cbdc01e0a60104aab3aff8e67e15c
4
+ data.tar.gz: 338dee798f354fc9ff31785dc47495c57ffc86f33c0090d4cf615a8b693f5a8b
5
+ SHA512:
6
+ metadata.gz: af51bf36db0f9e02320fe38ba99c30cbe08222f7e1ab051eecd87314e67ba128f72cbca8e08862a96f9b5cec6bfb19089bbadf336438d9828369386e74a5e8e5
7
+ data.tar.gz: 4b22ddc4c638da994926ccfd9f77ea2a3961e32bd2d66bad35bfa7d7064c38d8696e688b18f0b3e39d56f9650fd1a5f899bb081c8baf6dcc3e5b2cc2e9c10bc3
data/.editorconfig ADDED
@@ -0,0 +1,17 @@
1
+ root = true
2
+
3
+ [*]
4
+ indent_style = space
5
+ indent_size = 2
6
+ end_of_line = lf
7
+ charset = utf-8
8
+ trim_trailing_whitespace = true
9
+ insert_final_newline = true
10
+
11
+ [*.md]
12
+ indent_style = space
13
+ indent_size = 2
14
+
15
+ [*.y{a,}ml]
16
+ indent_style = space
17
+ indent_size = 2
@@ -0,0 +1,6 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ test:
7
+
8
+ runs-on: ubuntu-latest
9
+
10
+ strategy:
11
+ fail-fast: false
12
+ matrix:
13
+ ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", "3.2", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - name: Set up Ruby
18
+ uses: ruby/setup-ruby@v1
19
+ with:
20
+ bundler-cache: true # 'bundle install' and cache gems
21
+ ruby-version: ${{ matrix.ruby }}
22
+ - name: Run tests
23
+ run: bundle exec rake
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/CHANGELOG.md ADDED
@@ -0,0 +1,39 @@
1
+ Changelog
2
+ =========
3
+
4
+ Master
5
+ -------------------------
6
+
7
+ API modifications:
8
+
9
+ Features implemented:
10
+
11
+ Bugs fixed:
12
+
13
+ v1.3.1 (2015-07-09)
14
+ -------------------------
15
+
16
+ Bugs fixed:
17
+ * Make sure Content-Length is adjusted. (Samuel Cochran, #26)
18
+
19
+ v1.3.0 (2015-01-26)
20
+ -------------------------
21
+
22
+ v1.2.4 (2014-11-29)
23
+ -------------------------
24
+
25
+ v1.2.3 (2014-10-08)
26
+ -------------------------
27
+
28
+ v1.2.2 (2014-07-10)
29
+ -------------------------
30
+
31
+ Features implemented:
32
+ * Sanitize request body for all HTTP verbs. (Nathaniel Talbott, #15)
33
+ * Add `application/json` and `text/javascript` as sanitizable content types. (Benjamin Fleischer, #12)
34
+
35
+ Bugs fixed:
36
+ * Ensure Rack::UTF8 Sanitizer is first middleware. (Aaron Renner, #13)
37
+
38
+ v1.2.1 (2014-05-27)
39
+ -------------------------
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rack-sanitizer.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Peter Zotov
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,119 @@
1
+ # Rack::Sanitizer
2
+
3
+ Rack::Sanitizer is a Rack middleware which cleans up invalid UTF8 characters in request URI and headers. Additionally,
4
+ it cleans up invalid UTF8 characters in the request body (depending on the configurable content type filters) by reading
5
+ the input into a string, sanitizing the string, then replacing the Rack input stream with a rewindable input stream backed
6
+ by the sanitized string.
7
+
8
+ It is a mordernized and optimized fork of rack-utf8_sanitizer
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'rack-sanitizer'
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install rack-sanitizer
23
+
24
+ For Rails, add this to your `application.rb`:
25
+
26
+ ``` ruby
27
+ config.middleware.insert 0, Rack::Sanitizer
28
+ ```
29
+
30
+ For Rack apps, add this to `config.ru`:
31
+
32
+ ``` ruby
33
+ use Rack::Sanitizer
34
+ ```
35
+
36
+ ## Usage
37
+
38
+ Rack::Sanitizer divides all keys in the [Rack environment](http://rack.rubyforge.org/doc/SPEC.html) in two distinct groups: keys which contain raw data and the ones with percent-encoded data. The fields which are treated as percent-encoded are: `SCRIPT_NAME`, `REQUEST_PATH`, `REQUEST_URI`, `PATH_INFO`, `QUERY_STRING`, `HTTP_REFERER`.
39
+
40
+ The generic sanitization algorithm is as follows:
41
+
42
+ 1. Force the encoding to UTF-8.
43
+ 2. If the result contains invalid characters:
44
+ 1. Force the encoding to ASCII8-BIT.
45
+ 2. Re-encode it as UTF-8, replacing invalid and undefined characters as U+FFFD.
46
+
47
+ For fields with "raw data", the algorithm is applied once and the (UTF-8 encoded) result is left in the environment.
48
+
49
+ For fields with "percent-encoded data", the algorithm is applied twice to catch both invalid characters appearing as-is and invalid characters appearing in the percent encoding. The percent encoded, ASCII-8BIT encoded result is left in the environment.
50
+
51
+ ### Sanitizable content types
52
+
53
+ The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::Sanitizer.
54
+
55
+ To add sanitizable content types to the list of defaults, pass the `additional_content_types` options when using Rack::Sanitizer, e.g.
56
+
57
+ config.middleware.insert 0, Rack::Sanitizer, additional_content_types: ['application/vnd.api+json']
58
+
59
+ To explicitly set sanitizable content types and override the defaults, use the `sanitizable_content_types` option:
60
+
61
+ config.middleware.insert 0, Rack::Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
62
+
63
+ ### Strategies
64
+
65
+ There are two built in strategies for handling invalid characters. The default strategy is `:replace`, which will cause any invalid characters to be replaces with the unicode replacement character (�). The second built in strategy is `:exception` which will cause an `EncodingError` exception to be raised if invalid characters are found (the exception can then be handled by another Rack middleware).
66
+
67
+ This is an example of handling the `:exception` strategy with additional middleware:
68
+
69
+ ```ruby
70
+ require "./your/middleware/directory/rack_sanitizer_exception_handler.rb"
71
+
72
+ config.middleware.insert 0, Rack::SanitizerExceptionHandler
73
+ config.middleware.insert_after Rack::SanitizerExceptionHandler, Rack::Sanitizer, strategy: :exception
74
+ ```
75
+
76
+ Note: The exception handling middleware must be inserted before `Rack::Sanitizer`
77
+
78
+ ```ruby
79
+ module Rack
80
+ class SanitizerExceptionHandler
81
+ def initialize(app)
82
+ @app = app
83
+ end
84
+
85
+ def call(env)
86
+ @app.call(env)
87
+ rescue EncodingError => exception
88
+ # OPTIONAL: Add error logging service of your choice here
89
+ return [400, {}, ["Bad Request"]]
90
+ end
91
+ end
92
+ end
93
+ ```
94
+
95
+ An object that responds to `#call` and accepts the offending string with invalid characters as an argument can also be passed as a `:strategy`. This is how you can define custom strategies.
96
+
97
+ ```ruby
98
+ config.middleware.insert 0, Rack::Sanitizer, strategy: :exception
99
+ ```
100
+
101
+ ```ruby
102
+ replace_string = lambda do |_invalid|
103
+ Rails.logger.warn('Replacing invalid string')
104
+
105
+ '<Bad Encoding>'.freeze
106
+ end
107
+
108
+ config.middleware.insert 0, Rack::Sanitizer, strategy: replace_string
109
+ ```
110
+
111
+ ## Contributing
112
+
113
+ 1. Fork it
114
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
115
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
116
+ 4. Push to the branch (`git push origin my-new-feature`)
117
+ 5. Create new Pull Request
118
+
119
+ To run the tests, run `rake spec` in the project directory.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :default => :spec
4
+
5
+ desc "Run tests"
6
+ task :spec do
7
+ sh 'bacon -a'
8
+ end
@@ -0,0 +1,273 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+ require "stringio"
5
+
6
+ module Rack
7
+ class Sanitizer
8
+ BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
9
+
10
+ # options[:sanitizable_content_types] Array
11
+ # options[:additional_content_types] Array
12
+ def initialize(app, options={})
13
+ @app = app
14
+ @strategy = build_strategy(options)
15
+ @sanitizable_content_types = options[:sanitizable_content_types]
16
+ @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
17
+ end
18
+
19
+ def call(env)
20
+ env = sanitize(env)
21
+ begin
22
+ @app.call(env)
23
+ rescue SanitizedRackInput::FailedToReadBody
24
+ return BAD_REQUEST
25
+ end
26
+ end
27
+
28
+ DEFAULT_STRATEGIES = {
29
+ replace: lambda do |input|
30
+ input.
31
+ force_encoding(Encoding::ASCII_8BIT).
32
+ encode!(Encoding::UTF_8,
33
+ invalid: :replace,
34
+ undef: :replace)
35
+ input
36
+ end,
37
+ exception: lambda do |input|
38
+ input.
39
+ force_encoding(Encoding::ASCII_8BIT).
40
+ encode!(Encoding::UTF_8)
41
+ input
42
+ end
43
+ }.freeze
44
+
45
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
46
+ URI_FIELDS = %w(
47
+ SCRIPT_NAME
48
+ REQUEST_PATH REQUEST_URI PATH_INFO
49
+ QUERY_STRING
50
+ HTTP_REFERER
51
+ ORIGINAL_FULLPATH
52
+ ORIGINAL_SCRIPT_NAME
53
+ SERVER_NAME
54
+ ).freeze
55
+
56
+ SANITIZABLE_CONTENT_TYPES = %w(
57
+ text/plain
58
+ application/x-www-form-urlencoded
59
+ application/json
60
+ text/javascript
61
+ ).freeze
62
+
63
+ URI_ENCODED_CONTENT_TYPES = %w(
64
+ application/x-www-form-urlencoded
65
+ ).freeze
66
+
67
+ def sanitize(env)
68
+ sanitize_rack_input(env)
69
+ sanitize_cookies(env)
70
+ env.each do |key, value|
71
+ if URI_FIELDS.include?(key)
72
+ if value.frozen?
73
+ env[key] = sanitize_uri_encoded_string(value.dup).freeze
74
+ else
75
+ env[key] = sanitize_uri_encoded_string(value)
76
+ end
77
+ elsif key.to_s.start_with?("HTTP_")
78
+ # Just sanitize the headers and leave them in UTF-8. There is
79
+ # no reason to have UTF-8 in headers, but if it's valid, let it be.
80
+ if value.frozen?
81
+ env[key] = sanitize_string(value.dup).freeze
82
+ else
83
+ env[key] = sanitize_string(value)
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ private
90
+
91
+ def build_strategy(options)
92
+ strategy = options.fetch(:strategy) { :replace }
93
+
94
+ return strategy unless DEFAULT_STRATEGIES.key?(strategy)
95
+
96
+ DEFAULT_STRATEGIES[strategy]
97
+ end
98
+
99
+ def sanitize_rack_input(env)
100
+ # https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
101
+ # Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
102
+ # Ignoring charset in content type.
103
+ content_type = env['CONTENT_TYPE']
104
+ content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
105
+ content_type &&= content_type.downcase
106
+ return unless @sanitizable_content_types.include?(content_type)
107
+ uri_encoded = URI_ENCODED_CONTENT_TYPES.include?(content_type)
108
+
109
+ if env['rack.input']
110
+ env['rack.input'] = SanitizedRackInput.new(
111
+ env['rack.input'],
112
+ env,
113
+ uri_encoded,
114
+ @strategy
115
+ )
116
+ end
117
+ end
118
+
119
+ # Cookies need to be split and then sanitized as url encoded strings
120
+ # since the cookie string itself is not url encoded (separated by `;`),
121
+ # and the normal method of `sanitize_uri_encoded_string` would break
122
+ # later cookie parsing in the case that a cookie value contained an
123
+ # encoded `;`.
124
+ def sanitize_cookies(env)
125
+ return unless env['HTTP_COOKIE']
126
+
127
+ env['HTTP_COOKIE'] = env['HTTP_COOKIE']
128
+ .split(/[;,] */n)
129
+ .map { |cookie| sanitize_uri_encoded_string(cookie) }
130
+ .join('; ')
131
+ end
132
+
133
+ module Sanitizers
134
+ private
135
+
136
+ # URI.encode/decode expect the input to be in ASCII-8BIT.
137
+ # However, there could be invalid UTF-8 characters both in
138
+ # raw and percent-encoded form.
139
+ #
140
+ # So, first sanitize the value, then percent-decode it while
141
+ # treating as UTF-8, then sanitize the result and encode it back.
142
+ #
143
+ # The result is guaranteed to be UTF-8-safe.
144
+ def sanitize_uri_encoded_string(input)
145
+ return input if input.nil?
146
+ decoded_value = decode_string(input)
147
+ reencode_string(decoded_value)
148
+ end
149
+
150
+ def reencode_string(decoded_value)
151
+ escape_unreserved(
152
+ sanitize_string(decoded_value))
153
+ end
154
+
155
+ def decode_string(input)
156
+ unescape_unreserved(
157
+ sanitize_string(input).
158
+ force_encoding(Encoding::ASCII_8BIT))
159
+ end
160
+
161
+ # RFC3986, 2.2 states that the characters from 'reserved' group must be
162
+ # protected during normalization (which is what Rack::Sanitizer does).
163
+ #
164
+ # However, the regexp approach used by URI.unescape is not sophisticated
165
+ # enough for our task.
166
+ def unescape_unreserved(input)
167
+ input.gsub(/%([a-f\d]{2})/i) do |encoded|
168
+ decoded = $1.hex.chr
169
+
170
+ # This regexp matches all 'unreserved' characters from RFC3986 (2.3),
171
+ # plus all multibyte UTF-8 characters.
172
+ if decoded.match?(/[A-Za-z0-9\-._~\x80-\xFF]/n)
173
+ decoded
174
+ else
175
+ encoded
176
+ end
177
+ end
178
+ end
179
+
180
+ # Performs the reverse function of `unescape_unreserved`. Unlike
181
+ # the previous function, we can reuse the logic in URI#encode
182
+ def escape_unreserved(input)
183
+ # This regexp matches unsafe characters, i.e. everything except 'reserved'
184
+ # and 'unreserved' characters from RFC3986 (2.3), and additionally '%',
185
+ # as percent-encoded unreserved characters could be left over from the
186
+ # `unescape_unreserved` invocation.
187
+ #
188
+ # See also URI::REGEXP::PATTERN::{UNRESERVED,RESERVED}.
189
+ URI::DEFAULT_PARSER.escape(input, /[^\-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]%]/)
190
+ end
191
+
192
+ def sanitize_string(input)
193
+ if input.is_a? String
194
+ input = input.force_encoding(Encoding::UTF_8)
195
+
196
+ if input.valid_encoding?
197
+ input
198
+ else
199
+ @strategy.call(input)
200
+ end
201
+ else
202
+ input
203
+ end
204
+ end
205
+ end
206
+
207
+ include Sanitizers
208
+
209
+ class SanitizedRackInput
210
+ FailedToReadBody = Class.new(Exception)
211
+
212
+ include Sanitizers
213
+
214
+ def initialize(original_io, env, uri_encoded, strategy)
215
+ @original_io = original_io
216
+ @uri_encoded = uri_encoded
217
+ @env = env
218
+ @strategy = strategy
219
+ @sanitized_io = nil
220
+ end
221
+
222
+ def gets
223
+ sanitized_io.gets
224
+ end
225
+
226
+ def read(*args)
227
+ sanitized_io.read(*args)
228
+ end
229
+
230
+ def each(&block)
231
+ sanitized_io.each(&block)
232
+ end
233
+
234
+ def rewind
235
+ sanitized_io.rewind
236
+ end
237
+
238
+ def size
239
+ # StringIO#size is bytesize
240
+ sanitized_io.size
241
+ end
242
+
243
+ def close
244
+ @sanitized_io&.close
245
+ @original_io.close if @original_io.respond_to?(:close)
246
+ end
247
+
248
+ private
249
+
250
+ UTF8_BOM = "\xef\xbb\xbf".b.freeze
251
+ UTF8_BOM_SIZE = UTF8_BOM.bytesize
252
+
253
+ def sanitized_io
254
+ @sanitized_io ||= begin
255
+ content_length = @env['CONTENT_LENGTH']&.to_i
256
+ input = content_length && content_length >= 0 ? @original_io.read(content_length) : @original_io.read
257
+ if input.start_with?(UTF8_BOM)
258
+ input = input.byteslice(UTF8_BOM_SIZE..-1)
259
+ end
260
+
261
+ input = sanitize_string(input)
262
+ if @uri_encoded
263
+ input = sanitize_uri_encoded_string(input).force_encoding(Encoding::UTF_8)
264
+ end
265
+ @env['CONTENT_LENGTH'] &&= input.bytesize.to_s
266
+ StringIO.new(input)
267
+ end
268
+ rescue ::EOFError => error
269
+ raise FailedToReadBody, error.message
270
+ end
271
+ end
272
+ end
273
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = "rack-sanitizer"
5
+ gem.version = '2.0.0'
6
+ gem.authors = ["Jean Boussier", "whitequark"]
7
+ gem.license = "MIT"
8
+ gem.email = ["jean.boussier@gmail.org"]
9
+ gem.description = %{Rack::Sanitizer is a Rack middleware which cleans up } <<
10
+ %{invalid UTF8 characters in request URI and headers.}
11
+ gem.summary = "It is a mordernized and optimized fork of rack-utf8_sanitizer"
12
+ gem.homepage = "http://github.com/Shopify/rack-sanitizer"
13
+
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.require_paths = ["lib"]
17
+
18
+ gem.metadata["allowed_push_host"] = "https://rubygems.org/"
19
+
20
+ gem.required_ruby_version = '>= 2.5'
21
+
22
+ gem.add_dependency "rack", '>= 1.0', '< 4.0'
23
+
24
+ gem.add_development_dependency "bacon"
25
+ gem.add_development_dependency "bacon-colored_output"
26
+ gem.add_development_dependency "rake"
27
+ end
@@ -0,0 +1,526 @@
1
+ # encoding:ascii-8bit
2
+
3
+ require 'bacon/colored_output'
4
+ require 'cgi'
5
+ require 'rack/sanitizer'
6
+
7
+ describe Rack::Sanitizer do
8
+ before do
9
+ @app = Rack::Sanitizer.new(-> env { env["rack.input"]&.size; env })
10
+ end
11
+
12
+ shared :does_sanitize_plain do
13
+ it "sanitizes plaintext entity (HTTP_USER_AGENT)" do
14
+ env = @app.({ "HTTP_USER_AGENT" => @plain_input })
15
+ result = env["HTTP_USER_AGENT"]
16
+
17
+ result.encoding.should == Encoding::UTF_8
18
+ result.should.be.valid_encoding
19
+ end
20
+ end
21
+
22
+ shared :does_sanitize_uri do
23
+ it "sanitizes URI-like entity (REQUEST_PATH)" do
24
+ env = @app.({ "REQUEST_PATH" => @uri_input })
25
+ result = env["REQUEST_PATH"]
26
+
27
+ result.encoding.should == Encoding::US_ASCII
28
+ result.should.be.valid_encoding
29
+ end
30
+ end
31
+
32
+ describe "with invalid host input" do
33
+ it "sanitizes host entity (SERVER_NAME)" do
34
+ host = "host\xD0".force_encoding('UTF-8')
35
+ env = @app.({ "SERVER_NAME" => host })
36
+ result = env["SERVER_NAME"]
37
+
38
+ result.encoding.should == Encoding::US_ASCII
39
+ result.should.be.valid_encoding
40
+ end
41
+ end
42
+
43
+ describe "with invalid UTF-8 input" do
44
+ before do
45
+ @plain_input = "foo\xe0".force_encoding('UTF-8')
46
+ @uri_input = "http://bar/foo%E0".force_encoding('UTF-8')
47
+ end
48
+
49
+ behaves_like :does_sanitize_plain
50
+ behaves_like :does_sanitize_uri
51
+ end
52
+
53
+ describe "with invalid, incorrectly percent-encoded UTF-8 URI input" do
54
+ before do
55
+ @uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
56
+ end
57
+
58
+ behaves_like :does_sanitize_uri
59
+ end
60
+
61
+ describe "with invalid ASCII-8BIT input" do
62
+ before do
63
+ @plain_input = "foo\xe0"
64
+ @uri_input = "http://bar/foo%E0"
65
+ end
66
+
67
+ behaves_like :does_sanitize_plain
68
+ behaves_like :does_sanitize_uri
69
+ end
70
+
71
+ describe "with invalid, incorrectly percent-encoded ASCII-8BIT URI input" do
72
+ before do
73
+ @uri_input = "http://bar/foo%E0\xe0"
74
+ end
75
+
76
+ behaves_like :does_sanitize_uri
77
+ end
78
+
79
+ shared :identity_plain do
80
+ it "does not change plaintext entity (HTTP_USER_AGENT)" do
81
+ env = @app.({ "HTTP_USER_AGENT" => @plain_input })
82
+ result = env["HTTP_USER_AGENT"]
83
+
84
+ result.encoding.should == Encoding::UTF_8
85
+ result.should.be.valid_encoding
86
+ result.should == @plain_input
87
+ end
88
+ end
89
+
90
+ shared :identity_uri do
91
+ it "does not change URI-like entity (REQUEST_PATH)" do
92
+ env = @app.({ "REQUEST_PATH" => @uri_input })
93
+ result = env["REQUEST_PATH"]
94
+
95
+ result.encoding.should == Encoding::US_ASCII
96
+ result.should.be.valid_encoding
97
+ result.should == @uri_input
98
+ end
99
+ end
100
+
101
+ describe "with valid UTF-8 input" do
102
+ before do
103
+ @plain_input = "foo bar лол".force_encoding('UTF-8')
104
+ @uri_input = "http://bar/foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
105
+ end
106
+
107
+ behaves_like :identity_plain
108
+ behaves_like :identity_uri
109
+
110
+ describe "with URI characters from reserved range" do
111
+ before do
112
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
113
+ end
114
+
115
+ behaves_like :identity_uri
116
+ end
117
+ end
118
+
119
+ describe "with valid, not percent-encoded UTF-8 URI input" do
120
+ before do
121
+ @uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
122
+ @encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
123
+ end
124
+
125
+ it "does not change URI-like entity (REQUEST_PATH)" do
126
+ env = @app.({ "REQUEST_PATH" => @uri_input })
127
+ result = env["REQUEST_PATH"]
128
+
129
+ result.encoding.should == Encoding::US_ASCII
130
+ result.should.be.valid_encoding
131
+ result.should == @encoded
132
+ end
133
+ end
134
+
135
+ describe "with valid ASCII-8BIT input" do
136
+ before do
137
+ @plain_input = "bar baz"
138
+ @uri_input = "http://bar/bar+baz"
139
+ end
140
+
141
+ behaves_like :identity_plain
142
+ behaves_like :identity_uri
143
+
144
+ describe "with URI characters from reserved range" do
145
+ before do
146
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB"
147
+ end
148
+
149
+ behaves_like :identity_uri
150
+ end
151
+ end
152
+
153
+ describe "with frozen strings" do
154
+ before do
155
+ @plain_input = "bar baz".freeze
156
+ @uri_input = "http://bar/bar+baz".freeze
157
+ end
158
+
159
+ it "preserves the frozen? status of input" do
160
+ env = @app.({ "HTTP_USER_AGENT" => @plain_input,
161
+ "REQUEST_PATH" => @uri_input })
162
+
163
+ env["HTTP_USER_AGENT"].should.be.frozen
164
+ env["REQUEST_PATH"].should.be.frozen
165
+ end
166
+ end
167
+
168
+ describe "with symbols in the env" do
169
+ before do
170
+ @uri_input = "http://bar/foo%E0\xe0".force_encoding('UTF-8')
171
+ end
172
+
173
+ it "sanitizes REQUEST_PATH with invalid UTF-8 URI input" do
174
+ env = @app.({ :requested_at => "2014-07-22",
175
+ "REQUEST_PATH" => @uri_input })
176
+
177
+ result = env["REQUEST_PATH"]
178
+
179
+ result.encoding.should == Encoding::US_ASCII
180
+ result.should.be.valid_encoding
181
+ end
182
+ end
183
+
184
+ describe "with form data" do
185
+ def request_env
186
+ @plain_input = "foo bar лол".force_encoding('UTF-8')
187
+ {
188
+ "REQUEST_METHOD" => "POST",
189
+ "CONTENT_TYPE" => "application/x-www-form-urlencoded;foo=bar",
190
+ "HTTP_USER_AGENT" => @plain_input,
191
+ "rack.input" => @rack_input,
192
+ }
193
+ end
194
+
195
+ def sanitize_form_data(request_env = request_env())
196
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
197
+ @response_env = @app.(request_env)
198
+ sanitized_input = @response_env['rack.input'].read
199
+
200
+ yield sanitized_input if block_given?
201
+
202
+ @response_env['rack.input'].rewind
203
+ behaves_like :does_sanitize_plain
204
+ behaves_like :does_sanitize_uri
205
+ behaves_like :identity_plain
206
+ behaves_like :identity_uri
207
+ @response_env['rack.input'].close
208
+ end
209
+
210
+ class BrokenIO < StringIO
211
+ def read(_length = nil)
212
+ raise EOFError
213
+ end
214
+ end
215
+
216
+ it "returns HTTP 400 on EOF" do
217
+ @rack_input = BrokenIO.new
218
+ @response_env = @app.(request_env)
219
+ @response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
220
+ end
221
+
222
+ it "sanitizes StringIO rack.input" do
223
+ input = "foo=bla&quux=bar"
224
+ @rack_input = StringIO.new input
225
+
226
+ sanitize_form_data do |sanitized_input|
227
+ sanitized_input.encoding.should == Encoding::UTF_8
228
+ sanitized_input.should.be.valid_encoding
229
+ sanitized_input.should == input
230
+ end
231
+ end
232
+
233
+ it "sanitizes StringIO rack.input on GET" do
234
+ input = "foo=bla&quux=bar"
235
+ @rack_input = StringIO.new input
236
+
237
+ sanitize_form_data(request_env.merge("REQUEST_METHOD" => "GET")) do |sanitized_input|
238
+ sanitized_input.encoding.should == Encoding::UTF_8
239
+ sanitized_input.should.be.valid_encoding
240
+ sanitized_input.should == input
241
+ end
242
+ end
243
+
244
+ it "sanitizes StringIO rack.input with bad encoding" do
245
+ input = "foo=bla&quux=bar\xED"
246
+ @rack_input = StringIO.new input
247
+
248
+ sanitize_form_data do |sanitized_input|
249
+ sanitized_input.encoding.should == Encoding::UTF_8
250
+ sanitized_input.should.be.valid_encoding
251
+ sanitized_input.should != input
252
+ end
253
+ end
254
+
255
+ it "strip UTF-8 BOM from StringIO rack.input" do
256
+ input = %(\xef\xbb\xbf{"Hello": "World"})
257
+ @rack_input = StringIO.new input
258
+
259
+ sanitize_form_data(request_env.merge("CONTENT_TYPE" => "application/json")) do |sanitized_input|
260
+ sanitized_input.encoding.should == Encoding::UTF_8
261
+ sanitized_input.should.be.valid_encoding
262
+ sanitized_input.should == '{"Hello": "World"}'
263
+ end
264
+ end
265
+
266
+ it "sanitizes StringIO rack.input with form encoded bad encoding" do
267
+ input = "foo=bla&foo=baz&quux%ED=bar%ED"
268
+ @rack_input = StringIO.new input
269
+
270
+ sanitize_form_data do |sanitized_input|
271
+ # URI.decode_www_form does some encoding magic
272
+ sanitized_input.split("&").each do |pair|
273
+ pair.split("=", 2).each do |component|
274
+ decoded = URI.decode_www_form_component(component)
275
+ decoded.should.be.valid_encoding
276
+ end
277
+ end
278
+ sanitized_input.should != input
279
+ end
280
+ end
281
+
282
+ it "sanitizes non-StringIO rack.input" do
283
+ require 'rack/rewindable_input'
284
+ input = "foo=bla&quux=bar"
285
+ @rack_input = Rack::RewindableInput.new(StringIO.new(input))
286
+
287
+ sanitize_form_data do |sanitized_input|
288
+ sanitized_input.encoding.should == Encoding::UTF_8
289
+ sanitized_input.should.be.valid_encoding
290
+ sanitized_input.should == input
291
+ end
292
+ end
293
+
294
+ it "sanitizes non-StringIO rack.input with bad encoding" do
295
+ require 'rack/rewindable_input'
296
+ input = "foo=bla&quux=bar\xED"
297
+ @rack_input = Rack::RewindableInput.new(StringIO.new(input))
298
+
299
+ sanitize_form_data do |sanitized_input|
300
+ sanitized_input.encoding.should == Encoding::UTF_8
301
+ sanitized_input.should.be.valid_encoding
302
+ sanitized_input.should != input
303
+ end
304
+ end
305
+
306
+ it "does not sanitize the rack body if there is no CONTENT_TYPE" do
307
+ input = "foo=bla&quux=bar\xED"
308
+ @rack_input = StringIO.new input
309
+
310
+ env = request_env.update('CONTENT_TYPE' => nil)
311
+ sanitize_form_data(env) do |sanitized_input|
312
+ sanitized_input.encoding.should == Encoding::ASCII_8BIT
313
+ sanitized_input.should.be.valid_encoding
314
+ sanitized_input.should == input
315
+ end
316
+ end
317
+
318
+ it "does not sanitize the rack body if there is empty CONTENT_TYPE" do
319
+ input = "foo=bla&quux=bar\xED"
320
+ @rack_input = StringIO.new input
321
+
322
+ env = request_env.update('CONTENT_TYPE' => '')
323
+ sanitize_form_data(env) do |sanitized_input|
324
+ sanitized_input.encoding.should == Encoding::ASCII_8BIT
325
+ sanitized_input.should.be.valid_encoding
326
+ sanitized_input.should == input
327
+ end
328
+ end
329
+
330
+ it "adjusts content-length when replacing input" do
331
+ input = "foo=bla&quux=bar\xED"
332
+ @rack_input = StringIO.new input
333
+
334
+ env = request_env.update("CONTENT_LENGTH" => input.bytesize)
335
+ sanitize_form_data(env) do |sanitized_input|
336
+ sanitized_input.bytesize.should != input.bytesize
337
+ @response_env["CONTENT_LENGTH"].should == sanitized_input.bytesize.to_s
338
+ end
339
+ end
340
+
341
+ it "does not sanitize null bytes by default" do
342
+ input = "foo=bla&quux=bar%00"
343
+ @rack_input = StringIO.new input
344
+
345
+ sanitize_form_data do |sanitized_input|
346
+ sanitized_input.encoding.should == Encoding::UTF_8
347
+ sanitized_input.should.be.valid_encoding
348
+ sanitized_input.should == input
349
+ end
350
+ end
351
+ end
352
+
353
+ describe "with custom content-type" do
354
+ def request_env
355
+ {
356
+ "REQUEST_METHOD" => "GET",
357
+ "CONTENT_TYPE" => "application/json",
358
+ "HTTP_COOKIE" => @cookie,
359
+ "rack.input" => StringIO.new,
360
+ }
361
+ end
362
+
363
+ it "sanitizes bad http cookie" do
364
+ @cookie = "foo=bla; quux=bar\xED"
365
+ response_env = @app.(request_env)
366
+ response_env['HTTP_COOKIE'].should != @cookie
367
+ response_env['HTTP_COOKIE'].should == 'foo=bla; quux=bar%EF%BF%BD'
368
+ end
369
+
370
+ it "does not change ok http cookie" do
371
+ @cookie = "foo=bla; quux=bar"
372
+ response_env = @app.(request_env)
373
+ response_env['HTTP_COOKIE'].should == @cookie
374
+
375
+ @cookie = "foo=b%3bla; quux=b%20a%20r"
376
+ response_env = @app.(request_env)
377
+ response_env['HTTP_COOKIE'].should == @cookie
378
+ end
379
+ end
380
+
381
+ describe "with custom content-type" do
382
+ def request_env
383
+ @plain_input = "foo bar лол".force_encoding('UTF-8')
384
+ {
385
+ "REQUEST_METHOD" => "POST",
386
+ "CONTENT_TYPE" => "application/vnd.api+json",
387
+ "HTTP_USER_AGENT" => @plain_input,
388
+ "rack.input" => @rack_input,
389
+ }
390
+ end
391
+
392
+ def sanitize_data(request_env = request_env())
393
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
394
+ @response_env = @app.(request_env)
395
+ sanitized_input = @response_env['rack.input'].read
396
+
397
+ yield sanitized_input if block_given?
398
+ end
399
+
400
+ it "does not sanitize custom content-type by default" do
401
+ input = "foo=bla&quux=bar\xED"
402
+ @rack_input = StringIO.new input
403
+
404
+ env = request_env
405
+ sanitize_data(env) do |sanitized_input|
406
+ sanitized_input.encoding.should == Encoding::ASCII_8BIT
407
+ sanitized_input.should.be.valid_encoding
408
+ sanitized_input.should == input
409
+ end
410
+ end
411
+
412
+ it "sanitizes custom content-type if additional_content_types given" do
413
+ @app = Rack::Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
414
+ input = "foo=bla&quux=bar\xED"
415
+ @rack_input = StringIO.new input
416
+
417
+ env = request_env
418
+ sanitize_data(env) do |sanitized_input|
419
+ sanitized_input.encoding.should == Encoding::UTF_8
420
+ sanitized_input.should.be.valid_encoding
421
+ sanitized_input.should != input
422
+ end
423
+ end
424
+
425
+ it "sanitizes default content-type if additional_content_types given" do
426
+ @app = Rack::Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
427
+ input = "foo=bla&quux=bar\xED"
428
+ @rack_input = StringIO.new input
429
+
430
+ env = request_env.update('CONTENT_TYPE' => 'application/json')
431
+ sanitize_data(env) do |sanitized_input|
432
+ sanitized_input.encoding.should == Encoding::UTF_8
433
+ sanitized_input.should.be.valid_encoding
434
+ sanitized_input.should != input
435
+ end
436
+ end
437
+
438
+ it "sanitizes custom content-type if sanitizable_content_types given" do
439
+ @app = Rack::Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
440
+ input = "foo=bla&quux=bar\xED"
441
+ @rack_input = StringIO.new input
442
+
443
+ env = request_env
444
+ sanitize_data(env) do |sanitized_input|
445
+ sanitized_input.encoding.should == Encoding::UTF_8
446
+ sanitized_input.should.be.valid_encoding
447
+ sanitized_input.should != input
448
+ end
449
+ end
450
+
451
+ it "does not sanitize default content-type if sanitizable_content_types does not include it" do
452
+ @app = Rack::Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
453
+ input = "foo=bla&quux=bar\xED"
454
+ @rack_input = StringIO.new input
455
+
456
+ env = request_env.update('CONTENT_TYPE' => 'application/json')
457
+ sanitize_data(env) do |sanitized_input|
458
+ sanitized_input.encoding.should == Encoding::ASCII_8BIT
459
+ sanitized_input.should.be.valid_encoding
460
+ sanitized_input.should == input
461
+ end
462
+ end
463
+ end
464
+
465
+ describe "with custom strategy" do
466
+ def request_env
467
+ @plain_input = "foo bar лол".force_encoding('UTF-8')
468
+ {
469
+ "REQUEST_METHOD" => "POST",
470
+ "CONTENT_TYPE" => "application/json",
471
+ "HTTP_USER_AGENT" => @plain_input,
472
+ "rack.input" => @rack_input,
473
+ }
474
+ end
475
+
476
+ def sanitize_data(request_env = request_env())
477
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
478
+ @response_env = @app.(request_env)
479
+ sanitized_input = @response_env['rack.input'].read
480
+
481
+ yield sanitized_input if block_given?
482
+ end
483
+
484
+ it "calls a default strategy (replace)" do
485
+ @app = Rack::Sanitizer.new(-> env { env })
486
+
487
+ input = "foo=bla&quux=bar\xED"
488
+ @rack_input = StringIO.new input
489
+
490
+ env = request_env
491
+ sanitize_data(env) do |sanitized_input|
492
+ sanitized_input.encoding.should == Encoding::UTF_8
493
+ sanitized_input.should.be.valid_encoding
494
+ sanitized_input.should != input
495
+ end
496
+ end
497
+
498
+ it "calls the exception strategy" do
499
+ @app = Rack::Sanitizer.new(-> env { env }, strategy: :exception)
500
+
501
+ input = "foo=bla&quux=bar\xED"
502
+ @rack_input = StringIO.new input
503
+
504
+ env = request_env
505
+ should.raise(EncodingError) { sanitize_data(env) }
506
+ end
507
+
508
+ it "accepts a proc as a strategy" do
509
+ truncate = -> (input) do
510
+ 'replace'.force_encoding(Encoding::UTF_8)
511
+ end
512
+
513
+ @app = Rack::Sanitizer.new(-> env { env }, strategy: truncate)
514
+
515
+ input = "foo=bla&quux=bar\xED"
516
+ @rack_input = StringIO.new input
517
+
518
+ env = request_env
519
+ sanitize_data(env) do |sanitized_input|
520
+ sanitized_input.encoding.should == Encoding::UTF_8
521
+ sanitized_input.should.be.valid_encoding
522
+ sanitized_input.should == 'replace'
523
+ end
524
+ end
525
+ end
526
+ end
metadata ADDED
@@ -0,0 +1,121 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rack-sanitizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Jean Boussier
8
+ - whitequark
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2023-11-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rack
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '1.0'
21
+ - - "<"
22
+ - !ruby/object:Gem::Version
23
+ version: '4.0'
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ version: '1.0'
31
+ - - "<"
32
+ - !ruby/object:Gem::Version
33
+ version: '4.0'
34
+ - !ruby/object:Gem::Dependency
35
+ name: bacon
36
+ requirement: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ type: :development
42
+ prerelease: false
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ - !ruby/object:Gem::Dependency
49
+ name: bacon-colored_output
50
+ requirement: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ type: :development
70
+ prerelease: false
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ description: Rack::Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
77
+ in request URI and headers.
78
+ email:
79
+ - jean.boussier@gmail.org
80
+ executables: []
81
+ extensions: []
82
+ extra_rdoc_files: []
83
+ files:
84
+ - ".editorconfig"
85
+ - ".github/dependabot.yml"
86
+ - ".github/workflows/ci.yml"
87
+ - ".gitignore"
88
+ - CHANGELOG.md
89
+ - Gemfile
90
+ - LICENSE.txt
91
+ - README.md
92
+ - Rakefile
93
+ - lib/rack/sanitizer.rb
94
+ - rack-sanitizer.gemspec
95
+ - test/test_sanitizer.rb
96
+ homepage: http://github.com/Shopify/rack-sanitizer
97
+ licenses:
98
+ - MIT
99
+ metadata:
100
+ allowed_push_host: https://rubygems.org/
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '2.5'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubygems_version: 3.4.21
117
+ signing_key:
118
+ specification_version: 4
119
+ summary: It is a mordernized and optimized fork of rack-utf8_sanitizer
120
+ test_files:
121
+ - test/test_sanitizer.rb