rack-utf8_sanitizer 1.6.0 → 1.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.editorconfig +17 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/ci.yml +23 -0
- data/.travis.yml +4 -7
- data/README.md +34 -3
- data/lib/rack/utf8_sanitizer.rb +8 -2
- data/rack-utf8_sanitizer.gemspec +2 -3
- data/test/test_utf8_sanitizer.rb +16 -2
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 045740a7d869543a26c071de26ee6585d00e6193eaa2a5a02bfe09142cfe11c1
|
4
|
+
data.tar.gz: '050977cbbb72a835dea65e4df6bd75d6837c216b2a2f68eecd83701f1153e7ff'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 611f078bdbe5f0247eac8ba3258a174eb70a1d3263b49ca0d4d261a8fd3de1b260da9defc36bc644eb2e6805211038adddf4396a9f7b0bb48fbd932e41991f97
|
7
|
+
data.tar.gz: 1bc7f43fbd004ac010a7829cb8077b2bdb245500670d2fc4d22372ccc44c959db47a3853aa68f1eb4c1533b5202ee6fdb44d368d722be7fa9439dc6049ddbef2
|
data/.editorconfig
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
indent_style = space
|
5
|
+
indent_size = 2
|
6
|
+
end_of_line = lf
|
7
|
+
charset = utf-8
|
8
|
+
trim_trailing_whitespace = true
|
9
|
+
insert_final_newline = true
|
10
|
+
|
11
|
+
[*.md]
|
12
|
+
indent_style = space
|
13
|
+
indent_size = 2
|
14
|
+
|
15
|
+
[*.y{a,}ml]
|
16
|
+
indent_style = space
|
17
|
+
indent_size = 2
|
@@ -0,0 +1,23 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
|
10
|
+
strategy:
|
11
|
+
fail-fast: false
|
12
|
+
matrix:
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v3
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
bundler-cache: true # 'bundle install' and cache gems
|
21
|
+
ruby-version: ${{ matrix.ruby }}
|
22
|
+
- name: Run tests
|
23
|
+
run: bundle exec rake
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# Rack::UTF8Sanitizer
|
2
2
|
|
3
|
-
Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters in request URI and headers.
|
3
|
+
Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters in request URI and headers. Additionally,
|
4
|
+
it cleans up invalid UTF8 characters in the request body (depending on the configurable content type filters) by reading
|
5
|
+
the input into a string, sanitizing the string, then replacing the Rack input stream with a rewindable input stream backed
|
6
|
+
by the sanitized string.
|
4
7
|
|
5
8
|
## Installation
|
6
9
|
|
@@ -45,7 +48,7 @@ For fields with "percent-encoded data", the algorithm is applied twice to catch
|
|
45
48
|
|
46
49
|
### Sanitizable content types
|
47
50
|
|
48
|
-
The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::UTF8Sanitizer.
|
51
|
+
The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::UTF8Sanitizer.
|
49
52
|
|
50
53
|
To add sanitizable content types to the list of defaults, pass the `additional_content_types` options when using Rack::UTF8Sanitizer, e.g.
|
51
54
|
|
@@ -75,7 +78,35 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, except: [/HTTP_.+/]
|
|
75
78
|
|
76
79
|
There are two built in strategies for handling invalid characters. The default strategy is `:replace`, which will cause any invalid characters to be replaces with the unicode replacement character (�). The second built in strategy is `:exception` which will cause an `EncodingError` exception to be raised if invalid characters are found (the exception can then be handled by another Rack middleware).
|
77
80
|
|
78
|
-
|
81
|
+
This is an example of handling the `:exception` strategy with additional middleware:
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
require "./your/middleware/directory/utf8_sanitizer_exception_handler.rb"
|
85
|
+
|
86
|
+
config.middleware.insert 0, Rack::UTF8SanitizerExceptionHandler
|
87
|
+
config.middleware.insert_after Rack::UTF8SanitizerExceptionHandler, Rack::UTF8Sanitizer, strategy: :exception
|
88
|
+
```
|
89
|
+
|
90
|
+
Note: The exception handling middleware must be inserted before `Rack::UTF8Sanitizer`
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
module Rack
|
94
|
+
class UTF8SanitizerExceptionHandler
|
95
|
+
def initialize(app)
|
96
|
+
@app = app
|
97
|
+
end
|
98
|
+
|
99
|
+
def call(env)
|
100
|
+
@app.call(env)
|
101
|
+
rescue EncodingError => exception
|
102
|
+
# OPTIONAL: Add error logging service of your choice here
|
103
|
+
return [400, {}, ["Bad Request"]]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
```
|
108
|
+
|
109
|
+
An object that responds to `#call` and accepts the offending string with invalid characters as an argument can also be passed as a `:strategy`. This is how you can define custom strategies.
|
79
110
|
|
80
111
|
```ruby
|
81
112
|
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -6,6 +6,7 @@ require 'stringio'
|
|
6
6
|
module Rack
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
|
+
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
9
10
|
|
10
11
|
# options[:sanitizable_content_types] Array
|
11
12
|
# options[:additional_content_types] Array
|
@@ -19,7 +20,12 @@ module Rack
|
|
19
20
|
end
|
20
21
|
|
21
22
|
def call(env)
|
22
|
-
|
23
|
+
begin
|
24
|
+
env = sanitize(env)
|
25
|
+
rescue EOFError
|
26
|
+
return BAD_REQUEST
|
27
|
+
end
|
28
|
+
@app.call(env)
|
23
29
|
end
|
24
30
|
|
25
31
|
DEFAULT_STRATEGIES = {
|
@@ -231,7 +237,7 @@ module Rack
|
|
231
237
|
# Performs the reverse function of `unescape_unreserved`. Unlike
|
232
238
|
# the previous function, we can reuse the logic in URI#encode
|
233
239
|
def escape_unreserved(input)
|
234
|
-
URI.
|
240
|
+
URI::DEFAULT_PARSER.escape(input, UNSAFE)
|
235
241
|
end
|
236
242
|
|
237
243
|
def sanitize_string(input)
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.8.0'
|
6
6
|
gem.authors = ["whitequark"]
|
7
7
|
gem.license = "MIT"
|
8
8
|
gem.email = ["whitequark@whitequark.org"]
|
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
|
|
12
12
|
gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
|
13
13
|
|
14
14
|
gem.files = `git ls-files`.split($/)
|
15
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
16
|
gem.require_paths = ["lib"]
|
18
17
|
|
19
18
|
gem.required_ruby_version = '>= 1.9.3'
|
20
19
|
|
21
|
-
gem.add_dependency "rack", '>= 1.0', '<
|
20
|
+
gem.add_dependency "rack", '>= 1.0', '< 4.0'
|
22
21
|
|
23
22
|
gem.add_development_dependency "bacon"
|
24
23
|
gem.add_development_dependency "bacon-colored_output"
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding:ascii-8bit
|
2
2
|
|
3
3
|
require 'bacon/colored_output'
|
4
|
+
require 'cgi'
|
4
5
|
require 'rack/utf8_sanitizer'
|
5
6
|
|
6
7
|
describe Rack::UTF8Sanitizer do
|
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
|
|
118
119
|
describe "with valid, not percent-encoded UTF-8 URI input" do
|
119
120
|
before do
|
120
121
|
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
122
|
+
@encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
|
121
123
|
end
|
122
124
|
|
123
125
|
it "does not change URI-like entity (REQUEST_PATH)" do
|
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
|
|
126
128
|
|
127
129
|
result.encoding.should == Encoding::US_ASCII
|
128
130
|
result.should.be.valid_encoding
|
129
|
-
result.should ==
|
131
|
+
result.should == @encoded
|
130
132
|
end
|
131
133
|
end
|
132
134
|
|
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
|
|
205
207
|
@response_env['rack.input'].close
|
206
208
|
end
|
207
209
|
|
210
|
+
class BrokenIO < StringIO
|
211
|
+
def read
|
212
|
+
raise EOFError
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
it "returns HTTP 400 on EOF" do
|
217
|
+
@rack_input = BrokenIO.new
|
218
|
+
@response_env = @app.(request_env)
|
219
|
+
@response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
|
220
|
+
end
|
221
|
+
|
208
222
|
it "sanitizes StringIO rack.input" do
|
209
223
|
input = "foo=bla&quux=bar"
|
210
224
|
@rack_input = StringIO.new input
|
@@ -549,7 +563,7 @@ describe Rack::UTF8Sanitizer do
|
|
549
563
|
sanitize_data(env) do |sanitized_input|
|
550
564
|
sanitized_input.encoding.should == Encoding::UTF_8
|
551
565
|
sanitized_input.should.be.valid_encoding
|
552
|
-
sanitized_input.should == 'replace'
|
566
|
+
sanitized_input.should == 'replace'
|
553
567
|
end
|
554
568
|
end
|
555
569
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '1.0'
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '
|
22
|
+
version: '4.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '1.0'
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '4.0'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: bacon
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,9 @@ executables: []
|
|
80
80
|
extensions: []
|
81
81
|
extra_rdoc_files: []
|
82
82
|
files:
|
83
|
+
- ".editorconfig"
|
84
|
+
- ".github/dependabot.yml"
|
85
|
+
- ".github/workflows/ci.yml"
|
83
86
|
- ".gitignore"
|
84
87
|
- ".travis.yml"
|
85
88
|
- CHANGELOG.md
|
@@ -109,8 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
112
|
- !ruby/object:Gem::Version
|
110
113
|
version: '0'
|
111
114
|
requirements: []
|
112
|
-
|
113
|
-
rubygems_version: 2.7.6
|
115
|
+
rubygems_version: 3.2.5
|
114
116
|
signing_key:
|
115
117
|
specification_version: 4
|
116
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|