rack-utf8_sanitizer 1.6.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +17 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/ci.yml +23 -0
- data/.travis.yml +4 -7
- data/README.md +34 -3
- data/lib/rack/utf8_sanitizer.rb +8 -2
- data/rack-utf8_sanitizer.gemspec +2 -3
- data/test/test_utf8_sanitizer.rb +16 -2
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 045740a7d869543a26c071de26ee6585d00e6193eaa2a5a02bfe09142cfe11c1
|
4
|
+
data.tar.gz: '050977cbbb72a835dea65e4df6bd75d6837c216b2a2f68eecd83701f1153e7ff'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 611f078bdbe5f0247eac8ba3258a174eb70a1d3263b49ca0d4d261a8fd3de1b260da9defc36bc644eb2e6805211038adddf4396a9f7b0bb48fbd932e41991f97
|
7
|
+
data.tar.gz: 1bc7f43fbd004ac010a7829cb8077b2bdb245500670d2fc4d22372ccc44c959db47a3853aa68f1eb4c1533b5202ee6fdb44d368d722be7fa9439dc6049ddbef2
|
data/.editorconfig
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
indent_style = space
|
5
|
+
indent_size = 2
|
6
|
+
end_of_line = lf
|
7
|
+
charset = utf-8
|
8
|
+
trim_trailing_whitespace = true
|
9
|
+
insert_final_newline = true
|
10
|
+
|
11
|
+
[*.md]
|
12
|
+
indent_style = space
|
13
|
+
indent_size = 2
|
14
|
+
|
15
|
+
[*.y{a,}ml]
|
16
|
+
indent_style = space
|
17
|
+
indent_size = 2
|
@@ -0,0 +1,23 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
|
10
|
+
strategy:
|
11
|
+
fail-fast: false
|
12
|
+
matrix:
|
13
|
+
ruby: ["2.5", "2.6", "2.7", "3.0", "3.1", ruby-head, jruby-9.2, jruby-9.3, jruby-head]
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v3
|
17
|
+
- name: Set up Ruby
|
18
|
+
uses: ruby/setup-ruby@v1
|
19
|
+
with:
|
20
|
+
bundler-cache: true # 'bundle install' and cache gems
|
21
|
+
ruby-version: ${{ matrix.ruby }}
|
22
|
+
- name: Run tests
|
23
|
+
run: bundle exec rake
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# Rack::UTF8Sanitizer
|
2
2
|
|
3
|
-
Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters in request URI and headers.
|
3
|
+
Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters in request URI and headers. Additionally,
|
4
|
+
it cleans up invalid UTF8 characters in the request body (depending on the configurable content type filters) by reading
|
5
|
+
the input into a string, sanitizing the string, then replacing the Rack input stream with a rewindable input stream backed
|
6
|
+
by the sanitized string.
|
4
7
|
|
5
8
|
## Installation
|
6
9
|
|
@@ -45,7 +48,7 @@ For fields with "percent-encoded data", the algorithm is applied twice to catch
|
|
45
48
|
|
46
49
|
### Sanitizable content types
|
47
50
|
|
48
|
-
The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::UTF8Sanitizer.
|
51
|
+
The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::UTF8Sanitizer.
|
49
52
|
|
50
53
|
To add sanitizable content types to the list of defaults, pass the `additional_content_types` options when using Rack::UTF8Sanitizer, e.g.
|
51
54
|
|
@@ -75,7 +78,35 @@ config.middleware.insert 0, Rack::UTF8Sanitizer, except: [/HTTP_.+/]
|
|
75
78
|
|
76
79
|
There are two built in strategies for handling invalid characters. The default strategy is `:replace`, which will cause any invalid characters to be replaces with the unicode replacement character (�). The second built in strategy is `:exception` which will cause an `EncodingError` exception to be raised if invalid characters are found (the exception can then be handled by another Rack middleware).
|
77
80
|
|
78
|
-
|
81
|
+
This is an example of handling the `:exception` strategy with additional middleware:
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
require "./your/middleware/directory/utf8_sanitizer_exception_handler.rb"
|
85
|
+
|
86
|
+
config.middleware.insert 0, Rack::UTF8SanitizerExceptionHandler
|
87
|
+
config.middleware.insert_after Rack::UTF8SanitizerExceptionHandler, Rack::UTF8Sanitizer, strategy: :exception
|
88
|
+
```
|
89
|
+
|
90
|
+
Note: The exception handling middleware must be inserted before `Rack::UTF8Sanitizer`
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
module Rack
|
94
|
+
class UTF8SanitizerExceptionHandler
|
95
|
+
def initialize(app)
|
96
|
+
@app = app
|
97
|
+
end
|
98
|
+
|
99
|
+
def call(env)
|
100
|
+
@app.call(env)
|
101
|
+
rescue EncodingError => exception
|
102
|
+
# OPTIONAL: Add error logging service of your choice here
|
103
|
+
return [400, {}, ["Bad Request"]]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
```
|
108
|
+
|
109
|
+
An object that responds to `#call` and accepts the offending string with invalid characters as an argument can also be passed as a `:strategy`. This is how you can define custom strategies.
|
79
110
|
|
80
111
|
```ruby
|
81
112
|
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -6,6 +6,7 @@ require 'stringio'
|
|
6
6
|
module Rack
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
|
+
BAD_REQUEST = [400, { "Content-Type" => "text/plain" }, ["Bad Request"]]
|
9
10
|
|
10
11
|
# options[:sanitizable_content_types] Array
|
11
12
|
# options[:additional_content_types] Array
|
@@ -19,7 +20,12 @@ module Rack
|
|
19
20
|
end
|
20
21
|
|
21
22
|
def call(env)
|
22
|
-
|
23
|
+
begin
|
24
|
+
env = sanitize(env)
|
25
|
+
rescue EOFError
|
26
|
+
return BAD_REQUEST
|
27
|
+
end
|
28
|
+
@app.call(env)
|
23
29
|
end
|
24
30
|
|
25
31
|
DEFAULT_STRATEGIES = {
|
@@ -231,7 +237,7 @@ module Rack
|
|
231
237
|
# Performs the reverse function of `unescape_unreserved`. Unlike
|
232
238
|
# the previous function, we can reuse the logic in URI#encode
|
233
239
|
def escape_unreserved(input)
|
234
|
-
URI.
|
240
|
+
URI::DEFAULT_PARSER.escape(input, UNSAFE)
|
235
241
|
end
|
236
242
|
|
237
243
|
def sanitize_string(input)
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.8.0'
|
6
6
|
gem.authors = ["whitequark"]
|
7
7
|
gem.license = "MIT"
|
8
8
|
gem.email = ["whitequark@whitequark.org"]
|
@@ -12,13 +12,12 @@ Gem::Specification.new do |gem|
|
|
12
12
|
gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
|
13
13
|
|
14
14
|
gem.files = `git ls-files`.split($/)
|
15
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
16
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
16
|
gem.require_paths = ["lib"]
|
18
17
|
|
19
18
|
gem.required_ruby_version = '>= 1.9.3'
|
20
19
|
|
21
|
-
gem.add_dependency "rack", '>= 1.0', '<
|
20
|
+
gem.add_dependency "rack", '>= 1.0', '< 4.0'
|
22
21
|
|
23
22
|
gem.add_development_dependency "bacon"
|
24
23
|
gem.add_development_dependency "bacon-colored_output"
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding:ascii-8bit
|
2
2
|
|
3
3
|
require 'bacon/colored_output'
|
4
|
+
require 'cgi'
|
4
5
|
require 'rack/utf8_sanitizer'
|
5
6
|
|
6
7
|
describe Rack::UTF8Sanitizer do
|
@@ -118,6 +119,7 @@ describe Rack::UTF8Sanitizer do
|
|
118
119
|
describe "with valid, not percent-encoded UTF-8 URI input" do
|
119
120
|
before do
|
120
121
|
@uri_input = "http://bar/foo+bar+лол".force_encoding('UTF-8')
|
122
|
+
@encoded = "http://bar/foo+bar+#{CGI.escape("лол")}"
|
121
123
|
end
|
122
124
|
|
123
125
|
it "does not change URI-like entity (REQUEST_PATH)" do
|
@@ -126,7 +128,7 @@ describe Rack::UTF8Sanitizer do
|
|
126
128
|
|
127
129
|
result.encoding.should == Encoding::US_ASCII
|
128
130
|
result.should.be.valid_encoding
|
129
|
-
result.should ==
|
131
|
+
result.should == @encoded
|
130
132
|
end
|
131
133
|
end
|
132
134
|
|
@@ -205,6 +207,18 @@ describe Rack::UTF8Sanitizer do
|
|
205
207
|
@response_env['rack.input'].close
|
206
208
|
end
|
207
209
|
|
210
|
+
class BrokenIO < StringIO
|
211
|
+
def read
|
212
|
+
raise EOFError
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
it "returns HTTP 400 on EOF" do
|
217
|
+
@rack_input = BrokenIO.new
|
218
|
+
@response_env = @app.(request_env)
|
219
|
+
@response_env.should == [400, {"Content-Type"=>"text/plain"}, ["Bad Request"]]
|
220
|
+
end
|
221
|
+
|
208
222
|
it "sanitizes StringIO rack.input" do
|
209
223
|
input = "foo=bla&quux=bar"
|
210
224
|
@rack_input = StringIO.new input
|
@@ -549,7 +563,7 @@ describe Rack::UTF8Sanitizer do
|
|
549
563
|
sanitize_data(env) do |sanitized_input|
|
550
564
|
sanitized_input.encoding.should == Encoding::UTF_8
|
551
565
|
sanitized_input.should.be.valid_encoding
|
552
|
-
sanitized_input.should == 'replace'
|
566
|
+
sanitized_input.should == 'replace'
|
553
567
|
end
|
554
568
|
end
|
555
569
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '1.0'
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '
|
22
|
+
version: '4.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '1.0'
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '4.0'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: bacon
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,9 @@ executables: []
|
|
80
80
|
extensions: []
|
81
81
|
extra_rdoc_files: []
|
82
82
|
files:
|
83
|
+
- ".editorconfig"
|
84
|
+
- ".github/dependabot.yml"
|
85
|
+
- ".github/workflows/ci.yml"
|
83
86
|
- ".gitignore"
|
84
87
|
- ".travis.yml"
|
85
88
|
- CHANGELOG.md
|
@@ -109,8 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
112
|
- !ruby/object:Gem::Version
|
110
113
|
version: '0'
|
111
114
|
requirements: []
|
112
|
-
|
113
|
-
rubygems_version: 2.7.6
|
115
|
+
rubygems_version: 3.2.5
|
114
116
|
signing_key:
|
115
117
|
specification_version: 4
|
116
118
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|