rack-utf8_sanitizer 1.3.2 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -1
- data/README.md +21 -1
- data/lib/rack/utf8_sanitizer.rb +30 -7
- data/rack-utf8_sanitizer.gemspec +2 -1
- data/test/test_utf8_sanitizer.rb +71 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69f0fb52847e155e4c7afd57565531b8bf506149
|
4
|
+
data.tar.gz: 0502a652822279ce9fd6b22e2db54126b9247b49
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '01215648794d9d0b47ef805f40116644b0afab096a00d88ebe0d84df4aed839d025641ba1a40c032b62d525ff9467ec89498e396b6c3e69483535cd71b43e997'
|
7
|
+
data.tar.gz: 95d5d7cfe3a6ec0564a566fe95aaed96f48605265a101483e98dd82a46eaf5a7aa1ff3a29106105fba5aa03f1ef6f108eb29c80ccd11a053ad6b91642e8a3003
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -55,6 +55,26 @@ To explicitly set sanitizable content types and override the defaults, use the `
|
|
55
55
|
|
56
56
|
config.middleware.insert 0, Rack::UTF8Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
|
57
57
|
|
58
|
+
### Strategies
|
59
|
+
|
60
|
+
There are two built in strategies for handling invalid characters. The default strategy is `:replace`, which will cause any invalid characters to be replaces with the unicode replacement character (�). The second built in strategy is `:exception` which will cause an `EncodingError` exception to be raised if invalid characters are found (the exception can then be handled by another Rack middleware).
|
61
|
+
|
62
|
+
An object that responds to `#call` and accepts the offending string with invalid characters as an argumant can also be passed as a `:strategy`. This is how you can define custom strategies.
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
66
|
+
```
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
replace_string = lambda do |_invalid|
|
70
|
+
Rails.logger.warn('Replacing invalid string')
|
71
|
+
|
72
|
+
'<Bad Encoding>'.freeze
|
73
|
+
end
|
74
|
+
|
75
|
+
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
|
76
|
+
```
|
77
|
+
|
58
78
|
## Contributing
|
59
79
|
|
60
80
|
1. Fork it
|
@@ -63,4 +83,4 @@ To explicitly set sanitizable content types and override the defaults, use the `
|
|
63
83
|
4. Push to the branch (`git push origin my-new-feature`)
|
64
84
|
5. Create new Pull Request
|
65
85
|
|
66
|
-
To run the tests, run `rake spec` in the project directory.
|
86
|
+
To run the tests, run `rake spec` in the project directory.
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -11,6 +11,7 @@ module Rack
|
|
11
11
|
# options[:additional_content_types] Array
|
12
12
|
def initialize(app, options={})
|
13
13
|
@app = app
|
14
|
+
@strategy = build_strategy(options)
|
14
15
|
@sanitizable_content_types = options[:sanitizable_content_types]
|
15
16
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
16
17
|
end
|
@@ -19,6 +20,21 @@ module Rack
|
|
19
20
|
@app.call(sanitize(env))
|
20
21
|
end
|
21
22
|
|
23
|
+
DEFAULT_STRATEGIES = {
|
24
|
+
replace: lambda do |input|
|
25
|
+
input.
|
26
|
+
force_encoding(Encoding::ASCII_8BIT).
|
27
|
+
encode!(Encoding::UTF_8,
|
28
|
+
invalid: :replace,
|
29
|
+
undef: :replace)
|
30
|
+
end,
|
31
|
+
exception: lambda do |input|
|
32
|
+
input.
|
33
|
+
force_encoding(Encoding::ASCII_8BIT).
|
34
|
+
encode!(Encoding::UTF_8)
|
35
|
+
end
|
36
|
+
}.freeze
|
37
|
+
|
22
38
|
# http://rack.rubyforge.org/doc/SPEC.html
|
23
39
|
URI_FIELDS = %w(
|
24
40
|
SCRIPT_NAME
|
@@ -27,6 +43,7 @@ module Rack
|
|
27
43
|
HTTP_REFERER
|
28
44
|
ORIGINAL_FULLPATH
|
29
45
|
ORIGINAL_SCRIPT_NAME
|
46
|
+
SERVER_NAME
|
30
47
|
).map(&:freeze).freeze
|
31
48
|
|
32
49
|
SANITIZABLE_CONTENT_TYPES = %w(
|
@@ -40,13 +57,15 @@ module Rack
|
|
40
57
|
application/x-www-form-urlencoded
|
41
58
|
).map(&:freeze).freeze
|
42
59
|
|
60
|
+
HTTP_ = 'HTTP_'.freeze
|
61
|
+
|
43
62
|
def sanitize(env)
|
44
63
|
sanitize_rack_input(env)
|
45
64
|
env.each do |key, value|
|
46
65
|
if URI_FIELDS.include?(key)
|
47
66
|
env[key] = transfer_frozen(value,
|
48
67
|
sanitize_uri_encoded_string(value))
|
49
|
-
elsif key.to_s.start_with?(
|
68
|
+
elsif key.to_s.start_with?(HTTP_)
|
50
69
|
# Just sanitize the headers and leave them in UTF-8. There is
|
51
70
|
# no reason to have UTF-8 in headers, but if it's valid, let it be.
|
52
71
|
env[key] = transfer_frozen(value,
|
@@ -57,6 +76,14 @@ module Rack
|
|
57
76
|
|
58
77
|
protected
|
59
78
|
|
79
|
+
def build_strategy(options)
|
80
|
+
strategy = options.fetch(:strategy) { :replace }
|
81
|
+
|
82
|
+
return strategy unless DEFAULT_STRATEGIES.key?(strategy)
|
83
|
+
|
84
|
+
DEFAULT_STRATEGIES[strategy]
|
85
|
+
end
|
86
|
+
|
60
87
|
def sanitize_rack_input(env)
|
61
88
|
# https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
|
62
89
|
# Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
|
@@ -156,7 +183,7 @@ module Rack
|
|
156
183
|
# enough for our task.
|
157
184
|
def unescape_unreserved(input)
|
158
185
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
159
|
-
decoded =
|
186
|
+
decoded = $1.hex.chr
|
160
187
|
|
161
188
|
if decoded =~ UNRESERVED_OR_UTF8
|
162
189
|
decoded
|
@@ -187,11 +214,7 @@ module Rack
|
|
187
214
|
if input.valid_encoding?
|
188
215
|
input
|
189
216
|
else
|
190
|
-
input
|
191
|
-
force_encoding(Encoding::ASCII_8BIT).
|
192
|
-
encode!(Encoding::UTF_8,
|
193
|
-
invalid: :replace,
|
194
|
-
undef: :replace)
|
217
|
+
@strategy.call(input)
|
195
218
|
end
|
196
219
|
else
|
197
220
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.4.0'
|
6
6
|
gem.authors = ["whitequark"]
|
7
|
+
gem.license = "MIT"
|
7
8
|
gem.email = ["whitequark@whitequark.org"]
|
8
9
|
gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
|
9
10
|
%{invalid UTF8 characters in request URI and headers.}
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -28,6 +28,17 @@ describe Rack::UTF8Sanitizer do
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
+
describe "with invalid host input" do
|
32
|
+
it "sanitizes host entity (SERVER_NAME)" do
|
33
|
+
host = "host\xD0".force_encoding('UTF-8')
|
34
|
+
env = @app.({ "SERVER_NAME" => host })
|
35
|
+
result = env["SERVER_NAME"]
|
36
|
+
|
37
|
+
result.encoding.should == Encoding::US_ASCII
|
38
|
+
result.should.be.valid_encoding
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
31
42
|
describe "with invalid UTF-8 input" do
|
32
43
|
before do
|
33
44
|
@plain_input = "foo\xe0".force_encoding('UTF-8')
|
@@ -397,4 +408,64 @@ describe Rack::UTF8Sanitizer do
|
|
397
408
|
end
|
398
409
|
end
|
399
410
|
end
|
411
|
+
|
412
|
+
describe "with custom strategy" do
|
413
|
+
def request_env
|
414
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
415
|
+
{
|
416
|
+
"REQUEST_METHOD" => "POST",
|
417
|
+
"CONTENT_TYPE" => "application/json",
|
418
|
+
"HTTP_USER_AGENT" => @plain_input,
|
419
|
+
"rack.input" => @rack_input,
|
420
|
+
}
|
421
|
+
end
|
422
|
+
|
423
|
+
def sanitize_data(request_env = request_env())
|
424
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
425
|
+
@response_env = @app.(request_env)
|
426
|
+
sanitized_input = @response_env['rack.input'].read
|
427
|
+
|
428
|
+
yield sanitized_input if block_given?
|
429
|
+
end
|
430
|
+
|
431
|
+
it "calls a default strategy (replace)" do
|
432
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env })
|
433
|
+
|
434
|
+
input = "foo=bla&quux=bar\xED"
|
435
|
+
@rack_input = StringIO.new input
|
436
|
+
|
437
|
+
env = request_env
|
438
|
+
sanitize_data(env) do |sanitized_input|
|
439
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
440
|
+
sanitized_input.should.be.valid_encoding
|
441
|
+
sanitized_input.should != input
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
it "calls the exception strategy" do
|
446
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: :exception)
|
447
|
+
|
448
|
+
input = "foo=bla&quux=bar\xED"
|
449
|
+
@rack_input = StringIO.new input
|
450
|
+
|
451
|
+
env = request_env
|
452
|
+
should.raise(EncodingError) { sanitize_data(env) }
|
453
|
+
end
|
454
|
+
|
455
|
+
it "accepts a proc as a strategy" do
|
456
|
+
truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
|
457
|
+
|
458
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
459
|
+
|
460
|
+
input = "foo=bla&quux=bar\xED"
|
461
|
+
@rack_input = StringIO.new input
|
462
|
+
|
463
|
+
env = request_env
|
464
|
+
sanitize_data(env) do |sanitized_input|
|
465
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
466
|
+
sanitized_input.should.be.valid_encoding
|
467
|
+
sanitized_input.should == 'replace'
|
468
|
+
end
|
469
|
+
end
|
470
|
+
end
|
400
471
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -91,7 +91,8 @@ files:
|
|
91
91
|
- rack-utf8_sanitizer.gemspec
|
92
92
|
- test/test_utf8_sanitizer.rb
|
93
93
|
homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
94
|
-
licenses:
|
94
|
+
licenses:
|
95
|
+
- MIT
|
95
96
|
metadata: {}
|
96
97
|
post_install_message:
|
97
98
|
rdoc_options: []
|
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
110
|
version: '0'
|
110
111
|
requirements: []
|
111
112
|
rubyforge_project:
|
112
|
-
rubygems_version: 2.
|
113
|
+
rubygems_version: 2.5.2.1
|
113
114
|
signing_key:
|
114
115
|
specification_version: 4
|
115
116
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|