rack-utf8_sanitizer 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -1
- data/README.md +21 -1
- data/lib/rack/utf8_sanitizer.rb +30 -7
- data/rack-utf8_sanitizer.gemspec +2 -1
- data/test/test_utf8_sanitizer.rb +71 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69f0fb52847e155e4c7afd57565531b8bf506149
|
4
|
+
data.tar.gz: 0502a652822279ce9fd6b22e2db54126b9247b49
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '01215648794d9d0b47ef805f40116644b0afab096a00d88ebe0d84df4aed839d025641ba1a40c032b62d525ff9467ec89498e396b6c3e69483535cd71b43e997'
|
7
|
+
data.tar.gz: 95d5d7cfe3a6ec0564a566fe95aaed96f48605265a101483e98dd82a46eaf5a7aa1ff3a29106105fba5aa03f1ef6f108eb29c80ccd11a053ad6b91642e8a3003
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -55,6 +55,26 @@ To explicitly set sanitizable content types and override the defaults, use the `
|
|
55
55
|
|
56
56
|
config.middleware.insert 0, Rack::UTF8Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
|
57
57
|
|
58
|
+
### Strategies
|
59
|
+
|
60
|
+
There are two built in strategies for handling invalid characters. The default strategy is `:replace`, which will cause any invalid characters to be replaces with the unicode replacement character (�). The second built in strategy is `:exception` which will cause an `EncodingError` exception to be raised if invalid characters are found (the exception can then be handled by another Rack middleware).
|
61
|
+
|
62
|
+
An object that responds to `#call` and accepts the offending string with invalid characters as an argumant can also be passed as a `:strategy`. This is how you can define custom strategies.
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
|
66
|
+
```
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
replace_string = lambda do |_invalid|
|
70
|
+
Rails.logger.warn('Replacing invalid string')
|
71
|
+
|
72
|
+
'<Bad Encoding>'.freeze
|
73
|
+
end
|
74
|
+
|
75
|
+
config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
|
76
|
+
```
|
77
|
+
|
58
78
|
## Contributing
|
59
79
|
|
60
80
|
1. Fork it
|
@@ -63,4 +83,4 @@ To explicitly set sanitizable content types and override the defaults, use the `
|
|
63
83
|
4. Push to the branch (`git push origin my-new-feature`)
|
64
84
|
5. Create new Pull Request
|
65
85
|
|
66
|
-
To run the tests, run `rake spec` in the project directory.
|
86
|
+
To run the tests, run `rake spec` in the project directory.
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -11,6 +11,7 @@ module Rack
|
|
11
11
|
# options[:additional_content_types] Array
|
12
12
|
def initialize(app, options={})
|
13
13
|
@app = app
|
14
|
+
@strategy = build_strategy(options)
|
14
15
|
@sanitizable_content_types = options[:sanitizable_content_types]
|
15
16
|
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
16
17
|
end
|
@@ -19,6 +20,21 @@ module Rack
|
|
19
20
|
@app.call(sanitize(env))
|
20
21
|
end
|
21
22
|
|
23
|
+
DEFAULT_STRATEGIES = {
|
24
|
+
replace: lambda do |input|
|
25
|
+
input.
|
26
|
+
force_encoding(Encoding::ASCII_8BIT).
|
27
|
+
encode!(Encoding::UTF_8,
|
28
|
+
invalid: :replace,
|
29
|
+
undef: :replace)
|
30
|
+
end,
|
31
|
+
exception: lambda do |input|
|
32
|
+
input.
|
33
|
+
force_encoding(Encoding::ASCII_8BIT).
|
34
|
+
encode!(Encoding::UTF_8)
|
35
|
+
end
|
36
|
+
}.freeze
|
37
|
+
|
22
38
|
# http://rack.rubyforge.org/doc/SPEC.html
|
23
39
|
URI_FIELDS = %w(
|
24
40
|
SCRIPT_NAME
|
@@ -27,6 +43,7 @@ module Rack
|
|
27
43
|
HTTP_REFERER
|
28
44
|
ORIGINAL_FULLPATH
|
29
45
|
ORIGINAL_SCRIPT_NAME
|
46
|
+
SERVER_NAME
|
30
47
|
).map(&:freeze).freeze
|
31
48
|
|
32
49
|
SANITIZABLE_CONTENT_TYPES = %w(
|
@@ -40,13 +57,15 @@ module Rack
|
|
40
57
|
application/x-www-form-urlencoded
|
41
58
|
).map(&:freeze).freeze
|
42
59
|
|
60
|
+
HTTP_ = 'HTTP_'.freeze
|
61
|
+
|
43
62
|
def sanitize(env)
|
44
63
|
sanitize_rack_input(env)
|
45
64
|
env.each do |key, value|
|
46
65
|
if URI_FIELDS.include?(key)
|
47
66
|
env[key] = transfer_frozen(value,
|
48
67
|
sanitize_uri_encoded_string(value))
|
49
|
-
elsif key.to_s.start_with?(
|
68
|
+
elsif key.to_s.start_with?(HTTP_)
|
50
69
|
# Just sanitize the headers and leave them in UTF-8. There is
|
51
70
|
# no reason to have UTF-8 in headers, but if it's valid, let it be.
|
52
71
|
env[key] = transfer_frozen(value,
|
@@ -57,6 +76,14 @@ module Rack
|
|
57
76
|
|
58
77
|
protected
|
59
78
|
|
79
|
+
def build_strategy(options)
|
80
|
+
strategy = options.fetch(:strategy) { :replace }
|
81
|
+
|
82
|
+
return strategy unless DEFAULT_STRATEGIES.key?(strategy)
|
83
|
+
|
84
|
+
DEFAULT_STRATEGIES[strategy]
|
85
|
+
end
|
86
|
+
|
60
87
|
def sanitize_rack_input(env)
|
61
88
|
# https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
|
62
89
|
# Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
|
@@ -156,7 +183,7 @@ module Rack
|
|
156
183
|
# enough for our task.
|
157
184
|
def unescape_unreserved(input)
|
158
185
|
input.gsub(/%([a-f\d]{2})/i) do |encoded|
|
159
|
-
decoded =
|
186
|
+
decoded = $1.hex.chr
|
160
187
|
|
161
188
|
if decoded =~ UNRESERVED_OR_UTF8
|
162
189
|
decoded
|
@@ -187,11 +214,7 @@ module Rack
|
|
187
214
|
if input.valid_encoding?
|
188
215
|
input
|
189
216
|
else
|
190
|
-
input
|
191
|
-
force_encoding(Encoding::ASCII_8BIT).
|
192
|
-
encode!(Encoding::UTF_8,
|
193
|
-
invalid: :replace,
|
194
|
-
undef: :replace)
|
217
|
+
@strategy.call(input)
|
195
218
|
end
|
196
219
|
else
|
197
220
|
input
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.
|
5
|
+
gem.version = '1.4.0'
|
6
6
|
gem.authors = ["whitequark"]
|
7
|
+
gem.license = "MIT"
|
7
8
|
gem.email = ["whitequark@whitequark.org"]
|
8
9
|
gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
|
9
10
|
%{invalid UTF8 characters in request URI and headers.}
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -28,6 +28,17 @@ describe Rack::UTF8Sanitizer do
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
+
describe "with invalid host input" do
|
32
|
+
it "sanitizes host entity (SERVER_NAME)" do
|
33
|
+
host = "host\xD0".force_encoding('UTF-8')
|
34
|
+
env = @app.({ "SERVER_NAME" => host })
|
35
|
+
result = env["SERVER_NAME"]
|
36
|
+
|
37
|
+
result.encoding.should == Encoding::US_ASCII
|
38
|
+
result.should.be.valid_encoding
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
31
42
|
describe "with invalid UTF-8 input" do
|
32
43
|
before do
|
33
44
|
@plain_input = "foo\xe0".force_encoding('UTF-8')
|
@@ -397,4 +408,64 @@ describe Rack::UTF8Sanitizer do
|
|
397
408
|
end
|
398
409
|
end
|
399
410
|
end
|
411
|
+
|
412
|
+
describe "with custom strategy" do
|
413
|
+
def request_env
|
414
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
415
|
+
{
|
416
|
+
"REQUEST_METHOD" => "POST",
|
417
|
+
"CONTENT_TYPE" => "application/json",
|
418
|
+
"HTTP_USER_AGENT" => @plain_input,
|
419
|
+
"rack.input" => @rack_input,
|
420
|
+
}
|
421
|
+
end
|
422
|
+
|
423
|
+
def sanitize_data(request_env = request_env())
|
424
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
425
|
+
@response_env = @app.(request_env)
|
426
|
+
sanitized_input = @response_env['rack.input'].read
|
427
|
+
|
428
|
+
yield sanitized_input if block_given?
|
429
|
+
end
|
430
|
+
|
431
|
+
it "calls a default strategy (replace)" do
|
432
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env })
|
433
|
+
|
434
|
+
input = "foo=bla&quux=bar\xED"
|
435
|
+
@rack_input = StringIO.new input
|
436
|
+
|
437
|
+
env = request_env
|
438
|
+
sanitize_data(env) do |sanitized_input|
|
439
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
440
|
+
sanitized_input.should.be.valid_encoding
|
441
|
+
sanitized_input.should != input
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
it "calls the exception strategy" do
|
446
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: :exception)
|
447
|
+
|
448
|
+
input = "foo=bla&quux=bar\xED"
|
449
|
+
@rack_input = StringIO.new input
|
450
|
+
|
451
|
+
env = request_env
|
452
|
+
should.raise(EncodingError) { sanitize_data(env) }
|
453
|
+
end
|
454
|
+
|
455
|
+
it "accepts a proc as a strategy" do
|
456
|
+
truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
|
457
|
+
|
458
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
|
459
|
+
|
460
|
+
input = "foo=bla&quux=bar\xED"
|
461
|
+
@rack_input = StringIO.new input
|
462
|
+
|
463
|
+
env = request_env
|
464
|
+
sanitize_data(env) do |sanitized_input|
|
465
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
466
|
+
sanitized_input.should.be.valid_encoding
|
467
|
+
sanitized_input.should == 'replace'
|
468
|
+
end
|
469
|
+
end
|
470
|
+
end
|
400
471
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- whitequark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -91,7 +91,8 @@ files:
|
|
91
91
|
- rack-utf8_sanitizer.gemspec
|
92
92
|
- test/test_utf8_sanitizer.rb
|
93
93
|
homepage: http://github.com/whitequark/rack-utf8_sanitizer
|
94
|
-
licenses:
|
94
|
+
licenses:
|
95
|
+
- MIT
|
95
96
|
metadata: {}
|
96
97
|
post_install_message:
|
97
98
|
rdoc_options: []
|
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
110
|
version: '0'
|
110
111
|
requirements: []
|
111
112
|
rubyforge_project:
|
112
|
-
rubygems_version: 2.
|
113
|
+
rubygems_version: 2.5.2.1
|
113
114
|
signing_key:
|
114
115
|
specification_version: 4
|
115
116
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|