rack-utf8_sanitizer 1.3.2 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef9e02d0857dfe41f06d2422727019029dc9d50a
4
- data.tar.gz: 6946827dd89a0a13cf0d8b4f0325c321d3c4db71
3
+ metadata.gz: 69f0fb52847e155e4c7afd57565531b8bf506149
4
+ data.tar.gz: 0502a652822279ce9fd6b22e2db54126b9247b49
5
5
  SHA512:
6
- metadata.gz: 2b3f4ad2011e98e573803d751419474ac4bb1cd402dc8dd1dec8063d34a043fbd1ff7866ccc0aa84d9e8456c685c7b2b8f4addae4108d41825d816e521aa129f
7
- data.tar.gz: 2aed120d37855fbacb7400d82fdd5da9273c8ea5eaefccc16f297006e3e8cd70f6cc88c7b4b2db296ef61f5424b14a111a36ba0da60047206adc47bf83399d7c
6
+ metadata.gz: '01215648794d9d0b47ef805f40116644b0afab096a00d88ebe0d84df4aed839d025641ba1a40c032b62d525ff9467ec89498e396b6c3e69483535cd71b43e997'
7
+ data.tar.gz: 95d5d7cfe3a6ec0564a566fe95aaed96f48605265a101483e98dd82a46eaf5a7aa1ff3a29106105fba5aa03f1ef6f108eb29c80ccd11a053ad6b91642e8a3003
@@ -3,11 +3,13 @@ language: ruby
3
3
  rvm:
4
4
  - 1.9.3
5
5
  - 2.0.0
6
- # 2.1, not 2.1.0 until fixed https://github.com/travis-ci/travis-ci/issues/2220
7
6
  - 2.1
8
7
  - 2.2
9
8
  - jruby
10
9
  - rbx-2
11
10
 
11
+ before_install:
12
+ - gem install bundler
13
+
12
14
  script:
13
15
  - rake spec
data/README.md CHANGED
@@ -55,6 +55,26 @@ To explicitly set sanitizable content types and override the defaults, use the `
55
55
 
56
56
  config.middleware.insert 0, Rack::UTF8Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
57
57
 
58
+ ### Strategies
59
+
60
+ There are two built in strategies for handling invalid characters. The default strategy is `:replace`, which will cause any invalid characters to be replaces with the unicode replacement character (�). The second built in strategy is `:exception` which will cause an `EncodingError` exception to be raised if invalid characters are found (the exception can then be handled by another Rack middleware).
61
+
62
+ An object that responds to `#call` and accepts the offending string with invalid characters as an argumant can also be passed as a `:strategy`. This is how you can define custom strategies.
63
+
64
+ ```ruby
65
+ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
66
+ ```
67
+
68
+ ```ruby
69
+ replace_string = lambda do |_invalid|
70
+ Rails.logger.warn('Replacing invalid string')
71
+
72
+ '<Bad Encoding>'.freeze
73
+ end
74
+
75
+ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
76
+ ```
77
+
58
78
  ## Contributing
59
79
 
60
80
  1. Fork it
@@ -63,4 +83,4 @@ To explicitly set sanitizable content types and override the defaults, use the `
63
83
  4. Push to the branch (`git push origin my-new-feature`)
64
84
  5. Create new Pull Request
65
85
 
66
- To run the tests, run `rake spec` in the project directory.
86
+ To run the tests, run `rake spec` in the project directory.
@@ -11,6 +11,7 @@ module Rack
11
11
  # options[:additional_content_types] Array
12
12
  def initialize(app, options={})
13
13
  @app = app
14
+ @strategy = build_strategy(options)
14
15
  @sanitizable_content_types = options[:sanitizable_content_types]
15
16
  @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
16
17
  end
@@ -19,6 +20,21 @@ module Rack
19
20
  @app.call(sanitize(env))
20
21
  end
21
22
 
23
+ DEFAULT_STRATEGIES = {
24
+ replace: lambda do |input|
25
+ input.
26
+ force_encoding(Encoding::ASCII_8BIT).
27
+ encode!(Encoding::UTF_8,
28
+ invalid: :replace,
29
+ undef: :replace)
30
+ end,
31
+ exception: lambda do |input|
32
+ input.
33
+ force_encoding(Encoding::ASCII_8BIT).
34
+ encode!(Encoding::UTF_8)
35
+ end
36
+ }.freeze
37
+
22
38
  # http://rack.rubyforge.org/doc/SPEC.html
23
39
  URI_FIELDS = %w(
24
40
  SCRIPT_NAME
@@ -27,6 +43,7 @@ module Rack
27
43
  HTTP_REFERER
28
44
  ORIGINAL_FULLPATH
29
45
  ORIGINAL_SCRIPT_NAME
46
+ SERVER_NAME
30
47
  ).map(&:freeze).freeze
31
48
 
32
49
  SANITIZABLE_CONTENT_TYPES = %w(
@@ -40,13 +57,15 @@ module Rack
40
57
  application/x-www-form-urlencoded
41
58
  ).map(&:freeze).freeze
42
59
 
60
+ HTTP_ = 'HTTP_'.freeze
61
+
43
62
  def sanitize(env)
44
63
  sanitize_rack_input(env)
45
64
  env.each do |key, value|
46
65
  if URI_FIELDS.include?(key)
47
66
  env[key] = transfer_frozen(value,
48
67
  sanitize_uri_encoded_string(value))
49
- elsif key.to_s.start_with?("HTTP_")
68
+ elsif key.to_s.start_with?(HTTP_)
50
69
  # Just sanitize the headers and leave them in UTF-8. There is
51
70
  # no reason to have UTF-8 in headers, but if it's valid, let it be.
52
71
  env[key] = transfer_frozen(value,
@@ -57,6 +76,14 @@ module Rack
57
76
 
58
77
  protected
59
78
 
79
+ def build_strategy(options)
80
+ strategy = options.fetch(:strategy) { :replace }
81
+
82
+ return strategy unless DEFAULT_STRATEGIES.key?(strategy)
83
+
84
+ DEFAULT_STRATEGIES[strategy]
85
+ end
86
+
60
87
  def sanitize_rack_input(env)
61
88
  # https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
62
89
  # Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
@@ -156,7 +183,7 @@ module Rack
156
183
  # enough for our task.
157
184
  def unescape_unreserved(input)
158
185
  input.gsub(/%([a-f\d]{2})/i) do |encoded|
159
- decoded = [$1.hex].pack('C')
186
+ decoded = $1.hex.chr
160
187
 
161
188
  if decoded =~ UNRESERVED_OR_UTF8
162
189
  decoded
@@ -187,11 +214,7 @@ module Rack
187
214
  if input.valid_encoding?
188
215
  input
189
216
  else
190
- input.
191
- force_encoding(Encoding::ASCII_8BIT).
192
- encode!(Encoding::UTF_8,
193
- invalid: :replace,
194
- undef: :replace)
217
+ @strategy.call(input)
195
218
  end
196
219
  else
197
220
  input
@@ -2,8 +2,9 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.3.2'
5
+ gem.version = '1.4.0'
6
6
  gem.authors = ["whitequark"]
7
+ gem.license = "MIT"
7
8
  gem.email = ["whitequark@whitequark.org"]
8
9
  gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
9
10
  %{invalid UTF8 characters in request URI and headers.}
@@ -28,6 +28,17 @@ describe Rack::UTF8Sanitizer do
28
28
  end
29
29
  end
30
30
 
31
+ describe "with invalid host input" do
32
+ it "sanitizes host entity (SERVER_NAME)" do
33
+ host = "host\xD0".force_encoding('UTF-8')
34
+ env = @app.({ "SERVER_NAME" => host })
35
+ result = env["SERVER_NAME"]
36
+
37
+ result.encoding.should == Encoding::US_ASCII
38
+ result.should.be.valid_encoding
39
+ end
40
+ end
41
+
31
42
  describe "with invalid UTF-8 input" do
32
43
  before do
33
44
  @plain_input = "foo\xe0".force_encoding('UTF-8')
@@ -397,4 +408,64 @@ describe Rack::UTF8Sanitizer do
397
408
  end
398
409
  end
399
410
  end
411
+
412
+ describe "with custom strategy" do
413
+ def request_env
414
+ @plain_input = "foo bar лол".force_encoding('UTF-8')
415
+ {
416
+ "REQUEST_METHOD" => "POST",
417
+ "CONTENT_TYPE" => "application/json",
418
+ "HTTP_USER_AGENT" => @plain_input,
419
+ "rack.input" => @rack_input,
420
+ }
421
+ end
422
+
423
+ def sanitize_data(request_env = request_env())
424
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
425
+ @response_env = @app.(request_env)
426
+ sanitized_input = @response_env['rack.input'].read
427
+
428
+ yield sanitized_input if block_given?
429
+ end
430
+
431
+ it "calls a default strategy (replace)" do
432
+ @app = Rack::UTF8Sanitizer.new(-> env { env })
433
+
434
+ input = "foo=bla&quux=bar\xED"
435
+ @rack_input = StringIO.new input
436
+
437
+ env = request_env
438
+ sanitize_data(env) do |sanitized_input|
439
+ sanitized_input.encoding.should == Encoding::UTF_8
440
+ sanitized_input.should.be.valid_encoding
441
+ sanitized_input.should != input
442
+ end
443
+ end
444
+
445
+ it "calls the exception strategy" do
446
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: :exception)
447
+
448
+ input = "foo=bla&quux=bar\xED"
449
+ @rack_input = StringIO.new input
450
+
451
+ env = request_env
452
+ should.raise(EncodingError) { sanitize_data(env) }
453
+ end
454
+
455
+ it "accepts a proc as a strategy" do
456
+ truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
457
+
458
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
459
+
460
+ input = "foo=bla&quux=bar\xED"
461
+ @rack_input = StringIO.new input
462
+
463
+ env = request_env
464
+ sanitize_data(env) do |sanitized_input|
465
+ sanitized_input.encoding.should == Encoding::UTF_8
466
+ sanitized_input.should.be.valid_encoding
467
+ sanitized_input.should == 'replace'
468
+ end
469
+ end
470
+ end
400
471
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-23 00:00:00.000000000 Z
11
+ date: 2017-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -91,7 +91,8 @@ files:
91
91
  - rack-utf8_sanitizer.gemspec
92
92
  - test/test_utf8_sanitizer.rb
93
93
  homepage: http://github.com/whitequark/rack-utf8_sanitizer
94
- licenses: []
94
+ licenses:
95
+ - MIT
95
96
  metadata: {}
96
97
  post_install_message:
97
98
  rdoc_options: []
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
110
  version: '0'
110
111
  requirements: []
111
112
  rubyforge_project:
112
- rubygems_version: 2.4.5.1
113
+ rubygems_version: 2.5.2.1
113
114
  signing_key:
114
115
  specification_version: 4
115
116
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters