rack-utf8_sanitizer 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef9e02d0857dfe41f06d2422727019029dc9d50a
4
- data.tar.gz: 6946827dd89a0a13cf0d8b4f0325c321d3c4db71
3
+ metadata.gz: 69f0fb52847e155e4c7afd57565531b8bf506149
4
+ data.tar.gz: 0502a652822279ce9fd6b22e2db54126b9247b49
5
5
  SHA512:
6
- metadata.gz: 2b3f4ad2011e98e573803d751419474ac4bb1cd402dc8dd1dec8063d34a043fbd1ff7866ccc0aa84d9e8456c685c7b2b8f4addae4108d41825d816e521aa129f
7
- data.tar.gz: 2aed120d37855fbacb7400d82fdd5da9273c8ea5eaefccc16f297006e3e8cd70f6cc88c7b4b2db296ef61f5424b14a111a36ba0da60047206adc47bf83399d7c
6
+ metadata.gz: '01215648794d9d0b47ef805f40116644b0afab096a00d88ebe0d84df4aed839d025641ba1a40c032b62d525ff9467ec89498e396b6c3e69483535cd71b43e997'
7
+ data.tar.gz: 95d5d7cfe3a6ec0564a566fe95aaed96f48605265a101483e98dd82a46eaf5a7aa1ff3a29106105fba5aa03f1ef6f108eb29c80ccd11a053ad6b91642e8a3003
@@ -3,11 +3,13 @@ language: ruby
3
3
  rvm:
4
4
  - 1.9.3
5
5
  - 2.0.0
6
- # 2.1, not 2.1.0 until fixed https://github.com/travis-ci/travis-ci/issues/2220
7
6
  - 2.1
8
7
  - 2.2
9
8
  - jruby
10
9
  - rbx-2
11
10
 
11
+ before_install:
12
+ - gem install bundler
13
+
12
14
  script:
13
15
  - rake spec
data/README.md CHANGED
@@ -55,6 +55,26 @@ To explicitly set sanitizable content types and override the defaults, use the `
55
55
 
56
56
  config.middleware.insert 0, Rack::UTF8Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
57
57
 
58
+ ### Strategies
59
+
60
+ There are two built in strategies for handling invalid characters. The default strategy is `:replace`, which will cause any invalid characters to be replaces with the unicode replacement character (�). The second built in strategy is `:exception` which will cause an `EncodingError` exception to be raised if invalid characters are found (the exception can then be handled by another Rack middleware).
61
+
62
+ An object that responds to `#call` and accepts the offending string with invalid characters as an argumant can also be passed as a `:strategy`. This is how you can define custom strategies.
63
+
64
+ ```ruby
65
+ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: :exception
66
+ ```
67
+
68
+ ```ruby
69
+ replace_string = lambda do |_invalid|
70
+ Rails.logger.warn('Replacing invalid string')
71
+
72
+ '<Bad Encoding>'.freeze
73
+ end
74
+
75
+ config.middleware.insert 0, Rack::UTF8Sanitizer, strategy: replace_string
76
+ ```
77
+
58
78
  ## Contributing
59
79
 
60
80
  1. Fork it
@@ -63,4 +83,4 @@ To explicitly set sanitizable content types and override the defaults, use the `
63
83
  4. Push to the branch (`git push origin my-new-feature`)
64
84
  5. Create new Pull Request
65
85
 
66
- To run the tests, run `rake spec` in the project directory.
86
+ To run the tests, run `rake spec` in the project directory.
@@ -11,6 +11,7 @@ module Rack
11
11
  # options[:additional_content_types] Array
12
12
  def initialize(app, options={})
13
13
  @app = app
14
+ @strategy = build_strategy(options)
14
15
  @sanitizable_content_types = options[:sanitizable_content_types]
15
16
  @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
16
17
  end
@@ -19,6 +20,21 @@ module Rack
19
20
  @app.call(sanitize(env))
20
21
  end
21
22
 
23
+ DEFAULT_STRATEGIES = {
24
+ replace: lambda do |input|
25
+ input.
26
+ force_encoding(Encoding::ASCII_8BIT).
27
+ encode!(Encoding::UTF_8,
28
+ invalid: :replace,
29
+ undef: :replace)
30
+ end,
31
+ exception: lambda do |input|
32
+ input.
33
+ force_encoding(Encoding::ASCII_8BIT).
34
+ encode!(Encoding::UTF_8)
35
+ end
36
+ }.freeze
37
+
22
38
  # http://rack.rubyforge.org/doc/SPEC.html
23
39
  URI_FIELDS = %w(
24
40
  SCRIPT_NAME
@@ -27,6 +43,7 @@ module Rack
27
43
  HTTP_REFERER
28
44
  ORIGINAL_FULLPATH
29
45
  ORIGINAL_SCRIPT_NAME
46
+ SERVER_NAME
30
47
  ).map(&:freeze).freeze
31
48
 
32
49
  SANITIZABLE_CONTENT_TYPES = %w(
@@ -40,13 +57,15 @@ module Rack
40
57
  application/x-www-form-urlencoded
41
58
  ).map(&:freeze).freeze
42
59
 
60
+ HTTP_ = 'HTTP_'.freeze
61
+
43
62
  def sanitize(env)
44
63
  sanitize_rack_input(env)
45
64
  env.each do |key, value|
46
65
  if URI_FIELDS.include?(key)
47
66
  env[key] = transfer_frozen(value,
48
67
  sanitize_uri_encoded_string(value))
49
- elsif key.to_s.start_with?("HTTP_")
68
+ elsif key.to_s.start_with?(HTTP_)
50
69
  # Just sanitize the headers and leave them in UTF-8. There is
51
70
  # no reason to have UTF-8 in headers, but if it's valid, let it be.
52
71
  env[key] = transfer_frozen(value,
@@ -57,6 +76,14 @@ module Rack
57
76
 
58
77
  protected
59
78
 
79
+ def build_strategy(options)
80
+ strategy = options.fetch(:strategy) { :replace }
81
+
82
+ return strategy unless DEFAULT_STRATEGIES.key?(strategy)
83
+
84
+ DEFAULT_STRATEGIES[strategy]
85
+ end
86
+
60
87
  def sanitize_rack_input(env)
61
88
  # https://github.com/rack/rack/blob/master/lib/rack/request.rb#L42
62
89
  # Logic borrowed from Rack::Request#media_type,#media_type_params,#content_charset
@@ -156,7 +183,7 @@ module Rack
156
183
  # enough for our task.
157
184
  def unescape_unreserved(input)
158
185
  input.gsub(/%([a-f\d]{2})/i) do |encoded|
159
- decoded = [$1.hex].pack('C')
186
+ decoded = $1.hex.chr
160
187
 
161
188
  if decoded =~ UNRESERVED_OR_UTF8
162
189
  decoded
@@ -187,11 +214,7 @@ module Rack
187
214
  if input.valid_encoding?
188
215
  input
189
216
  else
190
- input.
191
- force_encoding(Encoding::ASCII_8BIT).
192
- encode!(Encoding::UTF_8,
193
- invalid: :replace,
194
- undef: :replace)
217
+ @strategy.call(input)
195
218
  end
196
219
  else
197
220
  input
@@ -2,8 +2,9 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.3.2'
5
+ gem.version = '1.4.0'
6
6
  gem.authors = ["whitequark"]
7
+ gem.license = "MIT"
7
8
  gem.email = ["whitequark@whitequark.org"]
8
9
  gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
9
10
  %{invalid UTF8 characters in request URI and headers.}
@@ -28,6 +28,17 @@ describe Rack::UTF8Sanitizer do
28
28
  end
29
29
  end
30
30
 
31
+ describe "with invalid host input" do
32
+ it "sanitizes host entity (SERVER_NAME)" do
33
+ host = "host\xD0".force_encoding('UTF-8')
34
+ env = @app.({ "SERVER_NAME" => host })
35
+ result = env["SERVER_NAME"]
36
+
37
+ result.encoding.should == Encoding::US_ASCII
38
+ result.should.be.valid_encoding
39
+ end
40
+ end
41
+
31
42
  describe "with invalid UTF-8 input" do
32
43
  before do
33
44
  @plain_input = "foo\xe0".force_encoding('UTF-8')
@@ -397,4 +408,64 @@ describe Rack::UTF8Sanitizer do
397
408
  end
398
409
  end
399
410
  end
411
+
412
+ describe "with custom strategy" do
413
+ def request_env
414
+ @plain_input = "foo bar лол".force_encoding('UTF-8')
415
+ {
416
+ "REQUEST_METHOD" => "POST",
417
+ "CONTENT_TYPE" => "application/json",
418
+ "HTTP_USER_AGENT" => @plain_input,
419
+ "rack.input" => @rack_input,
420
+ }
421
+ end
422
+
423
+ def sanitize_data(request_env = request_env())
424
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
425
+ @response_env = @app.(request_env)
426
+ sanitized_input = @response_env['rack.input'].read
427
+
428
+ yield sanitized_input if block_given?
429
+ end
430
+
431
+ it "calls a default strategy (replace)" do
432
+ @app = Rack::UTF8Sanitizer.new(-> env { env })
433
+
434
+ input = "foo=bla&quux=bar\xED"
435
+ @rack_input = StringIO.new input
436
+
437
+ env = request_env
438
+ sanitize_data(env) do |sanitized_input|
439
+ sanitized_input.encoding.should == Encoding::UTF_8
440
+ sanitized_input.should.be.valid_encoding
441
+ sanitized_input.should != input
442
+ end
443
+ end
444
+
445
+ it "calls the exception strategy" do
446
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: :exception)
447
+
448
+ input = "foo=bla&quux=bar\xED"
449
+ @rack_input = StringIO.new input
450
+
451
+ env = request_env
452
+ should.raise(EncodingError) { sanitize_data(env) }
453
+ end
454
+
455
+ it "accepts a proc as a strategy" do
456
+ truncate = -> input { 'replace'.force_encoding(Encoding::UTF_8) }
457
+
458
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, strategy: truncate)
459
+
460
+ input = "foo=bla&quux=bar\xED"
461
+ @rack_input = StringIO.new input
462
+
463
+ env = request_env
464
+ sanitize_data(env) do |sanitized_input|
465
+ sanitized_input.encoding.should == Encoding::UTF_8
466
+ sanitized_input.should.be.valid_encoding
467
+ sanitized_input.should == 'replace'
468
+ end
469
+ end
470
+ end
400
471
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - whitequark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-23 00:00:00.000000000 Z
11
+ date: 2017-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -91,7 +91,8 @@ files:
91
91
  - rack-utf8_sanitizer.gemspec
92
92
  - test/test_utf8_sanitizer.rb
93
93
  homepage: http://github.com/whitequark/rack-utf8_sanitizer
94
- licenses: []
94
+ licenses:
95
+ - MIT
95
96
  metadata: {}
96
97
  post_install_message:
97
98
  rdoc_options: []
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
109
110
  version: '0'
110
111
  requirements: []
111
112
  rubyforge_project:
112
- rubygems_version: 2.4.5.1
113
+ rubygems_version: 2.5.2.1
113
114
  signing_key:
114
115
  specification_version: 4
115
116
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters