rack-utf8_sanitizer 1.3.1 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a21608edab6e71fde026d8c9b5fdf6afaeb831ab
4
- data.tar.gz: d3648a488dd74a3ea6357d39cc87b1d486af2c1c
3
+ metadata.gz: ef9e02d0857dfe41f06d2422727019029dc9d50a
4
+ data.tar.gz: 6946827dd89a0a13cf0d8b4f0325c321d3c4db71
5
5
  SHA512:
6
- metadata.gz: 8c8c7e485b0ca9584951b2689b090feaf4441eb96c1fb5ee3fd8f7c79bddcb4bf4d005b69e56a1fd0675b1e96de07dca21906ccca4b255e3893007278e819afe
7
- data.tar.gz: b8dcdd43af94a1277b978184ba8e5b26c782f269f646805839d102cf216f7e8d1e375a4104ace50357c23c3761afd00acaef441ac13bcc05ae1555e719e71079
6
+ metadata.gz: 2b3f4ad2011e98e573803d751419474ac4bb1cd402dc8dd1dec8063d34a043fbd1ff7866ccc0aa84d9e8456c685c7b2b8f4addae4108d41825d816e521aa129f
7
+ data.tar.gz: 2aed120d37855fbacb7400d82fdd5da9273c8ea5eaefccc16f297006e3e8cd70f6cc88c7b4b2db296ef61f5424b14a111a36ba0da60047206adc47bf83399d7c
@@ -1,11 +1,11 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.2
5
4
  - 1.9.3
6
5
  - 2.0.0
7
6
  # 2.1, not 2.1.0 until fixed https://github.com/travis-ci/travis-ci/issues/2220
8
7
  - 2.1
8
+ - 2.2
9
9
  - jruby
10
10
  - rbx-2
11
11
 
data/README.md CHANGED
@@ -43,6 +43,18 @@ For fields with "raw data", the algorithm is applied once and the (UTF-8 encoded
43
43
 
44
44
  For fields with "percent-encoded data", the algorithm is applied twice to catch both invalid characters appearing as-is and invalid characters appearing in the percent encoding. The percent encoded, ASCII-8BIT encoded result is left in the environment.
45
45
 
46
+ ### Sanitizable content types
47
+
48
+ The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::UTF8Sanitizer.
49
+
50
+ To add sanitizable content types to the list of defaults, pass the `additional_content_types` options when using Rack::UTF8Sanitizer, e.g.
51
+
52
+ config.middleware.insert 0, Rack::UTF8Sanitizer, additional_content_types: ['application/vnd.api+json']
53
+
54
+ To explicitly set sanitizable content types and override the defaults, use the `sanitizable_content_types` option:
55
+
56
+ config.middleware.insert 0, Rack::UTF8Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
57
+
46
58
  ## Contributing
47
59
 
48
60
  1. Fork it
@@ -50,3 +62,5 @@ For fields with "percent-encoded data", the algorithm is applied twice to catch
50
62
  3. Commit your changes (`git commit -am 'Add some feature'`)
51
63
  4. Push to the branch (`git push origin my-new-feature`)
52
64
  5. Create new Pull Request
65
+
66
+ To run the tests, run `rake spec` in the project directory.
@@ -7,8 +7,12 @@ module Rack
7
7
  class UTF8Sanitizer
8
8
  StringIO = ::StringIO
9
9
 
10
- def initialize(app)
10
+ # options[:sanitizable_content_types] Array
11
+ # options[:additional_content_types] Array
12
+ def initialize(app, options={})
11
13
  @app = app
14
+ @sanitizable_content_types = options[:sanitizable_content_types]
15
+ @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
12
16
  end
13
17
 
14
18
  def call(env)
@@ -23,18 +27,18 @@ module Rack
23
27
  HTTP_REFERER
24
28
  ORIGINAL_FULLPATH
25
29
  ORIGINAL_SCRIPT_NAME
26
- )
30
+ ).map(&:freeze).freeze
27
31
 
28
32
  SANITIZABLE_CONTENT_TYPES = %w(
29
33
  text/plain
30
34
  application/x-www-form-urlencoded
31
35
  application/json
32
36
  text/javascript
33
- )
37
+ ).map(&:freeze).freeze
34
38
 
35
39
  URI_ENCODED_CONTENT_TYPES = %w(
36
40
  application/x-www-form-urlencoded
37
- )
41
+ ).map(&:freeze).freeze
38
42
 
39
43
  def sanitize(env)
40
44
  sanitize_rack_input(env)
@@ -60,7 +64,7 @@ module Rack
60
64
  content_type = env['CONTENT_TYPE']
61
65
  content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
62
66
  content_type &&= content_type.downcase
63
- return unless SANITIZABLE_CONTENT_TYPES.any? {|type| content_type == type }
67
+ return unless @sanitizable_content_types.any? {|type| content_type == type }
64
68
  uri_encoded = URI_ENCODED_CONTENT_TYPES.any? {|type| content_type == type}
65
69
 
66
70
  if env["rack.input"]
@@ -108,7 +112,7 @@ module Rack
108
112
 
109
113
  def sanitize_io(io, uri_encoded = false)
110
114
  input = io.read
111
- sanitized_input = sanitize_string(input)
115
+ sanitized_input = sanitize_string(strip_byte_order_mark(input))
112
116
  if uri_encoded
113
117
  sanitized_input = sanitize_uri_encoded_string(sanitized_input).
114
118
  force_encoding(Encoding::UTF_8)
@@ -201,5 +205,13 @@ module Rack
201
205
  to
202
206
  end
203
207
  end
208
+
209
+ UTF8_BOM = "\xef\xbb\xbf".force_encoding(Encoding::BINARY).freeze
210
+ UTF8_BOM_SIZE = UTF8_BOM.bytesize
211
+
212
+ def strip_byte_order_mark(input)
213
+ return input unless input.start_with?(UTF8_BOM)
214
+ input.byteslice(UTF8_BOM_SIZE..-1)
215
+ end
204
216
  end
205
217
  end
@@ -2,8 +2,8 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.3.1'
6
- gem.authors = ["Peter Zotov"]
5
+ gem.version = '1.3.2'
6
+ gem.authors = ["whitequark"]
7
7
  gem.email = ["whitequark@whitequark.org"]
8
8
  gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
9
9
  %{invalid UTF8 characters in request URI and headers.}
@@ -15,9 +15,9 @@ Gem::Specification.new do |gem|
15
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
16
  gem.require_paths = ["lib"]
17
17
 
18
- gem.required_ruby_version = '>= 1.9'
18
+ gem.required_ruby_version = '>= 1.9.3'
19
19
 
20
- gem.add_dependency "rack", '~> 1.0'
20
+ gem.add_dependency "rack", '>= 1.0', '< 3.0'
21
21
 
22
22
  gem.add_development_dependency "bacon"
23
23
  gem.add_development_dependency "bacon-colored_output"
@@ -179,7 +179,7 @@ describe Rack::UTF8Sanitizer do
179
179
  }
180
180
  end
181
181
 
182
- def sanitize_form_data(request_env = request_env)
182
+ def sanitize_form_data(request_env = request_env())
183
183
  @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
184
184
  @response_env = @app.(request_env)
185
185
  sanitized_input = @response_env['rack.input'].read
@@ -227,6 +227,17 @@ describe Rack::UTF8Sanitizer do
227
227
  end
228
228
  end
229
229
 
230
+ it "strip UTF-8 BOM from StringIO rack.input" do
231
+ input = %(\xef\xbb\xbf{"Hello": "World"})
232
+ @rack_input = StringIO.new input
233
+
234
+ sanitize_form_data(request_env.merge("CONTENT_TYPE" => "application/json")) do |sanitized_input|
235
+ sanitized_input.encoding.should == Encoding::UTF_8
236
+ sanitized_input.should.be.valid_encoding
237
+ sanitized_input.should == '{"Hello": "World"}'
238
+ end
239
+ end
240
+
230
241
  it "sanitizes StringIO rack.input with form encoded bad encoding" do
231
242
  input = "foo=bla&foo=baz&quux%ED=bar%ED"
232
243
  @rack_input = StringIO.new input
@@ -302,4 +313,88 @@ describe Rack::UTF8Sanitizer do
302
313
  end
303
314
  end
304
315
  end
316
+
317
+ describe "with custom content-type" do
318
+ def request_env
319
+ @plain_input = "foo bar лол".force_encoding('UTF-8')
320
+ {
321
+ "REQUEST_METHOD" => "POST",
322
+ "CONTENT_TYPE" => "application/vnd.api+json",
323
+ "HTTP_USER_AGENT" => @plain_input,
324
+ "rack.input" => @rack_input,
325
+ }
326
+ end
327
+
328
+ def sanitize_data(request_env = request_env())
329
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
330
+ @response_env = @app.(request_env)
331
+ sanitized_input = @response_env['rack.input'].read
332
+
333
+ yield sanitized_input if block_given?
334
+ end
335
+
336
+ it "does not sanitize custom content-type by default" do
337
+ input = "foo=bla&quux=bar\xED"
338
+ @rack_input = StringIO.new input
339
+
340
+ env = request_env
341
+ sanitize_data(env) do |sanitized_input|
342
+ sanitized_input.encoding.should == Encoding::ASCII_8BIT
343
+ sanitized_input.should.be.valid_encoding
344
+ sanitized_input.should == input
345
+ end
346
+ end
347
+
348
+ it "sanitizes custom content-type if additional_content_types given" do
349
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
350
+ input = "foo=bla&quux=bar\xED"
351
+ @rack_input = StringIO.new input
352
+
353
+ env = request_env
354
+ sanitize_data(env) do |sanitized_input|
355
+ sanitized_input.encoding.should == Encoding::UTF_8
356
+ sanitized_input.should.be.valid_encoding
357
+ sanitized_input.should != input
358
+ end
359
+ end
360
+
361
+ it "sanitizes default content-type if additional_content_types given" do
362
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
363
+ input = "foo=bla&quux=bar\xED"
364
+ @rack_input = StringIO.new input
365
+
366
+ env = request_env.update('CONTENT_TYPE' => 'application/json')
367
+ sanitize_data(env) do |sanitized_input|
368
+ sanitized_input.encoding.should == Encoding::UTF_8
369
+ sanitized_input.should.be.valid_encoding
370
+ sanitized_input.should != input
371
+ end
372
+ end
373
+
374
+ it "sanitizes custom content-type if sanitizable_content_types given" do
375
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
376
+ input = "foo=bla&quux=bar\xED"
377
+ @rack_input = StringIO.new input
378
+
379
+ env = request_env
380
+ sanitize_data(env) do |sanitized_input|
381
+ sanitized_input.encoding.should == Encoding::UTF_8
382
+ sanitized_input.should.be.valid_encoding
383
+ sanitized_input.should != input
384
+ end
385
+ end
386
+
387
+ it "does not sanitize default content-type if sanitizable_content_types does not include it" do
388
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
389
+ input = "foo=bla&quux=bar\xED"
390
+ @rack_input = StringIO.new input
391
+
392
+ env = request_env.update('CONTENT_TYPE' => 'application/json')
393
+ sanitize_data(env) do |sanitized_input|
394
+ sanitized_input.encoding.should == Encoding::ASCII_8BIT
395
+ sanitized_input.should.be.valid_encoding
396
+ sanitized_input.should == input
397
+ end
398
+ end
399
+ end
305
400
  end
metadata CHANGED
@@ -1,29 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  platform: ruby
6
6
  authors:
7
- - Peter Zotov
7
+ - whitequark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-10 00:00:00.000000000 Z
11
+ date: 2015-12-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.0'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '3.0'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - "~>"
27
+ - - ">="
25
28
  - !ruby/object:Gem::Version
26
29
  version: '1.0'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '3.0'
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: bacon
29
35
  requirement: !ruby/object:Gem::Requirement
@@ -95,7 +101,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
95
101
  requirements:
96
102
  - - ">="
97
103
  - !ruby/object:Gem::Version
98
- version: '1.9'
104
+ version: 1.9.3
99
105
  required_rubygems_version: !ruby/object:Gem::Requirement
100
106
  requirements:
101
107
  - - ">="
@@ -103,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
103
109
  version: '0'
104
110
  requirements: []
105
111
  rubyforge_project:
106
- rubygems_version: 2.4.6
112
+ rubygems_version: 2.4.5.1
107
113
  signing_key:
108
114
  specification_version: 4
109
115
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters