rack-utf8_sanitizer 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a21608edab6e71fde026d8c9b5fdf6afaeb831ab
4
- data.tar.gz: d3648a488dd74a3ea6357d39cc87b1d486af2c1c
3
+ metadata.gz: ef9e02d0857dfe41f06d2422727019029dc9d50a
4
+ data.tar.gz: 6946827dd89a0a13cf0d8b4f0325c321d3c4db71
5
5
  SHA512:
6
- metadata.gz: 8c8c7e485b0ca9584951b2689b090feaf4441eb96c1fb5ee3fd8f7c79bddcb4bf4d005b69e56a1fd0675b1e96de07dca21906ccca4b255e3893007278e819afe
7
- data.tar.gz: b8dcdd43af94a1277b978184ba8e5b26c782f269f646805839d102cf216f7e8d1e375a4104ace50357c23c3761afd00acaef441ac13bcc05ae1555e719e71079
6
+ metadata.gz: 2b3f4ad2011e98e573803d751419474ac4bb1cd402dc8dd1dec8063d34a043fbd1ff7866ccc0aa84d9e8456c685c7b2b8f4addae4108d41825d816e521aa129f
7
+ data.tar.gz: 2aed120d37855fbacb7400d82fdd5da9273c8ea5eaefccc16f297006e3e8cd70f6cc88c7b4b2db296ef61f5424b14a111a36ba0da60047206adc47bf83399d7c
@@ -1,11 +1,11 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.2
5
4
  - 1.9.3
6
5
  - 2.0.0
7
6
  # 2.1, not 2.1.0 until fixed https://github.com/travis-ci/travis-ci/issues/2220
8
7
  - 2.1
8
+ - 2.2
9
9
  - jruby
10
10
  - rbx-2
11
11
 
data/README.md CHANGED
@@ -43,6 +43,18 @@ For fields with "raw data", the algorithm is applied once and the (UTF-8 encoded
43
43
 
44
44
  For fields with "percent-encoded data", the algorithm is applied twice to catch both invalid characters appearing as-is and invalid characters appearing in the percent encoding. The percent encoded, ASCII-8BIT encoded result is left in the environment.
45
45
 
46
+ ### Sanitizable content types
47
+
48
+ The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::UTF8Sanitizer.
49
+
50
+ To add sanitizable content types to the list of defaults, pass the `additional_content_types` options when using Rack::UTF8Sanitizer, e.g.
51
+
52
+ config.middleware.insert 0, Rack::UTF8Sanitizer, additional_content_types: ['application/vnd.api+json']
53
+
54
+ To explicitly set sanitizable content types and override the defaults, use the `sanitizable_content_types` option:
55
+
56
+ config.middleware.insert 0, Rack::UTF8Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
57
+
46
58
  ## Contributing
47
59
 
48
60
  1. Fork it
@@ -50,3 +62,5 @@ For fields with "percent-encoded data", the algorithm is applied twice to catch
50
62
  3. Commit your changes (`git commit -am 'Add some feature'`)
51
63
  4. Push to the branch (`git push origin my-new-feature`)
52
64
  5. Create new Pull Request
65
+
66
+ To run the tests, run `rake spec` in the project directory.
@@ -7,8 +7,12 @@ module Rack
7
7
  class UTF8Sanitizer
8
8
  StringIO = ::StringIO
9
9
 
10
- def initialize(app)
10
+ # options[:sanitizable_content_types] Array
11
+ # options[:additional_content_types] Array
12
+ def initialize(app, options={})
11
13
  @app = app
14
+ @sanitizable_content_types = options[:sanitizable_content_types]
15
+ @sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
12
16
  end
13
17
 
14
18
  def call(env)
@@ -23,18 +27,18 @@ module Rack
23
27
  HTTP_REFERER
24
28
  ORIGINAL_FULLPATH
25
29
  ORIGINAL_SCRIPT_NAME
26
- )
30
+ ).map(&:freeze).freeze
27
31
 
28
32
  SANITIZABLE_CONTENT_TYPES = %w(
29
33
  text/plain
30
34
  application/x-www-form-urlencoded
31
35
  application/json
32
36
  text/javascript
33
- )
37
+ ).map(&:freeze).freeze
34
38
 
35
39
  URI_ENCODED_CONTENT_TYPES = %w(
36
40
  application/x-www-form-urlencoded
37
- )
41
+ ).map(&:freeze).freeze
38
42
 
39
43
  def sanitize(env)
40
44
  sanitize_rack_input(env)
@@ -60,7 +64,7 @@ module Rack
60
64
  content_type = env['CONTENT_TYPE']
61
65
  content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
62
66
  content_type &&= content_type.downcase
63
- return unless SANITIZABLE_CONTENT_TYPES.any? {|type| content_type == type }
67
+ return unless @sanitizable_content_types.any? {|type| content_type == type }
64
68
  uri_encoded = URI_ENCODED_CONTENT_TYPES.any? {|type| content_type == type}
65
69
 
66
70
  if env["rack.input"]
@@ -108,7 +112,7 @@ module Rack
108
112
 
109
113
  def sanitize_io(io, uri_encoded = false)
110
114
  input = io.read
111
- sanitized_input = sanitize_string(input)
115
+ sanitized_input = sanitize_string(strip_byte_order_mark(input))
112
116
  if uri_encoded
113
117
  sanitized_input = sanitize_uri_encoded_string(sanitized_input).
114
118
  force_encoding(Encoding::UTF_8)
@@ -201,5 +205,13 @@ module Rack
201
205
  to
202
206
  end
203
207
  end
208
+
209
+ UTF8_BOM = "\xef\xbb\xbf".force_encoding(Encoding::BINARY).freeze
210
+ UTF8_BOM_SIZE = UTF8_BOM.bytesize
211
+
212
+ def strip_byte_order_mark(input)
213
+ return input unless input.start_with?(UTF8_BOM)
214
+ input.byteslice(UTF8_BOM_SIZE..-1)
215
+ end
204
216
  end
205
217
  end
@@ -2,8 +2,8 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "rack-utf8_sanitizer"
5
- gem.version = '1.3.1'
6
- gem.authors = ["Peter Zotov"]
5
+ gem.version = '1.3.2'
6
+ gem.authors = ["whitequark"]
7
7
  gem.email = ["whitequark@whitequark.org"]
8
8
  gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
9
9
  %{invalid UTF8 characters in request URI and headers.}
@@ -15,9 +15,9 @@ Gem::Specification.new do |gem|
15
15
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
16
  gem.require_paths = ["lib"]
17
17
 
18
- gem.required_ruby_version = '>= 1.9'
18
+ gem.required_ruby_version = '>= 1.9.3'
19
19
 
20
- gem.add_dependency "rack", '~> 1.0'
20
+ gem.add_dependency "rack", '>= 1.0', '< 3.0'
21
21
 
22
22
  gem.add_development_dependency "bacon"
23
23
  gem.add_development_dependency "bacon-colored_output"
@@ -179,7 +179,7 @@ describe Rack::UTF8Sanitizer do
179
179
  }
180
180
  end
181
181
 
182
- def sanitize_form_data(request_env = request_env)
182
+ def sanitize_form_data(request_env = request_env())
183
183
  @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
184
184
  @response_env = @app.(request_env)
185
185
  sanitized_input = @response_env['rack.input'].read
@@ -227,6 +227,17 @@ describe Rack::UTF8Sanitizer do
227
227
  end
228
228
  end
229
229
 
230
+ it "strip UTF-8 BOM from StringIO rack.input" do
231
+ input = %(\xef\xbb\xbf{"Hello": "World"})
232
+ @rack_input = StringIO.new input
233
+
234
+ sanitize_form_data(request_env.merge("CONTENT_TYPE" => "application/json")) do |sanitized_input|
235
+ sanitized_input.encoding.should == Encoding::UTF_8
236
+ sanitized_input.should.be.valid_encoding
237
+ sanitized_input.should == '{"Hello": "World"}'
238
+ end
239
+ end
240
+
230
241
  it "sanitizes StringIO rack.input with form encoded bad encoding" do
231
242
  input = "foo=bla&foo=baz&quux%ED=bar%ED"
232
243
  @rack_input = StringIO.new input
@@ -302,4 +313,88 @@ describe Rack::UTF8Sanitizer do
302
313
  end
303
314
  end
304
315
  end
316
+
317
+ describe "with custom content-type" do
318
+ def request_env
319
+ @plain_input = "foo bar лол".force_encoding('UTF-8')
320
+ {
321
+ "REQUEST_METHOD" => "POST",
322
+ "CONTENT_TYPE" => "application/vnd.api+json",
323
+ "HTTP_USER_AGENT" => @plain_input,
324
+ "rack.input" => @rack_input,
325
+ }
326
+ end
327
+
328
+ def sanitize_data(request_env = request_env())
329
+ @uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
330
+ @response_env = @app.(request_env)
331
+ sanitized_input = @response_env['rack.input'].read
332
+
333
+ yield sanitized_input if block_given?
334
+ end
335
+
336
+ it "does not sanitize custom content-type by default" do
337
+ input = "foo=bla&quux=bar\xED"
338
+ @rack_input = StringIO.new input
339
+
340
+ env = request_env
341
+ sanitize_data(env) do |sanitized_input|
342
+ sanitized_input.encoding.should == Encoding::ASCII_8BIT
343
+ sanitized_input.should.be.valid_encoding
344
+ sanitized_input.should == input
345
+ end
346
+ end
347
+
348
+ it "sanitizes custom content-type if additional_content_types given" do
349
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
350
+ input = "foo=bla&quux=bar\xED"
351
+ @rack_input = StringIO.new input
352
+
353
+ env = request_env
354
+ sanitize_data(env) do |sanitized_input|
355
+ sanitized_input.encoding.should == Encoding::UTF_8
356
+ sanitized_input.should.be.valid_encoding
357
+ sanitized_input.should != input
358
+ end
359
+ end
360
+
361
+ it "sanitizes default content-type if additional_content_types given" do
362
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
363
+ input = "foo=bla&quux=bar\xED"
364
+ @rack_input = StringIO.new input
365
+
366
+ env = request_env.update('CONTENT_TYPE' => 'application/json')
367
+ sanitize_data(env) do |sanitized_input|
368
+ sanitized_input.encoding.should == Encoding::UTF_8
369
+ sanitized_input.should.be.valid_encoding
370
+ sanitized_input.should != input
371
+ end
372
+ end
373
+
374
+ it "sanitizes custom content-type if sanitizable_content_types given" do
375
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
376
+ input = "foo=bla&quux=bar\xED"
377
+ @rack_input = StringIO.new input
378
+
379
+ env = request_env
380
+ sanitize_data(env) do |sanitized_input|
381
+ sanitized_input.encoding.should == Encoding::UTF_8
382
+ sanitized_input.should.be.valid_encoding
383
+ sanitized_input.should != input
384
+ end
385
+ end
386
+
387
+ it "does not sanitize default content-type if sanitizable_content_types does not include it" do
388
+ @app = Rack::UTF8Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
389
+ input = "foo=bla&quux=bar\xED"
390
+ @rack_input = StringIO.new input
391
+
392
+ env = request_env.update('CONTENT_TYPE' => 'application/json')
393
+ sanitize_data(env) do |sanitized_input|
394
+ sanitized_input.encoding.should == Encoding::ASCII_8BIT
395
+ sanitized_input.should.be.valid_encoding
396
+ sanitized_input.should == input
397
+ end
398
+ end
399
+ end
305
400
  end
metadata CHANGED
@@ -1,29 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rack-utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  platform: ruby
6
6
  authors:
7
- - Peter Zotov
7
+ - whitequark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-10 00:00:00.000000000 Z
11
+ date: 2015-12-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.0'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '3.0'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - "~>"
27
+ - - ">="
25
28
  - !ruby/object:Gem::Version
26
29
  version: '1.0'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '3.0'
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: bacon
29
35
  requirement: !ruby/object:Gem::Requirement
@@ -95,7 +101,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
95
101
  requirements:
96
102
  - - ">="
97
103
  - !ruby/object:Gem::Version
98
- version: '1.9'
104
+ version: 1.9.3
99
105
  required_rubygems_version: !ruby/object:Gem::Requirement
100
106
  requirements:
101
107
  - - ">="
@@ -103,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
103
109
  version: '0'
104
110
  requirements: []
105
111
  rubyforge_project:
106
- rubygems_version: 2.4.6
112
+ rubygems_version: 2.4.5.1
107
113
  signing_key:
108
114
  specification_version: 4
109
115
  summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters