rack-utf8_sanitizer 1.3.1 → 1.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +14 -0
- data/lib/rack/utf8_sanitizer.rb +18 -6
- data/rack-utf8_sanitizer.gemspec +4 -4
- data/test/test_utf8_sanitizer.rb +96 -1
- metadata +13 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef9e02d0857dfe41f06d2422727019029dc9d50a
|
4
|
+
data.tar.gz: 6946827dd89a0a13cf0d8b4f0325c321d3c4db71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2b3f4ad2011e98e573803d751419474ac4bb1cd402dc8dd1dec8063d34a043fbd1ff7866ccc0aa84d9e8456c685c7b2b8f4addae4108d41825d816e521aa129f
|
7
|
+
data.tar.gz: 2aed120d37855fbacb7400d82fdd5da9273c8ea5eaefccc16f297006e3e8cd70f6cc88c7b4b2db296ef61f5424b14a111a36ba0da60047206adc47bf83399d7c
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -43,6 +43,18 @@ For fields with "raw data", the algorithm is applied once and the (UTF-8 encoded
|
|
43
43
|
|
44
44
|
For fields with "percent-encoded data", the algorithm is applied twice to catch both invalid characters appearing as-is and invalid characters appearing in the percent encoding. The percent encoded, ASCII-8BIT encoded result is left in the environment.
|
45
45
|
|
46
|
+
### Sanitizable content types
|
47
|
+
|
48
|
+
The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::UTF8Sanitizer.
|
49
|
+
|
50
|
+
To add sanitizable content types to the list of defaults, pass the `additional_content_types` options when using Rack::UTF8Sanitizer, e.g.
|
51
|
+
|
52
|
+
config.middleware.insert 0, Rack::UTF8Sanitizer, additional_content_types: ['application/vnd.api+json']
|
53
|
+
|
54
|
+
To explicitly set sanitizable content types and override the defaults, use the `sanitizable_content_types` option:
|
55
|
+
|
56
|
+
config.middleware.insert 0, Rack::UTF8Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
|
57
|
+
|
46
58
|
## Contributing
|
47
59
|
|
48
60
|
1. Fork it
|
@@ -50,3 +62,5 @@ For fields with "percent-encoded data", the algorithm is applied twice to catch
|
|
50
62
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
51
63
|
4. Push to the branch (`git push origin my-new-feature`)
|
52
64
|
5. Create new Pull Request
|
65
|
+
|
66
|
+
To run the tests, run `rake spec` in the project directory.
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -7,8 +7,12 @@ module Rack
|
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
9
|
|
10
|
-
|
10
|
+
# options[:sanitizable_content_types] Array
|
11
|
+
# options[:additional_content_types] Array
|
12
|
+
def initialize(app, options={})
|
11
13
|
@app = app
|
14
|
+
@sanitizable_content_types = options[:sanitizable_content_types]
|
15
|
+
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
12
16
|
end
|
13
17
|
|
14
18
|
def call(env)
|
@@ -23,18 +27,18 @@ module Rack
|
|
23
27
|
HTTP_REFERER
|
24
28
|
ORIGINAL_FULLPATH
|
25
29
|
ORIGINAL_SCRIPT_NAME
|
26
|
-
)
|
30
|
+
).map(&:freeze).freeze
|
27
31
|
|
28
32
|
SANITIZABLE_CONTENT_TYPES = %w(
|
29
33
|
text/plain
|
30
34
|
application/x-www-form-urlencoded
|
31
35
|
application/json
|
32
36
|
text/javascript
|
33
|
-
)
|
37
|
+
).map(&:freeze).freeze
|
34
38
|
|
35
39
|
URI_ENCODED_CONTENT_TYPES = %w(
|
36
40
|
application/x-www-form-urlencoded
|
37
|
-
)
|
41
|
+
).map(&:freeze).freeze
|
38
42
|
|
39
43
|
def sanitize(env)
|
40
44
|
sanitize_rack_input(env)
|
@@ -60,7 +64,7 @@ module Rack
|
|
60
64
|
content_type = env['CONTENT_TYPE']
|
61
65
|
content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
|
62
66
|
content_type &&= content_type.downcase
|
63
|
-
return unless
|
67
|
+
return unless @sanitizable_content_types.any? {|type| content_type == type }
|
64
68
|
uri_encoded = URI_ENCODED_CONTENT_TYPES.any? {|type| content_type == type}
|
65
69
|
|
66
70
|
if env["rack.input"]
|
@@ -108,7 +112,7 @@ module Rack
|
|
108
112
|
|
109
113
|
def sanitize_io(io, uri_encoded = false)
|
110
114
|
input = io.read
|
111
|
-
sanitized_input = sanitize_string(input)
|
115
|
+
sanitized_input = sanitize_string(strip_byte_order_mark(input))
|
112
116
|
if uri_encoded
|
113
117
|
sanitized_input = sanitize_uri_encoded_string(sanitized_input).
|
114
118
|
force_encoding(Encoding::UTF_8)
|
@@ -201,5 +205,13 @@ module Rack
|
|
201
205
|
to
|
202
206
|
end
|
203
207
|
end
|
208
|
+
|
209
|
+
UTF8_BOM = "\xef\xbb\xbf".force_encoding(Encoding::BINARY).freeze
|
210
|
+
UTF8_BOM_SIZE = UTF8_BOM.bytesize
|
211
|
+
|
212
|
+
def strip_byte_order_mark(input)
|
213
|
+
return input unless input.start_with?(UTF8_BOM)
|
214
|
+
input.byteslice(UTF8_BOM_SIZE..-1)
|
215
|
+
end
|
204
216
|
end
|
205
217
|
end
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.3.
|
6
|
-
gem.authors = ["
|
5
|
+
gem.version = '1.3.2'
|
6
|
+
gem.authors = ["whitequark"]
|
7
7
|
gem.email = ["whitequark@whitequark.org"]
|
8
8
|
gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
|
9
9
|
%{invalid UTF8 characters in request URI and headers.}
|
@@ -15,9 +15,9 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
|
18
|
-
gem.required_ruby_version = '>= 1.9'
|
18
|
+
gem.required_ruby_version = '>= 1.9.3'
|
19
19
|
|
20
|
-
gem.add_dependency "rack", '
|
20
|
+
gem.add_dependency "rack", '>= 1.0', '< 3.0'
|
21
21
|
|
22
22
|
gem.add_development_dependency "bacon"
|
23
23
|
gem.add_development_dependency "bacon-colored_output"
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -179,7 +179,7 @@ describe Rack::UTF8Sanitizer do
|
|
179
179
|
}
|
180
180
|
end
|
181
181
|
|
182
|
-
def sanitize_form_data(request_env = request_env)
|
182
|
+
def sanitize_form_data(request_env = request_env())
|
183
183
|
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
184
184
|
@response_env = @app.(request_env)
|
185
185
|
sanitized_input = @response_env['rack.input'].read
|
@@ -227,6 +227,17 @@ describe Rack::UTF8Sanitizer do
|
|
227
227
|
end
|
228
228
|
end
|
229
229
|
|
230
|
+
it "strip UTF-8 BOM from StringIO rack.input" do
|
231
|
+
input = %(\xef\xbb\xbf{"Hello": "World"})
|
232
|
+
@rack_input = StringIO.new input
|
233
|
+
|
234
|
+
sanitize_form_data(request_env.merge("CONTENT_TYPE" => "application/json")) do |sanitized_input|
|
235
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
236
|
+
sanitized_input.should.be.valid_encoding
|
237
|
+
sanitized_input.should == '{"Hello": "World"}'
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
230
241
|
it "sanitizes StringIO rack.input with form encoded bad encoding" do
|
231
242
|
input = "foo=bla&foo=baz&quux%ED=bar%ED"
|
232
243
|
@rack_input = StringIO.new input
|
@@ -302,4 +313,88 @@ describe Rack::UTF8Sanitizer do
|
|
302
313
|
end
|
303
314
|
end
|
304
315
|
end
|
316
|
+
|
317
|
+
describe "with custom content-type" do
|
318
|
+
def request_env
|
319
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
320
|
+
{
|
321
|
+
"REQUEST_METHOD" => "POST",
|
322
|
+
"CONTENT_TYPE" => "application/vnd.api+json",
|
323
|
+
"HTTP_USER_AGENT" => @plain_input,
|
324
|
+
"rack.input" => @rack_input,
|
325
|
+
}
|
326
|
+
end
|
327
|
+
|
328
|
+
def sanitize_data(request_env = request_env())
|
329
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
330
|
+
@response_env = @app.(request_env)
|
331
|
+
sanitized_input = @response_env['rack.input'].read
|
332
|
+
|
333
|
+
yield sanitized_input if block_given?
|
334
|
+
end
|
335
|
+
|
336
|
+
it "does not sanitize custom content-type by default" do
|
337
|
+
input = "foo=bla&quux=bar\xED"
|
338
|
+
@rack_input = StringIO.new input
|
339
|
+
|
340
|
+
env = request_env
|
341
|
+
sanitize_data(env) do |sanitized_input|
|
342
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
343
|
+
sanitized_input.should.be.valid_encoding
|
344
|
+
sanitized_input.should == input
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
it "sanitizes custom content-type if additional_content_types given" do
|
349
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
|
350
|
+
input = "foo=bla&quux=bar\xED"
|
351
|
+
@rack_input = StringIO.new input
|
352
|
+
|
353
|
+
env = request_env
|
354
|
+
sanitize_data(env) do |sanitized_input|
|
355
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
356
|
+
sanitized_input.should.be.valid_encoding
|
357
|
+
sanitized_input.should != input
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
it "sanitizes default content-type if additional_content_types given" do
|
362
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
|
363
|
+
input = "foo=bla&quux=bar\xED"
|
364
|
+
@rack_input = StringIO.new input
|
365
|
+
|
366
|
+
env = request_env.update('CONTENT_TYPE' => 'application/json')
|
367
|
+
sanitize_data(env) do |sanitized_input|
|
368
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
369
|
+
sanitized_input.should.be.valid_encoding
|
370
|
+
sanitized_input.should != input
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
it "sanitizes custom content-type if sanitizable_content_types given" do
|
375
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
|
376
|
+
input = "foo=bla&quux=bar\xED"
|
377
|
+
@rack_input = StringIO.new input
|
378
|
+
|
379
|
+
env = request_env
|
380
|
+
sanitize_data(env) do |sanitized_input|
|
381
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
382
|
+
sanitized_input.should.be.valid_encoding
|
383
|
+
sanitized_input.should != input
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
it "does not sanitize default content-type if sanitizable_content_types does not include it" do
|
388
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
|
389
|
+
input = "foo=bla&quux=bar\xED"
|
390
|
+
@rack_input = StringIO.new input
|
391
|
+
|
392
|
+
env = request_env.update('CONTENT_TYPE' => 'application/json')
|
393
|
+
sanitize_data(env) do |sanitized_input|
|
394
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
395
|
+
sanitized_input.should.be.valid_encoding
|
396
|
+
sanitized_input.should == input
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
305
400
|
end
|
metadata
CHANGED
@@ -1,29 +1,35 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- whitequark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.0'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '3.0'
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
|
-
- - "
|
27
|
+
- - ">="
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '1.0'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '3.0'
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: bacon
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -95,7 +101,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
95
101
|
requirements:
|
96
102
|
- - ">="
|
97
103
|
- !ruby/object:Gem::Version
|
98
|
-
version:
|
104
|
+
version: 1.9.3
|
99
105
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
106
|
requirements:
|
101
107
|
- - ">="
|
@@ -103,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
103
109
|
version: '0'
|
104
110
|
requirements: []
|
105
111
|
rubyforge_project:
|
106
|
-
rubygems_version: 2.4.
|
112
|
+
rubygems_version: 2.4.5.1
|
107
113
|
signing_key:
|
108
114
|
specification_version: 4
|
109
115
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|