rack-utf8_sanitizer 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +14 -0
- data/lib/rack/utf8_sanitizer.rb +18 -6
- data/rack-utf8_sanitizer.gemspec +4 -4
- data/test/test_utf8_sanitizer.rb +96 -1
- metadata +13 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef9e02d0857dfe41f06d2422727019029dc9d50a
|
4
|
+
data.tar.gz: 6946827dd89a0a13cf0d8b4f0325c321d3c4db71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2b3f4ad2011e98e573803d751419474ac4bb1cd402dc8dd1dec8063d34a043fbd1ff7866ccc0aa84d9e8456c685c7b2b8f4addae4108d41825d816e521aa129f
|
7
|
+
data.tar.gz: 2aed120d37855fbacb7400d82fdd5da9273c8ea5eaefccc16f297006e3e8cd70f6cc88c7b4b2db296ef61f5424b14a111a36ba0da60047206adc47bf83399d7c
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -43,6 +43,18 @@ For fields with "raw data", the algorithm is applied once and the (UTF-8 encoded
|
|
43
43
|
|
44
44
|
For fields with "percent-encoded data", the algorithm is applied twice to catch both invalid characters appearing as-is and invalid characters appearing in the percent encoding. The percent encoded, ASCII-8BIT encoded result is left in the environment.
|
45
45
|
|
46
|
+
### Sanitizable content types
|
47
|
+
|
48
|
+
The default content types to be sanitized are 'text/plain', 'application/x-www-form-urlencoded', 'application/json', 'text/javascript'. You may wish to modify this, for example if your app accepts specific or custom media types in the CONTENT_TYPE header. If you want to change the sanitizable content types, you can pass options when using Rack::UTF8Sanitizer.
|
49
|
+
|
50
|
+
To add sanitizable content types to the list of defaults, pass the `additional_content_types` options when using Rack::UTF8Sanitizer, e.g.
|
51
|
+
|
52
|
+
config.middleware.insert 0, Rack::UTF8Sanitizer, additional_content_types: ['application/vnd.api+json']
|
53
|
+
|
54
|
+
To explicitly set sanitizable content types and override the defaults, use the `sanitizable_content_types` option:
|
55
|
+
|
56
|
+
config.middleware.insert 0, Rack::UTF8Sanitizer, sanitizable_content_types: ['application/vnd.api+json']
|
57
|
+
|
46
58
|
## Contributing
|
47
59
|
|
48
60
|
1. Fork it
|
@@ -50,3 +62,5 @@ For fields with "percent-encoded data", the algorithm is applied twice to catch
|
|
50
62
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
51
63
|
4. Push to the branch (`git push origin my-new-feature`)
|
52
64
|
5. Create new Pull Request
|
65
|
+
|
66
|
+
To run the tests, run `rake spec` in the project directory.
|
data/lib/rack/utf8_sanitizer.rb
CHANGED
@@ -7,8 +7,12 @@ module Rack
|
|
7
7
|
class UTF8Sanitizer
|
8
8
|
StringIO = ::StringIO
|
9
9
|
|
10
|
-
|
10
|
+
# options[:sanitizable_content_types] Array
|
11
|
+
# options[:additional_content_types] Array
|
12
|
+
def initialize(app, options={})
|
11
13
|
@app = app
|
14
|
+
@sanitizable_content_types = options[:sanitizable_content_types]
|
15
|
+
@sanitizable_content_types ||= SANITIZABLE_CONTENT_TYPES + (options[:additional_content_types] || [])
|
12
16
|
end
|
13
17
|
|
14
18
|
def call(env)
|
@@ -23,18 +27,18 @@ module Rack
|
|
23
27
|
HTTP_REFERER
|
24
28
|
ORIGINAL_FULLPATH
|
25
29
|
ORIGINAL_SCRIPT_NAME
|
26
|
-
)
|
30
|
+
).map(&:freeze).freeze
|
27
31
|
|
28
32
|
SANITIZABLE_CONTENT_TYPES = %w(
|
29
33
|
text/plain
|
30
34
|
application/x-www-form-urlencoded
|
31
35
|
application/json
|
32
36
|
text/javascript
|
33
|
-
)
|
37
|
+
).map(&:freeze).freeze
|
34
38
|
|
35
39
|
URI_ENCODED_CONTENT_TYPES = %w(
|
36
40
|
application/x-www-form-urlencoded
|
37
|
-
)
|
41
|
+
).map(&:freeze).freeze
|
38
42
|
|
39
43
|
def sanitize(env)
|
40
44
|
sanitize_rack_input(env)
|
@@ -60,7 +64,7 @@ module Rack
|
|
60
64
|
content_type = env['CONTENT_TYPE']
|
61
65
|
content_type &&= content_type.split(/\s*[;,]\s*/, 2).first
|
62
66
|
content_type &&= content_type.downcase
|
63
|
-
return unless
|
67
|
+
return unless @sanitizable_content_types.any? {|type| content_type == type }
|
64
68
|
uri_encoded = URI_ENCODED_CONTENT_TYPES.any? {|type| content_type == type}
|
65
69
|
|
66
70
|
if env["rack.input"]
|
@@ -108,7 +112,7 @@ module Rack
|
|
108
112
|
|
109
113
|
def sanitize_io(io, uri_encoded = false)
|
110
114
|
input = io.read
|
111
|
-
sanitized_input = sanitize_string(input)
|
115
|
+
sanitized_input = sanitize_string(strip_byte_order_mark(input))
|
112
116
|
if uri_encoded
|
113
117
|
sanitized_input = sanitize_uri_encoded_string(sanitized_input).
|
114
118
|
force_encoding(Encoding::UTF_8)
|
@@ -201,5 +205,13 @@ module Rack
|
|
201
205
|
to
|
202
206
|
end
|
203
207
|
end
|
208
|
+
|
209
|
+
UTF8_BOM = "\xef\xbb\xbf".force_encoding(Encoding::BINARY).freeze
|
210
|
+
UTF8_BOM_SIZE = UTF8_BOM.bytesize
|
211
|
+
|
212
|
+
def strip_byte_order_mark(input)
|
213
|
+
return input unless input.start_with?(UTF8_BOM)
|
214
|
+
input.byteslice(UTF8_BOM_SIZE..-1)
|
215
|
+
end
|
204
216
|
end
|
205
217
|
end
|
data/rack-utf8_sanitizer.gemspec
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "rack-utf8_sanitizer"
|
5
|
-
gem.version = '1.3.
|
6
|
-
gem.authors = ["
|
5
|
+
gem.version = '1.3.2'
|
6
|
+
gem.authors = ["whitequark"]
|
7
7
|
gem.email = ["whitequark@whitequark.org"]
|
8
8
|
gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
|
9
9
|
%{invalid UTF8 characters in request URI and headers.}
|
@@ -15,9 +15,9 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
|
18
|
-
gem.required_ruby_version = '>= 1.9'
|
18
|
+
gem.required_ruby_version = '>= 1.9.3'
|
19
19
|
|
20
|
-
gem.add_dependency "rack", '
|
20
|
+
gem.add_dependency "rack", '>= 1.0', '< 3.0'
|
21
21
|
|
22
22
|
gem.add_development_dependency "bacon"
|
23
23
|
gem.add_development_dependency "bacon-colored_output"
|
data/test/test_utf8_sanitizer.rb
CHANGED
@@ -179,7 +179,7 @@ describe Rack::UTF8Sanitizer do
|
|
179
179
|
}
|
180
180
|
end
|
181
181
|
|
182
|
-
def sanitize_form_data(request_env = request_env)
|
182
|
+
def sanitize_form_data(request_env = request_env())
|
183
183
|
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
184
184
|
@response_env = @app.(request_env)
|
185
185
|
sanitized_input = @response_env['rack.input'].read
|
@@ -227,6 +227,17 @@ describe Rack::UTF8Sanitizer do
|
|
227
227
|
end
|
228
228
|
end
|
229
229
|
|
230
|
+
it "strip UTF-8 BOM from StringIO rack.input" do
|
231
|
+
input = %(\xef\xbb\xbf{"Hello": "World"})
|
232
|
+
@rack_input = StringIO.new input
|
233
|
+
|
234
|
+
sanitize_form_data(request_env.merge("CONTENT_TYPE" => "application/json")) do |sanitized_input|
|
235
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
236
|
+
sanitized_input.should.be.valid_encoding
|
237
|
+
sanitized_input.should == '{"Hello": "World"}'
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
230
241
|
it "sanitizes StringIO rack.input with form encoded bad encoding" do
|
231
242
|
input = "foo=bla&foo=baz&quux%ED=bar%ED"
|
232
243
|
@rack_input = StringIO.new input
|
@@ -302,4 +313,88 @@ describe Rack::UTF8Sanitizer do
|
|
302
313
|
end
|
303
314
|
end
|
304
315
|
end
|
316
|
+
|
317
|
+
describe "with custom content-type" do
|
318
|
+
def request_env
|
319
|
+
@plain_input = "foo bar лол".force_encoding('UTF-8')
|
320
|
+
{
|
321
|
+
"REQUEST_METHOD" => "POST",
|
322
|
+
"CONTENT_TYPE" => "application/vnd.api+json",
|
323
|
+
"HTTP_USER_AGENT" => @plain_input,
|
324
|
+
"rack.input" => @rack_input,
|
325
|
+
}
|
326
|
+
end
|
327
|
+
|
328
|
+
def sanitize_data(request_env = request_env())
|
329
|
+
@uri_input = "http://bar/foo+%2F%3A+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
|
330
|
+
@response_env = @app.(request_env)
|
331
|
+
sanitized_input = @response_env['rack.input'].read
|
332
|
+
|
333
|
+
yield sanitized_input if block_given?
|
334
|
+
end
|
335
|
+
|
336
|
+
it "does not sanitize custom content-type by default" do
|
337
|
+
input = "foo=bla&quux=bar\xED"
|
338
|
+
@rack_input = StringIO.new input
|
339
|
+
|
340
|
+
env = request_env
|
341
|
+
sanitize_data(env) do |sanitized_input|
|
342
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
343
|
+
sanitized_input.should.be.valid_encoding
|
344
|
+
sanitized_input.should == input
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
it "sanitizes custom content-type if additional_content_types given" do
|
349
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
|
350
|
+
input = "foo=bla&quux=bar\xED"
|
351
|
+
@rack_input = StringIO.new input
|
352
|
+
|
353
|
+
env = request_env
|
354
|
+
sanitize_data(env) do |sanitized_input|
|
355
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
356
|
+
sanitized_input.should.be.valid_encoding
|
357
|
+
sanitized_input.should != input
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
it "sanitizes default content-type if additional_content_types given" do
|
362
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, additional_content_types: ["application/vnd.api+json"])
|
363
|
+
input = "foo=bla&quux=bar\xED"
|
364
|
+
@rack_input = StringIO.new input
|
365
|
+
|
366
|
+
env = request_env.update('CONTENT_TYPE' => 'application/json')
|
367
|
+
sanitize_data(env) do |sanitized_input|
|
368
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
369
|
+
sanitized_input.should.be.valid_encoding
|
370
|
+
sanitized_input.should != input
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
it "sanitizes custom content-type if sanitizable_content_types given" do
|
375
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
|
376
|
+
input = "foo=bla&quux=bar\xED"
|
377
|
+
@rack_input = StringIO.new input
|
378
|
+
|
379
|
+
env = request_env
|
380
|
+
sanitize_data(env) do |sanitized_input|
|
381
|
+
sanitized_input.encoding.should == Encoding::UTF_8
|
382
|
+
sanitized_input.should.be.valid_encoding
|
383
|
+
sanitized_input.should != input
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
it "does not sanitize default content-type if sanitizable_content_types does not include it" do
|
388
|
+
@app = Rack::UTF8Sanitizer.new(-> env { env }, sanitizable_content_types: ["application/vnd.api+json"])
|
389
|
+
input = "foo=bla&quux=bar\xED"
|
390
|
+
@rack_input = StringIO.new input
|
391
|
+
|
392
|
+
env = request_env.update('CONTENT_TYPE' => 'application/json')
|
393
|
+
sanitize_data(env) do |sanitized_input|
|
394
|
+
sanitized_input.encoding.should == Encoding::ASCII_8BIT
|
395
|
+
sanitized_input.should.be.valid_encoding
|
396
|
+
sanitized_input.should == input
|
397
|
+
end
|
398
|
+
end
|
399
|
+
end
|
305
400
|
end
|
metadata
CHANGED
@@ -1,29 +1,35 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rack-utf8_sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- whitequark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.0'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '3.0'
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
|
-
- - "
|
27
|
+
- - ">="
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '1.0'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '3.0'
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: bacon
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -95,7 +101,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
95
101
|
requirements:
|
96
102
|
- - ">="
|
97
103
|
- !ruby/object:Gem::Version
|
98
|
-
version:
|
104
|
+
version: 1.9.3
|
99
105
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
106
|
requirements:
|
101
107
|
- - ">="
|
@@ -103,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
103
109
|
version: '0'
|
104
110
|
requirements: []
|
105
111
|
rubyforge_project:
|
106
|
-
rubygems_version: 2.4.
|
112
|
+
rubygems_version: 2.4.5.1
|
107
113
|
signing_key:
|
108
114
|
specification_version: 4
|
109
115
|
summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
|