rack-utf8_sanitizer 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
@@ -0,0 +1,11 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - 2.0.0
7
+ - jruby-19mode
8
+ - rbx-19mode
9
+
10
+ script:
11
+ - rake spec
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rack-utf8_sanitizer.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Peter Zotov
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,48 @@
1
+ # Rack::UTF8Sanitizer
2
+
3
+ Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters in request URI and headers.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'rack-utf8_sanitizer'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install rack-utf8_sanitizer
18
+
19
+ For Rails, add this to your `application.rb`:
20
+
21
+ config.middleware.insert_before "Rack::Lock", Rack::UTF8Sanitizer
22
+
23
+ For Rack apps, add this to `config.ru`:
24
+
25
+ use Rack::UTF8Sanitizer
26
+
27
+ ## Usage
28
+
29
+ Rack::UTF8Sanitizer divides all keys in the [Rack environment](rack.rubyforge.org/doc/SPEC.html) in two distinct groups: keys which contain raw data and the ones with percent-encoded data. The fields which are treated as percent-encoded are: `SCRIPT_NAME`, `REQUEST_PATH`, `REQUEST_URI`, `PATH_INFO`, `QUERY_STRING`, `HTTP_REFERER`.
30
+
31
+ The generic sanitization algorithm is as follows:
32
+
33
+ 1. Force the encoding to UTF-8.
34
+ 2. If the result contains invalid characters:
35
+ 1. Force the encoding to ASCII8-BIT.
36
+ 2. Re-encode it as UTF-8, replacing invalid and undefined characters as U+FFFD.
37
+
38
+ For fields with "raw data", the algorithm is applied once and the (UTF-8 encoded) result is left in the environment.
39
+
40
+ For fields with "percent-encoded data", the algorithm is applied twice to catch both invalid characters appearing as-is and invalid characters appearing in the percent encoding. The percent encoded, ASCII-8BIT encoded result is left in the environment.
41
+
42
+ ## Contributing
43
+
44
+ 1. Fork it
45
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
46
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
47
+ 4. Push to the branch (`git push origin my-new-feature`)
48
+ 5. Create new Pull Request
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task :default => :spec
4
+
5
+ desc "Run tests"
6
+ task :spec do
7
+ sh 'bacon -a'
8
+ end
@@ -0,0 +1,78 @@
1
+ require 'uri'
2
+
3
+ module Rack
4
+ class UTF8Sanitizer
5
+ def initialize(app)
6
+ @app = app
7
+ end
8
+
9
+ def call(env)
10
+ @app.call(sanitize(env))
11
+ end
12
+
13
+ # http://rack.rubyforge.org/doc/SPEC.html
14
+ URI_FIELDS = %w(
15
+ SCRIPT_NAME
16
+ REQUEST_PATH REQUEST_URI PATH_INFO
17
+ QUERY_STRING
18
+ HTTP_REFERER
19
+ )
20
+
21
+ def sanitize(env)
22
+ env.each do |key, value|
23
+ if URI_FIELDS.include?(key)
24
+ # URI.encode/decode expect the input to be in ASCII-8BIT.
25
+ # However, there could be invalid UTF-8 characters both in
26
+ # raw and percent-encoded form.
27
+ #
28
+ # So, first sanitize the value, then percent-decode it while
29
+ # treating as UTF-8, then sanitize the result and encode it back.
30
+ #
31
+ # The result is guaranteed to be UTF-8-safe.
32
+
33
+ decoded_value = URI.decode(
34
+ sanitize_string(value).
35
+ force_encoding('ASCII-8BIT'))
36
+
37
+ env[key] = transfer_frozen(value,
38
+ URI.encode(sanitize_string(decoded_value)))
39
+
40
+ elsif key =~ /^HTTP_/
41
+ # Just sanitize the headers and leave them in UTF-8. There is
42
+ # no reason to have UTF-8 in headers, but if it's valid, let it be.
43
+
44
+ env[key] = transfer_frozen(value,
45
+ sanitize_string(value))
46
+ end
47
+ end
48
+ end
49
+
50
+ protected
51
+
52
+ def sanitize_string(input)
53
+ if input.is_a? String
54
+ input = input.dup.force_encoding('UTF-8')
55
+
56
+ if input.valid_encoding?
57
+ input
58
+ else
59
+ input.
60
+ force_encoding('ASCII-8BIT').
61
+ encode!('UTF-8',
62
+ invalid: :replace,
63
+ undef: :replace)
64
+ end
65
+ else
66
+ input
67
+ end
68
+ end
69
+
70
+ def transfer_frozen(from, to)
71
+ if from.frozen?
72
+ to.freeze
73
+ else
74
+ to
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = "rack-utf8_sanitizer"
5
+ gem.version = '1.0.0'
6
+ gem.authors = ["Peter Zotov"]
7
+ gem.email = ["whitequark@whitequark.org"]
8
+ gem.description = %{Rack::UTF8Sanitizer is a Rack middleware which cleans up } <<
9
+ %{invalid UTF8 characters in request URI and headers.}
10
+ gem.summary = gem.description
11
+ gem.homepage = "http://github.com/whitequark/rack-utf8_sanitizer"
12
+
13
+ gem.files = `git ls-files`.split($/)
14
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.require_paths = ["lib"]
17
+
18
+ gem.required_ruby_version = '>= 1.9'
19
+
20
+ gem.add_dependency "rack", '~> 1.0'
21
+
22
+ gem.add_development_dependency "bacon"
23
+ gem.add_development_dependency "bacon-colored_output"
24
+ end
@@ -0,0 +1,138 @@
1
+ # encoding:ascii-8bit
2
+
3
+ require 'bacon/colored_output'
4
+ require 'rack/utf8_sanitizer'
5
+
6
+ describe Rack::UTF8Sanitizer do
7
+ before do
8
+ @app = Rack::UTF8Sanitizer.new(-> env { env })
9
+ end
10
+
11
+ shared :does_sanitize_plain do
12
+ it "sanitizes plaintext entity (HTTP_USER_AGENT)" do
13
+ env = @app.({ "HTTP_USER_AGENT" => @plain_input })
14
+ result = env["HTTP_USER_AGENT"]
15
+
16
+ result.encoding.should == Encoding::UTF_8
17
+ result.should.be.valid_encoding
18
+ end
19
+ end
20
+
21
+ shared :does_sanitize_uri do
22
+ it "sanitizes URI-like entity (REQUEST_PATH)" do
23
+ env = @app.({ "REQUEST_PATH" => @uri_input })
24
+ result = env["REQUEST_PATH"]
25
+
26
+ result.encoding.should == Encoding::US_ASCII
27
+ result.should.be.valid_encoding
28
+ end
29
+ end
30
+
31
+ describe "with invalid UTF-8 input" do
32
+ before do
33
+ @plain_input = "foo\xe0".force_encoding('UTF-8')
34
+ @uri_input = "foo%E0".force_encoding('UTF-8')
35
+ end
36
+
37
+ behaves_like :does_sanitize_plain
38
+ behaves_like :does_sanitize_uri
39
+ end
40
+
41
+ describe "with invalid, incorrectly percent-encoded UTF-8 URI input" do
42
+ before do
43
+ @uri_input = "foo%E0\xe0".force_encoding('UTF-8')
44
+ end
45
+
46
+ behaves_like :does_sanitize_uri
47
+ end
48
+
49
+ describe "with invalid ASCII-8BIT input" do
50
+ before do
51
+ @plain_input = "foo\xe0"
52
+ @uri_input = "foo%E0"
53
+ end
54
+
55
+ behaves_like :does_sanitize_plain
56
+ behaves_like :does_sanitize_uri
57
+ end
58
+
59
+ describe "with invalid, incorrectly percent-encoded ASCII-8BIT URI input" do
60
+ before do
61
+ @uri_input = "foo%E0\xe0"
62
+ end
63
+
64
+ behaves_like :does_sanitize_uri
65
+ end
66
+
67
+ shared :identity_plain do
68
+ it "does not change plaintext entity (HTTP_USER_AGENT)" do
69
+ env = @app.({ "HTTP_USER_AGENT" => @plain_input })
70
+ result = env["HTTP_USER_AGENT"]
71
+
72
+ result.encoding.should == Encoding::UTF_8
73
+ result.should.be.valid_encoding
74
+ result.should == @plain_input
75
+ end
76
+ end
77
+
78
+ shared :identity_uri do
79
+ it "does not change URI-like entity (REQUEST_PATH)" do
80
+ env = @app.({ "REQUEST_PATH" => @uri_input })
81
+ result = env["REQUEST_PATH"]
82
+
83
+ result.encoding.should == Encoding::US_ASCII
84
+ result.should.be.valid_encoding
85
+ result.should == @uri_input
86
+ end
87
+ end
88
+
89
+ describe "with valid UTF-8 input" do
90
+ before do
91
+ @plain_input = "foo bar лол".force_encoding('UTF-8')
92
+ @uri_input = "foo+bar+%D0%BB%D0%BE%D0%BB".force_encoding('UTF-8')
93
+ end
94
+
95
+ behaves_like :identity_plain
96
+ behaves_like :identity_uri
97
+ end
98
+
99
+ describe "with valid, not percent-encoded UTF-8 URI input" do
100
+ before do
101
+ @uri_input = "foo+bar+лол".force_encoding('UTF-8')
102
+ end
103
+
104
+ it "does not change URI-like entity (REQUEST_PATH)" do
105
+ env = @app.({ "REQUEST_PATH" => @uri_input })
106
+ result = env["REQUEST_PATH"]
107
+
108
+ result.encoding.should == Encoding::US_ASCII
109
+ result.should.be.valid_encoding
110
+ result.should == URI.encode(@uri_input)
111
+ end
112
+ end
113
+
114
+ describe "with valid ASCII-8BIT input" do
115
+ before do
116
+ @plain_input = "bar baz"
117
+ @uri_input = "bar+baz"
118
+ end
119
+
120
+ behaves_like :identity_plain
121
+ behaves_like :identity_uri
122
+ end
123
+
124
+ describe "with frozen strings" do
125
+ before do
126
+ @plain_input = "bar baz".freeze
127
+ @uri_input = "bar+baz".freeze
128
+ end
129
+
130
+ it "preserves the frozen? status of input" do
131
+ env = @app.({ "HTTP_USER_AGENT" => @plain_input,
132
+ "REQUEST_PATH" => @uri_input })
133
+
134
+ env["HTTP_USER_AGENT"].should.be.frozen
135
+ env["REQUEST_PATH"].should.be.frozen
136
+ end
137
+ end
138
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rack-utf8_sanitizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Peter Zotov
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-03-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rack
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: bacon
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: bacon-colored_output
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8
63
+ characters in request URI and headers.
64
+ email:
65
+ - whitequark@whitequark.org
66
+ executables: []
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - .gitignore
71
+ - .travis.yml
72
+ - Gemfile
73
+ - LICENSE.txt
74
+ - README.md
75
+ - Rakefile
76
+ - lib/rack/utf8_sanitizer.rb
77
+ - rack-utf8_sanitizer.gemspec
78
+ - test/test_utf8_sanitizer.rb
79
+ homepage: http://github.com/whitequark/rack-utf8_sanitizer
80
+ licenses: []
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '1.9'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ none: false
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 1.8.23
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: Rack::UTF8Sanitizer is a Rack middleware which cleans up invalid UTF8 characters
103
+ in request URI and headers.
104
+ test_files:
105
+ - test/test_utf8_sanitizer.rb
106
+ has_rdoc: