utf8-cleaner 0.0.6 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b6e265c0aefe9f9c6fa6082b86f21c81a92400b6
4
- data.tar.gz: 12dd12dceef125122d2877dbc9e9e597055ddfac
3
+ metadata.gz: 34470c6433b9f2fc055514464d9c4794881ed579
4
+ data.tar.gz: 5135b3b7e6ea404b3dcc57d07ea9aee798a528d4
5
5
  SHA512:
6
- metadata.gz: b794d47e0c9460ef5ec9eadc9340f705a491b0c48370efb82310d2bf21ff7713d655a4fdbb312c4788913879476be5a4c8823e8f494d8a2fe7376ecc4dd58a6c
7
- data.tar.gz: 0bf1fb1a6d23600a3f5ff4ae7d1f62c7fded5a0d7d0233ebc85103b12c51a849a7ef83708a21568a28bd005e8ea1199ffff6b0d05b0c186a8889624e190f5e19
6
+ metadata.gz: 35b02095acfd32a1a5c1a380ed017e130c25dae095fc2f1bee135a1451f1394f6018eba74f7d4567b3158fbf8978d79ad968fd07a947fde054bf6d0e38766e68
7
+ data.tar.gz: 9fef626eb986add0193d716339e6df9e7faf22cc8767c7257b2514265ccd3f2294360df45f6c04afd4977f88ced34b4e51f9160231c12e041e0e586ec35bf64d
data/.travis.yml CHANGED
@@ -1,4 +1,5 @@
1
1
  language: ruby
2
2
  rvm:
3
3
  - 1.9.3
4
- - 2.0.0
4
+ - 2.0.0
5
+ - 2.1.1
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # UTF8Cleaner
2
2
 
3
- [<img src="https://secure.travis-ci.org/singlebrook/utf8-cleaner.png" />](http://travis-ci.org/singlebrook/utf8-cleaner)
3
+ [![Build Status](https://secure.travis-ci.org/singlebrook/utf8-cleaner.png?branch=master)](http://travis-ci.org/singlebrook/utf8-cleaner)
4
4
 
5
5
  Removes invalid UTF-8 characters from the environment so that your app doesn't choke
6
6
  on them. This prevents errors like "invalid byte sequence in UTF-8".
@@ -21,7 +21,7 @@ Or install it yourself as:
21
21
 
22
22
  If you're not running Rails, you'll have to add the middleware to your config.ru:
23
23
 
24
- require 'uf8-cleaner'
24
+ require 'utf8-cleaner'
25
25
  use UTF8Cleaner::Middleware
26
26
 
27
27
  ## Usage
@@ -40,4 +40,6 @@ There's nothing to "use". It just works!
40
40
 
41
41
  Original middleware author: @phoet - https://gist.github.com/phoet/1336754
42
42
 
43
- Ruby 1.9.3 compatibility: @pithyless - https://gist.github.com/pithyless/3639014
43
+ * Ruby 1.9.3 compatibility: @pithyless - https://gist.github.com/pithyless/3639014
44
+ * Code review and cleanup: @nextmat
45
+ * POST body sanitization: @salrepe
@@ -20,22 +20,37 @@ module UTF8Cleaner
20
20
 
21
21
  private
22
22
 
23
- def is_valid_utf8(string)
24
- utf8 = string.dup.force_encoding('UTF-8')
25
- string == utf8 && utf8.valid_encoding?
26
- rescue EncodingError
27
- false
23
+ def sanitize_env(env)
24
+ sanitize_env_keys(env)
25
+ sanitize_env_rack_input(env)
26
+ env
28
27
  end
29
28
 
30
- def sanitize_env(env)
29
+ def sanitize_env_keys(env)
31
30
  SANITIZE_ENV_KEYS.each do |key|
32
31
  next unless value = env[key]
32
+ cleaned_value = cleaned_uri_string(value)
33
+ env[key] = cleaned_value if cleaned_value
34
+ end
35
+ end
33
36
 
34
- if value.include?('%')
35
- env[key] = URIString.new(value).cleaned
36
- end
37
+ def sanitize_env_rack_input(env)
38
+ case env['CONTENT_TYPE']
39
+ when 'application/x-www-form-urlencoded'
40
+ cleaned_value = cleaned_uri_string(env['rack.input'].read)
41
+ env['rack.input'] = StringIO.new(cleaned_value) if cleaned_value
42
+ env['rack.input'].rewind
43
+ when 'multipart/form-data'
44
+ # Don't process the data since it may contain binary content
45
+ else
46
+ # Unknown content type. Leave it alone
47
+ end
48
+ end
49
+
50
+ def cleaned_uri_string(value)
51
+ if value.include?('%')
52
+ URIString.new(value).cleaned
37
53
  end
38
- env
39
54
  end
40
55
  end
41
56
  end
@@ -14,10 +14,6 @@ module UTF8Cleaner
14
14
  end
15
15
  end
16
16
 
17
- def encoded?
18
- data.include?('%')
19
- end
20
-
21
17
  def valid?
22
18
  valid_uri_encoded_utf8(data)
23
19
  end
@@ -1,3 +1,3 @@
1
1
  module UTF8Cleaner
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.9"
3
3
  end
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require 'rack/lint'
2
3
 
3
4
  describe UTF8Cleaner::Middleware do
4
5
  let :env do
@@ -6,7 +7,9 @@ describe UTF8Cleaner::Middleware do
6
7
  'PATH_INFO' => 'foo/%FFbar%2e%2fbaz%26%3B',
7
8
  'QUERY_STRING' => 'foo=bar%FF',
8
9
  'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
9
- 'REQUEST_URI' => '%C3%89%E2%9C%93'
10
+ 'REQUEST_URI' => '%C3%89%E2%9C%93',
11
+ 'rack.input' => StringIO.new("foo=%FFbar%F8"),
12
+ 'CONTENT_TYPE' => 'application/x-www-form-urlencoded'
10
13
  }
11
14
  end
12
15
 
@@ -17,10 +20,32 @@ describe UTF8Cleaner::Middleware do
17
20
  describe "removes invalid UTF-8 sequences" do
18
21
  it { new_env['QUERY_STRING'].should == 'foo=bar' }
19
22
  it { new_env['HTTP_REFERER'].should == 'http://example.com/blog+Result:+++++' }
23
+ it { new_env['rack.input'].read.should == 'foo=bar' }
20
24
  end
21
25
 
22
26
  describe "leaves all valid characters untouched" do
23
27
  it { new_env['PATH_INFO'].should == 'foo/bar%2e%2fbaz%26%3B' }
24
28
  it { new_env['REQUEST_URI'].should == '%C3%89%E2%9C%93' }
25
29
  end
26
- end
30
+
31
+ describe "when rack.input is wrapped" do
32
+ # rack.input responds only to methods gets, each, rewind, read and close
33
+ # Rack::Lint::InputWrapper is the class which servers wrappers are based on
34
+ it "removes invalid UTF-8 sequences" do
35
+ wrapped_rack_input = Rack::Lint::InputWrapper.new(StringIO.new("foo=%FFbar%F8"))
36
+ env.merge!('rack.input' => wrapped_rack_input)
37
+ new_env = UTF8Cleaner::Middleware.new(nil).send(:sanitize_env, env)
38
+ new_env['rack.input'].read.should == 'foo=bar'
39
+ end
40
+ end
41
+
42
+ describe "when binary data is POSTed" do
43
+ before do
44
+ env['CONTENT_TYPE'] = 'multipart/form-data'
45
+ end
46
+ it "leaves the body alone" do
47
+ env['rack.input'].rewind
48
+ new_env['rack.input'].read.should == "foo=%FFbar%F8"
49
+ end
50
+ end
51
+ end
@@ -22,15 +22,6 @@ module UTF8Cleaner
22
22
  it { complex_invalid_string.cleaned.should eq('foo/bar%2e%2fbaz%26%3B%E2%9C%93baz') }
23
23
  end
24
24
 
25
- describe '#encoded?' do
26
- it { encoded_string.should be_encoded }
27
- it { invalid_string.should be_encoded }
28
- it { multibyte_string.should be_encoded }
29
- it { complex_invalid_string.should be_encoded }
30
-
31
- it { ascii_string.should_not be_encoded }
32
- end
33
-
34
25
  describe '#valid?' do
35
26
  it { ascii_string.should be_valid }
36
27
  it { encoded_string.should be_valid }
data/utf8-cleaner.gemspec CHANGED
@@ -21,4 +21,5 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency "guard"
22
22
  gem.add_development_dependency "guard-rspec"
23
23
  gem.add_development_dependency "rspec"
24
+ gem.add_development_dependency "rack"
24
25
  end
metadata CHANGED
@@ -1,69 +1,83 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8-cleaner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leon Miller-Out
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-10-16 00:00:00.000000000 Z
11
+ date: 2014-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: guard
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: guard-rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rack
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
67
81
  - !ruby/object:Gem::Version
68
82
  version: '0'
69
83
  description: Removes invalid UTF8 characters from the URL and other env vars
@@ -73,8 +87,8 @@ executables: []
73
87
  extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
- - .gitignore
77
- - .travis.yml
90
+ - ".gitignore"
91
+ - ".travis.yml"
78
92
  - Gemfile
79
93
  - Guardfile
80
94
  - LICENSE.txt
@@ -98,17 +112,17 @@ require_paths:
98
112
  - lib
99
113
  required_ruby_version: !ruby/object:Gem::Requirement
100
114
  requirements:
101
- - - '>='
115
+ - - ">="
102
116
  - !ruby/object:Gem::Version
103
117
  version: '0'
104
118
  required_rubygems_version: !ruby/object:Gem::Requirement
105
119
  requirements:
106
- - - '>='
120
+ - - ">="
107
121
  - !ruby/object:Gem::Version
108
122
  version: '0'
109
123
  requirements: []
110
124
  rubyforge_project:
111
- rubygems_version: 2.0.3
125
+ rubygems_version: 2.2.2
112
126
  signing_key:
113
127
  specification_version: 4
114
128
  summary: Prevent annoying error reports of "invalid byte sequence in UTF-8"