utf8-cleaner 0.0.6 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b6e265c0aefe9f9c6fa6082b86f21c81a92400b6
4
- data.tar.gz: 12dd12dceef125122d2877dbc9e9e597055ddfac
3
+ metadata.gz: 34470c6433b9f2fc055514464d9c4794881ed579
4
+ data.tar.gz: 5135b3b7e6ea404b3dcc57d07ea9aee798a528d4
5
5
  SHA512:
6
- metadata.gz: b794d47e0c9460ef5ec9eadc9340f705a491b0c48370efb82310d2bf21ff7713d655a4fdbb312c4788913879476be5a4c8823e8f494d8a2fe7376ecc4dd58a6c
7
- data.tar.gz: 0bf1fb1a6d23600a3f5ff4ae7d1f62c7fded5a0d7d0233ebc85103b12c51a849a7ef83708a21568a28bd005e8ea1199ffff6b0d05b0c186a8889624e190f5e19
6
+ metadata.gz: 35b02095acfd32a1a5c1a380ed017e130c25dae095fc2f1bee135a1451f1394f6018eba74f7d4567b3158fbf8978d79ad968fd07a947fde054bf6d0e38766e68
7
+ data.tar.gz: 9fef626eb986add0193d716339e6df9e7faf22cc8767c7257b2514265ccd3f2294360df45f6c04afd4977f88ced34b4e51f9160231c12e041e0e586ec35bf64d
data/.travis.yml CHANGED
@@ -1,4 +1,5 @@
1
1
  language: ruby
2
2
  rvm:
3
3
  - 1.9.3
4
- - 2.0.0
4
+ - 2.0.0
5
+ - 2.1.1
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # UTF8Cleaner
2
2
 
3
- [<img src="https://secure.travis-ci.org/singlebrook/utf8-cleaner.png" />](http://travis-ci.org/singlebrook/utf8-cleaner)
3
+ [![Build Status](https://secure.travis-ci.org/singlebrook/utf8-cleaner.png?branch=master)](http://travis-ci.org/singlebrook/utf8-cleaner)
4
4
 
5
5
  Removes invalid UTF-8 characters from the environment so that your app doesn't choke
6
6
  on them. This prevents errors like "invalid byte sequence in UTF-8".
@@ -21,7 +21,7 @@ Or install it yourself as:
21
21
 
22
22
  If you're not running Rails, you'll have to add the middleware to your config.ru:
23
23
 
24
- require 'uf8-cleaner'
24
+ require 'utf8-cleaner'
25
25
  use UTF8Cleaner::Middleware
26
26
 
27
27
  ## Usage
@@ -40,4 +40,6 @@ There's nothing to "use". It just works!
40
40
 
41
41
  Original middleware author: @phoet - https://gist.github.com/phoet/1336754
42
42
 
43
- Ruby 1.9.3 compatibility: @pithyless - https://gist.github.com/pithyless/3639014
43
+ * Ruby 1.9.3 compatibility: @pithyless - https://gist.github.com/pithyless/3639014
44
+ * Code review and cleanup: @nextmat
45
+ * POST body sanitization: @salrepe
@@ -20,22 +20,37 @@ module UTF8Cleaner
20
20
 
21
21
  private
22
22
 
23
- def is_valid_utf8(string)
24
- utf8 = string.dup.force_encoding('UTF-8')
25
- string == utf8 && utf8.valid_encoding?
26
- rescue EncodingError
27
- false
23
+ def sanitize_env(env)
24
+ sanitize_env_keys(env)
25
+ sanitize_env_rack_input(env)
26
+ env
28
27
  end
29
28
 
30
- def sanitize_env(env)
29
+ def sanitize_env_keys(env)
31
30
  SANITIZE_ENV_KEYS.each do |key|
32
31
  next unless value = env[key]
32
+ cleaned_value = cleaned_uri_string(value)
33
+ env[key] = cleaned_value if cleaned_value
34
+ end
35
+ end
33
36
 
34
- if value.include?('%')
35
- env[key] = URIString.new(value).cleaned
36
- end
37
+ def sanitize_env_rack_input(env)
38
+ case env['CONTENT_TYPE']
39
+ when 'application/x-www-form-urlencoded'
40
+ cleaned_value = cleaned_uri_string(env['rack.input'].read)
41
+ env['rack.input'] = StringIO.new(cleaned_value) if cleaned_value
42
+ env['rack.input'].rewind
43
+ when 'multipart/form-data'
44
+ # Don't process the data since it may contain binary content
45
+ else
46
+ # Unknown content type. Leave it alone
47
+ end
48
+ end
49
+
50
+ def cleaned_uri_string(value)
51
+ if value.include?('%')
52
+ URIString.new(value).cleaned
37
53
  end
38
- env
39
54
  end
40
55
  end
41
56
  end
@@ -14,10 +14,6 @@ module UTF8Cleaner
14
14
  end
15
15
  end
16
16
 
17
- def encoded?
18
- data.include?('%')
19
- end
20
-
21
17
  def valid?
22
18
  valid_uri_encoded_utf8(data)
23
19
  end
@@ -1,3 +1,3 @@
1
1
  module UTF8Cleaner
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.9"
3
3
  end
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require 'rack/lint'
2
3
 
3
4
  describe UTF8Cleaner::Middleware do
4
5
  let :env do
@@ -6,7 +7,9 @@ describe UTF8Cleaner::Middleware do
6
7
  'PATH_INFO' => 'foo/%FFbar%2e%2fbaz%26%3B',
7
8
  'QUERY_STRING' => 'foo=bar%FF',
8
9
  'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
9
- 'REQUEST_URI' => '%C3%89%E2%9C%93'
10
+ 'REQUEST_URI' => '%C3%89%E2%9C%93',
11
+ 'rack.input' => StringIO.new("foo=%FFbar%F8"),
12
+ 'CONTENT_TYPE' => 'application/x-www-form-urlencoded'
10
13
  }
11
14
  end
12
15
 
@@ -17,10 +20,32 @@ describe UTF8Cleaner::Middleware do
17
20
  describe "removes invalid UTF-8 sequences" do
18
21
  it { new_env['QUERY_STRING'].should == 'foo=bar' }
19
22
  it { new_env['HTTP_REFERER'].should == 'http://example.com/blog+Result:+++++' }
23
+ it { new_env['rack.input'].read.should == 'foo=bar' }
20
24
  end
21
25
 
22
26
  describe "leaves all valid characters untouched" do
23
27
  it { new_env['PATH_INFO'].should == 'foo/bar%2e%2fbaz%26%3B' }
24
28
  it { new_env['REQUEST_URI'].should == '%C3%89%E2%9C%93' }
25
29
  end
26
- end
30
+
31
+ describe "when rack.input is wrapped" do
32
+ # rack.input responds only to methods gets, each, rewind, read and close
33
+ # Rack::Lint::InputWrapper is the class which servers wrappers are based on
34
+ it "removes invalid UTF-8 sequences" do
35
+ wrapped_rack_input = Rack::Lint::InputWrapper.new(StringIO.new("foo=%FFbar%F8"))
36
+ env.merge!('rack.input' => wrapped_rack_input)
37
+ new_env = UTF8Cleaner::Middleware.new(nil).send(:sanitize_env, env)
38
+ new_env['rack.input'].read.should == 'foo=bar'
39
+ end
40
+ end
41
+
42
+ describe "when binary data is POSTed" do
43
+ before do
44
+ env['CONTENT_TYPE'] = 'multipart/form-data'
45
+ end
46
+ it "leaves the body alone" do
47
+ env['rack.input'].rewind
48
+ new_env['rack.input'].read.should == "foo=%FFbar%F8"
49
+ end
50
+ end
51
+ end
@@ -22,15 +22,6 @@ module UTF8Cleaner
22
22
  it { complex_invalid_string.cleaned.should eq('foo/bar%2e%2fbaz%26%3B%E2%9C%93baz') }
23
23
  end
24
24
 
25
- describe '#encoded?' do
26
- it { encoded_string.should be_encoded }
27
- it { invalid_string.should be_encoded }
28
- it { multibyte_string.should be_encoded }
29
- it { complex_invalid_string.should be_encoded }
30
-
31
- it { ascii_string.should_not be_encoded }
32
- end
33
-
34
25
  describe '#valid?' do
35
26
  it { ascii_string.should be_valid }
36
27
  it { encoded_string.should be_valid }
data/utf8-cleaner.gemspec CHANGED
@@ -21,4 +21,5 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency "guard"
22
22
  gem.add_development_dependency "guard-rspec"
23
23
  gem.add_development_dependency "rspec"
24
+ gem.add_development_dependency "rack"
24
25
  end
metadata CHANGED
@@ -1,69 +1,83 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8-cleaner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leon Miller-Out
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-10-16 00:00:00.000000000 Z
11
+ date: 2014-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: guard
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: guard-rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rack
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
67
81
  - !ruby/object:Gem::Version
68
82
  version: '0'
69
83
  description: Removes invalid UTF8 characters from the URL and other env vars
@@ -73,8 +87,8 @@ executables: []
73
87
  extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
- - .gitignore
77
- - .travis.yml
90
+ - ".gitignore"
91
+ - ".travis.yml"
78
92
  - Gemfile
79
93
  - Guardfile
80
94
  - LICENSE.txt
@@ -98,17 +112,17 @@ require_paths:
98
112
  - lib
99
113
  required_ruby_version: !ruby/object:Gem::Requirement
100
114
  requirements:
101
- - - '>='
115
+ - - ">="
102
116
  - !ruby/object:Gem::Version
103
117
  version: '0'
104
118
  required_rubygems_version: !ruby/object:Gem::Requirement
105
119
  requirements:
106
- - - '>='
120
+ - - ">="
107
121
  - !ruby/object:Gem::Version
108
122
  version: '0'
109
123
  requirements: []
110
124
  rubyforge_project:
111
- rubygems_version: 2.0.3
125
+ rubygems_version: 2.2.2
112
126
  signing_key:
113
127
  specification_version: 4
114
128
  summary: Prevent annoying error reports of "invalid byte sequence in UTF-8"