utf8-cleaner 0.0.6 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/README.md +5 -3
- data/lib/utf8-cleaner/middleware.rb +25 -10
- data/lib/utf8-cleaner/uri_string.rb +0 -4
- data/lib/utf8-cleaner/version.rb +1 -1
- data/spec/middleware_spec.rb +27 -2
- data/spec/uri_string_spec.rb +0 -9
- data/utf8-cleaner.gemspec +1 -0
- metadata +29 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34470c6433b9f2fc055514464d9c4794881ed579
|
4
|
+
data.tar.gz: 5135b3b7e6ea404b3dcc57d07ea9aee798a528d4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35b02095acfd32a1a5c1a380ed017e130c25dae095fc2f1bee135a1451f1394f6018eba74f7d4567b3158fbf8978d79ad968fd07a947fde054bf6d0e38766e68
|
7
|
+
data.tar.gz: 9fef626eb986add0193d716339e6df9e7faf22cc8767c7257b2514265ccd3f2294360df45f6c04afd4977f88ced34b4e51f9160231c12e041e0e586ec35bf64d
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# UTF8Cleaner
|
2
2
|
|
3
|
-
[
|
3
|
+
[](http://travis-ci.org/singlebrook/utf8-cleaner)
|
4
4
|
|
5
5
|
Removes invalid UTF-8 characters from the environment so that your app doesn't choke
|
6
6
|
on them. This prevents errors like "invalid byte sequence in UTF-8".
|
@@ -21,7 +21,7 @@ Or install it yourself as:
|
|
21
21
|
|
22
22
|
If you're not running Rails, you'll have to add the middleware to your config.ru:
|
23
23
|
|
24
|
-
require '
|
24
|
+
require 'utf8-cleaner'
|
25
25
|
use UTF8Cleaner::Middleware
|
26
26
|
|
27
27
|
## Usage
|
@@ -40,4 +40,6 @@ There's nothing to "use". It just works!
|
|
40
40
|
|
41
41
|
Original middleware author: @phoet - https://gist.github.com/phoet/1336754
|
42
42
|
|
43
|
-
Ruby 1.9.3 compatibility: @pithyless - https://gist.github.com/pithyless/3639014
|
43
|
+
* Ruby 1.9.3 compatibility: @pithyless - https://gist.github.com/pithyless/3639014
|
44
|
+
* Code review and cleanup: @nextmat
|
45
|
+
* POST body sanitization: @salrepe
|
@@ -20,22 +20,37 @@ module UTF8Cleaner
|
|
20
20
|
|
21
21
|
private
|
22
22
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
false
|
23
|
+
def sanitize_env(env)
|
24
|
+
sanitize_env_keys(env)
|
25
|
+
sanitize_env_rack_input(env)
|
26
|
+
env
|
28
27
|
end
|
29
28
|
|
30
|
-
def
|
29
|
+
def sanitize_env_keys(env)
|
31
30
|
SANITIZE_ENV_KEYS.each do |key|
|
32
31
|
next unless value = env[key]
|
32
|
+
cleaned_value = cleaned_uri_string(value)
|
33
|
+
env[key] = cleaned_value if cleaned_value
|
34
|
+
end
|
35
|
+
end
|
33
36
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
+
def sanitize_env_rack_input(env)
|
38
|
+
case env['CONTENT_TYPE']
|
39
|
+
when 'application/x-www-form-urlencoded'
|
40
|
+
cleaned_value = cleaned_uri_string(env['rack.input'].read)
|
41
|
+
env['rack.input'] = StringIO.new(cleaned_value) if cleaned_value
|
42
|
+
env['rack.input'].rewind
|
43
|
+
when 'multipart/form-data'
|
44
|
+
# Don't process the data since it may contain binary content
|
45
|
+
else
|
46
|
+
# Unknown content type. Leave it alone
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def cleaned_uri_string(value)
|
51
|
+
if value.include?('%')
|
52
|
+
URIString.new(value).cleaned
|
37
53
|
end
|
38
|
-
env
|
39
54
|
end
|
40
55
|
end
|
41
56
|
end
|
data/lib/utf8-cleaner/version.rb
CHANGED
data/spec/middleware_spec.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
+
require 'rack/lint'
|
2
3
|
|
3
4
|
describe UTF8Cleaner::Middleware do
|
4
5
|
let :env do
|
@@ -6,7 +7,9 @@ describe UTF8Cleaner::Middleware do
|
|
6
7
|
'PATH_INFO' => 'foo/%FFbar%2e%2fbaz%26%3B',
|
7
8
|
'QUERY_STRING' => 'foo=bar%FF',
|
8
9
|
'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
|
9
|
-
'REQUEST_URI' => '%C3%89%E2%9C%93'
|
10
|
+
'REQUEST_URI' => '%C3%89%E2%9C%93',
|
11
|
+
'rack.input' => StringIO.new("foo=%FFbar%F8"),
|
12
|
+
'CONTENT_TYPE' => 'application/x-www-form-urlencoded'
|
10
13
|
}
|
11
14
|
end
|
12
15
|
|
@@ -17,10 +20,32 @@ describe UTF8Cleaner::Middleware do
|
|
17
20
|
describe "removes invalid UTF-8 sequences" do
|
18
21
|
it { new_env['QUERY_STRING'].should == 'foo=bar' }
|
19
22
|
it { new_env['HTTP_REFERER'].should == 'http://example.com/blog+Result:+++++' }
|
23
|
+
it { new_env['rack.input'].read.should == 'foo=bar' }
|
20
24
|
end
|
21
25
|
|
22
26
|
describe "leaves all valid characters untouched" do
|
23
27
|
it { new_env['PATH_INFO'].should == 'foo/bar%2e%2fbaz%26%3B' }
|
24
28
|
it { new_env['REQUEST_URI'].should == '%C3%89%E2%9C%93' }
|
25
29
|
end
|
26
|
-
|
30
|
+
|
31
|
+
describe "when rack.input is wrapped" do
|
32
|
+
# rack.input responds only to methods gets, each, rewind, read and close
|
33
|
+
# Rack::Lint::InputWrapper is the class which servers wrappers are based on
|
34
|
+
it "removes invalid UTF-8 sequences" do
|
35
|
+
wrapped_rack_input = Rack::Lint::InputWrapper.new(StringIO.new("foo=%FFbar%F8"))
|
36
|
+
env.merge!('rack.input' => wrapped_rack_input)
|
37
|
+
new_env = UTF8Cleaner::Middleware.new(nil).send(:sanitize_env, env)
|
38
|
+
new_env['rack.input'].read.should == 'foo=bar'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "when binary data is POSTed" do
|
43
|
+
before do
|
44
|
+
env['CONTENT_TYPE'] = 'multipart/form-data'
|
45
|
+
end
|
46
|
+
it "leaves the body alone" do
|
47
|
+
env['rack.input'].rewind
|
48
|
+
new_env['rack.input'].read.should == "foo=%FFbar%F8"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/spec/uri_string_spec.rb
CHANGED
@@ -22,15 +22,6 @@ module UTF8Cleaner
|
|
22
22
|
it { complex_invalid_string.cleaned.should eq('foo/bar%2e%2fbaz%26%3B%E2%9C%93baz') }
|
23
23
|
end
|
24
24
|
|
25
|
-
describe '#encoded?' do
|
26
|
-
it { encoded_string.should be_encoded }
|
27
|
-
it { invalid_string.should be_encoded }
|
28
|
-
it { multibyte_string.should be_encoded }
|
29
|
-
it { complex_invalid_string.should be_encoded }
|
30
|
-
|
31
|
-
it { ascii_string.should_not be_encoded }
|
32
|
-
end
|
33
|
-
|
34
25
|
describe '#valid?' do
|
35
26
|
it { ascii_string.should be_valid }
|
36
27
|
it { encoded_string.should be_valid }
|
data/utf8-cleaner.gemspec
CHANGED
metadata
CHANGED
@@ -1,69 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: utf8-cleaner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leon Miller-Out
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-04-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: guard
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: guard-rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rspec
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rack
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: '0'
|
69
83
|
description: Removes invalid UTF8 characters from the URL and other env vars
|
@@ -73,8 +87,8 @@ executables: []
|
|
73
87
|
extensions: []
|
74
88
|
extra_rdoc_files: []
|
75
89
|
files:
|
76
|
-
- .gitignore
|
77
|
-
- .travis.yml
|
90
|
+
- ".gitignore"
|
91
|
+
- ".travis.yml"
|
78
92
|
- Gemfile
|
79
93
|
- Guardfile
|
80
94
|
- LICENSE.txt
|
@@ -98,17 +112,17 @@ require_paths:
|
|
98
112
|
- lib
|
99
113
|
required_ruby_version: !ruby/object:Gem::Requirement
|
100
114
|
requirements:
|
101
|
-
- -
|
115
|
+
- - ">="
|
102
116
|
- !ruby/object:Gem::Version
|
103
117
|
version: '0'
|
104
118
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
119
|
requirements:
|
106
|
-
- -
|
120
|
+
- - ">="
|
107
121
|
- !ruby/object:Gem::Version
|
108
122
|
version: '0'
|
109
123
|
requirements: []
|
110
124
|
rubyforge_project:
|
111
|
-
rubygems_version: 2.
|
125
|
+
rubygems_version: 2.2.2
|
112
126
|
signing_key:
|
113
127
|
specification_version: 4
|
114
128
|
summary: Prevent annoying error reports of "invalid byte sequence in UTF-8"
|