utf8-cleaner 0.0.9 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/CHANGELOG.md +10 -0
- data/Guardfile +1 -1
- data/lib/utf8-cleaner/middleware.rb +9 -5
- data/lib/utf8-cleaner/uri_string.rb +5 -0
- data/lib/utf8-cleaner/version.rb +1 -1
- data/spec/middleware_spec.rb +67 -40
- data/spec/spec_helper.rb +0 -4
- data/spec/uri_string_spec.rb +11 -11
- data/utf8-cleaner.gemspec +2 -0
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4f4b8427b2de1cca2aad8cf672164873552352c
|
4
|
+
data.tar.gz: f4c6e987bd3dcdcdd459b896ca6e8768fe103ad8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5984ac142b4497cc4dfb3626f09b79a5c74c0e27ac449239876a68cf1d73adc49433b163573d037ed5c91963b7c020e9cb8653a68a75c5eb09b13cf006f8533f
|
7
|
+
data.tar.gz: b9640a1a528a8873338dd08c1bf278994fec361a91c4dfc6562d01c21871f8029b48b120bc35453ff8fc7c948fe4c02df4d3da59dac68504393990d882f7c29e
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
data/Guardfile
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# A sample Guardfile
|
2
2
|
# More info at https://github.com/guard/guard#readme
|
3
3
|
|
4
|
-
guard :rspec do
|
4
|
+
guard :rspec, cmd: 'bundle exec rspec -b' do
|
5
5
|
watch(%r{^spec/.+_spec\.rb$})
|
6
6
|
watch(%r{^lib/utf8-cleaner/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
7
7
|
watch('spec/spec_helper.rb') { "spec" }
|
@@ -1,8 +1,11 @@
|
|
1
|
+
require 'active_support/multibyte/unicode'
|
2
|
+
|
1
3
|
module UTF8Cleaner
|
2
4
|
class Middleware
|
3
5
|
|
4
6
|
SANITIZE_ENV_KEYS = [
|
5
7
|
"HTTP_REFERER",
|
8
|
+
"HTTP_USER_AGENT",
|
6
9
|
"PATH_INFO",
|
7
10
|
"QUERY_STRING",
|
8
11
|
"REQUEST_PATH",
|
@@ -20,6 +23,8 @@ module UTF8Cleaner
|
|
20
23
|
|
21
24
|
private
|
22
25
|
|
26
|
+
include ActiveSupport::Multibyte::Unicode
|
27
|
+
|
23
28
|
def sanitize_env(env)
|
24
29
|
sanitize_env_keys(env)
|
25
30
|
sanitize_env_rack_input(env)
|
@@ -29,8 +34,7 @@ module UTF8Cleaner
|
|
29
34
|
def sanitize_env_keys(env)
|
30
35
|
SANITIZE_ENV_KEYS.each do |key|
|
31
36
|
next unless value = env[key]
|
32
|
-
|
33
|
-
env[key] = cleaned_value if cleaned_value
|
37
|
+
env[key] = cleaned_uri_string(value)
|
34
38
|
end
|
35
39
|
end
|
36
40
|
|
@@ -48,9 +52,9 @@ module UTF8Cleaner
|
|
48
52
|
end
|
49
53
|
|
50
54
|
def cleaned_uri_string(value)
|
51
|
-
if value.
|
52
|
-
|
53
|
-
|
55
|
+
value = tidy_bytes(value) if value && !value.ascii_only?
|
56
|
+
value = URIString.new(value).cleaned if value.include?('%')
|
57
|
+
value
|
54
58
|
end
|
55
59
|
end
|
56
60
|
end
|
data/lib/utf8-cleaner/version.rb
CHANGED
data/spec/middleware_spec.rb
CHANGED
@@ -1,51 +1,78 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'rack/lint'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
|
10
|
-
'REQUEST_URI' => '%C3%89%E2%9C%93',
|
11
|
-
'rack.input' => StringIO.new("foo=%FFbar%F8"),
|
12
|
-
'CONTENT_TYPE' => 'application/x-www-form-urlencoded'
|
13
|
-
}
|
14
|
-
end
|
4
|
+
module UTF8Cleaner
|
5
|
+
describe Middleware do
|
6
|
+
let :new_env do
|
7
|
+
Middleware.new(nil).send(:sanitize_env, env)
|
8
|
+
end
|
15
9
|
|
16
|
-
|
17
|
-
|
18
|
-
|
10
|
+
describe "with a big nasty env" do
|
11
|
+
let :env do
|
12
|
+
{
|
13
|
+
'PATH_INFO' => 'foo/%FFbar%2e%2fbaz%26%3B',
|
14
|
+
'QUERY_STRING' => 'foo=bar%FF',
|
15
|
+
'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
|
16
|
+
'HTTP_USER_AGENT' => "Android Versi\xF3n/4.0",
|
17
|
+
'REQUEST_URI' => '%C3%89%E2%9C%93',
|
18
|
+
'rack.input' => StringIO.new("foo=%FFbar%F8"),
|
19
|
+
'CONTENT_TYPE' => 'application/x-www-form-urlencoded'
|
20
|
+
}
|
21
|
+
end
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
23
|
+
describe "removes invalid %-encoded UTF-8 sequences" do
|
24
|
+
it { expect(new_env['QUERY_STRING']).to eq('foo=bar') }
|
25
|
+
it { expect(new_env['HTTP_REFERER']).to eq('http://example.com/blog+Result:+++++') }
|
26
|
+
it { expect(new_env['rack.input'].read).to eq('foo=bar') }
|
27
|
+
end
|
25
28
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
end
|
29
|
+
describe 'replaces \x-encoded characters from the ISO-8859-1 and CP1252 code pages with their UTF-8 equivalents' do
|
30
|
+
it { expect(new_env['HTTP_USER_AGENT']).to eq('Android Versión/4.0') }
|
31
|
+
end
|
30
32
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
33
|
+
describe "leaves all valid characters untouched" do
|
34
|
+
it { expect(new_env['PATH_INFO']).to eq('foo/bar%2e%2fbaz%26%3B') }
|
35
|
+
it { expect(new_env['REQUEST_URI']).to eq('%C3%89%E2%9C%93') }
|
36
|
+
end
|
37
|
+
|
38
|
+
describe "when rack.input is wrapped" do
|
39
|
+
# rack.input responds only to methods gets, each, rewind, read and close
|
40
|
+
# Rack::Lint::InputWrapper is the class which servers wrappers are based on
|
41
|
+
it "removes invalid UTF-8 sequences" do
|
42
|
+
wrapped_rack_input = Rack::Lint::InputWrapper.new(StringIO.new("foo=%FFbar%F8"))
|
43
|
+
env.merge!('rack.input' => wrapped_rack_input)
|
44
|
+
new_env = Middleware.new(nil).send(:sanitize_env, env)
|
45
|
+
expect(new_env['rack.input'].read).to eq('foo=bar')
|
46
|
+
end
|
47
|
+
end
|
41
48
|
|
42
|
-
|
43
|
-
|
44
|
-
|
49
|
+
describe "when binary data is POSTed" do
|
50
|
+
before do
|
51
|
+
env['CONTENT_TYPE'] = 'multipart/form-data'
|
52
|
+
end
|
53
|
+
it "leaves the body alone" do
|
54
|
+
env['rack.input'].rewind
|
55
|
+
expect(new_env['rack.input'].read).to eq "foo=%FFbar%F8"
|
56
|
+
end
|
57
|
+
end
|
45
58
|
end
|
46
|
-
|
47
|
-
|
48
|
-
|
59
|
+
|
60
|
+
describe "with a minimal env" do
|
61
|
+
let(:env) do
|
62
|
+
{
|
63
|
+
'PATH_INFO' => '/this/is/safe',
|
64
|
+
'QUERY_STRING' => 'foo=bar%FF'
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
it "only runs URIString cleaning on potentially unclean strings" do
|
69
|
+
expect(URIString).to receive(:new).once.and_call_original
|
70
|
+
new_env
|
71
|
+
end
|
72
|
+
|
73
|
+
it "leaves clean values alone" do
|
74
|
+
expect(new_env['PATH_INFO']).to eq('/this/is/safe')
|
75
|
+
end
|
49
76
|
end
|
50
77
|
end
|
51
|
-
end
|
78
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'rspec/autorun'
|
3
|
-
|
4
2
|
require 'utf8-cleaner'
|
5
3
|
require 'uri'
|
6
4
|
|
7
5
|
RSpec.configure do |config|
|
8
|
-
config.treat_symbols_as_metadata_keys_with_true_values = true
|
9
|
-
|
10
6
|
# Run specs in random order to surface order dependencies. If you find an
|
11
7
|
# order dependency and want to debug it, you can fix the order by providing
|
12
8
|
# the seed, which is printed after each run.
|
data/spec/uri_string_spec.rb
CHANGED
@@ -11,24 +11,24 @@ module UTF8Cleaner
|
|
11
11
|
# foo/ bar. / baz& ; √ baz
|
12
12
|
|
13
13
|
describe '#new' do
|
14
|
-
it { encoded_string.
|
14
|
+
it { expect(encoded_string).to be_a(URIString) }
|
15
15
|
end
|
16
16
|
|
17
17
|
describe '#cleaned' do
|
18
|
-
it { invalid_string.cleaned.
|
19
|
-
it { ascii_string.cleaned.
|
20
|
-
it { encoded_string.cleaned.
|
21
|
-
it { multibyte_string.cleaned.
|
22
|
-
it { complex_invalid_string.cleaned.
|
18
|
+
it { expect(invalid_string.cleaned).to eq('') }
|
19
|
+
it { expect(ascii_string.cleaned).to eq('foo') }
|
20
|
+
it { expect(encoded_string.cleaned).to eq('%26') }
|
21
|
+
it { expect(multibyte_string.cleaned).to eq('%E2%9C%93') }
|
22
|
+
it { expect(complex_invalid_string.cleaned).to eq('foo/bar%2e%2fbaz%26%3B%E2%9C%93baz') }
|
23
23
|
end
|
24
24
|
|
25
25
|
describe '#valid?' do
|
26
|
-
it { ascii_string.
|
27
|
-
it { encoded_string.
|
28
|
-
it { multibyte_string.
|
26
|
+
it { expect(ascii_string).to be_valid }
|
27
|
+
it { expect(encoded_string).to be_valid }
|
28
|
+
it { expect(multibyte_string).to be_valid }
|
29
29
|
|
30
|
-
it { invalid_string.
|
31
|
-
it { complex_invalid_string.
|
30
|
+
it { expect(invalid_string).to_not be_valid }
|
31
|
+
it { expect(complex_invalid_string).to_not be_valid }
|
32
32
|
end
|
33
33
|
|
34
34
|
end
|
data/utf8-cleaner.gemspec
CHANGED
@@ -17,6 +17,8 @@ Gem::Specification.new do |gem|
|
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
|
20
|
+
gem.add_dependency 'activesupport'
|
21
|
+
|
20
22
|
gem.add_development_dependency "rake"
|
21
23
|
gem.add_development_dependency "guard"
|
22
24
|
gem.add_development_dependency "guard-rspec"
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: utf8-cleaner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leon Miller-Out
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rake
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -89,6 +103,7 @@ extra_rdoc_files: []
|
|
89
103
|
files:
|
90
104
|
- ".gitignore"
|
91
105
|
- ".travis.yml"
|
106
|
+
- CHANGELOG.md
|
92
107
|
- Gemfile
|
93
108
|
- Guardfile
|
94
109
|
- LICENSE.txt
|