utf8-cleaner 0.0.9 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34470c6433b9f2fc055514464d9c4794881ed579
4
- data.tar.gz: 5135b3b7e6ea404b3dcc57d07ea9aee798a528d4
3
+ metadata.gz: d4f4b8427b2de1cca2aad8cf672164873552352c
4
+ data.tar.gz: f4c6e987bd3dcdcdd459b896ca6e8768fe103ad8
5
5
  SHA512:
6
- metadata.gz: 35b02095acfd32a1a5c1a380ed017e130c25dae095fc2f1bee135a1451f1394f6018eba74f7d4567b3158fbf8978d79ad968fd07a947fde054bf6d0e38766e68
7
- data.tar.gz: 9fef626eb986add0193d716339e6df9e7faf22cc8767c7257b2514265ccd3f2294360df45f6c04afd4977f88ced34b4e51f9160231c12e041e0e586ec35bf64d
6
+ metadata.gz: 5984ac142b4497cc4dfb3626f09b79a5c74c0e27ac449239876a68cf1d73adc49433b163573d037ed5c91963b7c020e9cb8653a68a75c5eb09b13cf006f8533f
7
+ data.tar.gz: b9640a1a528a8873338dd08c1bf278994fec361a91c4dfc6562d01c21871f8029b48b120bc35453ff8fc7c948fe4c02df4d3da59dac68504393990d882f7c29e
data/.travis.yml CHANGED
@@ -2,4 +2,5 @@ language: ruby
2
2
  rvm:
3
3
  - 1.9.3
4
4
  - 2.0.0
5
- - 2.1.1
5
+ - 2.1.1
6
+ - 2.2.1
data/CHANGELOG.md ADDED
@@ -0,0 +1,10 @@
1
+ # CHANGELOG
2
+
3
+ ## v0.1.1
4
+
5
+ * Now cleans HTTP_USER_AGENT
6
+ * Replaces some Windows (ISO-8859-1 and CP1252) characters with UTF8 equivalents
7
+
8
+ ## v0.1.0
9
+
10
+ Broken.
data/Guardfile CHANGED
@@ -1,7 +1,7 @@
1
1
  # A sample Guardfile
2
2
  # More info at https://github.com/guard/guard#readme
3
3
 
4
- guard :rspec do
4
+ guard :rspec, cmd: 'bundle exec rspec -b' do
5
5
  watch(%r{^spec/.+_spec\.rb$})
6
6
  watch(%r{^lib/utf8-cleaner/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
7
  watch('spec/spec_helper.rb') { "spec" }
@@ -1,8 +1,11 @@
1
+ require 'active_support/multibyte/unicode'
2
+
1
3
  module UTF8Cleaner
2
4
  class Middleware
3
5
 
4
6
  SANITIZE_ENV_KEYS = [
5
7
  "HTTP_REFERER",
8
+ "HTTP_USER_AGENT",
6
9
  "PATH_INFO",
7
10
  "QUERY_STRING",
8
11
  "REQUEST_PATH",
@@ -20,6 +23,8 @@ module UTF8Cleaner
20
23
 
21
24
  private
22
25
 
26
+ include ActiveSupport::Multibyte::Unicode
27
+
23
28
  def sanitize_env(env)
24
29
  sanitize_env_keys(env)
25
30
  sanitize_env_rack_input(env)
@@ -29,8 +34,7 @@ module UTF8Cleaner
29
34
  def sanitize_env_keys(env)
30
35
  SANITIZE_ENV_KEYS.each do |key|
31
36
  next unless value = env[key]
32
- cleaned_value = cleaned_uri_string(value)
33
- env[key] = cleaned_value if cleaned_value
37
+ env[key] = cleaned_uri_string(value)
34
38
  end
35
39
  end
36
40
 
@@ -48,9 +52,9 @@ module UTF8Cleaner
48
52
  end
49
53
 
50
54
  def cleaned_uri_string(value)
51
- if value.include?('%')
52
- URIString.new(value).cleaned
53
- end
55
+ value = tidy_bytes(value) if value && !value.ascii_only?
56
+ value = URIString.new(value).cleaned if value.include?('%')
57
+ value
54
58
  end
55
59
  end
56
60
  end
@@ -16,6 +16,11 @@ module UTF8Cleaner
16
16
 
17
17
  def valid?
18
18
  valid_uri_encoded_utf8(data)
19
+ rescue ArgumentError => e
20
+ if e.message =~ /invalid byte sequence/
21
+ return false
22
+ end
23
+ raise e
19
24
  end
20
25
 
21
26
  private
@@ -1,3 +1,3 @@
1
1
  module UTF8Cleaner
2
- VERSION = "0.0.9"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -1,51 +1,78 @@
1
1
  require 'spec_helper'
2
2
  require 'rack/lint'
3
3
 
4
- describe UTF8Cleaner::Middleware do
5
- let :env do
6
- {
7
- 'PATH_INFO' => 'foo/%FFbar%2e%2fbaz%26%3B',
8
- 'QUERY_STRING' => 'foo=bar%FF',
9
- 'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
10
- 'REQUEST_URI' => '%C3%89%E2%9C%93',
11
- 'rack.input' => StringIO.new("foo=%FFbar%F8"),
12
- 'CONTENT_TYPE' => 'application/x-www-form-urlencoded'
13
- }
14
- end
4
+ module UTF8Cleaner
5
+ describe Middleware do
6
+ let :new_env do
7
+ Middleware.new(nil).send(:sanitize_env, env)
8
+ end
15
9
 
16
- let :new_env do
17
- UTF8Cleaner::Middleware.new(nil).send(:sanitize_env, env)
18
- end
10
+ describe "with a big nasty env" do
11
+ let :env do
12
+ {
13
+ 'PATH_INFO' => 'foo/%FFbar%2e%2fbaz%26%3B',
14
+ 'QUERY_STRING' => 'foo=bar%FF',
15
+ 'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
16
+ 'HTTP_USER_AGENT' => "Android Versi\xF3n/4.0",
17
+ 'REQUEST_URI' => '%C3%89%E2%9C%93',
18
+ 'rack.input' => StringIO.new("foo=%FFbar%F8"),
19
+ 'CONTENT_TYPE' => 'application/x-www-form-urlencoded'
20
+ }
21
+ end
19
22
 
20
- describe "removes invalid UTF-8 sequences" do
21
- it { new_env['QUERY_STRING'].should == 'foo=bar' }
22
- it { new_env['HTTP_REFERER'].should == 'http://example.com/blog+Result:+++++' }
23
- it { new_env['rack.input'].read.should == 'foo=bar' }
24
- end
23
+ describe "removes invalid %-encoded UTF-8 sequences" do
24
+ it { expect(new_env['QUERY_STRING']).to eq('foo=bar') }
25
+ it { expect(new_env['HTTP_REFERER']).to eq('http://example.com/blog+Result:+++++') }
26
+ it { expect(new_env['rack.input'].read).to eq('foo=bar') }
27
+ end
25
28
 
26
- describe "leaves all valid characters untouched" do
27
- it { new_env['PATH_INFO'].should == 'foo/bar%2e%2fbaz%26%3B' }
28
- it { new_env['REQUEST_URI'].should == '%C3%89%E2%9C%93' }
29
- end
29
+ describe 'replaces \x-encoded characters from the ISO-8859-1 and CP1252 code pages with their UTF-8 equivalents' do
30
+ it { expect(new_env['HTTP_USER_AGENT']).to eq('Android Versión/4.0') }
31
+ end
30
32
 
31
- describe "when rack.input is wrapped" do
32
- # rack.input responds only to methods gets, each, rewind, read and close
33
- # Rack::Lint::InputWrapper is the class which servers wrappers are based on
34
- it "removes invalid UTF-8 sequences" do
35
- wrapped_rack_input = Rack::Lint::InputWrapper.new(StringIO.new("foo=%FFbar%F8"))
36
- env.merge!('rack.input' => wrapped_rack_input)
37
- new_env = UTF8Cleaner::Middleware.new(nil).send(:sanitize_env, env)
38
- new_env['rack.input'].read.should == 'foo=bar'
39
- end
40
- end
33
+ describe "leaves all valid characters untouched" do
34
+ it { expect(new_env['PATH_INFO']).to eq('foo/bar%2e%2fbaz%26%3B') }
35
+ it { expect(new_env['REQUEST_URI']).to eq('%C3%89%E2%9C%93') }
36
+ end
37
+
38
+ describe "when rack.input is wrapped" do
39
+ # rack.input responds only to methods gets, each, rewind, read and close
40
+ # Rack::Lint::InputWrapper is the class which servers wrappers are based on
41
+ it "removes invalid UTF-8 sequences" do
42
+ wrapped_rack_input = Rack::Lint::InputWrapper.new(StringIO.new("foo=%FFbar%F8"))
43
+ env.merge!('rack.input' => wrapped_rack_input)
44
+ new_env = Middleware.new(nil).send(:sanitize_env, env)
45
+ expect(new_env['rack.input'].read).to eq('foo=bar')
46
+ end
47
+ end
41
48
 
42
- describe "when binary data is POSTed" do
43
- before do
44
- env['CONTENT_TYPE'] = 'multipart/form-data'
49
+ describe "when binary data is POSTed" do
50
+ before do
51
+ env['CONTENT_TYPE'] = 'multipart/form-data'
52
+ end
53
+ it "leaves the body alone" do
54
+ env['rack.input'].rewind
55
+ expect(new_env['rack.input'].read).to eq "foo=%FFbar%F8"
56
+ end
57
+ end
45
58
  end
46
- it "leaves the body alone" do
47
- env['rack.input'].rewind
48
- new_env['rack.input'].read.should == "foo=%FFbar%F8"
59
+
60
+ describe "with a minimal env" do
61
+ let(:env) do
62
+ {
63
+ 'PATH_INFO' => '/this/is/safe',
64
+ 'QUERY_STRING' => 'foo=bar%FF'
65
+ }
66
+ end
67
+
68
+ it "only runs URIString cleaning on potentially unclean strings" do
69
+ expect(URIString).to receive(:new).once.and_call_original
70
+ new_env
71
+ end
72
+
73
+ it "leaves clean values alone" do
74
+ expect(new_env['PATH_INFO']).to eq('/this/is/safe')
75
+ end
49
76
  end
50
77
  end
51
- end
78
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,12 +1,8 @@
1
1
  require 'rubygems'
2
- require 'rspec/autorun'
3
-
4
2
  require 'utf8-cleaner'
5
3
  require 'uri'
6
4
 
7
5
  RSpec.configure do |config|
8
- config.treat_symbols_as_metadata_keys_with_true_values = true
9
-
10
6
  # Run specs in random order to surface order dependencies. If you find an
11
7
  # order dependency and want to debug it, you can fix the order by providing
12
8
  # the seed, which is printed after each run.
@@ -11,24 +11,24 @@ module UTF8Cleaner
11
11
  # foo/ bar. / baz& ; √ baz
12
12
 
13
13
  describe '#new' do
14
- it { encoded_string.should be_a URIString }
14
+ it { expect(encoded_string).to be_a(URIString) }
15
15
  end
16
16
 
17
17
  describe '#cleaned' do
18
- it { invalid_string.cleaned.should eq('') }
19
- it { ascii_string.cleaned.should eq('foo') }
20
- it { encoded_string.cleaned.should eq('%26') }
21
- it { multibyte_string.cleaned.should eq('%E2%9C%93') }
22
- it { complex_invalid_string.cleaned.should eq('foo/bar%2e%2fbaz%26%3B%E2%9C%93baz') }
18
+ it { expect(invalid_string.cleaned).to eq('') }
19
+ it { expect(ascii_string.cleaned).to eq('foo') }
20
+ it { expect(encoded_string.cleaned).to eq('%26') }
21
+ it { expect(multibyte_string.cleaned).to eq('%E2%9C%93') }
22
+ it { expect(complex_invalid_string.cleaned).to eq('foo/bar%2e%2fbaz%26%3B%E2%9C%93baz') }
23
23
  end
24
24
 
25
25
  describe '#valid?' do
26
- it { ascii_string.should be_valid }
27
- it { encoded_string.should be_valid }
28
- it { multibyte_string.should be_valid }
26
+ it { expect(ascii_string).to be_valid }
27
+ it { expect(encoded_string).to be_valid }
28
+ it { expect(multibyte_string).to be_valid }
29
29
 
30
- it { invalid_string.should_not be_valid }
31
- it { complex_invalid_string.should_not be_valid }
30
+ it { expect(invalid_string).to_not be_valid }
31
+ it { expect(complex_invalid_string).to_not be_valid }
32
32
  end
33
33
 
34
34
  end
data/utf8-cleaner.gemspec CHANGED
@@ -17,6 +17,8 @@ Gem::Specification.new do |gem|
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
19
 
20
+ gem.add_dependency 'activesupport'
21
+
20
22
  gem.add_development_dependency "rake"
21
23
  gem.add_development_dependency "guard"
22
24
  gem.add_development_dependency "guard-rspec"
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8-cleaner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leon Miller-Out
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-16 00:00:00.000000000 Z
11
+ date: 2015-09-19 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: rake
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -89,6 +103,7 @@ extra_rdoc_files: []
89
103
  files:
90
104
  - ".gitignore"
91
105
  - ".travis.yml"
106
+ - CHANGELOG.md
92
107
  - Gemfile
93
108
  - Guardfile
94
109
  - LICENSE.txt