utf8-cleaner 0.0.9 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34470c6433b9f2fc055514464d9c4794881ed579
4
- data.tar.gz: 5135b3b7e6ea404b3dcc57d07ea9aee798a528d4
3
+ metadata.gz: d4f4b8427b2de1cca2aad8cf672164873552352c
4
+ data.tar.gz: f4c6e987bd3dcdcdd459b896ca6e8768fe103ad8
5
5
  SHA512:
6
- metadata.gz: 35b02095acfd32a1a5c1a380ed017e130c25dae095fc2f1bee135a1451f1394f6018eba74f7d4567b3158fbf8978d79ad968fd07a947fde054bf6d0e38766e68
7
- data.tar.gz: 9fef626eb986add0193d716339e6df9e7faf22cc8767c7257b2514265ccd3f2294360df45f6c04afd4977f88ced34b4e51f9160231c12e041e0e586ec35bf64d
6
+ metadata.gz: 5984ac142b4497cc4dfb3626f09b79a5c74c0e27ac449239876a68cf1d73adc49433b163573d037ed5c91963b7c020e9cb8653a68a75c5eb09b13cf006f8533f
7
+ data.tar.gz: b9640a1a528a8873338dd08c1bf278994fec361a91c4dfc6562d01c21871f8029b48b120bc35453ff8fc7c948fe4c02df4d3da59dac68504393990d882f7c29e
data/.travis.yml CHANGED
@@ -2,4 +2,5 @@ language: ruby
2
2
  rvm:
3
3
  - 1.9.3
4
4
  - 2.0.0
5
- - 2.1.1
5
+ - 2.1.1
6
+ - 2.2.1
data/CHANGELOG.md ADDED
@@ -0,0 +1,10 @@
1
+ # CHANGELOG
2
+
3
+ ## v0.1.1
4
+
5
+ * Now cleans HTTP_USER_AGENT
6
+ * Replaces some Windows (ISO-8859-1 and CP1252) characters with UTF8 equivalents
7
+
8
+ ## v0.1.0
9
+
10
+ Broken.
data/Guardfile CHANGED
@@ -1,7 +1,7 @@
1
1
  # A sample Guardfile
2
2
  # More info at https://github.com/guard/guard#readme
3
3
 
4
- guard :rspec do
4
+ guard :rspec, cmd: 'bundle exec rspec -b' do
5
5
  watch(%r{^spec/.+_spec\.rb$})
6
6
  watch(%r{^lib/utf8-cleaner/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
7
  watch('spec/spec_helper.rb') { "spec" }
@@ -1,8 +1,11 @@
1
+ require 'active_support/multibyte/unicode'
2
+
1
3
  module UTF8Cleaner
2
4
  class Middleware
3
5
 
4
6
  SANITIZE_ENV_KEYS = [
5
7
  "HTTP_REFERER",
8
+ "HTTP_USER_AGENT",
6
9
  "PATH_INFO",
7
10
  "QUERY_STRING",
8
11
  "REQUEST_PATH",
@@ -20,6 +23,8 @@ module UTF8Cleaner
20
23
 
21
24
  private
22
25
 
26
+ include ActiveSupport::Multibyte::Unicode
27
+
23
28
  def sanitize_env(env)
24
29
  sanitize_env_keys(env)
25
30
  sanitize_env_rack_input(env)
@@ -29,8 +34,7 @@ module UTF8Cleaner
29
34
  def sanitize_env_keys(env)
30
35
  SANITIZE_ENV_KEYS.each do |key|
31
36
  next unless value = env[key]
32
- cleaned_value = cleaned_uri_string(value)
33
- env[key] = cleaned_value if cleaned_value
37
+ env[key] = cleaned_uri_string(value)
34
38
  end
35
39
  end
36
40
 
@@ -48,9 +52,9 @@ module UTF8Cleaner
48
52
  end
49
53
 
50
54
  def cleaned_uri_string(value)
51
- if value.include?('%')
52
- URIString.new(value).cleaned
53
- end
55
+ value = tidy_bytes(value) if value && !value.ascii_only?
56
+ value = URIString.new(value).cleaned if value.include?('%')
57
+ value
54
58
  end
55
59
  end
56
60
  end
@@ -16,6 +16,11 @@ module UTF8Cleaner
16
16
 
17
17
  def valid?
18
18
  valid_uri_encoded_utf8(data)
19
+ rescue ArgumentError => e
20
+ if e.message =~ /invalid byte sequence/
21
+ return false
22
+ end
23
+ raise e
19
24
  end
20
25
 
21
26
  private
@@ -1,3 +1,3 @@
1
1
  module UTF8Cleaner
2
- VERSION = "0.0.9"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -1,51 +1,78 @@
1
1
  require 'spec_helper'
2
2
  require 'rack/lint'
3
3
 
4
- describe UTF8Cleaner::Middleware do
5
- let :env do
6
- {
7
- 'PATH_INFO' => 'foo/%FFbar%2e%2fbaz%26%3B',
8
- 'QUERY_STRING' => 'foo=bar%FF',
9
- 'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
10
- 'REQUEST_URI' => '%C3%89%E2%9C%93',
11
- 'rack.input' => StringIO.new("foo=%FFbar%F8"),
12
- 'CONTENT_TYPE' => 'application/x-www-form-urlencoded'
13
- }
14
- end
4
+ module UTF8Cleaner
5
+ describe Middleware do
6
+ let :new_env do
7
+ Middleware.new(nil).send(:sanitize_env, env)
8
+ end
15
9
 
16
- let :new_env do
17
- UTF8Cleaner::Middleware.new(nil).send(:sanitize_env, env)
18
- end
10
+ describe "with a big nasty env" do
11
+ let :env do
12
+ {
13
+ 'PATH_INFO' => 'foo/%FFbar%2e%2fbaz%26%3B',
14
+ 'QUERY_STRING' => 'foo=bar%FF',
15
+ 'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
16
+ 'HTTP_USER_AGENT' => "Android Versi\xF3n/4.0",
17
+ 'REQUEST_URI' => '%C3%89%E2%9C%93',
18
+ 'rack.input' => StringIO.new("foo=%FFbar%F8"),
19
+ 'CONTENT_TYPE' => 'application/x-www-form-urlencoded'
20
+ }
21
+ end
19
22
 
20
- describe "removes invalid UTF-8 sequences" do
21
- it { new_env['QUERY_STRING'].should == 'foo=bar' }
22
- it { new_env['HTTP_REFERER'].should == 'http://example.com/blog+Result:+++++' }
23
- it { new_env['rack.input'].read.should == 'foo=bar' }
24
- end
23
+ describe "removes invalid %-encoded UTF-8 sequences" do
24
+ it { expect(new_env['QUERY_STRING']).to eq('foo=bar') }
25
+ it { expect(new_env['HTTP_REFERER']).to eq('http://example.com/blog+Result:+++++') }
26
+ it { expect(new_env['rack.input'].read).to eq('foo=bar') }
27
+ end
25
28
 
26
- describe "leaves all valid characters untouched" do
27
- it { new_env['PATH_INFO'].should == 'foo/bar%2e%2fbaz%26%3B' }
28
- it { new_env['REQUEST_URI'].should == '%C3%89%E2%9C%93' }
29
- end
29
+ describe 'replaces \x-encoded characters from the ISO-8859-1 and CP1252 code pages with their UTF-8 equivalents' do
30
+ it { expect(new_env['HTTP_USER_AGENT']).to eq('Android Versión/4.0') }
31
+ end
30
32
 
31
- describe "when rack.input is wrapped" do
32
- # rack.input responds only to methods gets, each, rewind, read and close
33
- # Rack::Lint::InputWrapper is the class which servers wrappers are based on
34
- it "removes invalid UTF-8 sequences" do
35
- wrapped_rack_input = Rack::Lint::InputWrapper.new(StringIO.new("foo=%FFbar%F8"))
36
- env.merge!('rack.input' => wrapped_rack_input)
37
- new_env = UTF8Cleaner::Middleware.new(nil).send(:sanitize_env, env)
38
- new_env['rack.input'].read.should == 'foo=bar'
39
- end
40
- end
33
+ describe "leaves all valid characters untouched" do
34
+ it { expect(new_env['PATH_INFO']).to eq('foo/bar%2e%2fbaz%26%3B') }
35
+ it { expect(new_env['REQUEST_URI']).to eq('%C3%89%E2%9C%93') }
36
+ end
37
+
38
+ describe "when rack.input is wrapped" do
39
+ # rack.input responds only to methods gets, each, rewind, read and close
40
+ # Rack::Lint::InputWrapper is the class which servers wrappers are based on
41
+ it "removes invalid UTF-8 sequences" do
42
+ wrapped_rack_input = Rack::Lint::InputWrapper.new(StringIO.new("foo=%FFbar%F8"))
43
+ env.merge!('rack.input' => wrapped_rack_input)
44
+ new_env = Middleware.new(nil).send(:sanitize_env, env)
45
+ expect(new_env['rack.input'].read).to eq('foo=bar')
46
+ end
47
+ end
41
48
 
42
- describe "when binary data is POSTed" do
43
- before do
44
- env['CONTENT_TYPE'] = 'multipart/form-data'
49
+ describe "when binary data is POSTed" do
50
+ before do
51
+ env['CONTENT_TYPE'] = 'multipart/form-data'
52
+ end
53
+ it "leaves the body alone" do
54
+ env['rack.input'].rewind
55
+ expect(new_env['rack.input'].read).to eq "foo=%FFbar%F8"
56
+ end
57
+ end
45
58
  end
46
- it "leaves the body alone" do
47
- env['rack.input'].rewind
48
- new_env['rack.input'].read.should == "foo=%FFbar%F8"
59
+
60
+ describe "with a minimal env" do
61
+ let(:env) do
62
+ {
63
+ 'PATH_INFO' => '/this/is/safe',
64
+ 'QUERY_STRING' => 'foo=bar%FF'
65
+ }
66
+ end
67
+
68
+ it "only runs URIString cleaning on potentially unclean strings" do
69
+ expect(URIString).to receive(:new).once.and_call_original
70
+ new_env
71
+ end
72
+
73
+ it "leaves clean values alone" do
74
+ expect(new_env['PATH_INFO']).to eq('/this/is/safe')
75
+ end
49
76
  end
50
77
  end
51
- end
78
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,12 +1,8 @@
1
1
  require 'rubygems'
2
- require 'rspec/autorun'
3
-
4
2
  require 'utf8-cleaner'
5
3
  require 'uri'
6
4
 
7
5
  RSpec.configure do |config|
8
- config.treat_symbols_as_metadata_keys_with_true_values = true
9
-
10
6
  # Run specs in random order to surface order dependencies. If you find an
11
7
  # order dependency and want to debug it, you can fix the order by providing
12
8
  # the seed, which is printed after each run.
@@ -11,24 +11,24 @@ module UTF8Cleaner
11
11
  # foo/ bar. / baz& ; √ baz
12
12
 
13
13
  describe '#new' do
14
- it { encoded_string.should be_a URIString }
14
+ it { expect(encoded_string).to be_a(URIString) }
15
15
  end
16
16
 
17
17
  describe '#cleaned' do
18
- it { invalid_string.cleaned.should eq('') }
19
- it { ascii_string.cleaned.should eq('foo') }
20
- it { encoded_string.cleaned.should eq('%26') }
21
- it { multibyte_string.cleaned.should eq('%E2%9C%93') }
22
- it { complex_invalid_string.cleaned.should eq('foo/bar%2e%2fbaz%26%3B%E2%9C%93baz') }
18
+ it { expect(invalid_string.cleaned).to eq('') }
19
+ it { expect(ascii_string.cleaned).to eq('foo') }
20
+ it { expect(encoded_string.cleaned).to eq('%26') }
21
+ it { expect(multibyte_string.cleaned).to eq('%E2%9C%93') }
22
+ it { expect(complex_invalid_string.cleaned).to eq('foo/bar%2e%2fbaz%26%3B%E2%9C%93baz') }
23
23
  end
24
24
 
25
25
  describe '#valid?' do
26
- it { ascii_string.should be_valid }
27
- it { encoded_string.should be_valid }
28
- it { multibyte_string.should be_valid }
26
+ it { expect(ascii_string).to be_valid }
27
+ it { expect(encoded_string).to be_valid }
28
+ it { expect(multibyte_string).to be_valid }
29
29
 
30
- it { invalid_string.should_not be_valid }
31
- it { complex_invalid_string.should_not be_valid }
30
+ it { expect(invalid_string).to_not be_valid }
31
+ it { expect(complex_invalid_string).to_not be_valid }
32
32
  end
33
33
 
34
34
  end
data/utf8-cleaner.gemspec CHANGED
@@ -17,6 +17,8 @@ Gem::Specification.new do |gem|
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
19
 
20
+ gem.add_dependency 'activesupport'
21
+
20
22
  gem.add_development_dependency "rake"
21
23
  gem.add_development_dependency "guard"
22
24
  gem.add_development_dependency "guard-rspec"
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8-cleaner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leon Miller-Out
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-16 00:00:00.000000000 Z
11
+ date: 2015-09-19 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: rake
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -89,6 +103,7 @@ extra_rdoc_files: []
89
103
  files:
90
104
  - ".gitignore"
91
105
  - ".travis.yml"
106
+ - CHANGELOG.md
92
107
  - Gemfile
93
108
  - Guardfile
94
109
  - LICENSE.txt