utf8-cleaner 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in utf8-cleaner.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Leon Miller-Out
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # UTF8Cleaner
2
+
3
+ Removes invalid UTF-8 characters from the environment so that your app doesn't choke
4
+ on them. This prevents errors like "invalid byte sequence in UTF-8".
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'utf8-cleaner'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install utf8-cleaner
19
+
20
+ If you're not running Rails, you'll have to add the middleware to your config.ru:
21
+
22
+ require 'uf8-cleaner'
23
+ use UTF8Cleaner::Middleware
24
+
25
+ ## Usage
26
+
27
+ There's nothing to "use". It just works!
28
+
29
+ ## Contributing
30
+
31
+ 1. Fork it
32
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
33
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
34
+ 4. Push to the branch (`git push origin my-new-feature`)
35
+ 5. Create new Pull Request
36
+
37
+ ## Credits
38
+
39
+ Original middleware author: @phoet - https://gist.github.com/phoet/1336754
40
+
41
+ Ruby 1.9.3 compatibility: @pithyless - https://gist.github.com/pithyless/3639014
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,60 @@
1
+ module UTF8Cleaner
2
+ class Middleware
3
+
4
+ SANITIZE_ENV_KEYS = [
5
+ "HTTP_REFERER",
6
+ "PATH_INFO",
7
+ "QUERY_STRING",
8
+ "REQUEST_PATH",
9
+ "REQUEST_URI",
10
+ ]
11
+
12
+ def initialize(app)
13
+ @app = app
14
+ end
15
+
16
+ def call(env)
17
+ @app.call(sanitize_env(env))
18
+ end
19
+
20
+ private
21
+
22
+ def sanitize_env(env)
23
+ SANITIZE_ENV_KEYS.each do |key|
24
+ next unless value = env[key]
25
+ value = sanitize_string(URI.decode(value))
26
+ env[key] = URI.encode(value)
27
+ end
28
+ ["HTTP_COOKIE"].each do |key|
29
+ next unless value = env[key]
30
+ fixed = sanitize_string(value)
31
+ env[key] = fixed if fixed
32
+ end
33
+ env
34
+ end
35
+
36
+ def sanitize_string(string)
37
+ return string unless string.is_a? String
38
+
39
+ # Try it as UTF-8 directly
40
+ cleaned = string.dup.force_encoding('UTF-8')
41
+ if cleaned.valid_encoding?
42
+ cleaned
43
+ else
44
+ utf8clean(string)
45
+ end
46
+ rescue EncodingError
47
+ utf8clean(string)
48
+ end
49
+
50
+ def utf8clean(string)
51
+ # Force it to UTF-8, throwing out invalid bits
52
+ if RUBY_VERSION >= "1.9.3"
53
+ # These converters don't exist in 1.9.2
54
+ string.encode('UTF-16', 'UTF-8', :invalid => :replace, :replace => '').encode('UTF-8', 'UTF-16')
55
+ else
56
+ string.chars.select{|i| i.valid_encoding?}.join
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,7 @@
1
+ module UTF8Cleaner
2
+ class Railtie < Rails::Railtie
3
+ initializer "utf8-cleaner.insert_middleware" do |app|
4
+ app.config.middleware.insert_before 0, "UTF8Cleaner::Middleware"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module UTF8Cleaner
2
+ VERSION = "0.0.4"
3
+ end
@@ -0,0 +1,3 @@
1
+ require "utf8-cleaner/version"
2
+ require "utf8-cleaner/middleware"
3
+ require "utf8-cleaner/railtie" if defined? Rails
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe 'UTF8Cleaner::Middleware' do
5
+ let :env do
6
+ {
7
+ 'PATH_INFO' => 'foo/bar%2e%2fbaz',
8
+ 'QUERY_STRING' => 'foo=bar%FF',
9
+ 'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
10
+ 'REQUEST_URI' => '%C3%89'
11
+ }
12
+ end
13
+
14
+ let :new_env do
15
+ UTF8Cleaner::Middleware.new(nil).send(:sanitize_env, env)
16
+ end
17
+
18
+ it "removes invalid UTF-8 sequences" do
19
+ new_env['QUERY_STRING'].should == 'foo=bar'
20
+ end
21
+
22
+ it "turns valid %-escaped ASCII chars into their ASCII equivalents" do
23
+ new_env['PATH_INFO'].should == 'foo/bar./baz'
24
+ end
25
+
26
+ it "leaves valid %-escaped UTF-8 chars alone" do
27
+ new_env['REQUEST_URI'].should == '%C3%89'
28
+ end
29
+
30
+ it "handles an awful URL" do
31
+ new_env['HTTP_REFERER'].should == 'http://example.com/blog+Result:+++++'
32
+ end
33
+ end
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'rspec/autorun'
3
+
4
+ require 'utf8-cleaner'
5
+
6
+ RSpec.configure do |config|
7
+ # Run specs in random order to surface order dependencies. If you find an
8
+ # order dependency and want to debug it, you can fix the order by providing
9
+ # the seed, which is printed after each run.
10
+ # --seed 1234
11
+ config.order = "random"
12
+
13
+ config.filter_run :focus => true
14
+ config.run_all_when_everything_filtered = true
15
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'utf8-cleaner/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "utf8-cleaner"
8
+ gem.version = UTF8Cleaner::VERSION
9
+ gem.authors = ["Leon Miller-Out"]
10
+ gem.email = ["leon@singlebrook.com"]
11
+ gem.description = %q{Removes invalid UTF8 characters from the URL and other env vars}
12
+ gem.summary = %q{Prevent annoying error reports of "invalid byte sequence in UTF-8"}
13
+ gem.homepage = "https://github.com/singlebrook/utf8-cleaner"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency "rspec"
21
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: utf8-cleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Leon Miller-Out
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Removes invalid UTF8 characters from the URL and other env vars
31
+ email:
32
+ - leon@singlebrook.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - Gemfile
39
+ - LICENSE.txt
40
+ - README.md
41
+ - Rakefile
42
+ - lib/utf8-cleaner.rb
43
+ - lib/utf8-cleaner/middleware.rb
44
+ - lib/utf8-cleaner/railtie.rb
45
+ - lib/utf8-cleaner/version.rb
46
+ - spec/middleware_spec.rb
47
+ - spec/spec_helper.rb
48
+ - utf8-cleaner.gemspec
49
+ homepage: https://github.com/singlebrook/utf8-cleaner
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 1.8.23
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Prevent annoying error reports of "invalid byte sequence in UTF-8"
73
+ test_files:
74
+ - spec/middleware_spec.rb
75
+ - spec/spec_helper.rb