utf8-cleaner 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in utf8-cleaner.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Leon Miller-Out
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # UTF8Cleaner
2
+
3
+ Removes invalid UTF-8 characters from the environment so that your app doesn't choke
4
+ on them. This prevents errors like "invalid byte sequence in UTF-8".
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'utf8-cleaner'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install utf8-cleaner
19
+
20
+ If you're not running Rails, you'll have to add the middleware to your config.ru:
21
+
22
+ require 'uf8-cleaner'
23
+ use UTF8Cleaner::Middleware
24
+
25
+ ## Usage
26
+
27
+ There's nothing to "use". It just works!
28
+
29
+ ## Contributing
30
+
31
+ 1. Fork it
32
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
33
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
34
+ 4. Push to the branch (`git push origin my-new-feature`)
35
+ 5. Create new Pull Request
36
+
37
+ ## Credits
38
+
39
+ Original middleware author: @phoet - https://gist.github.com/phoet/1336754
40
+
41
+ Ruby 1.9.3 compatibility: @pithyless - https://gist.github.com/pithyless/3639014
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,60 @@
1
+ module UTF8Cleaner
2
+ class Middleware
3
+
4
+ SANITIZE_ENV_KEYS = [
5
+ "HTTP_REFERER",
6
+ "PATH_INFO",
7
+ "QUERY_STRING",
8
+ "REQUEST_PATH",
9
+ "REQUEST_URI",
10
+ ]
11
+
12
+ def initialize(app)
13
+ @app = app
14
+ end
15
+
16
+ def call(env)
17
+ @app.call(sanitize_env(env))
18
+ end
19
+
20
+ private
21
+
22
+ def sanitize_env(env)
23
+ SANITIZE_ENV_KEYS.each do |key|
24
+ next unless value = env[key]
25
+ value = sanitize_string(URI.decode(value))
26
+ env[key] = URI.encode(value)
27
+ end
28
+ ["HTTP_COOKIE"].each do |key|
29
+ next unless value = env[key]
30
+ fixed = sanitize_string(value)
31
+ env[key] = fixed if fixed
32
+ end
33
+ env
34
+ end
35
+
36
+ def sanitize_string(string)
37
+ return string unless string.is_a? String
38
+
39
+ # Try it as UTF-8 directly
40
+ cleaned = string.dup.force_encoding('UTF-8')
41
+ if cleaned.valid_encoding?
42
+ cleaned
43
+ else
44
+ utf8clean(string)
45
+ end
46
+ rescue EncodingError
47
+ utf8clean(string)
48
+ end
49
+
50
+ def utf8clean(string)
51
+ # Force it to UTF-8, throwing out invalid bits
52
+ if RUBY_VERSION >= "1.9.3"
53
+ # These converters don't exist in 1.9.2
54
+ string.encode('UTF-16', 'UTF-8', :invalid => :replace, :replace => '').encode('UTF-8', 'UTF-16')
55
+ else
56
+ string.chars.select{|i| i.valid_encoding?}.join
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,7 @@
1
+ module UTF8Cleaner
2
+ class Railtie < Rails::Railtie
3
+ initializer "utf8-cleaner.insert_middleware" do |app|
4
+ app.config.middleware.insert_before 0, "UTF8Cleaner::Middleware"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module UTF8Cleaner
2
+ VERSION = "0.0.4"
3
+ end
@@ -0,0 +1,3 @@
1
+ require "utf8-cleaner/version"
2
+ require "utf8-cleaner/middleware"
3
+ require "utf8-cleaner/railtie" if defined? Rails
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe 'UTF8Cleaner::Middleware' do
5
+ let :env do
6
+ {
7
+ 'PATH_INFO' => 'foo/bar%2e%2fbaz',
8
+ 'QUERY_STRING' => 'foo=bar%FF',
9
+ 'HTTP_REFERER' => 'http://example.com/blog+Result:+%ED%E5+%ED%E0%F8%EB%EE%F1%FC+%F4%EE%F0%EC%FB+%E4%EB%FF+%EE%F2%EF%F0%E0%E2%EA%E8',
10
+ 'REQUEST_URI' => '%C3%89'
11
+ }
12
+ end
13
+
14
+ let :new_env do
15
+ UTF8Cleaner::Middleware.new(nil).send(:sanitize_env, env)
16
+ end
17
+
18
+ it "removes invalid UTF-8 sequences" do
19
+ new_env['QUERY_STRING'].should == 'foo=bar'
20
+ end
21
+
22
+ it "turns valid %-escaped ASCII chars into their ASCII equivalents" do
23
+ new_env['PATH_INFO'].should == 'foo/bar./baz'
24
+ end
25
+
26
+ it "leaves valid %-escaped UTF-8 chars alone" do
27
+ new_env['REQUEST_URI'].should == '%C3%89'
28
+ end
29
+
30
+ it "handles an awful URL" do
31
+ new_env['HTTP_REFERER'].should == 'http://example.com/blog+Result:+++++'
32
+ end
33
+ end
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'rspec/autorun'
3
+
4
+ require 'utf8-cleaner'
5
+
6
+ RSpec.configure do |config|
7
+ # Run specs in random order to surface order dependencies. If you find an
8
+ # order dependency and want to debug it, you can fix the order by providing
9
+ # the seed, which is printed after each run.
10
+ # --seed 1234
11
+ config.order = "random"
12
+
13
+ config.filter_run :focus => true
14
+ config.run_all_when_everything_filtered = true
15
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'utf8-cleaner/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "utf8-cleaner"
8
+ gem.version = UTF8Cleaner::VERSION
9
+ gem.authors = ["Leon Miller-Out"]
10
+ gem.email = ["leon@singlebrook.com"]
11
+ gem.description = %q{Removes invalid UTF8 characters from the URL and other env vars}
12
+ gem.summary = %q{Prevent annoying error reports of "invalid byte sequence in UTF-8"}
13
+ gem.homepage = "https://github.com/singlebrook/utf8-cleaner"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency "rspec"
21
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: utf8-cleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Leon Miller-Out
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Removes invalid UTF8 characters from the URL and other env vars
31
+ email:
32
+ - leon@singlebrook.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - Gemfile
39
+ - LICENSE.txt
40
+ - README.md
41
+ - Rakefile
42
+ - lib/utf8-cleaner.rb
43
+ - lib/utf8-cleaner/middleware.rb
44
+ - lib/utf8-cleaner/railtie.rb
45
+ - lib/utf8-cleaner/version.rb
46
+ - spec/middleware_spec.rb
47
+ - spec/spec_helper.rb
48
+ - utf8-cleaner.gemspec
49
+ homepage: https://github.com/singlebrook/utf8-cleaner
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 1.8.23
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Prevent annoying error reports of "invalid byte sequence in UTF-8"
73
+ test_files:
74
+ - spec/middleware_spec.rb
75
+ - spec/spec_helper.rb