utf8_converter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 731755377192fe051e2d39ac746b0fa5d072727f
4
+ data.tar.gz: a0e77a244a4f1ee80c7358b8f62fdf0be14a1822
5
+ SHA512:
6
+ metadata.gz: a18279072b7d850addfe33ed2ac6389c98261bfe6fbf7548ece399e23023f55bcbcae0f4401d51499efad80963e891a568677aa22c9a8314a2299c457b51d8db
7
+ data.tar.gz: f364e576ebb24eb8dfa187b17de34bba37cfc4314258201131b07885b3e97fe19ebd16183af6d0923bee132bd2869e782ec04961927e711c6e4a69acf4f1747e
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in utf8_converter.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017 newint33h
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # UTF8 Converter
2
+
3
+ A Ruby gem that attempts to convert texts from unknown encodings to UTF8.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'utf8_converter'
11
+ ```
12
+
13
+ And execute:
14
+
15
+ $ bundle update
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install utf8_converter
20
+
21
+ ## Usage
22
+
23
+ The normal usage for converting texts to UTF8 is the following:
24
+
25
+ ```
26
+ require 'utf8_converter'
27
+
28
+ unknown_text = "R\u00E9sum\u00E9"
29
+
30
+ # Make a copy in UTF8 encoding
31
+ puts unknown_text.to_utf8
32
+ # Résumé
33
+
34
+ # Convert the actual variable to UTF8
35
+ unknown_text.to_utf8!
36
+ puts unknown_text
37
+ # Résumé
38
+
39
+ ```
40
+
41
+ However the previous code will only convert the encodings listed in the default encodings listed
42
+ in the following variable:
43
+
44
+ ```
45
+ p UTF8Converter.common_encodings
46
+ # => [#<Encoding:ISO-8859-1 (autoload)>, #<Encoding:Windows-1252 (autoload)>]
47
+ ```
48
+
49
+ You can define the common encodings your application is expecting to receive:
50
+
51
+ ```
52
+ UTF8Converter.common_encodings = UTF8Converter::DEFAULT_COMMON_ENCODINGS
53
+ # or
54
+ UTF8Converter.common_encodings = [Encoding::ISO_8859_1]
55
+ ```
56
+
57
+ Any other text with a different encoding will result in a replacement of unknown characters to
58
+ a default replace character:
59
+
60
+ ```
61
+ UTF8Converter.common_encodings = []
62
+
63
+ puts "A\xF1o".to_utf8
64
+ # A?o
65
+
66
+ UTF8Converter.default_replace_character = ''
67
+ puts "A\xF1o".to_utf8
68
+ # Ao
69
+ ```
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << 'test'
6
+ t.libs << 'lib'
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task default: :test
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+ require 'utf8_converter/version'
3
+
4
+ # Class used to keep the default common encoding and the helper methods for the string conversion
5
+ class UTF8Converter
6
+ DEFAULT_COMMON_ENCODINGS = [
7
+ Encoding::ISO_8859_1,
8
+ Encoding::Windows_1252
9
+ ]
10
+ DEFAULT_REPLACE_CHARACTER = '?'
11
+
12
+ class << self
13
+ attr_accessor :common_encodings
14
+ attr_accessor :default_replace_character
15
+ end
16
+
17
+ @common_encodings = DEFAULT_COMMON_ENCODINGS
18
+ @default_replace_character = DEFAULT_REPLACE_CHARACTER
19
+
20
+ def self.try_convert_from_encoding_to_utf8!(string, encoding)
21
+ original_encoding = string.encoding
22
+ begin
23
+ string.force_encoding(encoding).encode!(Encoding::UTF_8)
24
+ true
25
+ rescue
26
+ string.force_encoding(original_encoding)
27
+ false
28
+ end
29
+ end
30
+
31
+ def self.convert_to_utf8!(string)
32
+ if string.force_encoding(Encoding::UTF_8).valid_encoding?
33
+ return string.encode!(Encoding::UTF_8)
34
+ end
35
+ @common_encodings.each do |encoding|
36
+ return string if try_convert_from_encoding_to_utf8!(string, encoding)
37
+ end
38
+ string.encode!(Encoding::UTF_8, invalid: :replace, replace: @default_replace_character)
39
+ end
40
+ end
41
+
42
+ # This partial class adds some useful methods to convert text to utf-8
43
+ class String
44
+
45
+ def to_utf8!
46
+ UTF8Converter.convert_to_utf8!(self)
47
+ end
48
+
49
+ def to_utf8
50
+ dup.to_utf8!
51
+ end
52
+ end
@@ -0,0 +1,6 @@
1
+ # encoding: utf-8
2
+
3
+ # This partial class defines the version of the gem
4
+ class UTF8Converter
5
+ VERSION = '0.1.0'
6
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'utf8_converter/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'utf8_converter'
8
+ spec.version = UTF8Converter::VERSION
9
+ spec.authors = ['Jorge Del Rio']
10
+ spec.email = ['jdelrios@gmail.com']
11
+
12
+ spec.summary = 'A gem to force all kind of text into UTF8 encoding'
13
+ spec.description = 'This gem attempts to convert texts from unknown encodings to UTF8'
14
+ spec.homepage = 'https://github.com/newint33h/utf8_converter.git'
15
+ spec.license = 'MIT'
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = 'exe'
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ['lib']
21
+
22
+ spec.add_development_dependency 'bundler', '~> 1.10'
23
+ spec.add_development_dependency 'rake', '~> 10.0'
24
+ spec.add_development_dependency 'minitest', '~> 5'
25
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: utf8_converter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jorge Del Rio
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-04-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '5'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '5'
55
+ description: This gem attempts to convert texts from unknown encodings to UTF8
56
+ email:
57
+ - jdelrios@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - Gemfile
63
+ - LICENSE
64
+ - README.md
65
+ - Rakefile
66
+ - lib/utf8_converter.rb
67
+ - lib/utf8_converter/version.rb
68
+ - utf8_converter.gemspec
69
+ homepage: https://github.com/newint33h/utf8_converter.git
70
+ licenses:
71
+ - MIT
72
+ metadata: {}
73
+ post_install_message:
74
+ rdoc_options: []
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ requirements: []
88
+ rubyforge_project:
89
+ rubygems_version: 2.5.1
90
+ signing_key:
91
+ specification_version: 4
92
+ summary: A gem to force all kind of text into UTF8 encoding
93
+ test_files: []