RubyGems - uchardet - Versions diffs - 0.1.3 → 0.2.0 - Mend

uchardet 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

checksums.yaml +7 -0
data/.gitignore +3 -0
data/.travis.yml +5 -0
data/Gemfile +2 -0
data/Gemfile.lock +25 -0
data/LICENSE.txt +21 -0
data/README.md +60 -0
data/Rakefile +19 -18
data/bin/console +15 -0
data/bin/setup +8 -0
data/{bin → exe}/uchardet +0 -2
data/ext/extconf.rb +12 -0
data/ext/{uchardet/uchardet.c → uchardet.c} +55 -45
data/lib/uchardet.rb +2 -18
data/lib/uchardet/cli.rb +11 -7
data/lib/uchardet/version.rb +3 -0
data/uchardet.gemspec +27 -0
metadata +102 -72
data/History.txt +0 -11
data/Manifest.txt +0 -18
data/README.rdoc +0 -52
data/ext/uchardet/extconf.rb +0 -12
data/script/console +0 -10
data/script/destroy +0 -14
data/script/generate +0 -14
data/tasks/extconf.rake +0 -13
data/tasks/extconf/uchardet.rake +0 -43
data/test/test_helper.rb +0 -3
data/test/test_uchardet.rb +0 -22
data/test/test_uchardet_cli.rb +0 -14
data/test/test_uchardet_extn.rb +0 -101

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 3f88bdf62e92c58a707c9099d3b00128ba1e944d7ed242cc756401e4d6ccdecc
+  data.tar.gz: 3e7ad2051d31269fdffde17c3f247836c8c69e525b6da8c27b0e8b7c58ef9880
+SHA512:
+  metadata.gz: e0e698ab4a3eec93dc0cbde2aff53728fb98947266703f34ab29c93ede539b0ef32a4e32589d62b0bff319e485e6c2f7b4dad38d8299d98b97ab32167a4749fc
+  data.tar.gz: 4bd4efbb61bf4cad064d5c78b3b8d412790691bdfea58dd062c7e4168ccd37791b56a62803e9604efa03527b84e6bbdcf8607995ef2c9ea913397779636009ce

data/.gitignore ADDED

@@ -0,0 +1,3 @@
+*.so
+/pkg/
+/tmp/

data/.travis.yml ADDED

@@ -0,0 +1,5 @@
+sudo: false
+language: ruby
+rvm:
+  - 2.5.1
+before_install: gem install bundler -v 1.16.2

data/Gemfile ADDED

	@@ -0,0 +1,2 @@
1	+ source "https://rubygems.org"
2	+ gemspec

data/Gemfile.lock ADDED

@@ -0,0 +1,25 @@
+PATH
+  remote: .
+  specs:
+    uchardet (0.2.0)
+GEM
+  remote: https://rubygems.org/
+  specs:
+    minitest (5.11.3)
+    rake (10.5.0)
+    rake-compiler (1.0.4)
+      rake
+PLATFORMS
+  ruby
+DEPENDENCIES
+  bundler (~> 1.16)
+  minitest (~> 5.0)
+  rake (~> 10.0)
+  rake-compiler (~> 1.0)
+  uchardet!
+BUNDLED WITH
+   1.16.2

data/LICENSE.txt ADDED

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2009-2018 Dmitri Goutnik
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED

@@ -0,0 +1,60 @@
+# Uchardet
+Fast character set encoding detection using International Components for Unicode library: [International Components for Unicode](http://site.icu-project.org/)
+* https://rubygems.org/gems/uchardet
+* https://github.com/dmgk/uchardet
+* https://www.rubydoc.info/gems/uchardet/
+## Installation
+Add this line to your application's Gemfile:
+```ruby
+gem 'uchardet'
+```
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install uchardet
+## Usage
+```ruby
+  require 'open-uri'
+  require 'uchardet'
+  text = open('https://raw.githubusercontent.com/dmgk/uchardet/master/test/samples/shift_jis.txt').read
+  encoding = ICU::UCharsetDetector.detect(text)
+  encoding # => {:encoding=>"Shift_JIS", :confidence=>100, :language=>"ja"}
+```
+From command line:
+```
+  $ uchardet
+  Usage: uchardet [options] file
+      -l, --list                       Display list of detectable character sets.
+      -s, --strip                      Strip HTML or XML markup before detection.
+      -e, --encoding                   Hint the charset detector about possible encoding.
+      -a, --all                        Show all matching encodings.
+      -h, --help                       Show this help message.
+      -v, --version                    Show version.
+  $ uchardet `which uchardet`
+  ISO-8859-1 (confidence 25%)
+```
+## Contributing
+Bug reports and pull requests are welcome on GitHub at https://github.com/dmgk/uchardet
+## License
+The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).

data/Rakefile CHANGED

@@ -1,23 +1,24 @@
-require 'rubygems'
-gem 'hoe', '>= 2.1.0'
-require 'hoe'
-require 'fileutils'
-require './lib/uchardet'
+require 'bundler/gem_tasks'
+require 'rake/extensiontask'
+require 'rake/testtask'
-Hoe.plugin :newgem
+GEMSPEC = Gem::Specification.load("uchardet.gemspec")
-# Generate all the Rake tasks
-# Run 'rake -T' to see list of generated tasks (from gem root directory)
-$hoe = Hoe.spec 'uchardet' do
-  self.developer 'Dmitri Goutnik', 'dg@syrec.org'
-  self.readme_file = 'README.rdoc'
-  self.extra_rdoc_files = ['README.rdoc']
-  self.rubyforge_name = self.name
+# Rake::ExtensionTask.new(:uchardet_ext) do |t|
+#   t.lib_dir = 'lib/uchardet'
+# end
+Rake::ExtensionTask.new(:uchardet_ext, GEMSPEC) do |t|
+  t.ext_dir = 'ext'
+end
+Rake::TestTask.new(:test) do |t|
+  t.libs << 'test'
+  t.libs << 'lib'
+  t.test_files = FileList['test/**/*_test.rb']
 end
-require 'newgem/tasks'
-Dir['tasks/**/*.rake'].each { |t| load t }
+task build: :compile
+task test: :compile
-# TODO - want other tests/tasks run by default? Add them to the list
-# remove_task :default
-# task :default => [:spec, :features]
+task default: :test

data/bin/console ADDED

@@ -0,0 +1,15 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "uchardet"
+require 'open-uri'
+# You can add fixtures and/or initialization code here to make experimenting
+# with your gem easier. You can also use a different console, if you like.
+# (If you use this, don't forget to add pry to your Gemfile!)
+# require "pry"
+# Pry.start
+require "irb"
+IRB.start(__FILE__)

data/bin/setup ADDED

@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+IFS=$'\n\t'
+set -vx
+bundle install
+# Do any other automated setup that you need to do here

data/{bin → exe}/uchardet RENAMED

@@ -1,8 +1,6 @@
 #!/usr/bin/env ruby
 require 'rubygems'
-require File.expand_path(
-  File.join(File.dirname(__FILE__), %w[.. lib uchardet]))
 require "uchardet/cli"
 Uchardet::CLI.execute(STDOUT, ARGV)

data/ext/extconf.rb ADDED

@@ -0,0 +1,12 @@
+require 'mkmf'
+icu_config = `which icu-config`.strip
+if icu_config.empty?
+  abort %q{Could not find ICU libraries and/or development tools. Try installing "icu-devtools" or "icu" package.}
+end
+$LIBS << ' ' + `#{icu_config} --ldflags-system`.chomp
+$LIBS << ' ' + `#{icu_config} --ldflags-libsonly`.chomp
+$LDFLAGS << ' ' + `#{icu_config} --ldflags-searchpath`.chomp
+create_makefile('uchardet_ext')

data/ext/{uchardet/uchardet.c → uchardet.c} RENAMED

@@ -56,7 +56,7 @@ UCharsetDetector_get_input_filtered(VALUE self)
  *
  * Enable filtering of input text. If filtering is enabled,
  * text within angle brackets ("<" and ">") will be removed
- * before detection, which will remove most HTML or xml markup.
+ * before detection, which will remove most HTML or XML markup.
  */
 static VALUE
 UCharsetDetector_set_input_filtered(VALUE self, VALUE flag)
@@ -110,7 +110,7 @@ UCharsetDetector_get_declared_encoding(VALUE self)
  *
  * Set the declared encoding for charset detection.
  * The declared encoding of an input text is an encoding obtained
- * by the user from an http header or xml declaration or similar source that
+ * by the user from an HTTP header or XML declaration or similar source that
  * can be provided as an additional hint to the charset detector.
  */
 static VALUE
@@ -123,12 +123,12 @@ static void
 set_text(VALUE self, VALUE text)
 {
     if (!NIL_P(text)) {
-        text = StringValue(text);
         UErrorCode status = U_ZERO_ERROR;
         UCharsetDetector *detector;
         Data_Get_Struct(self, UCharsetDetector, detector);
+        text = StringValue(text);
         ucsdet_setText(detector, StringValuePtr(text), RSTRING_LEN(text), &status);
         ensure(status);
@@ -140,12 +140,12 @@ static void
 set_declared_encoding(VALUE self, VALUE declared_encoding)
 {
     if (!NIL_P(declared_encoding)){
-        declared_encoding = StringValue(declared_encoding);
         UErrorCode status = U_ZERO_ERROR;
         UCharsetDetector *detector;
         Data_Get_Struct(self, UCharsetDetector, detector);
+        declared_encoding = StringValue(declared_encoding);
         ucsdet_setDeclaredEncoding(detector, StringValuePtr(declared_encoding), RSTRING_LEN(declared_encoding), &status);
         ensure(status);
@@ -183,7 +183,8 @@ UCharsetDetector_initialize(int argc, VALUE *argv, VALUE self)
  * call-seq:
  *   detect(text=nil, declared_encoding=nil)
  *
- * Return the charset that best matches the supplied input data.
+ * Return the charset that best matches the supplied input data. If no match
+ * could be found, this method returns nil.
  *
  * Note though, that because the detection
  * only looks at the start of the input data,
@@ -199,28 +200,32 @@ UCharsetDetector_detect(int argc, VALUE *argv, VALUE self)
 {
     VALUE text;
     VALUE declared_encoding;
+    UErrorCode status = U_ZERO_ERROR;
+    UCharsetDetector *detector;
+    const UCharsetMatch *match = NULL;
+    const char *encoding_name = "";
+    int32_t encoding_confidence = 0;
+    const char *encoding_language = "";
+    VALUE hash = rb_hash_new();
     rb_scan_args(argc, argv, "02", &text, &declared_encoding);
     set_text(self, text);
     set_declared_encoding(self, declared_encoding);
-    UErrorCode status = U_ZERO_ERROR;
-    UCharsetDetector *detector;
     Data_Get_Struct(self, UCharsetDetector, detector);
-    const UCharsetMatch *match = ucsdet_detect(detector, &status);
-    ensure(status);
-    const char *encoding_name = ucsdet_getName(match, &status);
-    ensure(status);
-    int32_t encoding_confidence = ucsdet_getConfidence(match, &status);
+    match = ucsdet_detect(detector, &status);
     ensure(status);
-    const char *encoding_language = ucsdet_getLanguage(match, &status);
-    ensure(status);
-    VALUE hash = rb_hash_new();
+    if (match) {
+        encoding_name = ucsdet_getName(match, &status);
+        ensure(status);
+        encoding_confidence = ucsdet_getConfidence(match, &status);
+        ensure(status);
+        encoding_language = ucsdet_getLanguage(match, &status);
+        ensure(status);
+    }
     rb_hash_aset(hash, ID2SYM(rb_intern("encoding")), rb_str_new2(encoding_name));
     rb_hash_aset(hash, ID2SYM(rb_intern("confidence")), INT2NUM(encoding_confidence));
     rb_hash_aset(hash, ID2SYM(rb_intern("language")), rb_str_new2(encoding_language));
@@ -249,37 +254,41 @@ UCharsetDetector_detect_all(int argc, VALUE *argv, VALUE self)
 {
     VALUE text;
     VALUE declared_encoding;
+    UCharsetDetector *detector;
+    UErrorCode status = U_ZERO_ERROR;
+    const UCharsetMatch **matches = NULL;
+    int32_t matches_found = 0;
+    VALUE ary = rb_ary_new();
+    int i;
     rb_scan_args(argc, argv, "02", &text, &declared_encoding);
     set_text(self, text);
     set_declared_encoding(self, declared_encoding);
-    UCharsetDetector *detector;
     Data_Get_Struct(self, UCharsetDetector, detector);
-    UErrorCode status = U_ZERO_ERROR;
-    int32_t matches_found;
-    const UCharsetMatch **matches = ucsdet_detectAll(detector, &matches_found, &status);
+    matches = ucsdet_detectAll(detector, &matches_found, &status);
     ensure(status);
-    VALUE ary = rb_ary_new();
-    int i = 0;
     for (i = 0; i < matches_found; i++) {
-        const char *encoding_name = ucsdet_getName(matches[i], &status);
-        ensure(status);
+        const char *encoding_name = "";
+        int32_t encoding_confidence = 0;
+        const char *encoding_language = "";
+        VALUE hash = rb_hash_new();
-        int32_t encoding_confidence = ucsdet_getConfidence(matches[i], &status);
-        ensure(status);
-        const char *encoding_language = ucsdet_getLanguage(matches[i], &status);
-        ensure(status);
+        if (matches[i]) {
+            encoding_name = ucsdet_getName(matches[i], &status);
+            ensure(status);
+            encoding_confidence = ucsdet_getConfidence(matches[i], &status);
+            ensure(status);
+            encoding_language = ucsdet_getLanguage(matches[i], &status);
+            ensure(status);
+        }
-        VALUE hash = rb_hash_new();
         rb_hash_aset(hash, ID2SYM(rb_intern("encoding")), rb_str_new2(encoding_name));
         rb_hash_aset(hash, ID2SYM(rb_intern("confidence")), INT2NUM(encoding_confidence));
         rb_hash_aset(hash, ID2SYM(rb_intern("language")), rb_str_new2(encoding_language));
         rb_ary_push(ary, hash);
     }
@@ -296,16 +305,17 @@ static VALUE
 UCharsetDetector_get_detectable_charsets(VALUE self)
 {
     UCharsetDetector *detector;
-    Data_Get_Struct(self, UCharsetDetector, detector);
     UErrorCode status = U_ZERO_ERROR;
+    UEnumeration *charsets = NULL;
+    const char *charset_name = "";
+    int32_t result_length = 0;
+    VALUE ary = rb_ary_new();
+    Data_Get_Struct(self, UCharsetDetector, detector);
-    UEnumeration *charsets = ucsdet_getAllDetectableCharsets(detector, &status);
+    charsets = ucsdet_getAllDetectableCharsets(detector, &status);
     ensure(status);
-    VALUE ary = rb_ary_new();
-    int32_t result_length;
-    const char *charset_name;
     while (charset_name = uenum_next(charsets, &result_length, &status)) {
         ensure(status);
         rb_ary_push(ary, rb_str_new2(charset_name));
@@ -318,7 +328,7 @@ UCharsetDetector_get_detectable_charsets(VALUE self)
 /*
 */
 void
-Init_uchardet()
+Init_uchardet_ext()
 {
     VALUE mICU = rb_define_module("ICU");

data/lib/uchardet.rb CHANGED

@@ -1,35 +1,19 @@
-$:.unshift(File.dirname(__FILE__)) unless
-  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
-module Uchardet
-  VERSION = '0.1.3'
-end
-begin
-  require 'uchardet.so'
-rescue LoadError
-  # uh-oh
-end
+require 'uchardet_ext'
+require 'uchardet/version'
 module ICU  # :main: README
   class UCharsetDetector  # :main: README
-    ##
     # Shortcut for ICU::UCharsetDetector#detect
-    #
     def self.detect(*args)
       self.new.detect(*args)
     end
-    ##
     # Shortcut for ICU::UCharsetDetector#detect_all
-    #
     def self.detect_all(*args)
       self.new.detect_all(*args)
     end
-    ##
     # Shortcut for ICU::UCharsetDetector#detectable_charsets
-    #
     def self.detectable_charsets
       self.new.detectable_charsets
     end

data/lib/uchardet/cli.rb CHANGED

@@ -1,18 +1,19 @@
 require 'optparse'
+require 'uchardet'
 module Uchardet
   class CLI
     def self.execute(stdout, args=[])
       @stdout = stdout
       @options = {
-        :input_filtered => false,
-        :declared_encoding => nil,
-        :detect_all => false,
-        :path => nil
+        input_filtered: false,
+        declared_encoding: nil,
+        detect_all: false,
+        path: nil
       }
-      parser = OptionParser.new do |opts|
-        opts.banner = <<-BANNER.gsub(/^\s*/,'')
+      OptionParser.new do |opts|
+        opts.banner = <<-BANNER.gsub(/^\s*/, '')
           Usage: #{File.basename($0)} [options] file
         BANNER
@@ -31,6 +32,9 @@ module Uchardet
         opts.on("-h", "--help",
                 "Show this help message."
                 ) { @stdout.puts opts; exit }
+        opts.on("-v", "--version",
+                "Show version."
+                ) { @stdout.puts Uchardet::VERSION; exit }
         if args.empty?
           @stdout.puts opts
@@ -54,7 +58,7 @@ module Uchardet
     end
     def self.list
-      ICU::UCharsetDetector.detectable_charsets.uniq.sort.each { |name| @stdout.puts name }
+      ICU::UCharsetDetector.detectable_charsets.uniq.sort.each {|name| @stdout.puts name}
     end
     def self.detect

data/lib/uchardet/version.rb ADDED

@@ -0,0 +1,3 @@
+module Uchardet
+  VERSION = "0.2.0"
+end

data/uchardet.gemspec ADDED

@@ -0,0 +1,27 @@
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'uchardet/version'
+Gem::Specification.new do |spec|
+  spec.name          = 'uchardet'
+  spec.version       = Uchardet::VERSION
+  spec.authors       = ['Dmitri Goutnik']
+  spec.email         = ['dg@syrec.org']
+  spec.summary       = 'Fast character set encoding detection using International Components for Unicode library.'
+  spec.homepage      = 'https://github.com/dmgk/uchardet'
+  spec.license       = 'MIT'
+  spec.files         = Dir.chdir(File.expand_path('..', __FILE__)) do
+    `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
+  end
+  spec.bindir        = 'exe'
+  spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
+  spec.require_paths = ['lib']
+  spec.extensions    = ['ext/extconf.rb']
+  spec.add_development_dependency 'bundler', '~> 1.16'
+  spec.add_development_dependency 'rake', '~> 10.0'
+  spec.add_development_dependency 'rake-compiler', '~> 1.0'
+  spec.add_development_dependency 'minitest', '~> 5.0'
+end

metadata CHANGED

@@ -1,89 +1,119 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: uchardet
-version: !ruby/object:Gem::Version
-  version: 0.1.3
+version: !ruby/object:Gem::Version
+  version: 0.2.0
 platform: ruby
-authors:
+authors:
 - Dmitri Goutnik
 autorequire:
-bindir: bin
+bindir: exe
 cert_chain: []
-date: 2009-12-20 00:00:00 +03:00
-default_executable:
-dependencies:
-- !ruby/object:Gem::Dependency
-  name: hoe
+date: 2018-05-29 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.16'
   type: :development
-  version_requirement:
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 2.4.0
-    version:
-description: Fast character set encoding detection using International Components for Unicode C++ library.
-email:
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.16'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+- !ruby/object:Gem::Dependency
+  name: rake-compiler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+- !ruby/object:Gem::Dependency
+  name: minitest
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.0'
+description:
+email:
 - dg@syrec.org
-executables:
+executables:
 - uchardet
-extensions:
-- ext/uchardet/extconf.rb
-extra_rdoc_files:
-- History.txt
-- Manifest.txt
-- README.rdoc
-files:
-- History.txt
-- Manifest.txt
-- README.rdoc
+extensions:
+- ext/extconf.rb
+extra_rdoc_files: []
+files:
+- ".gitignore"
+- ".travis.yml"
+- Gemfile
+- Gemfile.lock
+- LICENSE.txt
+- README.md
 - Rakefile
-- bin/uchardet
-- ext/uchardet/extconf.rb
-- ext/uchardet/uchardet.c
+- bin/console
+- bin/setup
+- exe/uchardet
+- ext/extconf.rb
+- ext/uchardet.c
 - lib/uchardet.rb
 - lib/uchardet/cli.rb
-- script/console
-- script/destroy
-- script/generate
-- tasks/extconf.rake
-- tasks/extconf/uchardet.rake
-- test/test_helper.rb
-- test/test_uchardet.rb
-- test/test_uchardet_cli.rb
-- test/test_uchardet_extn.rb
-has_rdoc: true
-homepage: http://rubyforge.org/projects/uchardet/
-licenses: []
+- lib/uchardet/version.rb
+- uchardet.gemspec
+homepage: https://github.com/dmgk/uchardet
+licenses:
+- MIT
+metadata: {}
 post_install_message:
-rdoc_options:
-- --main
-- README.rdoc
-require_paths:
+rdoc_options: []
+require_paths:
 - lib
-- ext/uchardet
-required_ruby_version: !ruby/object:Gem::Requirement
-  requirements:
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
   - - ">="
-    - !ruby/object:Gem::Version
-      version: "0"
-  version:
-required_rubygems_version: !ruby/object:Gem::Requirement
-  requirements:
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
   - - ">="
-    - !ruby/object:Gem::Version
-      version: "0"
-  version:
+    - !ruby/object:Gem::Version
+      version: '0'
 requirements: []
-rubyforge_project: uchardet
-rubygems_version: 1.3.5
+rubyforge_project:
+rubygems_version: 2.7.7
 signing_key:
-specification_version: 3
-summary: Fast character set encoding detection using International Components for Unicode C++ library.
-test_files:
-- test/test_helper.rb
-- test/test_uchardet.rb
-- test/test_uchardet_cli.rb
-- test/test_uchardet_extn.rb
+specification_version: 4
+summary: Fast character set encoding detection using International Components for
+  Unicode library.
+test_files: []

data/History.txt DELETED

@@ -1,11 +0,0 @@
-=== 0.1.1 2009-12-19
-* Initial release
-=== 0.1.2 2009-12-20
-* Documentation and code cleanup.
-=== 0.1.3 2009-12-20
-* extconf.rb fixes for Debian/Ubuntu builds

data/Manifest.txt DELETED

@@ -1,18 +0,0 @@
-History.txt
-Manifest.txt
-README.rdoc
-Rakefile
-bin/uchardet
-ext/uchardet/extconf.rb
-ext/uchardet/uchardet.c
-lib/uchardet.rb
-lib/uchardet/cli.rb
-script/console
-script/destroy
-script/generate
-tasks/extconf.rake
-tasks/extconf/uchardet.rake
-test/test_helper.rb
-test/test_uchardet.rb
-test/test_uchardet_cli.rb
-test/test_uchardet_extn.rb

data/README.rdoc DELETED

@@ -1,52 +0,0 @@
-= uchardet
-* http://rubyforge.org/projects/uchardet/
-* http://github.com/invisiblellama/uchardet
-* http://uchardet.rubyforge.org/rdoc/
-== DESCRIPTION:
-Fast character set encoding detection using International Components for Unicode C++ library.
-== SYNOPSIS:
-  require 'open-uri'
-  require 'uchardet'
-  encoding = ICU::UCharsetDetector.detect open('http://google.jp').read
-  encoding # => {:language=>"ja", :encoding=>"Shift_JIS", :confidence=>100}
-From command line:
-  $ uchardet
-  Usage: uchardet [options] file
-      -l, --list                       Display list of detectable character sets.
-      -s, --strip                      Strip HTML or XML markup before detection.
-      -e, --encoding                   Hint the charset detector about possible encoding.
-      -a, --all                        Show all matching encodings.
-      -h, --help                       Show this help message.
-  $ uchardet `which uchardet`
-  ISO-8859-1 (confidence 60%)
-== REQUIREMENTS:
-ICU[http://site.icu-project.org/] (International Components for Unicode):
-on Mac OS X:
-  sudo port install icu
-on Debian/Ubuntu
-  sudo apt-get install libicu-dev
-== INSTALL:
-  sudo gem install uchardet
-== LICENSE:
-Copyright (c) 2009 Dmitri Goutnik, released under the MIT license.

data/ext/uchardet/extconf.rb DELETED

@@ -1,12 +0,0 @@
-require 'mkmf'
-icu_config = `which icu-config`.strip
-if icu_config.empty?
-  abort "ICU seems to be missing. Try 'port install icu' or 'apt-get install libicu-dev'"
-end
-$LIBS << ' ' + `#{icu_config} --ldflags-system`.strip
-$LIBS << ' ' + `#{icu_config} --ldflags-libsonly`.strip
-$LDFLAGS <<  ' ' + `#{icu_config} --ldflags-searchpath`.strip
-create_makefile("uchardet")

data/script/console DELETED

@@ -1,10 +0,0 @@
-#!/usr/bin/env ruby
-# File: script/console
-irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
-libs =  " -r irb/completion"
-# Perhaps use a console_lib to store any extra methods I may want available in the cosole
-# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
-libs <<  " -r #{File.dirname(__FILE__) + '/../lib/chardet-icu.rb'}"
-puts "Loading chardet-icu gem"
-exec "#{irb} #{libs} --simple-prompt"

data/script/destroy DELETED

@@ -1,14 +0,0 @@
-#!/usr/bin/env ruby
-APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
-begin
-  require 'rubigen'
-rescue LoadError
-  require 'rubygems'
-  require 'rubigen'
-end
-require 'rubigen/scripts/destroy'
-ARGV.shift if ['--help', '-h'].include?(ARGV[0])
-RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
-RubiGen::Scripts::Destroy.new.run(ARGV)

data/script/generate DELETED

@@ -1,14 +0,0 @@
-#!/usr/bin/env ruby
-APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
-begin
-  require 'rubigen'
-rescue LoadError
-  require 'rubygems'
-  require 'rubigen'
-end
-require 'rubigen/scripts/generate'
-ARGV.shift if ['--help', '-h'].include?(ARGV[0])
-RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
-RubiGen::Scripts::Generate.new.run(ARGV)

data/tasks/extconf.rake DELETED

@@ -1,13 +0,0 @@
-namespace :extconf do
-  desc "Compiles the Ruby extension"
-  task :compile
-end
-task :compile => "extconf:compile"
-task :test => :compile
-BIN = "*.{o,bundle,jar,so,obj,pdb,lib,def,exp}"
-$hoe.clean_globs |= ["ext/**/#{BIN}", "lib/**/#{BIN}", 'ext/**/Makefile']
-$hoe.spec.require_paths = Dir['{lib,ext/*}']
-$hoe.spec.extensions = FileList["ext/**/extconf.rb"].to_a

data/tasks/extconf/uchardet.rake DELETED

@@ -1,43 +0,0 @@
-namespace :extconf do
-  extension = File.basename(__FILE__, '.rake')
-  ext = "ext/#{extension}"
-  ext_so = "#{ext}/#{extension}.#{Config::CONFIG['DLEXT']}"
-  ext_files = FileList[
-    "#{ext}/*.c",
-    "#{ext}/*.h",
-    "#{ext}/*.rl",
-    "#{ext}/extconf.rb",
-    "#{ext}/Makefile",
-    # "lib"
-  ]
-  task :compile => extension do
-    if Dir.glob("**/#{extension}.{o,so,dll}").length == 0
-      STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
-      STDERR.puts "Gem actually failed to build.  Your system is"
-      STDERR.puts "NOT configured properly to build #{GEM_NAME}."
-      STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
-      exit(1)
-    end
-  end
-  desc "Builds just the #{extension} extension"
-  task extension.to_sym => ["#{ext}/Makefile", ext_so ]
-  file "#{ext}/Makefile" => ["#{ext}/extconf.rb"] do
-    Dir.chdir(ext) do ruby "extconf.rb" end
-  end
-  file ext_so => ext_files do
-    Dir.chdir(ext) do
-      sh(RUBY_PLATFORM =~ /win32/ ? 'nmake' : 'make') do |ok, res|
-        if !ok
-          require "fileutils"
-          FileUtils.rm Dir.glob('*.{so,o,dll,bundle}')
-        end
-      end
-    end
-  end
-end

data/test/test_helper.rb DELETED

@@ -1,3 +0,0 @@
-require 'stringio'
-require 'test/unit'
-require File.dirname(__FILE__) + '/../lib/uchardet'

data/test/test_uchardet.rb DELETED

@@ -1,22 +0,0 @@
-# encoding: utf-8
-require File.dirname(__FILE__) + '/test_helper.rb'
-class TestUchardet < Test::Unit::TestCase   # :nodoc:
-  def test_detect
-    detector = ICU::UCharsetDetector.new
-    assert_equal(detector.detect(''), ICU::UCharsetDetector.detect(''))
-  end
-  def test_detect_all
-    detector = ICU::UCharsetDetector.new
-    assert_equal(detector.detect_all('∑'), ICU::UCharsetDetector.detect_all('∑'))
-  end
-  def test_detectable_charsets
-    detector = ICU::UCharsetDetector.new
-    assert_equal(detector.detectable_charsets, ICU::UCharsetDetector.detectable_charsets)
-  end
-end

data/test/test_uchardet_cli.rb DELETED

@@ -1,14 +0,0 @@
-require File.join(File.dirname(__FILE__), "test_helper.rb")
-require 'uchardet/cli'
-class TestUchardetCli < Test::Unit::TestCase
-  def setup
-    Uchardet::CLI.execute(@stdout_io = StringIO.new, [])
-    @stdout_io.rewind
-    @stdout = @stdout_io.read
-  end
-  def test_print_default_output
-    assert_match(/Usage: .* \[options\] file/, @stdout)
-  end
-end

data/test/test_uchardet_extn.rb DELETED

@@ -1,101 +0,0 @@
-# encoding: utf-8
-require "test/unit"
-$:.unshift File.dirname(__FILE__) + "/../ext/uchardet"
-require "uchardet.so"
-class TestUchardetExtn < Test::Unit::TestCase   # :nodoc:
-  def test_init
-    assert_not_nil(ICU::UCharsetDetector)
-    assert_nothing_raised do
-      detector = ICU::UCharsetDetector.new
-      assert_not_nil(detector)
-      detector = ICU::UCharsetDetector.new nil
-      assert_not_nil(detector)
-      detector = ICU::UCharsetDetector.new 'some text'
-      assert_not_nil(detector)
-    end
-    assert_raise(TypeError) do
-      detector = ICU::UCharsetDetector.new 0
-    end
-    assert_raise(TypeError) do
-      detector = ICU::UCharsetDetector.new Time.now
-    end
-  end
-  def test_detect
-    detector = ICU::UCharsetDetector.new
-    assert_raise(ICU::Error) do
-      detector.detect
-    end
-    e = detector.detect '∂∆∂∆∂∆'
-    assert(e.is_a? Hash)
-    assert(e.has_key? :encoding)
-    assert(e.has_key? :confidence)
-    assert(e.has_key? :language)
-    assert_equal('utf-8', e[:encoding].downcase)
-    e = detector.detect '··', 'utf-8'
-    assert_equal('utf-8', e[:encoding].downcase)
-    e = detector.detect '··', 'Shift_JIS'
-    assert_equal('utf-8', e[:encoding].downcase)
-  end
-  def test_detect_all
-    detector = ICU::UCharsetDetector.new
-    assert_raise(ICU::Error) do
-      detector.detect_all
-    end
-    a = detector.detect_all '€‹€‹€'
-    assert(a.is_a? Array)
-    assert_equal(false, a.empty?)
-    assert(a[0].is_a? Hash)
-    assert(a[0].has_key? :encoding)
-    assert(a[0].has_key? :confidence)
-    assert(a[0].has_key? :language)
-  end
-  def test_input_filtered_accessor
-    detector = ICU::UCharsetDetector.new
-    assert_equal(false, detector.input_filtered?)
-    detector.input_filtered = true
-    assert_equal(true, detector.input_filtered?)
-    detector.input_filtered = ''
-    assert_equal(true, detector.input_filtered?)
-    detector.input_filtered = nil
-    assert_equal(false, detector.input_filtered?)
-  end
-  def test_text_accessor
-    detector = ICU::UCharsetDetector.new
-    assert_equal(nil, detector.text)
-    detector = ICU::UCharsetDetector.new 'blah'
-    assert_equal('blah', detector.text)
-    detector.text = 'test'
-    assert_equal('test', detector.text)
-    detector.detect
-    assert_equal('test', detector.text)
-  end
-  def test_declared_encoding_accessor
-    detector = ICU::UCharsetDetector.new
-    assert_equal(nil, detector.declared_encoding)
-    detector.declared_encoding = 'iso-8859-15'
-    assert_equal('iso-8859-15', detector.declared_encoding)
-    detector.detect 'test'
-    assert_equal('iso-8859-15', detector.declared_encoding)
-  end
-  def test_detectable_charsets
-    detector = ICU::UCharsetDetector.new
-    assert_not_nil(detector.detectable_charsets)
-    assert(detector.detectable_charsets.is_a? Array)
-  end
-end