language_detection 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -5
- data/Rakefile +15 -2
- data/language_detection.gemspec +3 -4
- data/lib/language_detection.rb +8 -9
- data/lib/language_detection/language.rb +18 -0
- data/lib/language_detection/version.rb +1 -1
- data/test/language_detection_test.rb +10 -11
- metadata +8 -23
    
        data/README.md
    CHANGED
    
    | @@ -22,15 +22,15 @@ Or install it yourself as: | |
| 22 22 | 
             
            >> require 'language_detection'
         | 
| 23 23 | 
             
            => true
         | 
| 24 24 | 
             
            >> language = LanguageDetection.perform("This is some example text for language detection")
         | 
| 25 | 
            -
            =>  | 
| 25 | 
            +
            => #<LanguageDetection::Language:0x007fae0404f628 @name="english", @code="en", @reliable=true, @text_bytes=51, @details=[#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]>
         | 
| 26 26 | 
             
            >> language.name
         | 
| 27 | 
            -
            => " | 
| 27 | 
            +
            => "english"
         | 
| 28 28 | 
             
            >> language.code
         | 
| 29 29 | 
             
            => "en"
         | 
| 30 30 | 
             
            >> language.reliable
         | 
| 31 31 | 
             
            => true
         | 
| 32 32 | 
             
            >> language.details # contains up to 3 languages sorted by score
         | 
| 33 | 
            -
            => [ | 
| 33 | 
            +
            => [#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]
         | 
| 34 34 | 
             
            >> language.details.first.percent
         | 
| 35 35 | 
             
            => 100
         | 
| 36 36 | 
             
            >> language.details.first.score
         | 
| @@ -61,7 +61,7 @@ which provides `Article#language` method using `Article#to_s` method as paramete | |
| 61 61 | 
             
            ```ruby
         | 
| 62 62 | 
             
            >> article = Article.new :title => "Web development that doesn't hurt", :content => "Tens of thousands of Rails applications are already live..."
         | 
| 63 63 | 
             
            >> article.language
         | 
| 64 | 
            -
            =>  | 
| 64 | 
            +
            => #<LanguageDetection::Language:0x007fae049dd8e8 @name="english", @code="en", @reliable=true, @text_bytes=93, @details=[#<LanguageDetection::Language:0x007fae049dd118 @name="english", @code="en", @details=[], @percent=100, @score=80.22690437601297>]>
         | 
| 65 65 | 
             
            ```
         | 
| 66 66 |  | 
| 67 67 | 
             
            or you can add `String#language` method by `require 'language_detection/string'`
         | 
| @@ -72,7 +72,7 @@ or you can add `String#language` method by `require 'language_detection/string'` | |
| 72 72 | 
             
            >> require 'language_detection/string'
         | 
| 73 73 | 
             
            => true
         | 
| 74 74 | 
             
            >> "Web development that doesn't hurt".language
         | 
| 75 | 
            -
            =>  | 
| 75 | 
            +
            => #<LanguageDetection::Language:0x007fae049cfec8 @name="english", @code="en", @reliable=true, @text_bytes=36, @details=[#<LanguageDetection::Language:0x007fae049cf7e8 @name="english", @code="en", @details=[], @percent=100, @score=39.70826580226905>]>
         | 
| 76 76 | 
             
            ```
         | 
| 77 77 |  | 
| 78 78 |  | 
    
        data/Rakefile
    CHANGED
    
    | @@ -1,11 +1,24 @@ | |
| 1 1 | 
             
            require "bundler/gem_tasks"
         | 
| 2 | 
            +
            require 'rake/testtask'
         | 
| 2 3 |  | 
| 3 4 | 
             
            task :default => :test
         | 
| 4 5 |  | 
| 5 | 
            -
             | 
| 6 | 
            +
            desc "Compile extension"
         | 
| 7 | 
            +
            task :compile do
         | 
| 8 | 
            +
              path = File.expand_path("ext/cld/cld.so", File.dirname(__FILE__))
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              if !File.exists?(path) || ENV['RECOMPILE']
         | 
| 11 | 
            +
                puts "Compiling extension..."
         | 
| 12 | 
            +
                `cd #{File.expand_path("ext/cld/")} && make`
         | 
| 13 | 
            +
              else
         | 
| 14 | 
            +
                puts "Extension already compiled. To recompile set env variable RECOMPILE=true."
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
            end
         | 
| 17 | 
            +
             | 
| 6 18 | 
             
            Rake::TestTask.new(:test) do |test|
         | 
| 19 | 
            +
              Rake::Task["compile"].invoke
         | 
| 20 | 
            +
             | 
| 7 21 | 
             
              test.libs << 'lib' << 'test'
         | 
| 8 22 | 
             
              test.test_files = FileList['test/*_test.rb']
         | 
| 9 23 | 
             
              test.verbose = true
         | 
| 10 | 
            -
              # test.warning = true
         | 
| 11 24 | 
             
            end
         | 
    
        data/language_detection.gemspec
    CHANGED
    
    | @@ -8,9 +8,9 @@ Gem::Specification.new do |gem| | |
| 8 8 | 
             
              gem.version       = LanguageDetection::VERSION
         | 
| 9 9 | 
             
              gem.authors       = ["Vojtech Hyza"]
         | 
| 10 10 | 
             
              gem.email         = ["vhyza@vhyza.eu"]
         | 
| 11 | 
            -
              gem.description   = %q{Language  | 
| 12 | 
            -
              gem.summary       = %q{ | 
| 13 | 
            -
              gem.homepage      = ""
         | 
| 11 | 
            +
              gem.description   = %q{Ruby bindings for Chromium Compact Language Detector}
         | 
| 12 | 
            +
              gem.summary       = %q{Ruby bindings for Chromium Compact Language Detector}
         | 
| 13 | 
            +
              gem.homepage      = "https://github.com/vhyza/language_detection"
         | 
| 14 14 |  | 
| 15 15 | 
             
              gem.files         = `git ls-files`.split($/)
         | 
| 16 16 | 
             
              gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
         | 
| @@ -19,7 +19,6 @@ Gem::Specification.new do |gem| | |
| 19 19 | 
             
              gem.extensions    = ["ext/cld/extconf.rb"]
         | 
| 20 20 |  | 
| 21 21 | 
             
              gem.add_dependency "ffi"
         | 
| 22 | 
            -
              gem.add_dependency "hashr"
         | 
| 23 22 | 
             
              gem.add_dependency "rake"
         | 
| 24 23 |  | 
| 25 24 | 
             
              gem.add_development_dependency "shoulda"
         | 
    
        data/lib/language_detection.rb
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            require "language_detection/version"
         | 
| 2 | 
            +
            require "language_detection/language"
         | 
| 2 3 | 
             
            require "ffi"
         | 
| 3 | 
            -
            require "hashr"
         | 
| 4 4 |  | 
| 5 5 | 
             
            module LanguageDetection
         | 
| 6 6 |  | 
| @@ -10,12 +10,11 @@ module LanguageDetection | |
| 10 10 | 
             
                result             = language_detection(text.to_s, is_plain_text)
         | 
| 11 11 |  | 
| 12 12 | 
             
                language           = parse_result(result, result.members - [:details])
         | 
| 13 | 
            -
                language[:details] = []
         | 
| 14 13 |  | 
| 15 | 
            -
                details = FFI::Pointer.new(LanguageDetection:: | 
| 14 | 
            +
                details = FFI::Pointer.new(LanguageDetection::DetailStruct, result[:details])
         | 
| 16 15 | 
             
                3.times do |i|
         | 
| 17 | 
            -
                  detail = parse_result(LanguageDetection:: | 
| 18 | 
            -
                  language | 
| 16 | 
            +
                  detail = parse_result(LanguageDetection::DetailStruct.new(details[i]))
         | 
| 17 | 
            +
                  language.details << detail unless detail.code == 'un'
         | 
| 19 18 | 
             
                end
         | 
| 20 19 |  | 
| 21 20 | 
             
                language
         | 
| @@ -28,19 +27,19 @@ module LanguageDetection | |
| 28 27 | 
             
              private
         | 
| 29 28 |  | 
| 30 29 | 
             
              def self.parse_result(result, members = result.members)
         | 
| 31 | 
            -
                 | 
| 30 | 
            +
                Language.new(Hash[ members.map {|member| [member.to_sym, result[member]]} ])
         | 
| 32 31 | 
             
              end
         | 
| 33 32 |  | 
| 34 33 | 
             
              extend FFI::Library
         | 
| 35 34 |  | 
| 36 | 
            -
              class  | 
| 35 | 
            +
              class DetailStruct < FFI::Struct
         | 
| 37 36 | 
             
                layout :name,    :string,
         | 
| 38 37 | 
             
                       :code,    :string,
         | 
| 39 38 | 
             
                       :percent, :int,
         | 
| 40 39 | 
             
                       :score,   :double
         | 
| 41 40 | 
             
              end
         | 
| 42 41 |  | 
| 43 | 
            -
              class  | 
| 42 | 
            +
              class LanguageStruct < FFI::Struct
         | 
| 44 43 | 
             
                layout :name,       :string,
         | 
| 45 44 | 
             
                       :code,       :string,
         | 
| 46 45 | 
             
                       :reliable,   :bool,
         | 
| @@ -49,6 +48,6 @@ module LanguageDetection | |
| 49 48 | 
             
              end
         | 
| 50 49 |  | 
| 51 50 | 
             
              ffi_lib File.expand_path("../../ext/cld/cld.so", __FILE__)
         | 
| 52 | 
            -
              attach_function "language_detection","language_detection", [:buffer_in, :bool],  | 
| 51 | 
            +
              attach_function "language_detection","language_detection", [:buffer_in, :bool], LanguageStruct.by_value
         | 
| 53 52 |  | 
| 54 53 | 
             
            end
         | 
| @@ -0,0 +1,18 @@ | |
| 1 | 
            +
            module LanguageDetection
         | 
| 2 | 
            +
             | 
| 3 | 
            +
              class Language
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                attr_accessor :name, :code, :reliable, :text_bytes, :details, :percent, :score
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def initialize(attributes = {})
         | 
| 8 | 
            +
                  attributes.each_pair do |attribute, value|
         | 
| 9 | 
            +
                    self.send("#{attribute}=", value)
         | 
| 10 | 
            +
                  end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  @details ||= []
         | 
| 13 | 
            +
                  @name.downcase!
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            end
         | 
| @@ -7,24 +7,23 @@ class LanguageDetectionTest < Test::Unit::TestCase | |
| 7 7 |  | 
| 8 8 | 
             
              context "Language detection" do
         | 
| 9 9 |  | 
| 10 | 
            -
                should "be able to convert result from native call to  | 
| 10 | 
            +
                should "be able to convert result from native call to Language instance" do
         | 
| 11 11 | 
             
                  result        = LanguageDetection.language_detection("this is some text", false)
         | 
| 12 12 | 
             
                  parsed_result = LanguageDetection.parse_result(result)
         | 
| 13 13 |  | 
| 14 | 
            -
                  assert_kind_of LanguageDetection:: | 
| 15 | 
            -
                  assert_kind_of  | 
| 14 | 
            +
                  assert_kind_of LanguageDetection::LanguageStruct, result
         | 
| 15 | 
            +
                  assert_kind_of LanguageDetection::Language, parsed_result
         | 
| 16 16 |  | 
| 17 | 
            -
                  assert_equal " | 
| 18 | 
            -
                  assert_nil   parsed_result.non_existing_property
         | 
| 17 | 
            +
                  assert_equal "english", parsed_result.name
         | 
| 19 18 | 
             
                end
         | 
| 20 19 |  | 
| 21 | 
            -
                should "convert details from FFI pointer to  | 
| 20 | 
            +
                should "convert details from FFI pointer to Language instance" do
         | 
| 22 21 | 
             
                  language = LanguageDetection.perform("this is some text")
         | 
| 23 22 |  | 
| 24 | 
            -
                  assert_kind_of Array, | 
| 25 | 
            -
                  assert_kind_of  | 
| 26 | 
            -
                  assert_equal " | 
| 27 | 
            -
                  assert_equal 65, | 
| 23 | 
            +
                  assert_kind_of Array,                       language.details
         | 
| 24 | 
            +
                  assert_kind_of LanguageDetection::Language, language.details.first
         | 
| 25 | 
            +
                  assert_equal "english",                     language.details.first.name
         | 
| 26 | 
            +
                  assert_equal 65,                            language.details.first.percent
         | 
| 28 27 | 
             
                end
         | 
| 29 28 |  | 
| 30 29 | 
             
                should "recognize languages in testing data" do
         | 
| @@ -67,7 +66,7 @@ class LanguageDetectionTest < Test::Unit::TestCase | |
| 67 66 |  | 
| 68 67 | 
             
                should "return detected language" do
         | 
| 69 68 | 
             
                  language = @article.language
         | 
| 70 | 
            -
                  assert_equal " | 
| 69 | 
            +
                  assert_equal "english", language.name
         | 
| 71 70 | 
             
                  assert_equal true,      language.reliable
         | 
| 72 71 | 
             
                  assert_equal 100,       language.details.first.percent
         | 
| 73 72 | 
             
                end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: language_detection
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.2
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2012-10- | 
| 12 | 
            +
            date: 2012-10-09 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: ffi
         | 
| @@ -27,22 +27,6 @@ dependencies: | |
| 27 27 | 
             
                - - ! '>='
         | 
| 28 28 | 
             
                  - !ruby/object:Gem::Version
         | 
| 29 29 | 
             
                    version: '0'
         | 
| 30 | 
            -
            - !ruby/object:Gem::Dependency
         | 
| 31 | 
            -
              name: hashr
         | 
| 32 | 
            -
              requirement: !ruby/object:Gem::Requirement
         | 
| 33 | 
            -
                none: false
         | 
| 34 | 
            -
                requirements:
         | 
| 35 | 
            -
                - - ! '>='
         | 
| 36 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 37 | 
            -
                    version: '0'
         | 
| 38 | 
            -
              type: :runtime
         | 
| 39 | 
            -
              prerelease: false
         | 
| 40 | 
            -
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 41 | 
            -
                none: false
         | 
| 42 | 
            -
                requirements:
         | 
| 43 | 
            -
                - - ! '>='
         | 
| 44 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 45 | 
            -
                    version: '0'
         | 
| 46 30 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 47 31 | 
             
              name: rake
         | 
| 48 32 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -107,7 +91,7 @@ dependencies: | |
| 107 91 | 
             
                - - ! '>='
         | 
| 108 92 | 
             
                  - !ruby/object:Gem::Version
         | 
| 109 93 | 
             
                    version: '0'
         | 
| 110 | 
            -
            description: Language  | 
| 94 | 
            +
            description: Ruby bindings for Chromium Compact Language Detector
         | 
| 111 95 | 
             
            email:
         | 
| 112 96 | 
             
            - vhyza@vhyza.eu
         | 
| 113 97 | 
             
            executables: []
         | 
| @@ -209,12 +193,13 @@ files: | |
| 209 193 | 
             
            - ext/cld/languages/public/languages.h
         | 
| 210 194 | 
             
            - language_detection.gemspec
         | 
| 211 195 | 
             
            - lib/language_detection.rb
         | 
| 196 | 
            +
            - lib/language_detection/language.rb
         | 
| 212 197 | 
             
            - lib/language_detection/string.rb
         | 
| 213 198 | 
             
            - lib/language_detection/version.rb
         | 
| 214 199 | 
             
            - test/_helper.rb
         | 
| 215 200 | 
             
            - test/fixtures/languages.csv
         | 
| 216 201 | 
             
            - test/language_detection_test.rb
         | 
| 217 | 
            -
            homepage:  | 
| 202 | 
            +
            homepage: https://github.com/vhyza/language_detection
         | 
| 218 203 | 
             
            licenses: []
         | 
| 219 204 | 
             
            post_install_message: 
         | 
| 220 205 | 
             
            rdoc_options: []
         | 
| @@ -228,7 +213,7 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 228 213 | 
             
                  version: '0'
         | 
| 229 214 | 
             
                  segments:
         | 
| 230 215 | 
             
                  - 0
         | 
| 231 | 
            -
                  hash:  | 
| 216 | 
            +
                  hash: 3522077385673025298
         | 
| 232 217 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 233 218 | 
             
              none: false
         | 
| 234 219 | 
             
              requirements:
         | 
| @@ -237,13 +222,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 237 222 | 
             
                  version: '0'
         | 
| 238 223 | 
             
                  segments:
         | 
| 239 224 | 
             
                  - 0
         | 
| 240 | 
            -
                  hash:  | 
| 225 | 
            +
                  hash: 3522077385673025298
         | 
| 241 226 | 
             
            requirements: []
         | 
| 242 227 | 
             
            rubyforge_project: 
         | 
| 243 228 | 
             
            rubygems_version: 1.8.24
         | 
| 244 229 | 
             
            signing_key: 
         | 
| 245 230 | 
             
            specification_version: 3
         | 
| 246 | 
            -
            summary:  | 
| 231 | 
            +
            summary: Ruby bindings for Chromium Compact Language Detector
         | 
| 247 232 | 
             
            test_files:
         | 
| 248 233 | 
             
            - test/_helper.rb
         | 
| 249 234 | 
             
            - test/fixtures/languages.csv
         |