language_detection 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4458c90ef41ad87f046ea204849c94f68b7c770f1ff210a53135e70104b10b14
4
+ data.tar.gz: 931f7cbbae50ee8d4e2aa119c41a610ee222c6fc50722e7ec2637517b1b891d4
5
+ SHA512:
6
+ metadata.gz: e0d50b6ee77faf6c3c7b29254f9d3c80e822652530ba3b1f5d44ca944f9d05167841929e03e3f62208d67db43a8542432f42ce0931f37dd5140fdbb154be01c5
7
+ data.tar.gz: 82dee57902451034039a8445ada9e712319d12c75272e73da90ed04ddcc1c22f491e1be557f13032169f0e4dd3b647999c3b4a8e081bdff1aa5412dcb9fbc69a
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ .DS_Store
1
2
  *.gem
2
3
  *.rbc
3
4
  .bundle
@@ -17,3 +18,4 @@ test/version_tmp
17
18
  tmp
18
19
  ext/cld/*.o
19
20
  ext/cld/*.a
21
+ ext/cld/*.so
data/README.md CHANGED
@@ -22,15 +22,15 @@ Or install it yourself as:
22
22
  >> require 'language_detection'
23
23
  => true
24
24
  >> language = LanguageDetection.perform("This is some example text for language detection")
25
- => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>51, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]}
25
+ => #<LanguageDetection::Language:0x007fae0404f628 @name="english", @code="en", @reliable=true, @text_bytes=51, @details=[#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]>
26
26
  >> language.name
27
- => "ENGLISH"
27
+ => "english"
28
28
  >> language.code
29
29
  => "en"
30
30
  >> language.reliable
31
31
  => true
32
32
  >> language.details # contains up to 3 languages sorted by score
33
- => [{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]
33
+ => [#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]
34
34
  >> language.details.first.percent
35
35
  => 100
36
36
  >> language.details.first.score
@@ -61,7 +61,7 @@ which provides `Article#language` method using `Article#to_s` method as paramete
61
61
  ```ruby
62
62
  >> article = Article.new :title => "Web development that doesn't hurt", :content => "Tens of thousands of Rails applications are already live..."
63
63
  >> article.language
64
- => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>93, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>80.22690437601297}]}
64
+ => #<LanguageDetection::Language:0x007fae049dd8e8 @name="english", @code="en", @reliable=true, @text_bytes=93, @details=[#<LanguageDetection::Language:0x007fae049dd118 @name="english", @code="en", @details=[], @percent=100, @score=80.22690437601297>]>
65
65
  ```
66
66
 
67
67
  or you can add `String#language` method by `require 'language_detection/string'`
@@ -72,7 +72,7 @@ or you can add `String#language` method by `require 'language_detection/string'`
72
72
  >> require 'language_detection/string'
73
73
  => true
74
74
  >> "Web development that doesn't hurt".language
75
- => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>36, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>39.70826580226905}]}
75
+ => #<LanguageDetection::Language:0x007fae049cfec8 @name="english", @code="en", @reliable=true, @text_bytes=36, @details=[#<LanguageDetection::Language:0x007fae049cf7e8 @name="english", @code="en", @details=[], @percent=100, @score=39.70826580226905>]>
76
76
  ```
77
77
 
78
78
 
data/Rakefile CHANGED
@@ -1,11 +1,24 @@
1
1
  require "bundler/gem_tasks"
2
+ require 'rake/testtask'
2
3
 
3
4
  task :default => :test
4
5
 
5
- require 'rake/testtask'
6
+ desc "Compile extension"
7
+ task :compile do
8
+ path = File.expand_path("ext/cld/cld.so", File.dirname(__FILE__))
9
+
10
+ if !File.exist?(path) || ENV['RECOMPILE']
11
+ puts "Compiling extension..."
12
+ `cd #{File.expand_path("ext/cld/")} && make`
13
+ else
14
+ puts "Extension already compiled. To recompile set env variable RECOMPILE=true."
15
+ end
16
+ end
17
+
6
18
  Rake::TestTask.new(:test) do |test|
19
+ Rake::Task["compile"].invoke
20
+
7
21
  test.libs << 'lib' << 'test'
8
22
  test.test_files = FileList['test/*_test.rb']
9
23
  test.verbose = true
10
- # test.warning = true
11
24
  end
data/ext/cld/Makefile CHANGED
@@ -1,6 +1,6 @@
1
1
  # TODO: Generate Makefile
2
2
 
3
- CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS
3
+ CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS -ansi
4
4
  LDFLAGS=-L.
5
5
  CC=g++
6
6
  AR=ar
@@ -7,10 +7,14 @@ Gem::Specification.new do |gem|
7
7
  gem.name = "language_detection"
8
8
  gem.version = LanguageDetection::VERSION
9
9
  gem.authors = ["Vojtech Hyza"]
10
+ gem.license = 'MIT'
10
11
  gem.email = ["vhyza@vhyza.eu"]
11
- gem.description = %q{Language detection}
12
- gem.summary = %q{Wrapped Chrome's compact language detector}
13
- gem.homepage = ""
12
+ gem.description = %q{Ruby bindings for Chromium Compact Language Detector}
13
+ gem.summary = <<-EOF
14
+ Ruby bindings for Chromium Compact Language Detector ([source](http://src.chromium.org/viewvc/chrome/trunk/src/third_party/cld/)).
15
+ This gem is using source codes from [chromium-compact-language-detector](http://code.google.com/p/chromium-compact-language-detector/) port.
16
+ EOF
17
+ gem.homepage = "https://github.com/vhyza/language_detection"
14
18
 
15
19
  gem.files = `git ls-files`.split($/)
16
20
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
@@ -18,11 +22,12 @@ Gem::Specification.new do |gem|
18
22
  gem.require_paths = ["lib"]
19
23
  gem.extensions = ["ext/cld/extconf.rb"]
20
24
 
21
- gem.add_dependency "ffi"
22
- gem.add_dependency "hashr"
23
- gem.add_dependency "rake"
25
+ gem.add_runtime_dependency "ffi", "~> 1.12"
24
26
 
25
- gem.add_development_dependency "shoulda"
26
- gem.add_development_dependency "mocha"
27
- gem.add_development_dependency "turn"
27
+ gem.add_development_dependency "rake", "~> 13"
28
+ gem.add_development_dependency "shoulda", "~> 4"
29
+ gem.add_development_dependency "mocha", "~> 2"
30
+ gem.add_development_dependency "test-unit", "~> 3"
31
+
32
+ gem.required_ruby_version = [ ">= 2.5.0", "< 3.3.0" ]
28
33
  end
@@ -0,0 +1,18 @@
1
+ module LanguageDetection
2
+
3
+ class Language
4
+
5
+ attr_accessor :name, :code, :reliable, :text_bytes, :details, :percent, :score
6
+
7
+ def initialize(attributes = {})
8
+ attributes.each_pair do |attribute, value|
9
+ self.send("#{attribute}=", value)
10
+ end
11
+
12
+ @details ||= []
13
+ @name.downcase!
14
+ end
15
+
16
+ end
17
+
18
+ end
@@ -1,3 +1,3 @@
1
1
  module LanguageDetection
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -1,6 +1,6 @@
1
1
  require "language_detection/version"
2
+ require "language_detection/language"
2
3
  require "ffi"
3
- require "hashr"
4
4
 
5
5
  module LanguageDetection
6
6
 
@@ -10,12 +10,11 @@ module LanguageDetection
10
10
  result = language_detection(text.to_s, is_plain_text)
11
11
 
12
12
  language = parse_result(result, result.members - [:details])
13
- language[:details] = []
14
13
 
15
- details = FFI::Pointer.new(LanguageDetection::Detail, result[:details])
14
+ details = FFI::Pointer.new(LanguageDetection::DetailStruct, result[:details])
16
15
  3.times do |i|
17
- detail = parse_result(LanguageDetection::Detail.new(details[i]))
18
- language[:details] << detail unless detail.code == 'un'
16
+ detail = parse_result(LanguageDetection::DetailStruct.new(details[i]))
17
+ language.details << detail unless detail.code == 'un'
19
18
  end
20
19
 
21
20
  language
@@ -28,19 +27,19 @@ module LanguageDetection
28
27
  private
29
28
 
30
29
  def self.parse_result(result, members = result.members)
31
- Hashr.new(Hash[ members.map {|member| [member.to_sym, result[member]]} ])
30
+ Language.new(Hash[ members.map {|member| [member.to_sym, result[member]]} ])
32
31
  end
33
32
 
34
33
  extend FFI::Library
35
34
 
36
- class Detail < FFI::Struct
35
+ class DetailStruct < FFI::Struct
37
36
  layout :name, :string,
38
37
  :code, :string,
39
38
  :percent, :int,
40
39
  :score, :double
41
40
  end
42
41
 
43
- class Language < FFI::Struct
42
+ class LanguageStruct < FFI::Struct
44
43
  layout :name, :string,
45
44
  :code, :string,
46
45
  :reliable, :bool,
@@ -49,6 +48,6 @@ module LanguageDetection
49
48
  end
50
49
 
51
50
  ffi_lib File.expand_path("../../ext/cld/cld.so", __FILE__)
52
- attach_function "language_detection","language_detection", [:buffer_in, :bool], Language.by_value
51
+ attach_function "language_detection","language_detection", [:buffer_in, :bool], LanguageStruct.by_value
53
52
 
54
53
  end
data/test/_helper.rb CHANGED
@@ -2,8 +2,7 @@ require 'bundler/setup'
2
2
 
3
3
  require 'test/unit'
4
4
  require 'shoulda'
5
- require 'turn' unless ENV["TM_FILEPATH"] || ENV["CI"]
6
- require 'mocha'
5
+ require 'mocha/test_unit'
7
6
  require File.join(File.expand_path('../../lib/language_detection.rb', __FILE__))
8
7
 
9
8
  class Test::Unit::TestCase
@@ -12,4 +11,4 @@ class Test::Unit::TestCase
12
11
  File.read File.expand_path("../fixtures/#{name}", __FILE__)
13
12
  end
14
13
 
15
- end
14
+ end
@@ -1,30 +1,29 @@
1
1
  # encoding: utf-8
2
2
 
3
- require '_helper'
3
+ require './test/_helper'
4
4
  require 'csv'
5
5
 
6
6
  class LanguageDetectionTest < Test::Unit::TestCase
7
7
 
8
8
  context "Language detection" do
9
9
 
10
- should "be able to convert result from native call to Hashr instance" do
10
+ should "be able to convert result from native call to Language instance" do
11
11
  result = LanguageDetection.language_detection("this is some text", false)
12
12
  parsed_result = LanguageDetection.parse_result(result)
13
13
 
14
- assert_kind_of LanguageDetection::Language, result
15
- assert_kind_of Hashr, parsed_result
14
+ assert_kind_of LanguageDetection::LanguageStruct, result
15
+ assert_kind_of LanguageDetection::Language, parsed_result
16
16
 
17
- assert_equal "ENGLISH", parsed_result.name
18
- assert_nil parsed_result.non_existing_property
17
+ assert_equal "english", parsed_result.name
19
18
  end
20
19
 
21
- should "convert details from FFI pointer to Hashr instance" do
20
+ should "convert details from FFI pointer to Language instance" do
22
21
  language = LanguageDetection.perform("this is some text")
23
22
 
24
- assert_kind_of Array, language.details
25
- assert_kind_of Hashr, language.details.first
26
- assert_equal "ENGLISH", language.details.first.name
27
- assert_equal 65, language.details.first.percent
23
+ assert_kind_of Array, language.details
24
+ assert_kind_of LanguageDetection::Language, language.details.first
25
+ assert_equal "english", language.details.first.name
26
+ assert_equal 65, language.details.first.percent
28
27
  end
29
28
 
30
29
  should "recognize languages in testing data" do
@@ -67,7 +66,7 @@ class LanguageDetectionTest < Test::Unit::TestCase
67
66
 
68
67
  should "return detected language" do
69
68
  language = @article.language
70
- assert_equal "ENGLISH", language.name
69
+ assert_equal "english", language.name
71
70
  assert_equal true, language.reliable
72
71
  assert_equal 100, language.details.first.percent
73
72
  end
@@ -85,4 +84,4 @@ class LanguageDetectionTest < Test::Unit::TestCase
85
84
  end
86
85
 
87
86
 
88
- end
87
+ end
metadata CHANGED
@@ -1,113 +1,86 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: language_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Vojtech Hyza
9
- autorequire:
8
+ autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-10-01 00:00:00.000000000 Z
11
+ date: 2022-12-22 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: ffi
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: '0'
19
+ version: '1.12'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
- version: '0'
30
- - !ruby/object:Gem::Dependency
31
- name: hashr
32
- requirement: !ruby/object:Gem::Requirement
33
- none: false
34
- requirements:
35
- - - ! '>='
36
- - !ruby/object:Gem::Version
37
- version: '0'
38
- type: :runtime
39
- prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ! '>='
44
- - !ruby/object:Gem::Version
45
- version: '0'
26
+ version: '1.12'
46
27
  - !ruby/object:Gem::Dependency
47
28
  name: rake
48
29
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
30
  requirements:
51
- - - ! '>='
31
+ - - "~>"
52
32
  - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :runtime
33
+ version: '13'
34
+ type: :development
55
35
  prerelease: false
56
36
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
37
  requirements:
59
- - - ! '>='
38
+ - - "~>"
60
39
  - !ruby/object:Gem::Version
61
- version: '0'
40
+ version: '13'
62
41
  - !ruby/object:Gem::Dependency
63
42
  name: shoulda
64
43
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
44
  requirements:
67
- - - ! '>='
45
+ - - "~>"
68
46
  - !ruby/object:Gem::Version
69
- version: '0'
47
+ version: '4'
70
48
  type: :development
71
49
  prerelease: false
72
50
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
51
  requirements:
75
- - - ! '>='
52
+ - - "~>"
76
53
  - !ruby/object:Gem::Version
77
- version: '0'
54
+ version: '4'
78
55
  - !ruby/object:Gem::Dependency
79
56
  name: mocha
80
57
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
58
  requirements:
83
- - - ! '>='
59
+ - - "~>"
84
60
  - !ruby/object:Gem::Version
85
- version: '0'
61
+ version: '2'
86
62
  type: :development
87
63
  prerelease: false
88
64
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
65
  requirements:
91
- - - ! '>='
66
+ - - "~>"
92
67
  - !ruby/object:Gem::Version
93
- version: '0'
68
+ version: '2'
94
69
  - !ruby/object:Gem::Dependency
95
- name: turn
70
+ name: test-unit
96
71
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
72
  requirements:
99
- - - ! '>='
73
+ - - "~>"
100
74
  - !ruby/object:Gem::Version
101
- version: '0'
75
+ version: '3'
102
76
  type: :development
103
77
  prerelease: false
104
78
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
79
  requirements:
107
- - - ! '>='
80
+ - - "~>"
108
81
  - !ruby/object:Gem::Version
109
- version: '0'
110
- description: Language detection
82
+ version: '3'
83
+ description: Ruby bindings for Chromium Compact Language Detector
111
84
  email:
112
85
  - vhyza@vhyza.eu
113
86
  executables: []
@@ -115,7 +88,7 @@ extensions:
115
88
  - ext/cld/extconf.rb
116
89
  extra_rdoc_files: []
117
90
  files:
118
- - .gitignore
91
+ - ".gitignore"
119
92
  - Gemfile
120
93
  - LICENSE.txt
121
94
  - README.md
@@ -140,7 +113,6 @@ files:
140
113
  - ext/cld/base/template_util.h
141
114
  - ext/cld/base/type_traits.h
142
115
  - ext/cld/base/vlog_is_on.h
143
- - ext/cld/cld.so
144
116
  - ext/cld/encodings/compact_lang_det/cldutil.cc
145
117
  - ext/cld/encodings/compact_lang_det/cldutil.h
146
118
  - ext/cld/encodings/compact_lang_det/cldutil_dbg.h
@@ -209,41 +181,40 @@ files:
209
181
  - ext/cld/languages/public/languages.h
210
182
  - language_detection.gemspec
211
183
  - lib/language_detection.rb
184
+ - lib/language_detection/language.rb
212
185
  - lib/language_detection/string.rb
213
186
  - lib/language_detection/version.rb
214
187
  - test/_helper.rb
215
188
  - test/fixtures/languages.csv
216
189
  - test/language_detection_test.rb
217
- homepage: ''
218
- licenses: []
219
- post_install_message:
190
+ homepage: https://github.com/vhyza/language_detection
191
+ licenses:
192
+ - MIT
193
+ metadata: {}
194
+ post_install_message:
220
195
  rdoc_options: []
221
196
  require_paths:
222
197
  - lib
223
198
  required_ruby_version: !ruby/object:Gem::Requirement
224
- none: false
225
199
  requirements:
226
- - - ! '>='
200
+ - - ">="
227
201
  - !ruby/object:Gem::Version
228
- version: '0'
229
- segments:
230
- - 0
231
- hash: 301210449373780646
202
+ version: 2.5.0
203
+ - - "<"
204
+ - !ruby/object:Gem::Version
205
+ version: 3.3.0
232
206
  required_rubygems_version: !ruby/object:Gem::Requirement
233
- none: false
234
207
  requirements:
235
- - - ! '>='
208
+ - - ">="
236
209
  - !ruby/object:Gem::Version
237
210
  version: '0'
238
- segments:
239
- - 0
240
- hash: 301210449373780646
241
211
  requirements: []
242
- rubyforge_project:
243
- rubygems_version: 1.8.24
244
- signing_key:
245
- specification_version: 3
246
- summary: Wrapped Chrome's compact language detector
212
+ rubygems_version: 3.3.7
213
+ signing_key:
214
+ specification_version: 4
215
+ summary: Ruby bindings for Chromium Compact Language Detector ([source](http://src.chromium.org/viewvc/chrome/trunk/src/third_party/cld/)).
216
+ This gem is using source codes from [chromium-compact-language-detector](http://code.google.com/p/chromium-compact-language-detector/)
217
+ port.
247
218
  test_files:
248
219
  - test/_helper.rb
249
220
  - test/fixtures/languages.csv
data/ext/cld/cld.so DELETED
Binary file