language_detection 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4458c90ef41ad87f046ea204849c94f68b7c770f1ff210a53135e70104b10b14
4
+ data.tar.gz: 931f7cbbae50ee8d4e2aa119c41a610ee222c6fc50722e7ec2637517b1b891d4
5
+ SHA512:
6
+ metadata.gz: e0d50b6ee77faf6c3c7b29254f9d3c80e822652530ba3b1f5d44ca944f9d05167841929e03e3f62208d67db43a8542432f42ce0931f37dd5140fdbb154be01c5
7
+ data.tar.gz: 82dee57902451034039a8445ada9e712319d12c75272e73da90ed04ddcc1c22f491e1be557f13032169f0e4dd3b647999c3b4a8e081bdff1aa5412dcb9fbc69a
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ .DS_Store
1
2
  *.gem
2
3
  *.rbc
3
4
  .bundle
@@ -17,3 +18,4 @@ test/version_tmp
17
18
  tmp
18
19
  ext/cld/*.o
19
20
  ext/cld/*.a
21
+ ext/cld/*.so
data/README.md CHANGED
@@ -22,15 +22,15 @@ Or install it yourself as:
22
22
  >> require 'language_detection'
23
23
  => true
24
24
  >> language = LanguageDetection.perform("This is some example text for language detection")
25
- => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>51, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]}
25
+ => #<LanguageDetection::Language:0x007fae0404f628 @name="english", @code="en", @reliable=true, @text_bytes=51, @details=[#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]>
26
26
  >> language.name
27
- => "ENGLISH"
27
+ => "english"
28
28
  >> language.code
29
29
  => "en"
30
30
  >> language.reliable
31
31
  => true
32
32
  >> language.details # contains up to 3 languages sorted by score
33
- => [{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>49.43273905996759}]
33
+ => [#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]
34
34
  >> language.details.first.percent
35
35
  => 100
36
36
  >> language.details.first.score
@@ -61,7 +61,7 @@ which provides `Article#language` method using `Article#to_s` method as paramete
61
61
  ```ruby
62
62
  >> article = Article.new :title => "Web development that doesn't hurt", :content => "Tens of thousands of Rails applications are already live..."
63
63
  >> article.language
64
- => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>93, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>80.22690437601297}]}
64
+ => #<LanguageDetection::Language:0x007fae049dd8e8 @name="english", @code="en", @reliable=true, @text_bytes=93, @details=[#<LanguageDetection::Language:0x007fae049dd118 @name="english", @code="en", @details=[], @percent=100, @score=80.22690437601297>]>
65
65
  ```
66
66
 
67
67
  or you can add `String#language` method by `require 'language_detection/string'`
@@ -72,7 +72,7 @@ or you can add `String#language` method by `require 'language_detection/string'`
72
72
  >> require 'language_detection/string'
73
73
  => true
74
74
  >> "Web development that doesn't hurt".language
75
- => {:name=>"ENGLISH", :code=>"en", :reliable=>true, :text_bytes=>36, :details=>[{:name=>"ENGLISH", :code=>"en", :percent=>100, :score=>39.70826580226905}]}
75
+ => #<LanguageDetection::Language:0x007fae049cfec8 @name="english", @code="en", @reliable=true, @text_bytes=36, @details=[#<LanguageDetection::Language:0x007fae049cf7e8 @name="english", @code="en", @details=[], @percent=100, @score=39.70826580226905>]>
76
76
  ```
77
77
 
78
78
 
data/Rakefile CHANGED
@@ -1,11 +1,24 @@
1
1
  require "bundler/gem_tasks"
2
+ require 'rake/testtask'
2
3
 
3
4
  task :default => :test
4
5
 
5
- require 'rake/testtask'
6
+ desc "Compile extension"
7
+ task :compile do
8
+ path = File.expand_path("ext/cld/cld.so", File.dirname(__FILE__))
9
+
10
+ if !File.exist?(path) || ENV['RECOMPILE']
11
+ puts "Compiling extension..."
12
+ `cd #{File.expand_path("ext/cld/")} && make`
13
+ else
14
+ puts "Extension already compiled. To recompile set env variable RECOMPILE=true."
15
+ end
16
+ end
17
+
6
18
  Rake::TestTask.new(:test) do |test|
19
+ Rake::Task["compile"].invoke
20
+
7
21
  test.libs << 'lib' << 'test'
8
22
  test.test_files = FileList['test/*_test.rb']
9
23
  test.verbose = true
10
- # test.warning = true
11
24
  end
data/ext/cld/Makefile CHANGED
@@ -1,6 +1,6 @@
1
1
  # TODO: Generate Makefile
2
2
 
3
- CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS
3
+ CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS -ansi
4
4
  LDFLAGS=-L.
5
5
  CC=g++
6
6
  AR=ar
@@ -7,10 +7,14 @@ Gem::Specification.new do |gem|
7
7
  gem.name = "language_detection"
8
8
  gem.version = LanguageDetection::VERSION
9
9
  gem.authors = ["Vojtech Hyza"]
10
+ gem.license = 'MIT'
10
11
  gem.email = ["vhyza@vhyza.eu"]
11
- gem.description = %q{Language detection}
12
- gem.summary = %q{Wrapped Chrome's compact language detector}
13
- gem.homepage = ""
12
+ gem.description = %q{Ruby bindings for Chromium Compact Language Detector}
13
+ gem.summary = <<-EOF
14
+ Ruby bindings for Chromium Compact Language Detector ([source](http://src.chromium.org/viewvc/chrome/trunk/src/third_party/cld/)).
15
+ This gem is using source codes from [chromium-compact-language-detector](http://code.google.com/p/chromium-compact-language-detector/) port.
16
+ EOF
17
+ gem.homepage = "https://github.com/vhyza/language_detection"
14
18
 
15
19
  gem.files = `git ls-files`.split($/)
16
20
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
@@ -18,11 +22,12 @@ Gem::Specification.new do |gem|
18
22
  gem.require_paths = ["lib"]
19
23
  gem.extensions = ["ext/cld/extconf.rb"]
20
24
 
21
- gem.add_dependency "ffi"
22
- gem.add_dependency "hashr"
23
- gem.add_dependency "rake"
25
+ gem.add_runtime_dependency "ffi", "~> 1.12"
24
26
 
25
- gem.add_development_dependency "shoulda"
26
- gem.add_development_dependency "mocha"
27
- gem.add_development_dependency "turn"
27
+ gem.add_development_dependency "rake", "~> 13"
28
+ gem.add_development_dependency "shoulda", "~> 4"
29
+ gem.add_development_dependency "mocha", "~> 2"
30
+ gem.add_development_dependency "test-unit", "~> 3"
31
+
32
+ gem.required_ruby_version = [ ">= 2.5.0", "< 3.3.0" ]
28
33
  end
@@ -0,0 +1,18 @@
1
+ module LanguageDetection
2
+
3
+ class Language
4
+
5
+ attr_accessor :name, :code, :reliable, :text_bytes, :details, :percent, :score
6
+
7
+ def initialize(attributes = {})
8
+ attributes.each_pair do |attribute, value|
9
+ self.send("#{attribute}=", value)
10
+ end
11
+
12
+ @details ||= []
13
+ @name.downcase!
14
+ end
15
+
16
+ end
17
+
18
+ end
@@ -1,3 +1,3 @@
1
1
  module LanguageDetection
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -1,6 +1,6 @@
1
1
  require "language_detection/version"
2
+ require "language_detection/language"
2
3
  require "ffi"
3
- require "hashr"
4
4
 
5
5
  module LanguageDetection
6
6
 
@@ -10,12 +10,11 @@ module LanguageDetection
10
10
  result = language_detection(text.to_s, is_plain_text)
11
11
 
12
12
  language = parse_result(result, result.members - [:details])
13
- language[:details] = []
14
13
 
15
- details = FFI::Pointer.new(LanguageDetection::Detail, result[:details])
14
+ details = FFI::Pointer.new(LanguageDetection::DetailStruct, result[:details])
16
15
  3.times do |i|
17
- detail = parse_result(LanguageDetection::Detail.new(details[i]))
18
- language[:details] << detail unless detail.code == 'un'
16
+ detail = parse_result(LanguageDetection::DetailStruct.new(details[i]))
17
+ language.details << detail unless detail.code == 'un'
19
18
  end
20
19
 
21
20
  language
@@ -28,19 +27,19 @@ module LanguageDetection
28
27
  private
29
28
 
30
29
  def self.parse_result(result, members = result.members)
31
- Hashr.new(Hash[ members.map {|member| [member.to_sym, result[member]]} ])
30
+ Language.new(Hash[ members.map {|member| [member.to_sym, result[member]]} ])
32
31
  end
33
32
 
34
33
  extend FFI::Library
35
34
 
36
- class Detail < FFI::Struct
35
+ class DetailStruct < FFI::Struct
37
36
  layout :name, :string,
38
37
  :code, :string,
39
38
  :percent, :int,
40
39
  :score, :double
41
40
  end
42
41
 
43
- class Language < FFI::Struct
42
+ class LanguageStruct < FFI::Struct
44
43
  layout :name, :string,
45
44
  :code, :string,
46
45
  :reliable, :bool,
@@ -49,6 +48,6 @@ module LanguageDetection
49
48
  end
50
49
 
51
50
  ffi_lib File.expand_path("../../ext/cld/cld.so", __FILE__)
52
- attach_function "language_detection","language_detection", [:buffer_in, :bool], Language.by_value
51
+ attach_function "language_detection","language_detection", [:buffer_in, :bool], LanguageStruct.by_value
53
52
 
54
53
  end
data/test/_helper.rb CHANGED
@@ -2,8 +2,7 @@ require 'bundler/setup'
2
2
 
3
3
  require 'test/unit'
4
4
  require 'shoulda'
5
- require 'turn' unless ENV["TM_FILEPATH"] || ENV["CI"]
6
- require 'mocha'
5
+ require 'mocha/test_unit'
7
6
  require File.join(File.expand_path('../../lib/language_detection.rb', __FILE__))
8
7
 
9
8
  class Test::Unit::TestCase
@@ -12,4 +11,4 @@ class Test::Unit::TestCase
12
11
  File.read File.expand_path("../fixtures/#{name}", __FILE__)
13
12
  end
14
13
 
15
- end
14
+ end
@@ -1,30 +1,29 @@
1
1
  # encoding: utf-8
2
2
 
3
- require '_helper'
3
+ require './test/_helper'
4
4
  require 'csv'
5
5
 
6
6
  class LanguageDetectionTest < Test::Unit::TestCase
7
7
 
8
8
  context "Language detection" do
9
9
 
10
- should "be able to convert result from native call to Hashr instance" do
10
+ should "be able to convert result from native call to Language instance" do
11
11
  result = LanguageDetection.language_detection("this is some text", false)
12
12
  parsed_result = LanguageDetection.parse_result(result)
13
13
 
14
- assert_kind_of LanguageDetection::Language, result
15
- assert_kind_of Hashr, parsed_result
14
+ assert_kind_of LanguageDetection::LanguageStruct, result
15
+ assert_kind_of LanguageDetection::Language, parsed_result
16
16
 
17
- assert_equal "ENGLISH", parsed_result.name
18
- assert_nil parsed_result.non_existing_property
17
+ assert_equal "english", parsed_result.name
19
18
  end
20
19
 
21
- should "convert details from FFI pointer to Hashr instance" do
20
+ should "convert details from FFI pointer to Language instance" do
22
21
  language = LanguageDetection.perform("this is some text")
23
22
 
24
- assert_kind_of Array, language.details
25
- assert_kind_of Hashr, language.details.first
26
- assert_equal "ENGLISH", language.details.first.name
27
- assert_equal 65, language.details.first.percent
23
+ assert_kind_of Array, language.details
24
+ assert_kind_of LanguageDetection::Language, language.details.first
25
+ assert_equal "english", language.details.first.name
26
+ assert_equal 65, language.details.first.percent
28
27
  end
29
28
 
30
29
  should "recognize languages in testing data" do
@@ -67,7 +66,7 @@ class LanguageDetectionTest < Test::Unit::TestCase
67
66
 
68
67
  should "return detected language" do
69
68
  language = @article.language
70
- assert_equal "ENGLISH", language.name
69
+ assert_equal "english", language.name
71
70
  assert_equal true, language.reliable
72
71
  assert_equal 100, language.details.first.percent
73
72
  end
@@ -85,4 +84,4 @@ class LanguageDetectionTest < Test::Unit::TestCase
85
84
  end
86
85
 
87
86
 
88
- end
87
+ end
metadata CHANGED
@@ -1,113 +1,86 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: language_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Vojtech Hyza
9
- autorequire:
8
+ autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-10-01 00:00:00.000000000 Z
11
+ date: 2022-12-22 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: ffi
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: '0'
19
+ version: '1.12'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
- version: '0'
30
- - !ruby/object:Gem::Dependency
31
- name: hashr
32
- requirement: !ruby/object:Gem::Requirement
33
- none: false
34
- requirements:
35
- - - ! '>='
36
- - !ruby/object:Gem::Version
37
- version: '0'
38
- type: :runtime
39
- prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ! '>='
44
- - !ruby/object:Gem::Version
45
- version: '0'
26
+ version: '1.12'
46
27
  - !ruby/object:Gem::Dependency
47
28
  name: rake
48
29
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
30
  requirements:
51
- - - ! '>='
31
+ - - "~>"
52
32
  - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :runtime
33
+ version: '13'
34
+ type: :development
55
35
  prerelease: false
56
36
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
37
  requirements:
59
- - - ! '>='
38
+ - - "~>"
60
39
  - !ruby/object:Gem::Version
61
- version: '0'
40
+ version: '13'
62
41
  - !ruby/object:Gem::Dependency
63
42
  name: shoulda
64
43
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
44
  requirements:
67
- - - ! '>='
45
+ - - "~>"
68
46
  - !ruby/object:Gem::Version
69
- version: '0'
47
+ version: '4'
70
48
  type: :development
71
49
  prerelease: false
72
50
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
51
  requirements:
75
- - - ! '>='
52
+ - - "~>"
76
53
  - !ruby/object:Gem::Version
77
- version: '0'
54
+ version: '4'
78
55
  - !ruby/object:Gem::Dependency
79
56
  name: mocha
80
57
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
58
  requirements:
83
- - - ! '>='
59
+ - - "~>"
84
60
  - !ruby/object:Gem::Version
85
- version: '0'
61
+ version: '2'
86
62
  type: :development
87
63
  prerelease: false
88
64
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
65
  requirements:
91
- - - ! '>='
66
+ - - "~>"
92
67
  - !ruby/object:Gem::Version
93
- version: '0'
68
+ version: '2'
94
69
  - !ruby/object:Gem::Dependency
95
- name: turn
70
+ name: test-unit
96
71
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
72
  requirements:
99
- - - ! '>='
73
+ - - "~>"
100
74
  - !ruby/object:Gem::Version
101
- version: '0'
75
+ version: '3'
102
76
  type: :development
103
77
  prerelease: false
104
78
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
79
  requirements:
107
- - - ! '>='
80
+ - - "~>"
108
81
  - !ruby/object:Gem::Version
109
- version: '0'
110
- description: Language detection
82
+ version: '3'
83
+ description: Ruby bindings for Chromium Compact Language Detector
111
84
  email:
112
85
  - vhyza@vhyza.eu
113
86
  executables: []
@@ -115,7 +88,7 @@ extensions:
115
88
  - ext/cld/extconf.rb
116
89
  extra_rdoc_files: []
117
90
  files:
118
- - .gitignore
91
+ - ".gitignore"
119
92
  - Gemfile
120
93
  - LICENSE.txt
121
94
  - README.md
@@ -140,7 +113,6 @@ files:
140
113
  - ext/cld/base/template_util.h
141
114
  - ext/cld/base/type_traits.h
142
115
  - ext/cld/base/vlog_is_on.h
143
- - ext/cld/cld.so
144
116
  - ext/cld/encodings/compact_lang_det/cldutil.cc
145
117
  - ext/cld/encodings/compact_lang_det/cldutil.h
146
118
  - ext/cld/encodings/compact_lang_det/cldutil_dbg.h
@@ -209,41 +181,40 @@ files:
209
181
  - ext/cld/languages/public/languages.h
210
182
  - language_detection.gemspec
211
183
  - lib/language_detection.rb
184
+ - lib/language_detection/language.rb
212
185
  - lib/language_detection/string.rb
213
186
  - lib/language_detection/version.rb
214
187
  - test/_helper.rb
215
188
  - test/fixtures/languages.csv
216
189
  - test/language_detection_test.rb
217
- homepage: ''
218
- licenses: []
219
- post_install_message:
190
+ homepage: https://github.com/vhyza/language_detection
191
+ licenses:
192
+ - MIT
193
+ metadata: {}
194
+ post_install_message:
220
195
  rdoc_options: []
221
196
  require_paths:
222
197
  - lib
223
198
  required_ruby_version: !ruby/object:Gem::Requirement
224
- none: false
225
199
  requirements:
226
- - - ! '>='
200
+ - - ">="
227
201
  - !ruby/object:Gem::Version
228
- version: '0'
229
- segments:
230
- - 0
231
- hash: 301210449373780646
202
+ version: 2.5.0
203
+ - - "<"
204
+ - !ruby/object:Gem::Version
205
+ version: 3.3.0
232
206
  required_rubygems_version: !ruby/object:Gem::Requirement
233
- none: false
234
207
  requirements:
235
- - - ! '>='
208
+ - - ">="
236
209
  - !ruby/object:Gem::Version
237
210
  version: '0'
238
- segments:
239
- - 0
240
- hash: 301210449373780646
241
211
  requirements: []
242
- rubyforge_project:
243
- rubygems_version: 1.8.24
244
- signing_key:
245
- specification_version: 3
246
- summary: Wrapped Chrome's compact language detector
212
+ rubygems_version: 3.3.7
213
+ signing_key:
214
+ specification_version: 4
215
+ summary: Ruby bindings for Chromium Compact Language Detector ([source](http://src.chromium.org/viewvc/chrome/trunk/src/third_party/cld/)).
216
+ This gem is using source codes from [chromium-compact-language-detector](http://code.google.com/p/chromium-compact-language-detector/)
217
+ port.
247
218
  test_files:
248
219
  - test/_helper.rb
249
220
  - test/fixtures/languages.csv
data/ext/cld/cld.so DELETED
Binary file