language_detection 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -5
- data/Rakefile +15 -2
- data/language_detection.gemspec +3 -4
- data/lib/language_detection.rb +8 -9
- data/lib/language_detection/language.rb +18 -0
- data/lib/language_detection/version.rb +1 -1
- data/test/language_detection_test.rb +10 -11
- metadata +8 -23
data/README.md
CHANGED
@@ -22,15 +22,15 @@ Or install it yourself as:
|
|
22
22
|
>> require 'language_detection'
|
23
23
|
=> true
|
24
24
|
>> language = LanguageDetection.perform("This is some example text for language detection")
|
25
|
-
=>
|
25
|
+
=> #<LanguageDetection::Language:0x007fae0404f628 @name="english", @code="en", @reliable=true, @text_bytes=51, @details=[#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]>
|
26
26
|
>> language.name
|
27
|
-
=> "
|
27
|
+
=> "english"
|
28
28
|
>> language.code
|
29
29
|
=> "en"
|
30
30
|
>> language.reliable
|
31
31
|
=> true
|
32
32
|
>> language.details # contains up to 3 languages sorted by score
|
33
|
-
=> [
|
33
|
+
=> [#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]
|
34
34
|
>> language.details.first.percent
|
35
35
|
=> 100
|
36
36
|
>> language.details.first.score
|
@@ -61,7 +61,7 @@ which provides `Article#language` method using `Article#to_s` method as paramete
|
|
61
61
|
```ruby
|
62
62
|
>> article = Article.new :title => "Web development that doesn't hurt", :content => "Tens of thousands of Rails applications are already live..."
|
63
63
|
>> article.language
|
64
|
-
=>
|
64
|
+
=> #<LanguageDetection::Language:0x007fae049dd8e8 @name="english", @code="en", @reliable=true, @text_bytes=93, @details=[#<LanguageDetection::Language:0x007fae049dd118 @name="english", @code="en", @details=[], @percent=100, @score=80.22690437601297>]>
|
65
65
|
```
|
66
66
|
|
67
67
|
or you can add `String#language` method by `require 'language_detection/string'`
|
@@ -72,7 +72,7 @@ or you can add `String#language` method by `require 'language_detection/string'`
|
|
72
72
|
>> require 'language_detection/string'
|
73
73
|
=> true
|
74
74
|
>> "Web development that doesn't hurt".language
|
75
|
-
=>
|
75
|
+
=> #<LanguageDetection::Language:0x007fae049cfec8 @name="english", @code="en", @reliable=true, @text_bytes=36, @details=[#<LanguageDetection::Language:0x007fae049cf7e8 @name="english", @code="en", @details=[], @percent=100, @score=39.70826580226905>]>
|
76
76
|
```
|
77
77
|
|
78
78
|
|
data/Rakefile
CHANGED
@@ -1,11 +1,24 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
2
3
|
|
3
4
|
task :default => :test
|
4
5
|
|
5
|
-
|
6
|
+
desc "Compile extension"
|
7
|
+
task :compile do
|
8
|
+
path = File.expand_path("ext/cld/cld.so", File.dirname(__FILE__))
|
9
|
+
|
10
|
+
if !File.exists?(path) || ENV['RECOMPILE']
|
11
|
+
puts "Compiling extension..."
|
12
|
+
`cd #{File.expand_path("ext/cld/")} && make`
|
13
|
+
else
|
14
|
+
puts "Extension already compiled. To recompile set env variable RECOMPILE=true."
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
6
18
|
Rake::TestTask.new(:test) do |test|
|
19
|
+
Rake::Task["compile"].invoke
|
20
|
+
|
7
21
|
test.libs << 'lib' << 'test'
|
8
22
|
test.test_files = FileList['test/*_test.rb']
|
9
23
|
test.verbose = true
|
10
|
-
# test.warning = true
|
11
24
|
end
|
data/language_detection.gemspec
CHANGED
@@ -8,9 +8,9 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.version = LanguageDetection::VERSION
|
9
9
|
gem.authors = ["Vojtech Hyza"]
|
10
10
|
gem.email = ["vhyza@vhyza.eu"]
|
11
|
-
gem.description = %q{Language
|
12
|
-
gem.summary = %q{
|
13
|
-
gem.homepage = ""
|
11
|
+
gem.description = %q{Ruby bindings for Chromium Compact Language Detector}
|
12
|
+
gem.summary = %q{Ruby bindings for Chromium Compact Language Detector}
|
13
|
+
gem.homepage = "https://github.com/vhyza/language_detection"
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split($/)
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
@@ -19,7 +19,6 @@ Gem::Specification.new do |gem|
|
|
19
19
|
gem.extensions = ["ext/cld/extconf.rb"]
|
20
20
|
|
21
21
|
gem.add_dependency "ffi"
|
22
|
-
gem.add_dependency "hashr"
|
23
22
|
gem.add_dependency "rake"
|
24
23
|
|
25
24
|
gem.add_development_dependency "shoulda"
|
data/lib/language_detection.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require "language_detection/version"
|
2
|
+
require "language_detection/language"
|
2
3
|
require "ffi"
|
3
|
-
require "hashr"
|
4
4
|
|
5
5
|
module LanguageDetection
|
6
6
|
|
@@ -10,12 +10,11 @@ module LanguageDetection
|
|
10
10
|
result = language_detection(text.to_s, is_plain_text)
|
11
11
|
|
12
12
|
language = parse_result(result, result.members - [:details])
|
13
|
-
language[:details] = []
|
14
13
|
|
15
|
-
details = FFI::Pointer.new(LanguageDetection::
|
14
|
+
details = FFI::Pointer.new(LanguageDetection::DetailStruct, result[:details])
|
16
15
|
3.times do |i|
|
17
|
-
detail = parse_result(LanguageDetection::
|
18
|
-
language
|
16
|
+
detail = parse_result(LanguageDetection::DetailStruct.new(details[i]))
|
17
|
+
language.details << detail unless detail.code == 'un'
|
19
18
|
end
|
20
19
|
|
21
20
|
language
|
@@ -28,19 +27,19 @@ module LanguageDetection
|
|
28
27
|
private
|
29
28
|
|
30
29
|
def self.parse_result(result, members = result.members)
|
31
|
-
|
30
|
+
Language.new(Hash[ members.map {|member| [member.to_sym, result[member]]} ])
|
32
31
|
end
|
33
32
|
|
34
33
|
extend FFI::Library
|
35
34
|
|
36
|
-
class
|
35
|
+
class DetailStruct < FFI::Struct
|
37
36
|
layout :name, :string,
|
38
37
|
:code, :string,
|
39
38
|
:percent, :int,
|
40
39
|
:score, :double
|
41
40
|
end
|
42
41
|
|
43
|
-
class
|
42
|
+
class LanguageStruct < FFI::Struct
|
44
43
|
layout :name, :string,
|
45
44
|
:code, :string,
|
46
45
|
:reliable, :bool,
|
@@ -49,6 +48,6 @@ module LanguageDetection
|
|
49
48
|
end
|
50
49
|
|
51
50
|
ffi_lib File.expand_path("../../ext/cld/cld.so", __FILE__)
|
52
|
-
attach_function "language_detection","language_detection", [:buffer_in, :bool],
|
51
|
+
attach_function "language_detection","language_detection", [:buffer_in, :bool], LanguageStruct.by_value
|
53
52
|
|
54
53
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module LanguageDetection
|
2
|
+
|
3
|
+
class Language
|
4
|
+
|
5
|
+
attr_accessor :name, :code, :reliable, :text_bytes, :details, :percent, :score
|
6
|
+
|
7
|
+
def initialize(attributes = {})
|
8
|
+
attributes.each_pair do |attribute, value|
|
9
|
+
self.send("#{attribute}=", value)
|
10
|
+
end
|
11
|
+
|
12
|
+
@details ||= []
|
13
|
+
@name.downcase!
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -7,24 +7,23 @@ class LanguageDetectionTest < Test::Unit::TestCase
|
|
7
7
|
|
8
8
|
context "Language detection" do
|
9
9
|
|
10
|
-
should "be able to convert result from native call to
|
10
|
+
should "be able to convert result from native call to Language instance" do
|
11
11
|
result = LanguageDetection.language_detection("this is some text", false)
|
12
12
|
parsed_result = LanguageDetection.parse_result(result)
|
13
13
|
|
14
|
-
assert_kind_of LanguageDetection::
|
15
|
-
assert_kind_of
|
14
|
+
assert_kind_of LanguageDetection::LanguageStruct, result
|
15
|
+
assert_kind_of LanguageDetection::Language, parsed_result
|
16
16
|
|
17
|
-
assert_equal "
|
18
|
-
assert_nil parsed_result.non_existing_property
|
17
|
+
assert_equal "english", parsed_result.name
|
19
18
|
end
|
20
19
|
|
21
|
-
should "convert details from FFI pointer to
|
20
|
+
should "convert details from FFI pointer to Language instance" do
|
22
21
|
language = LanguageDetection.perform("this is some text")
|
23
22
|
|
24
|
-
assert_kind_of Array,
|
25
|
-
assert_kind_of
|
26
|
-
assert_equal "
|
27
|
-
assert_equal 65,
|
23
|
+
assert_kind_of Array, language.details
|
24
|
+
assert_kind_of LanguageDetection::Language, language.details.first
|
25
|
+
assert_equal "english", language.details.first.name
|
26
|
+
assert_equal 65, language.details.first.percent
|
28
27
|
end
|
29
28
|
|
30
29
|
should "recognize languages in testing data" do
|
@@ -67,7 +66,7 @@ class LanguageDetectionTest < Test::Unit::TestCase
|
|
67
66
|
|
68
67
|
should "return detected language" do
|
69
68
|
language = @article.language
|
70
|
-
assert_equal "
|
69
|
+
assert_equal "english", language.name
|
71
70
|
assert_equal true, language.reliable
|
72
71
|
assert_equal 100, language.details.first.percent
|
73
72
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: language_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-10-
|
12
|
+
date: 2012-10-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ffi
|
@@ -27,22 +27,6 @@ dependencies:
|
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: hashr
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - ! '>='
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: '0'
|
38
|
-
type: :runtime
|
39
|
-
prerelease: false
|
40
|
-
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
|
-
requirements:
|
43
|
-
- - ! '>='
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
version: '0'
|
46
30
|
- !ruby/object:Gem::Dependency
|
47
31
|
name: rake
|
48
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -107,7 +91,7 @@ dependencies:
|
|
107
91
|
- - ! '>='
|
108
92
|
- !ruby/object:Gem::Version
|
109
93
|
version: '0'
|
110
|
-
description: Language
|
94
|
+
description: Ruby bindings for Chromium Compact Language Detector
|
111
95
|
email:
|
112
96
|
- vhyza@vhyza.eu
|
113
97
|
executables: []
|
@@ -209,12 +193,13 @@ files:
|
|
209
193
|
- ext/cld/languages/public/languages.h
|
210
194
|
- language_detection.gemspec
|
211
195
|
- lib/language_detection.rb
|
196
|
+
- lib/language_detection/language.rb
|
212
197
|
- lib/language_detection/string.rb
|
213
198
|
- lib/language_detection/version.rb
|
214
199
|
- test/_helper.rb
|
215
200
|
- test/fixtures/languages.csv
|
216
201
|
- test/language_detection_test.rb
|
217
|
-
homepage:
|
202
|
+
homepage: https://github.com/vhyza/language_detection
|
218
203
|
licenses: []
|
219
204
|
post_install_message:
|
220
205
|
rdoc_options: []
|
@@ -228,7 +213,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
228
213
|
version: '0'
|
229
214
|
segments:
|
230
215
|
- 0
|
231
|
-
hash:
|
216
|
+
hash: 3522077385673025298
|
232
217
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
233
218
|
none: false
|
234
219
|
requirements:
|
@@ -237,13 +222,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
237
222
|
version: '0'
|
238
223
|
segments:
|
239
224
|
- 0
|
240
|
-
hash:
|
225
|
+
hash: 3522077385673025298
|
241
226
|
requirements: []
|
242
227
|
rubyforge_project:
|
243
228
|
rubygems_version: 1.8.24
|
244
229
|
signing_key:
|
245
230
|
specification_version: 3
|
246
|
-
summary:
|
231
|
+
summary: Ruby bindings for Chromium Compact Language Detector
|
247
232
|
test_files:
|
248
233
|
- test/_helper.rb
|
249
234
|
- test/fixtures/languages.csv
|