language_detection 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -5
- data/Rakefile +15 -2
- data/language_detection.gemspec +3 -4
- data/lib/language_detection.rb +8 -9
- data/lib/language_detection/language.rb +18 -0
- data/lib/language_detection/version.rb +1 -1
- data/test/language_detection_test.rb +10 -11
- metadata +8 -23
data/README.md
CHANGED
@@ -22,15 +22,15 @@ Or install it yourself as:
|
|
22
22
|
>> require 'language_detection'
|
23
23
|
=> true
|
24
24
|
>> language = LanguageDetection.perform("This is some example text for language detection")
|
25
|
-
=>
|
25
|
+
=> #<LanguageDetection::Language:0x007fae0404f628 @name="english", @code="en", @reliable=true, @text_bytes=51, @details=[#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]>
|
26
26
|
>> language.name
|
27
|
-
=> "
|
27
|
+
=> "english"
|
28
28
|
>> language.code
|
29
29
|
=> "en"
|
30
30
|
>> language.reliable
|
31
31
|
=> true
|
32
32
|
>> language.details # contains up to 3 languages sorted by score
|
33
|
-
=> [
|
33
|
+
=> [#<LanguageDetection::Language:0x007fae0404eb10 @name="english", @code="en", @details=[], @percent=100, @score=49.43273905996759>]
|
34
34
|
>> language.details.first.percent
|
35
35
|
=> 100
|
36
36
|
>> language.details.first.score
|
@@ -61,7 +61,7 @@ which provides `Article#language` method using `Article#to_s` method as paramete
|
|
61
61
|
```ruby
|
62
62
|
>> article = Article.new :title => "Web development that doesn't hurt", :content => "Tens of thousands of Rails applications are already live..."
|
63
63
|
>> article.language
|
64
|
-
=>
|
64
|
+
=> #<LanguageDetection::Language:0x007fae049dd8e8 @name="english", @code="en", @reliable=true, @text_bytes=93, @details=[#<LanguageDetection::Language:0x007fae049dd118 @name="english", @code="en", @details=[], @percent=100, @score=80.22690437601297>]>
|
65
65
|
```
|
66
66
|
|
67
67
|
or you can add `String#language` method by `require 'language_detection/string'`
|
@@ -72,7 +72,7 @@ or you can add `String#language` method by `require 'language_detection/string'`
|
|
72
72
|
>> require 'language_detection/string'
|
73
73
|
=> true
|
74
74
|
>> "Web development that doesn't hurt".language
|
75
|
-
=>
|
75
|
+
=> #<LanguageDetection::Language:0x007fae049cfec8 @name="english", @code="en", @reliable=true, @text_bytes=36, @details=[#<LanguageDetection::Language:0x007fae049cf7e8 @name="english", @code="en", @details=[], @percent=100, @score=39.70826580226905>]>
|
76
76
|
```
|
77
77
|
|
78
78
|
|
data/Rakefile
CHANGED
@@ -1,11 +1,24 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
2
3
|
|
3
4
|
task :default => :test
|
4
5
|
|
5
|
-
|
6
|
+
desc "Compile extension"
|
7
|
+
task :compile do
|
8
|
+
path = File.expand_path("ext/cld/cld.so", File.dirname(__FILE__))
|
9
|
+
|
10
|
+
if !File.exists?(path) || ENV['RECOMPILE']
|
11
|
+
puts "Compiling extension..."
|
12
|
+
`cd #{File.expand_path("ext/cld/")} && make`
|
13
|
+
else
|
14
|
+
puts "Extension already compiled. To recompile set env variable RECOMPILE=true."
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
6
18
|
Rake::TestTask.new(:test) do |test|
|
19
|
+
Rake::Task["compile"].invoke
|
20
|
+
|
7
21
|
test.libs << 'lib' << 'test'
|
8
22
|
test.test_files = FileList['test/*_test.rb']
|
9
23
|
test.verbose = true
|
10
|
-
# test.warning = true
|
11
24
|
end
|
data/language_detection.gemspec
CHANGED
@@ -8,9 +8,9 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.version = LanguageDetection::VERSION
|
9
9
|
gem.authors = ["Vojtech Hyza"]
|
10
10
|
gem.email = ["vhyza@vhyza.eu"]
|
11
|
-
gem.description = %q{Language
|
12
|
-
gem.summary = %q{
|
13
|
-
gem.homepage = ""
|
11
|
+
gem.description = %q{Ruby bindings for Chromium Compact Language Detector}
|
12
|
+
gem.summary = %q{Ruby bindings for Chromium Compact Language Detector}
|
13
|
+
gem.homepage = "https://github.com/vhyza/language_detection"
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split($/)
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
@@ -19,7 +19,6 @@ Gem::Specification.new do |gem|
|
|
19
19
|
gem.extensions = ["ext/cld/extconf.rb"]
|
20
20
|
|
21
21
|
gem.add_dependency "ffi"
|
22
|
-
gem.add_dependency "hashr"
|
23
22
|
gem.add_dependency "rake"
|
24
23
|
|
25
24
|
gem.add_development_dependency "shoulda"
|
data/lib/language_detection.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require "language_detection/version"
|
2
|
+
require "language_detection/language"
|
2
3
|
require "ffi"
|
3
|
-
require "hashr"
|
4
4
|
|
5
5
|
module LanguageDetection
|
6
6
|
|
@@ -10,12 +10,11 @@ module LanguageDetection
|
|
10
10
|
result = language_detection(text.to_s, is_plain_text)
|
11
11
|
|
12
12
|
language = parse_result(result, result.members - [:details])
|
13
|
-
language[:details] = []
|
14
13
|
|
15
|
-
details = FFI::Pointer.new(LanguageDetection::
|
14
|
+
details = FFI::Pointer.new(LanguageDetection::DetailStruct, result[:details])
|
16
15
|
3.times do |i|
|
17
|
-
detail = parse_result(LanguageDetection::
|
18
|
-
language
|
16
|
+
detail = parse_result(LanguageDetection::DetailStruct.new(details[i]))
|
17
|
+
language.details << detail unless detail.code == 'un'
|
19
18
|
end
|
20
19
|
|
21
20
|
language
|
@@ -28,19 +27,19 @@ module LanguageDetection
|
|
28
27
|
private
|
29
28
|
|
30
29
|
def self.parse_result(result, members = result.members)
|
31
|
-
|
30
|
+
Language.new(Hash[ members.map {|member| [member.to_sym, result[member]]} ])
|
32
31
|
end
|
33
32
|
|
34
33
|
extend FFI::Library
|
35
34
|
|
36
|
-
class
|
35
|
+
class DetailStruct < FFI::Struct
|
37
36
|
layout :name, :string,
|
38
37
|
:code, :string,
|
39
38
|
:percent, :int,
|
40
39
|
:score, :double
|
41
40
|
end
|
42
41
|
|
43
|
-
class
|
42
|
+
class LanguageStruct < FFI::Struct
|
44
43
|
layout :name, :string,
|
45
44
|
:code, :string,
|
46
45
|
:reliable, :bool,
|
@@ -49,6 +48,6 @@ module LanguageDetection
|
|
49
48
|
end
|
50
49
|
|
51
50
|
ffi_lib File.expand_path("../../ext/cld/cld.so", __FILE__)
|
52
|
-
attach_function "language_detection","language_detection", [:buffer_in, :bool],
|
51
|
+
attach_function "language_detection","language_detection", [:buffer_in, :bool], LanguageStruct.by_value
|
53
52
|
|
54
53
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module LanguageDetection
|
2
|
+
|
3
|
+
class Language
|
4
|
+
|
5
|
+
attr_accessor :name, :code, :reliable, :text_bytes, :details, :percent, :score
|
6
|
+
|
7
|
+
def initialize(attributes = {})
|
8
|
+
attributes.each_pair do |attribute, value|
|
9
|
+
self.send("#{attribute}=", value)
|
10
|
+
end
|
11
|
+
|
12
|
+
@details ||= []
|
13
|
+
@name.downcase!
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -7,24 +7,23 @@ class LanguageDetectionTest < Test::Unit::TestCase
|
|
7
7
|
|
8
8
|
context "Language detection" do
|
9
9
|
|
10
|
-
should "be able to convert result from native call to
|
10
|
+
should "be able to convert result from native call to Language instance" do
|
11
11
|
result = LanguageDetection.language_detection("this is some text", false)
|
12
12
|
parsed_result = LanguageDetection.parse_result(result)
|
13
13
|
|
14
|
-
assert_kind_of LanguageDetection::
|
15
|
-
assert_kind_of
|
14
|
+
assert_kind_of LanguageDetection::LanguageStruct, result
|
15
|
+
assert_kind_of LanguageDetection::Language, parsed_result
|
16
16
|
|
17
|
-
assert_equal "
|
18
|
-
assert_nil parsed_result.non_existing_property
|
17
|
+
assert_equal "english", parsed_result.name
|
19
18
|
end
|
20
19
|
|
21
|
-
should "convert details from FFI pointer to
|
20
|
+
should "convert details from FFI pointer to Language instance" do
|
22
21
|
language = LanguageDetection.perform("this is some text")
|
23
22
|
|
24
|
-
assert_kind_of Array,
|
25
|
-
assert_kind_of
|
26
|
-
assert_equal "
|
27
|
-
assert_equal 65,
|
23
|
+
assert_kind_of Array, language.details
|
24
|
+
assert_kind_of LanguageDetection::Language, language.details.first
|
25
|
+
assert_equal "english", language.details.first.name
|
26
|
+
assert_equal 65, language.details.first.percent
|
28
27
|
end
|
29
28
|
|
30
29
|
should "recognize languages in testing data" do
|
@@ -67,7 +66,7 @@ class LanguageDetectionTest < Test::Unit::TestCase
|
|
67
66
|
|
68
67
|
should "return detected language" do
|
69
68
|
language = @article.language
|
70
|
-
assert_equal "
|
69
|
+
assert_equal "english", language.name
|
71
70
|
assert_equal true, language.reliable
|
72
71
|
assert_equal 100, language.details.first.percent
|
73
72
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: language_detection
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-10-
|
12
|
+
date: 2012-10-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ffi
|
@@ -27,22 +27,6 @@ dependencies:
|
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: hashr
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - ! '>='
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: '0'
|
38
|
-
type: :runtime
|
39
|
-
prerelease: false
|
40
|
-
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
|
-
requirements:
|
43
|
-
- - ! '>='
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
version: '0'
|
46
30
|
- !ruby/object:Gem::Dependency
|
47
31
|
name: rake
|
48
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -107,7 +91,7 @@ dependencies:
|
|
107
91
|
- - ! '>='
|
108
92
|
- !ruby/object:Gem::Version
|
109
93
|
version: '0'
|
110
|
-
description: Language
|
94
|
+
description: Ruby bindings for Chromium Compact Language Detector
|
111
95
|
email:
|
112
96
|
- vhyza@vhyza.eu
|
113
97
|
executables: []
|
@@ -209,12 +193,13 @@ files:
|
|
209
193
|
- ext/cld/languages/public/languages.h
|
210
194
|
- language_detection.gemspec
|
211
195
|
- lib/language_detection.rb
|
196
|
+
- lib/language_detection/language.rb
|
212
197
|
- lib/language_detection/string.rb
|
213
198
|
- lib/language_detection/version.rb
|
214
199
|
- test/_helper.rb
|
215
200
|
- test/fixtures/languages.csv
|
216
201
|
- test/language_detection_test.rb
|
217
|
-
homepage:
|
202
|
+
homepage: https://github.com/vhyza/language_detection
|
218
203
|
licenses: []
|
219
204
|
post_install_message:
|
220
205
|
rdoc_options: []
|
@@ -228,7 +213,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
228
213
|
version: '0'
|
229
214
|
segments:
|
230
215
|
- 0
|
231
|
-
hash:
|
216
|
+
hash: 3522077385673025298
|
232
217
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
233
218
|
none: false
|
234
219
|
requirements:
|
@@ -237,13 +222,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
237
222
|
version: '0'
|
238
223
|
segments:
|
239
224
|
- 0
|
240
|
-
hash:
|
225
|
+
hash: 3522077385673025298
|
241
226
|
requirements: []
|
242
227
|
rubyforge_project:
|
243
228
|
rubygems_version: 1.8.24
|
244
229
|
signing_key:
|
245
230
|
specification_version: 3
|
246
|
-
summary:
|
231
|
+
summary: Ruby bindings for Chromium Compact Language Detector
|
247
232
|
test_files:
|
248
233
|
- test/_helper.rb
|
249
234
|
- test/fixtures/languages.csv
|