cld 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/cld/thunk.cc +1 -2
- data/lib/cld.rb +3 -3
- data/lib/cld/version.rb +1 -1
- data/spec/cld_spec.rb +23 -0
- metadata +2 -2
data/ext/cld/thunk.cc
CHANGED
@@ -12,8 +12,7 @@ typedef struct {
|
|
12
12
|
} RESULT;
|
13
13
|
|
14
14
|
extern "C" {
|
15
|
-
RESULT detectLanguageThunkInt(const char * src) {
|
16
|
-
bool is_plain_text = true;
|
15
|
+
RESULT detectLanguageThunkInt(const char * src, bool is_plain_text) {
|
17
16
|
bool do_allow_extended_languages = true;
|
18
17
|
bool do_pick_summary_language = false;
|
19
18
|
bool do_remove_weak_matches = false;
|
data/lib/cld.rb
CHANGED
@@ -4,8 +4,8 @@ require "ffi"
|
|
4
4
|
module CLD
|
5
5
|
extend FFI::Library
|
6
6
|
|
7
|
-
def self.detect_language(text)
|
8
|
-
result = detect_language_ext(text)
|
7
|
+
def self.detect_language(text, is_plain_text=true)
|
8
|
+
result = detect_language_ext(text.to_s, is_plain_text)
|
9
9
|
Hash[ result.members.map {|member| [member.to_sym, result[member]]} ]
|
10
10
|
end
|
11
11
|
|
@@ -17,5 +17,5 @@ module CLD
|
|
17
17
|
|
18
18
|
GEM_ROOT = File.expand_path("../../", __FILE__)
|
19
19
|
ffi_lib "#{GEM_ROOT}/ext/cld/cld.so"
|
20
|
-
attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value
|
20
|
+
attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in, :bool], ReturnValue.by_value
|
21
21
|
end
|
data/lib/cld/version.rb
CHANGED
data/spec/cld_spec.rb
CHANGED
@@ -19,6 +19,21 @@ describe CLD do
|
|
19
19
|
it { subject[:reliable].should be_true }
|
20
20
|
end
|
21
21
|
|
22
|
+
context "French in HTML - using CLD html " do
|
23
|
+
subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", false) }
|
24
|
+
|
25
|
+
it { subject[:name].should eq("FRENCH") }
|
26
|
+
it { subject[:code].should eq("fr") }
|
27
|
+
|
28
|
+
end
|
29
|
+
context "French in HTML - using CLD text " do
|
30
|
+
subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", true) }
|
31
|
+
|
32
|
+
it { subject[:name].should eq("ENGLISH") }
|
33
|
+
it { subject[:code].should eq("en") }
|
34
|
+
|
35
|
+
end
|
36
|
+
|
22
37
|
context "Simplified Chinese text" do
|
23
38
|
subject { CLD.detect_language("你好吗箭体") }
|
24
39
|
|
@@ -41,4 +56,12 @@ describe CLD do
|
|
41
56
|
it { subject[:reliable].should be_true }
|
42
57
|
end
|
43
58
|
|
59
|
+
context "nil for text" do
|
60
|
+
subject { CLD.detect_language(nil) }
|
61
|
+
|
62
|
+
it { subject[:name].should eq("Unknown") }
|
63
|
+
it { subject[:code].should eq("un") }
|
64
|
+
it { subject[:reliable].should be_true }
|
65
|
+
end
|
66
|
+
|
44
67
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: cld
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.6.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jason Toy
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-05-31 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ffi
|