cld 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/cld/thunk.cc +1 -2
- data/lib/cld.rb +3 -3
- data/lib/cld/version.rb +1 -1
- data/spec/cld_spec.rb +23 -0
- metadata +2 -2
data/ext/cld/thunk.cc
CHANGED
@@ -12,8 +12,7 @@ typedef struct {
|
|
12
12
|
} RESULT;
|
13
13
|
|
14
14
|
extern "C" {
|
15
|
-
RESULT detectLanguageThunkInt(const char * src) {
|
16
|
-
bool is_plain_text = true;
|
15
|
+
RESULT detectLanguageThunkInt(const char * src, bool is_plain_text) {
|
17
16
|
bool do_allow_extended_languages = true;
|
18
17
|
bool do_pick_summary_language = false;
|
19
18
|
bool do_remove_weak_matches = false;
|
data/lib/cld.rb
CHANGED
@@ -4,8 +4,8 @@ require "ffi"
|
|
4
4
|
module CLD
|
5
5
|
extend FFI::Library
|
6
6
|
|
7
|
-
def self.detect_language(text)
|
8
|
-
result = detect_language_ext(text)
|
7
|
+
def self.detect_language(text, is_plain_text=true)
|
8
|
+
result = detect_language_ext(text.to_s, is_plain_text)
|
9
9
|
Hash[ result.members.map {|member| [member.to_sym, result[member]]} ]
|
10
10
|
end
|
11
11
|
|
@@ -17,5 +17,5 @@ module CLD
|
|
17
17
|
|
18
18
|
GEM_ROOT = File.expand_path("../../", __FILE__)
|
19
19
|
ffi_lib "#{GEM_ROOT}/ext/cld/cld.so"
|
20
|
-
attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value
|
20
|
+
attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in, :bool], ReturnValue.by_value
|
21
21
|
end
|
data/lib/cld/version.rb
CHANGED
data/spec/cld_spec.rb
CHANGED
@@ -19,6 +19,21 @@ describe CLD do
|
|
19
19
|
it { subject[:reliable].should be_true }
|
20
20
|
end
|
21
21
|
|
22
|
+
context "French in HTML - using CLD html " do
|
23
|
+
subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", false) }
|
24
|
+
|
25
|
+
it { subject[:name].should eq("FRENCH") }
|
26
|
+
it { subject[:code].should eq("fr") }
|
27
|
+
|
28
|
+
end
|
29
|
+
context "French in HTML - using CLD text " do
|
30
|
+
subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", true) }
|
31
|
+
|
32
|
+
it { subject[:name].should eq("ENGLISH") }
|
33
|
+
it { subject[:code].should eq("en") }
|
34
|
+
|
35
|
+
end
|
36
|
+
|
22
37
|
context "Simplified Chinese text" do
|
23
38
|
subject { CLD.detect_language("你好吗箭体") }
|
24
39
|
|
@@ -41,4 +56,12 @@ describe CLD do
|
|
41
56
|
it { subject[:reliable].should be_true }
|
42
57
|
end
|
43
58
|
|
59
|
+
context "nil for text" do
|
60
|
+
subject { CLD.detect_language(nil) }
|
61
|
+
|
62
|
+
it { subject[:name].should eq("Unknown") }
|
63
|
+
it { subject[:code].should eq("un") }
|
64
|
+
it { subject[:reliable].should be_true }
|
65
|
+
end
|
66
|
+
|
44
67
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: cld
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.6.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jason Toy
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-05-31 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ffi
|