cld 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/cld/thunk.cc CHANGED
@@ -12,8 +12,7 @@ typedef struct {
12
12
  } RESULT;
13
13
 
14
14
  extern "C" {
15
- RESULT detectLanguageThunkInt(const char * src) {
16
- bool is_plain_text = true;
15
+ RESULT detectLanguageThunkInt(const char * src, bool is_plain_text) {
17
16
  bool do_allow_extended_languages = true;
18
17
  bool do_pick_summary_language = false;
19
18
  bool do_remove_weak_matches = false;
data/lib/cld.rb CHANGED
@@ -4,8 +4,8 @@ require "ffi"
4
4
  module CLD
5
5
  extend FFI::Library
6
6
 
7
- def self.detect_language(text)
8
- result = detect_language_ext(text)
7
+ def self.detect_language(text, is_plain_text=true)
8
+ result = detect_language_ext(text.to_s, is_plain_text)
9
9
  Hash[ result.members.map {|member| [member.to_sym, result[member]]} ]
10
10
  end
11
11
 
@@ -17,5 +17,5 @@ module CLD
17
17
 
18
18
  GEM_ROOT = File.expand_path("../../", __FILE__)
19
19
  ffi_lib "#{GEM_ROOT}/ext/cld/cld.so"
20
- attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value
20
+ attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in, :bool], ReturnValue.by_value
21
21
  end
data/lib/cld/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CLD
2
- VERSION = "0.5.0"
2
+ VERSION = "0.6.0"
3
3
  end
data/spec/cld_spec.rb CHANGED
@@ -19,6 +19,21 @@ describe CLD do
19
19
  it { subject[:reliable].should be_true }
20
20
  end
21
21
 
22
+ context "French in HTML - using CLD html " do
23
+ subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", false) }
24
+
25
+ it { subject[:name].should eq("FRENCH") }
26
+ it { subject[:code].should eq("fr") }
27
+
28
+ end
29
+ context "French in HTML - using CLD text " do
30
+ subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", true) }
31
+
32
+ it { subject[:name].should eq("ENGLISH") }
33
+ it { subject[:code].should eq("en") }
34
+
35
+ end
36
+
22
37
  context "Simplified Chinese text" do
23
38
  subject { CLD.detect_language("你好吗箭体") }
24
39
 
@@ -41,4 +56,12 @@ describe CLD do
41
56
  it { subject[:reliable].should be_true }
42
57
  end
43
58
 
59
+ context "nil for text" do
60
+ subject { CLD.detect_language(nil) }
61
+
62
+ it { subject[:name].should eq("Unknown") }
63
+ it { subject[:code].should eq("un") }
64
+ it { subject[:reliable].should be_true }
65
+ end
66
+
44
67
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: cld
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.5.0
5
+ version: 0.6.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jason Toy
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-02-11 00:00:00 Z
13
+ date: 2012-05-31 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ffi