cld 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/cld/thunk.cc CHANGED
@@ -12,8 +12,7 @@ typedef struct {
12
12
  } RESULT;
13
13
 
14
14
  extern "C" {
15
- RESULT detectLanguageThunkInt(const char * src) {
16
- bool is_plain_text = true;
15
+ RESULT detectLanguageThunkInt(const char * src, bool is_plain_text) {
17
16
  bool do_allow_extended_languages = true;
18
17
  bool do_pick_summary_language = false;
19
18
  bool do_remove_weak_matches = false;
data/lib/cld.rb CHANGED
@@ -4,8 +4,8 @@ require "ffi"
4
4
  module CLD
5
5
  extend FFI::Library
6
6
 
7
- def self.detect_language(text)
8
- result = detect_language_ext(text)
7
+ def self.detect_language(text, is_plain_text=true)
8
+ result = detect_language_ext(text.to_s, is_plain_text)
9
9
  Hash[ result.members.map {|member| [member.to_sym, result[member]]} ]
10
10
  end
11
11
 
@@ -17,5 +17,5 @@ module CLD
17
17
 
18
18
  GEM_ROOT = File.expand_path("../../", __FILE__)
19
19
  ffi_lib "#{GEM_ROOT}/ext/cld/cld.so"
20
- attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value
20
+ attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in, :bool], ReturnValue.by_value
21
21
  end
data/lib/cld/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CLD
2
- VERSION = "0.5.0"
2
+ VERSION = "0.6.0"
3
3
  end
data/spec/cld_spec.rb CHANGED
@@ -19,6 +19,21 @@ describe CLD do
19
19
  it { subject[:reliable].should be_true }
20
20
  end
21
21
 
22
+ context "French in HTML - using CLD html " do
23
+ subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", false) }
24
+
25
+ it { subject[:name].should eq("FRENCH") }
26
+ it { subject[:code].should eq("fr") }
27
+
28
+ end
29
+ context "French in HTML - using CLD text " do
30
+ subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", true) }
31
+
32
+ it { subject[:name].should eq("ENGLISH") }
33
+ it { subject[:code].should eq("en") }
34
+
35
+ end
36
+
22
37
  context "Simplified Chinese text" do
23
38
  subject { CLD.detect_language("你好吗箭体") }
24
39
 
@@ -41,4 +56,12 @@ describe CLD do
41
56
  it { subject[:reliable].should be_true }
42
57
  end
43
58
 
59
+ context "nil for text" do
60
+ subject { CLD.detect_language(nil) }
61
+
62
+ it { subject[:name].should eq("Unknown") }
63
+ it { subject[:code].should eq("un") }
64
+ it { subject[:reliable].should be_true }
65
+ end
66
+
44
67
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: cld
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.5.0
5
+ version: 0.6.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jason Toy
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-02-11 00:00:00 Z
13
+ date: 2012-05-31 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ffi