cld 0.5.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ext/cld/Makefile.am +28 -0
- data/ext/cld/Makefile.in +790 -0
- data/ext/cld/aclocal.m4 +8895 -0
- data/ext/cld/base/build_config.h +5 -0
- data/ext/cld/build_aux/config.guess +1500 -0
- data/ext/cld/build_aux/config.sub +1616 -0
- data/ext/cld/build_aux/depcomp +584 -0
- data/ext/cld/build_aux/install-sh +507 -0
- data/ext/cld/build_aux/ltmain.sh +8745 -0
- data/ext/cld/build_aux/missing +367 -0
- data/ext/cld/configure +17362 -0
- data/ext/cld/configure.ac +14 -0
- data/ext/cld/extconf.rb +5 -0
- data/ext/cld/languages/internal/languages.cc +1 -2
- data/ext/cld/thunk.cc +1 -2
- data/lib/cld.rb +4 -4
- data/lib/cld/version.rb +1 -1
- metadata +43 -59
- data/.gitignore +0 -20
- data/.rspec +0 -2
- data/Gemfile +0 -6
- data/Rakefile +0 -5
- data/cld.gemspec +0 -22
- data/ext/cld/Makefile +0 -31
- data/spec/cld_spec.rb +0 -44
- data/spec/spec_helper.rb +0 -6
data/ext/cld/extconf.rb
CHANGED
@@ -58,8 +58,7 @@ static const LanguageInfo kLanguageInfoTable[] = {
|
|
58
58
|
{ "IRISH", "ga", "gle", NULL},
|
59
59
|
{ "GALICIAN", "gl", "glg", NULL},
|
60
60
|
// Impossible to tell Tagalog from Filipino at the moment.
|
61
|
-
|
62
|
-
{ "TAGALOG", NULL, "fil", NULL},
|
61
|
+
{ "TAGALOG", "tl", "tgl", NULL},
|
63
62
|
{ "TURKISH", "tr", "tur", NULL},
|
64
63
|
{ "UKRAINIAN", "uk", "ukr", NULL},
|
65
64
|
{ "HINDI", "hi", "hin", NULL},
|
data/ext/cld/thunk.cc
CHANGED
@@ -12,8 +12,7 @@ typedef struct {
|
|
12
12
|
} RESULT;
|
13
13
|
|
14
14
|
extern "C" {
|
15
|
-
RESULT detectLanguageThunkInt(const char * src) {
|
16
|
-
bool is_plain_text = true;
|
15
|
+
RESULT detectLanguageThunkInt(const char * src, bool is_plain_text) {
|
17
16
|
bool do_allow_extended_languages = true;
|
18
17
|
bool do_pick_summary_language = false;
|
19
18
|
bool do_remove_weak_matches = false;
|
data/lib/cld.rb
CHANGED
@@ -4,8 +4,8 @@ require "ffi"
|
|
4
4
|
module CLD
|
5
5
|
extend FFI::Library
|
6
6
|
|
7
|
-
def self.detect_language(text)
|
8
|
-
result = detect_language_ext(text)
|
7
|
+
def self.detect_language(text, is_plain_text=true)
|
8
|
+
result = detect_language_ext(text.to_s, is_plain_text)
|
9
9
|
Hash[ result.members.map {|member| [member.to_sym, result[member]]} ]
|
10
10
|
end
|
11
11
|
|
@@ -16,6 +16,6 @@ module CLD
|
|
16
16
|
end
|
17
17
|
|
18
18
|
GEM_ROOT = File.expand_path("../../", __FILE__)
|
19
|
-
ffi_lib "#{GEM_ROOT}/ext/cld/cld.so"
|
20
|
-
attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value
|
19
|
+
ffi_lib "#{GEM_ROOT}/ext/cld/lib/cld.so"
|
20
|
+
attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in, :bool], ReturnValue.by_value
|
21
21
|
end
|
data/lib/cld/version.rb
CHANGED
metadata
CHANGED
@@ -1,57 +1,42 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: cld
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
version: 0.5.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.10.0
|
6
5
|
platform: ruby
|
7
|
-
authors:
|
6
|
+
authors:
|
8
7
|
- Jason Toy
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2021-07-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
16
14
|
name: ffi
|
17
|
-
|
18
|
-
|
19
|
-
none: false
|
20
|
-
requirements:
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
21
17
|
- - ">="
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version:
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
24
20
|
type: :runtime
|
25
|
-
version_requirements: *id001
|
26
|
-
- !ruby/object:Gem::Dependency
|
27
|
-
name: rspec
|
28
21
|
prerelease: false
|
29
|
-
|
30
|
-
|
31
|
-
requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
32
24
|
- - ">="
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version:
|
35
|
-
type: :development
|
36
|
-
version_requirements: *id002
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
37
27
|
description: Compact Language Detection for Ruby
|
38
|
-
email:
|
28
|
+
email:
|
39
29
|
- jtoy@jtoy.net
|
40
30
|
executables: []
|
41
|
-
|
42
|
-
extensions:
|
31
|
+
extensions:
|
43
32
|
- ext/cld/extconf.rb
|
44
33
|
extra_rdoc_files: []
|
45
|
-
|
46
|
-
files:
|
47
|
-
- .gitignore
|
48
|
-
- .rspec
|
49
|
-
- Gemfile
|
34
|
+
files:
|
50
35
|
- LICENSE
|
51
36
|
- README.md
|
52
|
-
-
|
53
|
-
- cld.
|
54
|
-
- ext/cld/
|
37
|
+
- ext/cld/Makefile.am
|
38
|
+
- ext/cld/Makefile.in
|
39
|
+
- ext/cld/aclocal.m4
|
55
40
|
- ext/cld/base/basictypes.h
|
56
41
|
- ext/cld/base/build_config.h
|
57
42
|
- ext/cld/base/casts.h
|
@@ -71,7 +56,15 @@ files:
|
|
71
56
|
- ext/cld/base/template_util.h
|
72
57
|
- ext/cld/base/type_traits.h
|
73
58
|
- ext/cld/base/vlog_is_on.h
|
59
|
+
- ext/cld/build_aux/config.guess
|
60
|
+
- ext/cld/build_aux/config.sub
|
61
|
+
- ext/cld/build_aux/depcomp
|
62
|
+
- ext/cld/build_aux/install-sh
|
63
|
+
- ext/cld/build_aux/ltmain.sh
|
64
|
+
- ext/cld/build_aux/missing
|
74
65
|
- ext/cld/cld_encodings.h
|
66
|
+
- ext/cld/configure
|
67
|
+
- ext/cld/configure.ac
|
75
68
|
- ext/cld/encodings/compact_lang_det/#cldutil.cc#
|
76
69
|
- ext/cld/encodings/compact_lang_det/#cldutil.h#
|
77
70
|
- ext/cld/encodings/compact_lang_det/#compact_lang_det_impl.h#
|
@@ -151,35 +144,26 @@ files:
|
|
151
144
|
- ext/cld/thunk.cc
|
152
145
|
- lib/cld.rb
|
153
146
|
- lib/cld/version.rb
|
154
|
-
|
155
|
-
- spec/spec_helper.rb
|
156
|
-
homepage: http://github.com/jtoy/cld
|
147
|
+
homepage: https://github.com/jtoy/cld
|
157
148
|
licenses: []
|
158
|
-
|
149
|
+
metadata: {}
|
159
150
|
post_install_message:
|
160
151
|
rdoc_options: []
|
161
|
-
|
162
|
-
require_paths:
|
152
|
+
require_paths:
|
163
153
|
- lib
|
164
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
165
|
-
|
166
|
-
requirements:
|
154
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
167
156
|
- - ">="
|
168
|
-
- !ruby/object:Gem::Version
|
169
|
-
version:
|
170
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
|
-
|
172
|
-
requirements:
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
160
|
+
requirements:
|
173
161
|
- - ">="
|
174
|
-
- !ruby/object:Gem::Version
|
175
|
-
version:
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0'
|
176
164
|
requirements: []
|
177
|
-
|
178
|
-
rubyforge_project:
|
179
|
-
rubygems_version: 1.8.11
|
165
|
+
rubygems_version: 3.2.16
|
180
166
|
signing_key:
|
181
|
-
specification_version:
|
167
|
+
specification_version: 4
|
182
168
|
summary: Compact Language Detection for Ruby
|
183
|
-
test_files:
|
184
|
-
- spec/cld_spec.rb
|
185
|
-
- spec/spec_helper.rb
|
169
|
+
test_files: []
|
data/.gitignore
DELETED
data/.rspec
DELETED
data/Gemfile
DELETED
data/Rakefile
DELETED
data/cld.gemspec
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
require File.expand_path('../lib/cld/version', __FILE__)
|
3
|
-
|
4
|
-
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = ["Jason Toy"]
|
6
|
-
gem.email = ["jtoy@jtoy.net"]
|
7
|
-
gem.description = %q{Compact Language Detection for Ruby}
|
8
|
-
gem.summary = %q{Compact Language Detection for Ruby}
|
9
|
-
gem.homepage = "http://github.com/jtoy/cld"
|
10
|
-
|
11
|
-
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
-
gem.files = `git ls-files`.split("\n")
|
13
|
-
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
-
gem.extensions = ["ext/cld/extconf.rb"]
|
15
|
-
gem.name = "cld"
|
16
|
-
gem.require_paths = ["lib"]
|
17
|
-
gem.version = CLD::VERSION
|
18
|
-
|
19
|
-
gem.add_dependency "ffi"
|
20
|
-
|
21
|
-
gem.add_development_dependency "rspec"
|
22
|
-
end
|
data/ext/cld/Makefile
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS
|
2
|
-
LDFLAGS=-L.
|
3
|
-
CC=g++
|
4
|
-
AR=ar
|
5
|
-
SOURCES=encodings/compact_lang_det/cldutil.cc \
|
6
|
-
encodings/compact_lang_det/cldutil_dbg_empty.cc \
|
7
|
-
encodings/compact_lang_det/compact_lang_det.cc \
|
8
|
-
encodings/compact_lang_det/compact_lang_det_impl.cc \
|
9
|
-
encodings/compact_lang_det/ext_lang_enc.cc \
|
10
|
-
encodings/compact_lang_det/getonescriptspan.cc \
|
11
|
-
encodings/compact_lang_det/letterscript_enum.cc \
|
12
|
-
encodings/compact_lang_det/tote.cc \
|
13
|
-
encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc \
|
14
|
-
encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc \
|
15
|
-
encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc \
|
16
|
-
encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc \
|
17
|
-
encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc \
|
18
|
-
encodings/compact_lang_det/win/cld_htmlutils_windows.cc \
|
19
|
-
encodings/compact_lang_det/win/cld_unilib_windows.cc \
|
20
|
-
encodings/compact_lang_det/win/cld_utf8statetable.cc \
|
21
|
-
encodings/compact_lang_det/win/cld_utf8utils_windows.cc \
|
22
|
-
encodings/internal/encodings.cc \
|
23
|
-
languages/internal/languages.cc \
|
24
|
-
thunk.cc
|
25
|
-
|
26
|
-
install:
|
27
|
-
rm -f *.o
|
28
|
-
rm -f libcld.a
|
29
|
-
$(CC) -c $(CFLAGS) $(SOURCES)
|
30
|
-
$(AR) rcs libcld.a *.o
|
31
|
-
$(CC) -DCLD_WINDOWS -I. -L. -shared -o cld.so -lstdc++ *.o
|
data/spec/cld_spec.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
require "spec_helper"
|
3
|
-
|
4
|
-
describe CLD do
|
5
|
-
|
6
|
-
context "English text" do
|
7
|
-
subject { CLD.detect_language("This is a test") }
|
8
|
-
|
9
|
-
it { subject[:name].should eq("ENGLISH") }
|
10
|
-
it { subject[:code].should eq("en") }
|
11
|
-
it { subject[:reliable].should be_true }
|
12
|
-
end
|
13
|
-
|
14
|
-
context "French text" do
|
15
|
-
subject { CLD.detect_language("plus ça change, plus c'est la même chose") }
|
16
|
-
|
17
|
-
it { subject[:name].should eq("FRENCH") }
|
18
|
-
it { subject[:code].should eq("fr") }
|
19
|
-
it { subject[:reliable].should be_true }
|
20
|
-
end
|
21
|
-
|
22
|
-
context "Simplified Chinese text" do
|
23
|
-
subject { CLD.detect_language("你好吗箭体") }
|
24
|
-
|
25
|
-
it { subject[:name].should eq("Chinese") }
|
26
|
-
it { subject[:code].should eq("zh") }
|
27
|
-
end
|
28
|
-
|
29
|
-
context "Traditional Chinese text" do
|
30
|
-
subject { CLD.detect_language("你好嗎繁體") }
|
31
|
-
|
32
|
-
it { subject[:name].should eq("ChineseT") }
|
33
|
-
it { subject[:code].should eq("zh-TW") }
|
34
|
-
end
|
35
|
-
|
36
|
-
context "Unknown text" do
|
37
|
-
subject { CLD.detect_language("") }
|
38
|
-
|
39
|
-
it { subject[:name].should eq("Unknown") }
|
40
|
-
it { subject[:code].should eq("un") }
|
41
|
-
it { subject[:reliable].should be_true }
|
42
|
-
end
|
43
|
-
|
44
|
-
end
|