cld 0.5.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/cld/Makefile.am +28 -0
- data/ext/cld/Makefile.in +790 -0
- data/ext/cld/aclocal.m4 +8895 -0
- data/ext/cld/base/build_config.h +5 -0
- data/ext/cld/build_aux/config.guess +1500 -0
- data/ext/cld/build_aux/config.sub +1616 -0
- data/ext/cld/build_aux/depcomp +584 -0
- data/ext/cld/build_aux/install-sh +507 -0
- data/ext/cld/build_aux/ltmain.sh +8745 -0
- data/ext/cld/build_aux/missing +367 -0
- data/ext/cld/configure +17362 -0
- data/ext/cld/configure.ac +14 -0
- data/ext/cld/extconf.rb +5 -0
- data/ext/cld/languages/internal/languages.cc +1 -2
- data/ext/cld/thunk.cc +1 -2
- data/lib/cld.rb +4 -4
- data/lib/cld/version.rb +1 -1
- metadata +43 -59
- data/.gitignore +0 -20
- data/.rspec +0 -2
- data/Gemfile +0 -6
- data/Rakefile +0 -5
- data/cld.gemspec +0 -22
- data/ext/cld/Makefile +0 -31
- data/spec/cld_spec.rb +0 -44
- data/spec/spec_helper.rb +0 -6
data/ext/cld/extconf.rb
CHANGED
@@ -58,8 +58,7 @@ static const LanguageInfo kLanguageInfoTable[] = {
|
|
58
58
|
{ "IRISH", "ga", "gle", NULL},
|
59
59
|
{ "GALICIAN", "gl", "glg", NULL},
|
60
60
|
// Impossible to tell Tagalog from Filipino at the moment.
|
61
|
-
|
62
|
-
{ "TAGALOG", NULL, "fil", NULL},
|
61
|
+
{ "TAGALOG", "tl", "tgl", NULL},
|
63
62
|
{ "TURKISH", "tr", "tur", NULL},
|
64
63
|
{ "UKRAINIAN", "uk", "ukr", NULL},
|
65
64
|
{ "HINDI", "hi", "hin", NULL},
|
data/ext/cld/thunk.cc
CHANGED
@@ -12,8 +12,7 @@ typedef struct {
|
|
12
12
|
} RESULT;
|
13
13
|
|
14
14
|
extern "C" {
|
15
|
-
RESULT detectLanguageThunkInt(const char * src) {
|
16
|
-
bool is_plain_text = true;
|
15
|
+
RESULT detectLanguageThunkInt(const char * src, bool is_plain_text) {
|
17
16
|
bool do_allow_extended_languages = true;
|
18
17
|
bool do_pick_summary_language = false;
|
19
18
|
bool do_remove_weak_matches = false;
|
data/lib/cld.rb
CHANGED
@@ -4,8 +4,8 @@ require "ffi"
|
|
4
4
|
module CLD
|
5
5
|
extend FFI::Library
|
6
6
|
|
7
|
-
def self.detect_language(text)
|
8
|
-
result = detect_language_ext(text)
|
7
|
+
def self.detect_language(text, is_plain_text=true)
|
8
|
+
result = detect_language_ext(text.to_s, is_plain_text)
|
9
9
|
Hash[ result.members.map {|member| [member.to_sym, result[member]]} ]
|
10
10
|
end
|
11
11
|
|
@@ -16,6 +16,6 @@ module CLD
|
|
16
16
|
end
|
17
17
|
|
18
18
|
GEM_ROOT = File.expand_path("../../", __FILE__)
|
19
|
-
ffi_lib "#{GEM_ROOT}/ext/cld/cld.so"
|
20
|
-
attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value
|
19
|
+
ffi_lib "#{GEM_ROOT}/ext/cld/lib/cld.so"
|
20
|
+
attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in, :bool], ReturnValue.by_value
|
21
21
|
end
|
data/lib/cld/version.rb
CHANGED
metadata
CHANGED
@@ -1,57 +1,42 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: cld
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
version: 0.5.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.10.0
|
6
5
|
platform: ruby
|
7
|
-
authors:
|
6
|
+
authors:
|
8
7
|
- Jason Toy
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2021-07-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
16
14
|
name: ffi
|
17
|
-
|
18
|
-
|
19
|
-
none: false
|
20
|
-
requirements:
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
21
17
|
- - ">="
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version:
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
24
20
|
type: :runtime
|
25
|
-
version_requirements: *id001
|
26
|
-
- !ruby/object:Gem::Dependency
|
27
|
-
name: rspec
|
28
21
|
prerelease: false
|
29
|
-
|
30
|
-
|
31
|
-
requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
32
24
|
- - ">="
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version:
|
35
|
-
type: :development
|
36
|
-
version_requirements: *id002
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
37
27
|
description: Compact Language Detection for Ruby
|
38
|
-
email:
|
28
|
+
email:
|
39
29
|
- jtoy@jtoy.net
|
40
30
|
executables: []
|
41
|
-
|
42
|
-
extensions:
|
31
|
+
extensions:
|
43
32
|
- ext/cld/extconf.rb
|
44
33
|
extra_rdoc_files: []
|
45
|
-
|
46
|
-
files:
|
47
|
-
- .gitignore
|
48
|
-
- .rspec
|
49
|
-
- Gemfile
|
34
|
+
files:
|
50
35
|
- LICENSE
|
51
36
|
- README.md
|
52
|
-
-
|
53
|
-
- cld.
|
54
|
-
- ext/cld/
|
37
|
+
- ext/cld/Makefile.am
|
38
|
+
- ext/cld/Makefile.in
|
39
|
+
- ext/cld/aclocal.m4
|
55
40
|
- ext/cld/base/basictypes.h
|
56
41
|
- ext/cld/base/build_config.h
|
57
42
|
- ext/cld/base/casts.h
|
@@ -71,7 +56,15 @@ files:
|
|
71
56
|
- ext/cld/base/template_util.h
|
72
57
|
- ext/cld/base/type_traits.h
|
73
58
|
- ext/cld/base/vlog_is_on.h
|
59
|
+
- ext/cld/build_aux/config.guess
|
60
|
+
- ext/cld/build_aux/config.sub
|
61
|
+
- ext/cld/build_aux/depcomp
|
62
|
+
- ext/cld/build_aux/install-sh
|
63
|
+
- ext/cld/build_aux/ltmain.sh
|
64
|
+
- ext/cld/build_aux/missing
|
74
65
|
- ext/cld/cld_encodings.h
|
66
|
+
- ext/cld/configure
|
67
|
+
- ext/cld/configure.ac
|
75
68
|
- ext/cld/encodings/compact_lang_det/#cldutil.cc#
|
76
69
|
- ext/cld/encodings/compact_lang_det/#cldutil.h#
|
77
70
|
- ext/cld/encodings/compact_lang_det/#compact_lang_det_impl.h#
|
@@ -151,35 +144,26 @@ files:
|
|
151
144
|
- ext/cld/thunk.cc
|
152
145
|
- lib/cld.rb
|
153
146
|
- lib/cld/version.rb
|
154
|
-
|
155
|
-
- spec/spec_helper.rb
|
156
|
-
homepage: http://github.com/jtoy/cld
|
147
|
+
homepage: https://github.com/jtoy/cld
|
157
148
|
licenses: []
|
158
|
-
|
149
|
+
metadata: {}
|
159
150
|
post_install_message:
|
160
151
|
rdoc_options: []
|
161
|
-
|
162
|
-
require_paths:
|
152
|
+
require_paths:
|
163
153
|
- lib
|
164
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
165
|
-
|
166
|
-
requirements:
|
154
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
167
156
|
- - ">="
|
168
|
-
- !ruby/object:Gem::Version
|
169
|
-
version:
|
170
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
|
-
|
172
|
-
requirements:
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
160
|
+
requirements:
|
173
161
|
- - ">="
|
174
|
-
- !ruby/object:Gem::Version
|
175
|
-
version:
|
162
|
+
- !ruby/object:Gem::Version
|
163
|
+
version: '0'
|
176
164
|
requirements: []
|
177
|
-
|
178
|
-
rubyforge_project:
|
179
|
-
rubygems_version: 1.8.11
|
165
|
+
rubygems_version: 3.2.16
|
180
166
|
signing_key:
|
181
|
-
specification_version:
|
167
|
+
specification_version: 4
|
182
168
|
summary: Compact Language Detection for Ruby
|
183
|
-
test_files:
|
184
|
-
- spec/cld_spec.rb
|
185
|
-
- spec/spec_helper.rb
|
169
|
+
test_files: []
|
data/.gitignore
DELETED
data/.rspec
DELETED
data/Gemfile
DELETED
data/Rakefile
DELETED
data/cld.gemspec
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
require File.expand_path('../lib/cld/version', __FILE__)
|
3
|
-
|
4
|
-
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = ["Jason Toy"]
|
6
|
-
gem.email = ["jtoy@jtoy.net"]
|
7
|
-
gem.description = %q{Compact Language Detection for Ruby}
|
8
|
-
gem.summary = %q{Compact Language Detection for Ruby}
|
9
|
-
gem.homepage = "http://github.com/jtoy/cld"
|
10
|
-
|
11
|
-
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
-
gem.files = `git ls-files`.split("\n")
|
13
|
-
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
-
gem.extensions = ["ext/cld/extconf.rb"]
|
15
|
-
gem.name = "cld"
|
16
|
-
gem.require_paths = ["lib"]
|
17
|
-
gem.version = CLD::VERSION
|
18
|
-
|
19
|
-
gem.add_dependency "ffi"
|
20
|
-
|
21
|
-
gem.add_development_dependency "rspec"
|
22
|
-
end
|
data/ext/cld/Makefile
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS
|
2
|
-
LDFLAGS=-L.
|
3
|
-
CC=g++
|
4
|
-
AR=ar
|
5
|
-
SOURCES=encodings/compact_lang_det/cldutil.cc \
|
6
|
-
encodings/compact_lang_det/cldutil_dbg_empty.cc \
|
7
|
-
encodings/compact_lang_det/compact_lang_det.cc \
|
8
|
-
encodings/compact_lang_det/compact_lang_det_impl.cc \
|
9
|
-
encodings/compact_lang_det/ext_lang_enc.cc \
|
10
|
-
encodings/compact_lang_det/getonescriptspan.cc \
|
11
|
-
encodings/compact_lang_det/letterscript_enum.cc \
|
12
|
-
encodings/compact_lang_det/tote.cc \
|
13
|
-
encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc \
|
14
|
-
encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc \
|
15
|
-
encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc \
|
16
|
-
encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc \
|
17
|
-
encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc \
|
18
|
-
encodings/compact_lang_det/win/cld_htmlutils_windows.cc \
|
19
|
-
encodings/compact_lang_det/win/cld_unilib_windows.cc \
|
20
|
-
encodings/compact_lang_det/win/cld_utf8statetable.cc \
|
21
|
-
encodings/compact_lang_det/win/cld_utf8utils_windows.cc \
|
22
|
-
encodings/internal/encodings.cc \
|
23
|
-
languages/internal/languages.cc \
|
24
|
-
thunk.cc
|
25
|
-
|
26
|
-
install:
|
27
|
-
rm -f *.o
|
28
|
-
rm -f libcld.a
|
29
|
-
$(CC) -c $(CFLAGS) $(SOURCES)
|
30
|
-
$(AR) rcs libcld.a *.o
|
31
|
-
$(CC) -DCLD_WINDOWS -I. -L. -shared -o cld.so -lstdc++ *.o
|
data/spec/cld_spec.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
require "spec_helper"
|
3
|
-
|
4
|
-
describe CLD do
|
5
|
-
|
6
|
-
context "English text" do
|
7
|
-
subject { CLD.detect_language("This is a test") }
|
8
|
-
|
9
|
-
it { subject[:name].should eq("ENGLISH") }
|
10
|
-
it { subject[:code].should eq("en") }
|
11
|
-
it { subject[:reliable].should be_true }
|
12
|
-
end
|
13
|
-
|
14
|
-
context "French text" do
|
15
|
-
subject { CLD.detect_language("plus ça change, plus c'est la même chose") }
|
16
|
-
|
17
|
-
it { subject[:name].should eq("FRENCH") }
|
18
|
-
it { subject[:code].should eq("fr") }
|
19
|
-
it { subject[:reliable].should be_true }
|
20
|
-
end
|
21
|
-
|
22
|
-
context "Simplified Chinese text" do
|
23
|
-
subject { CLD.detect_language("你好吗箭体") }
|
24
|
-
|
25
|
-
it { subject[:name].should eq("Chinese") }
|
26
|
-
it { subject[:code].should eq("zh") }
|
27
|
-
end
|
28
|
-
|
29
|
-
context "Traditional Chinese text" do
|
30
|
-
subject { CLD.detect_language("你好嗎繁體") }
|
31
|
-
|
32
|
-
it { subject[:name].should eq("ChineseT") }
|
33
|
-
it { subject[:code].should eq("zh-TW") }
|
34
|
-
end
|
35
|
-
|
36
|
-
context "Unknown text" do
|
37
|
-
subject { CLD.detect_language("") }
|
38
|
-
|
39
|
-
it { subject[:name].should eq("Unknown") }
|
40
|
-
it { subject[:code].should eq("un") }
|
41
|
-
it { subject[:reliable].should be_true }
|
42
|
-
end
|
43
|
-
|
44
|
-
end
|