cld3 3.5.2 → 3.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/{Gemfile → Steepfile} +5 -3
- data/cld3.gemspec +6 -6
- data/ext/cld3/nnet_language_identifier_c.cc +6 -11
- data/spec/cld3_spec.rb +99 -0
- metadata +14 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98b87b769946ad6f7a17f7d2789e180c8ecbe641357526808b40701f3e0d6c4f
|
4
|
+
data.tar.gz: c6e269df192b74be722a12ea44f0e4370905c6aae58ebfacc65d885c5ae521e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1e9fe4f3413195037741c3663b69bf11bbabf1c49a54c5579197e3a7fc149758407e34fa8daff9a13f066556e53bfd793b770f232e3b6e67deb28a9c358ac907
|
7
|
+
data.tar.gz: 0a62a6957455e65f45eae97b893f238b2e59423be638bb624dc1df8e47bad67aab2e5ba607645b3d70ad5ba7af96085dfe41529262cc6a7f2f9014b5830453d7
|
data/README.md
CHANGED
@@ -19,14 +19,14 @@ cld3.find_language("здравствуйте") # => #<struct Struct::Result lang
|
|
19
19
|
### Prerequisites
|
20
20
|
* [Bundler](http://bundler.io/)
|
21
21
|
* C++ compiler
|
22
|
-
* [Rake](https://ruby.github.io/rake/)
|
23
22
|
* [RubyGems](https://rubygems.org/)
|
24
23
|
|
25
24
|
### Instructions
|
26
25
|
I (Akihiko Odaki) recommend to setup this library installing via `gem`.
|
27
26
|
|
28
27
|
You can also build this library by yourself. `Rakefile` includes a Rake task to
|
29
|
-
put this code into files buildable as a gem. Build a gem with `rake`
|
28
|
+
put this code into files buildable as a gem. Build a gem with `bundle exec rake`
|
29
|
+
command.
|
30
30
|
|
31
31
|
### Platform-specific information
|
32
32
|
|
@@ -42,7 +42,8 @@ I recommend [GCC](https://gcc.gnu.org/) as a C++ compiler.
|
|
42
42
|
|
43
43
|
If you cannot identify the cause of your problem, run spec of this library and
|
44
44
|
see whether the problem is reproducible with it or not. Spec is not included in
|
45
|
-
the gem, so clone the source code repository and then run
|
45
|
+
the gem, so clone the source code repository and then run
|
46
|
+
`bundle exec rake spec`.
|
46
47
|
The source code repository is at
|
47
48
|
https://github.com/akihikodaki/cld3-ruby.
|
48
49
|
|
data/{Gemfile → Steepfile}
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2021 Akihiko Odaki <akihiko.odaki@gmail.com>
|
2
2
|
# All Rights Reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -14,5 +14,7 @@
|
|
14
14
|
# limitations under the License.
|
15
15
|
#==============================================================================
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
target :lib do
|
18
|
+
signature "sig"
|
19
|
+
check "lib/cld3.rb"
|
20
|
+
end
|
data/cld3.gemspec
CHANGED
@@ -16,7 +16,7 @@
|
|
16
16
|
|
17
17
|
Gem::Specification.new do |gem|
|
18
18
|
gem.name = "cld3"
|
19
|
-
gem.version = "3.5.
|
19
|
+
gem.version = "3.5.3"
|
20
20
|
gem.summary = "Compact Language Detector v3 (CLD3)"
|
21
21
|
gem.description = "Compact Language Detector v3 (CLD3) is a neural network model for language identification."
|
22
22
|
gem.license = "Apache-2.0"
|
@@ -24,13 +24,13 @@ Gem::Specification.new do |gem|
|
|
24
24
|
gem.author = "Akihiko Odaki"
|
25
25
|
gem.email = "akihiko.odaki@gmail.com"
|
26
26
|
gem.required_ruby_version = [ ">= 2.7.0", "< 3.3.0" ]
|
27
|
-
gem.add_development_dependency "rbs",
|
28
|
-
gem.add_development_dependency "rspec",
|
29
|
-
gem.add_development_dependency "steep",
|
27
|
+
gem.add_development_dependency "rbs", "~> 2.8.0"
|
28
|
+
gem.add_development_dependency "rspec", "~> 3.12.0"
|
29
|
+
gem.add_development_dependency "steep", "~> 1.3.0"
|
30
30
|
gem.files = Dir[
|
31
|
-
"
|
31
|
+
"LICENSE", "LICENSE_CLD3", "README.md", "Steepfile",
|
32
32
|
"cld3.gemspec", "ext/**/*.c", "ext/**/*.cc", "ext/**/*.h",
|
33
|
-
"lib/**/*.rb", "sig
|
33
|
+
"lib/**/*.rb", "sig/**/*.rbs", "spec/**/*.rb"
|
34
34
|
]
|
35
35
|
gem.require_paths = [ "lib" ]
|
36
36
|
gem.extensions = [ "ext/cld3/extconf.rb" ]
|
@@ -24,7 +24,7 @@ limitations under the License.
|
|
24
24
|
#if defined _WIN32 || defined __CYGWIN__
|
25
25
|
#define EXPORT __declspec(dllexport)
|
26
26
|
#else
|
27
|
-
#define EXPORT __attribute__
|
27
|
+
#define EXPORT __attribute__((visibility("default")))
|
28
28
|
#endif
|
29
29
|
|
30
30
|
struct Result {
|
@@ -84,10 +84,9 @@ struct ResultVector {
|
|
84
84
|
};
|
85
85
|
|
86
86
|
template<typename T>
|
87
|
-
VALUE convert_protected(VALUE arg)
|
88
|
-
|
89
|
-
|
90
|
-
return result->convert();
|
87
|
+
VALUE convert_protected(VALUE arg) {
|
88
|
+
auto result = reinterpret_cast<const T *>(arg);
|
89
|
+
return result->convert();
|
91
90
|
}
|
92
91
|
|
93
92
|
static void dfree(void *arg) {
|
@@ -101,12 +100,8 @@ static size_t dsize(const void *data) {
|
|
101
100
|
}
|
102
101
|
|
103
102
|
static const rb_data_type_t data_type = {
|
104
|
-
|
105
|
-
|
106
|
-
.dfree = dfree,
|
107
|
-
.dsize = dsize,
|
108
|
-
},
|
109
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
103
|
+
"CLD3::NNetLanguageIdentifier", { nullptr, dfree, dsize }, nullptr, nullptr,
|
104
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
110
105
|
};
|
111
106
|
|
112
107
|
static VALUE find_language(VALUE obj,
|
data/spec/cld3_spec.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# Copyright 2017 Akihiko Odaki <akihiko.odaki@gmail.com>
|
2
|
+
# All Rights Reserved.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
#==============================================================================
|
16
|
+
|
17
|
+
require "bundler/setup"
|
18
|
+
Bundler.setup
|
19
|
+
|
20
|
+
require "rbs/test/setup"
|
21
|
+
require "cld3"
|
22
|
+
|
23
|
+
describe CLD3::NNetLanguageIdentifier do
|
24
|
+
describe "#initialize" do
|
25
|
+
it "is expected to raise ArgumentError with negative min_num_bytes" do
|
26
|
+
expect { described_class.new(-1, 1000) }.to raise_error(ArgumentError)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "is expected to raise ArgumentError with min_num_bytes <= max_num_bytes" do
|
30
|
+
expect { described_class.new(0, 0) }.to raise_error(ArgumentError)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
context "initialized without parameters" do
|
35
|
+
let(:lang_id) { described_class.new }
|
36
|
+
|
37
|
+
describe "#find_language" do
|
38
|
+
subject { lang_id.find_language("This text is written in English.") }
|
39
|
+
it { is_expected.to be_nil }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# See ext/cld3/ext/src/language_identifier_main.cc
|
44
|
+
context "initialized with custom parameters" do
|
45
|
+
let(:lang_id) { described_class.new(0, 1000) }
|
46
|
+
|
47
|
+
describe "#find_language" do
|
48
|
+
subject { lang_id.find_language text }
|
49
|
+
|
50
|
+
context "with an English text" do
|
51
|
+
let(:text) { "This text is written in English." }
|
52
|
+
it {
|
53
|
+
is_expected.to satisfy { |result|
|
54
|
+
result.language == :en &&
|
55
|
+
result.probability > 0 &&
|
56
|
+
result.probability < 1 &&
|
57
|
+
result.reliable? &&
|
58
|
+
result.proportion == 1 &&
|
59
|
+
result.byte_ranges == []
|
60
|
+
}
|
61
|
+
}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "#find_top_n_most_freq_langs" do
|
66
|
+
subject { lang_id.find_top_n_most_freq_langs text, 3 }
|
67
|
+
|
68
|
+
context "with an English text followed by a Russian text" do
|
69
|
+
let(:text) { "This piece of text is in English. Този текст е на Български." }
|
70
|
+
it {
|
71
|
+
is_expected.to satisfy { |results|
|
72
|
+
results.size == 2 &&
|
73
|
+
results[0].language == :bg &&
|
74
|
+
results[0].probability > 0 &&
|
75
|
+
results[0].probability < 1 &&
|
76
|
+
results[0].reliable? &&
|
77
|
+
results[0].proportion > 0 &&
|
78
|
+
results[0].proportion < 1 &&
|
79
|
+
results[0].byte_ranges.size == 1 &&
|
80
|
+
results[0].byte_ranges[0].start_index == 34 &&
|
81
|
+
results[0].byte_ranges[0].end_index == 81 &&
|
82
|
+
results[0].byte_ranges[0].probability == results[0].probability &&
|
83
|
+
results.size == 2 &&
|
84
|
+
results[1].language == :en &&
|
85
|
+
results[1].probability > 0 &&
|
86
|
+
results[1].probability < 1 &&
|
87
|
+
results[1].reliable? &&
|
88
|
+
results[1].proportion > 0 &&
|
89
|
+
results[1].proportion < 1 &&
|
90
|
+
results[1].byte_ranges.size == 1 &&
|
91
|
+
results[1].byte_ranges[0].start_index == 0 &&
|
92
|
+
results[1].byte_ranges[0].end_index == 34 &&
|
93
|
+
results[1].byte_ranges[0].probability == results[1].probability
|
94
|
+
}
|
95
|
+
}
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
metadata
CHANGED
@@ -1,75 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cld3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.5.
|
4
|
+
version: 3.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Akihiko Odaki
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-03-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbs
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 2.8.0
|
20
|
-
- - "<"
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: 2.9.0
|
23
20
|
type: :development
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
26
23
|
requirements:
|
27
|
-
- - "
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: 2.8.0
|
30
|
-
- - "<"
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: 2.9.0
|
33
27
|
- !ruby/object:Gem::Dependency
|
34
28
|
name: rspec
|
35
29
|
requirement: !ruby/object:Gem::Requirement
|
36
30
|
requirements:
|
37
|
-
- - "
|
31
|
+
- - "~>"
|
38
32
|
- !ruby/object:Gem::Version
|
39
33
|
version: 3.12.0
|
40
|
-
- - "<"
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
version: 3.13.0
|
43
34
|
type: :development
|
44
35
|
prerelease: false
|
45
36
|
version_requirements: !ruby/object:Gem::Requirement
|
46
37
|
requirements:
|
47
|
-
- - "
|
38
|
+
- - "~>"
|
48
39
|
- !ruby/object:Gem::Version
|
49
40
|
version: 3.12.0
|
50
|
-
- - "<"
|
51
|
-
- !ruby/object:Gem::Version
|
52
|
-
version: 3.13.0
|
53
41
|
- !ruby/object:Gem::Dependency
|
54
42
|
name: steep
|
55
43
|
requirement: !ruby/object:Gem::Requirement
|
56
44
|
requirements:
|
57
|
-
- - "
|
45
|
+
- - "~>"
|
58
46
|
- !ruby/object:Gem::Version
|
59
47
|
version: 1.3.0
|
60
|
-
- - "<"
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: 1.4.0
|
63
48
|
type: :development
|
64
49
|
prerelease: false
|
65
50
|
version_requirements: !ruby/object:Gem::Requirement
|
66
51
|
requirements:
|
67
|
-
- - "
|
52
|
+
- - "~>"
|
68
53
|
- !ruby/object:Gem::Version
|
69
54
|
version: 1.3.0
|
70
|
-
- - "<"
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
version: 1.4.0
|
73
55
|
description: Compact Language Detector v3 (CLD3) is a neural network model for language
|
74
56
|
identification.
|
75
57
|
email: akihiko.odaki@gmail.com
|
@@ -78,10 +60,10 @@ extensions:
|
|
78
60
|
- ext/cld3/extconf.rb
|
79
61
|
extra_rdoc_files: []
|
80
62
|
files:
|
81
|
-
- Gemfile
|
82
63
|
- LICENSE
|
83
64
|
- LICENSE_CLD3
|
84
65
|
- README.md
|
66
|
+
- Steepfile
|
85
67
|
- cld3.gemspec
|
86
68
|
- ext/cld3/base.cc
|
87
69
|
- ext/cld3/base.h
|
@@ -149,11 +131,12 @@ files:
|
|
149
131
|
- ext/cld3/workspace.h
|
150
132
|
- lib/cld3.rb
|
151
133
|
- sig/cld3.rbs
|
134
|
+
- spec/cld3_spec.rb
|
152
135
|
homepage: https://github.com/akihikodaki/cld3-ruby
|
153
136
|
licenses:
|
154
137
|
- Apache-2.0
|
155
138
|
metadata: {}
|
156
|
-
post_install_message:
|
139
|
+
post_install_message:
|
157
140
|
rdoc_options: []
|
158
141
|
require_paths:
|
159
142
|
- lib
|
@@ -171,8 +154,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
171
154
|
- !ruby/object:Gem::Version
|
172
155
|
version: '0'
|
173
156
|
requirements: []
|
174
|
-
rubygems_version: 3.
|
175
|
-
signing_key:
|
157
|
+
rubygems_version: 3.4.6
|
158
|
+
signing_key:
|
176
159
|
specification_version: 4
|
177
160
|
summary: Compact Language Detector v3 (CLD3)
|
178
161
|
test_files: []
|