whatlang 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +16 -0
- data/CHANGELOG.md +9 -1
- data/Cargo.lock +77 -31
- data/Cargo.toml +3 -3
- data/README.md +1 -1
- data/Rakefile +3 -8
- data/lib/whatlang.rb +10 -0
- data/src/lib.rs +13 -6
- data/test/{whatlang_test.rb → test_whatlang.rb} +19 -1
- metadata +8 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 99c048c830a5c3379fa6faf225a88ffe9d203a926b731f45c556f9e3eb85c3fe
|
|
4
|
+
data.tar.gz: cdcf4bc5852541b6f3cd6b507091e3d7d176ae0f51c73b35784721762c62f2f3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3cdff99de9c30099bb5387de242f941c1d87a3e26e42b6ae8d2755489261fbe863a56b8d12403f4f5095d3850fa8ce25fa5f2fa092694329c274fd60d88f75e5
|
|
7
|
+
data.tar.gz: d2b19fdc20d7c9e8923a6fc6c0e4b3c020d17d2c2ceaead9765e580194b21ab96d4e19243ad5a0e32debce429a30723305e527774f329ef4fbc3eeabb8df814d
|
data/.gitlab-ci.yml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
default:
|
|
2
|
+
image: ruby:3.2.2
|
|
3
|
+
|
|
4
|
+
before_script:
|
|
5
|
+
- apt-get update && apt-get install -y clang
|
|
6
|
+
- gem update --system '3.4.22'
|
|
7
|
+
- gem install bundler -v 2.4.22
|
|
8
|
+
- gem install tomlrb
|
|
9
|
+
- bundle install
|
|
10
|
+
- curl https://sh.rustup.rs -sSf | sh -s -- -y
|
|
11
|
+
|
|
12
|
+
test:
|
|
13
|
+
variables:
|
|
14
|
+
RB_SYS_FORCE_INSTALL_RUST_TOOLCHAIN: 'true'
|
|
15
|
+
script:
|
|
16
|
+
- source "$HOME/.cargo/env" && bundle exec rake test
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
|
+
0.1.6
|
|
2
|
+
=====
|
|
3
|
+
|
|
4
|
+
* Check argument and return value. Thanks [@bendangelo][] for filing a issue!
|
|
5
|
+
* Update Whatlang version to 0.16.4, which includes bug fix on Czech
|
|
6
|
+
|
|
1
7
|
0.1.5
|
|
2
|
-
|
|
8
|
+
=====
|
|
3
9
|
|
|
4
10
|
* Update Whatlang version to 0.16.0, which includes addition of Armenian
|
|
5
11
|
|
|
@@ -28,3 +34,5 @@
|
|
|
28
34
|
=====
|
|
29
35
|
|
|
30
36
|
* Initial release
|
|
37
|
+
|
|
38
|
+
[@bendangelo]: https://gitlab.com/bendangelo
|
data/Cargo.lock
CHANGED
|
@@ -4,39 +4,36 @@ version = 3
|
|
|
4
4
|
|
|
5
5
|
[[package]]
|
|
6
6
|
name = "ahash"
|
|
7
|
-
version = "0.7
|
|
7
|
+
version = "0.8.7"
|
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
-
checksum = "
|
|
9
|
+
checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01"
|
|
10
10
|
dependencies = [
|
|
11
|
-
"
|
|
11
|
+
"cfg-if",
|
|
12
12
|
"once_cell",
|
|
13
13
|
"version_check",
|
|
14
|
+
"zerocopy",
|
|
14
15
|
]
|
|
15
16
|
|
|
16
17
|
[[package]]
|
|
17
|
-
name = "
|
|
18
|
-
version = "
|
|
18
|
+
name = "allocator-api2"
|
|
19
|
+
version = "0.2.16"
|
|
19
20
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
20
|
-
checksum = "
|
|
21
|
+
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
|
21
22
|
|
|
22
23
|
[[package]]
|
|
23
|
-
name = "
|
|
24
|
-
version = "0.
|
|
24
|
+
name = "cfg-if"
|
|
25
|
+
version = "1.0.0"
|
|
25
26
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
26
|
-
checksum = "
|
|
27
|
-
dependencies = [
|
|
28
|
-
"cfg-if",
|
|
29
|
-
"libc",
|
|
30
|
-
"wasi",
|
|
31
|
-
]
|
|
27
|
+
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|
32
28
|
|
|
33
29
|
[[package]]
|
|
34
30
|
name = "hashbrown"
|
|
35
|
-
version = "0.
|
|
31
|
+
version = "0.14.3"
|
|
36
32
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
37
|
-
checksum = "
|
|
33
|
+
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
|
38
34
|
dependencies = [
|
|
39
35
|
"ahash",
|
|
36
|
+
"allocator-api2",
|
|
40
37
|
]
|
|
41
38
|
|
|
42
39
|
[[package]]
|
|
@@ -47,43 +44,72 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
|
|
47
44
|
|
|
48
45
|
[[package]]
|
|
49
46
|
name = "libc"
|
|
50
|
-
version = "0.2.
|
|
47
|
+
version = "0.2.152"
|
|
51
48
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
52
|
-
checksum = "
|
|
49
|
+
checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
|
|
53
50
|
|
|
54
51
|
[[package]]
|
|
55
52
|
name = "once_cell"
|
|
56
|
-
version = "1.
|
|
53
|
+
version = "1.19.0"
|
|
57
54
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
58
|
-
checksum = "
|
|
55
|
+
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
|
56
|
+
|
|
57
|
+
[[package]]
|
|
58
|
+
name = "proc-macro2"
|
|
59
|
+
version = "1.0.76"
|
|
60
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
61
|
+
checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c"
|
|
62
|
+
dependencies = [
|
|
63
|
+
"unicode-ident",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[[package]]
|
|
67
|
+
name = "quote"
|
|
68
|
+
version = "1.0.35"
|
|
69
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
70
|
+
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
|
|
71
|
+
dependencies = [
|
|
72
|
+
"proc-macro2",
|
|
73
|
+
]
|
|
59
74
|
|
|
60
75
|
[[package]]
|
|
61
76
|
name = "rutie"
|
|
62
|
-
version = "0.
|
|
77
|
+
version = "0.9.0"
|
|
63
78
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
64
|
-
checksum = "
|
|
79
|
+
checksum = "e5e8e4f6480c30609e3480adfab87b8d4792525225a1caf98b371fbc9a7b698a"
|
|
65
80
|
dependencies = [
|
|
66
81
|
"lazy_static",
|
|
67
82
|
"libc",
|
|
68
83
|
]
|
|
69
84
|
|
|
70
85
|
[[package]]
|
|
71
|
-
name = "
|
|
72
|
-
version = "0.
|
|
86
|
+
name = "syn"
|
|
87
|
+
version = "2.0.48"
|
|
73
88
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
74
|
-
checksum = "
|
|
89
|
+
checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
|
|
90
|
+
dependencies = [
|
|
91
|
+
"proc-macro2",
|
|
92
|
+
"quote",
|
|
93
|
+
"unicode-ident",
|
|
94
|
+
]
|
|
75
95
|
|
|
76
96
|
[[package]]
|
|
77
|
-
name = "
|
|
78
|
-
version = "0.
|
|
97
|
+
name = "unicode-ident"
|
|
98
|
+
version = "1.0.12"
|
|
79
99
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
80
|
-
checksum = "
|
|
100
|
+
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
|
101
|
+
|
|
102
|
+
[[package]]
|
|
103
|
+
name = "version_check"
|
|
104
|
+
version = "0.9.4"
|
|
105
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
106
|
+
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
|
81
107
|
|
|
82
108
|
[[package]]
|
|
83
109
|
name = "whatlang"
|
|
84
|
-
version = "0.16.
|
|
110
|
+
version = "0.16.4"
|
|
85
111
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
86
|
-
checksum = "
|
|
112
|
+
checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0"
|
|
87
113
|
dependencies = [
|
|
88
114
|
"hashbrown",
|
|
89
115
|
"once_cell",
|
|
@@ -91,8 +117,28 @@ dependencies = [
|
|
|
91
117
|
|
|
92
118
|
[[package]]
|
|
93
119
|
name = "whatlang-rb"
|
|
94
|
-
version = "0.1.
|
|
120
|
+
version = "0.1.6"
|
|
95
121
|
dependencies = [
|
|
96
122
|
"rutie",
|
|
97
123
|
"whatlang",
|
|
98
124
|
]
|
|
125
|
+
|
|
126
|
+
[[package]]
|
|
127
|
+
name = "zerocopy"
|
|
128
|
+
version = "0.7.32"
|
|
129
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
130
|
+
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
|
|
131
|
+
dependencies = [
|
|
132
|
+
"zerocopy-derive",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
[[package]]
|
|
136
|
+
name = "zerocopy-derive"
|
|
137
|
+
version = "0.7.32"
|
|
138
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
139
|
+
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
|
|
140
|
+
dependencies = [
|
|
141
|
+
"proc-macro2",
|
|
142
|
+
"quote",
|
|
143
|
+
"syn",
|
|
144
|
+
]
|
data/Cargo.toml
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "whatlang-rb"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.6"
|
|
4
4
|
authors = ["Kitaiti Makoto <KitaitiMakoto@gmail.com>"]
|
|
5
5
|
edition = "2018"
|
|
6
6
|
|
|
7
7
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
8
8
|
|
|
9
9
|
[dependencies]
|
|
10
|
-
rutie = { version = "0.
|
|
11
|
-
whatlang = "0.16.
|
|
10
|
+
rutie = { version = "0.9.0", features = ["no-link"] }
|
|
11
|
+
whatlang = "0.16.4"
|
|
12
12
|
|
|
13
13
|
[lib]
|
|
14
14
|
crate-type = ["cdylib"]
|
data/README.md
CHANGED
|
@@ -8,7 +8,7 @@ Features
|
|
|
8
8
|
|
|
9
9
|
Features are derived from original [Whatlang][] library, which includes:
|
|
10
10
|
|
|
11
|
-
* Supports [
|
|
11
|
+
* Supports [69 languages](https://github.com/greyblake/whatlang-rs/blob/master/SUPPORTED_LANGUAGES.md)
|
|
12
12
|
* Lightweight, fast and simple
|
|
13
13
|
* Recognizes not only a language, but also a script (Latin, Cyrillic, etc)
|
|
14
14
|
* Provides reliability information
|
data/Rakefile
CHANGED
|
@@ -5,12 +5,6 @@ require "rubygems/tasks"
|
|
|
5
5
|
require "rake/testtask"
|
|
6
6
|
require "yard"
|
|
7
7
|
|
|
8
|
-
Rake::TestTask.new(:test) do |t|
|
|
9
|
-
t.libs << "test"
|
|
10
|
-
t.libs << "lib"
|
|
11
|
-
t.test_files = FileList["test/**/*_test.rb"]
|
|
12
|
-
end
|
|
13
|
-
|
|
14
8
|
task default: :test
|
|
15
9
|
|
|
16
10
|
Gem::Tasks.new
|
|
@@ -23,8 +17,9 @@ RUST_SRC.each do |path|
|
|
|
23
17
|
file path
|
|
24
18
|
end
|
|
25
19
|
|
|
26
|
-
file RUST_TARGET => RUST_SRC do
|
|
20
|
+
file RUST_TARGET => RUST_SRC + ["Cargo.toml", "Cargo.lock"] do
|
|
27
21
|
sh "cargo build --release"
|
|
28
22
|
end
|
|
29
23
|
CLEAN.include RUST_TARGET
|
|
30
|
-
|
|
24
|
+
|
|
25
|
+
Rake::TestTask.new test: RUST_TARGET
|
data/lib/whatlang.rb
CHANGED
|
@@ -9,6 +9,8 @@ module Whatlang
|
|
|
9
9
|
raise ArgumentError, "Couldn't specify `allowlist' and `denylist' at a time. Choose one."
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
+
text = text.to_s
|
|
13
|
+
|
|
12
14
|
case
|
|
13
15
|
when allowlist
|
|
14
16
|
detect_with_allowlist(text, allowlist)
|
|
@@ -44,4 +46,12 @@ module Whatlang
|
|
|
44
46
|
@eng_name = eng_name
|
|
45
47
|
end
|
|
46
48
|
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
# Class returnned by detect when no lang detected.
|
|
53
|
+
#
|
|
54
|
+
# Needed because Rutie cause segmentation fault when it returns NilClass.
|
|
55
|
+
# Should be removed after https://github.com/danielpclark/rutie/pull/180 is solved.
|
|
56
|
+
NO_INFO = nil
|
|
47
57
|
end
|
data/src/lib.rs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
use rutie::{
|
|
2
|
-
methods, module, AnyException, AnyObject, Array, Boolean, Float, Module,
|
|
2
|
+
methods, module, AnyException, AnyObject, Array, Boolean, Float, Module, Object,
|
|
3
3
|
RString, VM,
|
|
4
4
|
};
|
|
5
5
|
use whatlang::{detect, detect_lang, detect_script, Detector, Info, Lang};
|
|
@@ -11,25 +11,25 @@ methods!(
|
|
|
11
11
|
_rtself,
|
|
12
12
|
|
|
13
13
|
fn wl_detect_without_options(text: RString) -> AnyObject {
|
|
14
|
-
detect(rstring(text).to_str()).map_or(
|
|
14
|
+
detect(rstring(text).to_str()).map_or(no_info(), rinfo)
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
fn wl_detect_with_allowlist(text: RString, list: Array) -> AnyObject {
|
|
18
18
|
let detector = Detector::with_allowlist(rarray_to_lang_list(list));
|
|
19
|
-
detector.detect(rstring(text).to_str()).map_or(
|
|
19
|
+
detector.detect(rstring(text).to_str()).map_or(no_info(), rinfo)
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
fn wl_detect_with_denylist(text: RString, list: Array) -> AnyObject {
|
|
23
23
|
let detector = Detector::with_denylist(rarray_to_lang_list(list));
|
|
24
|
-
detector.detect(rstring(text).to_str()).map_or(
|
|
24
|
+
detector.detect(rstring(text).to_str()).map_or(no_info(), rinfo)
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
fn wl_detect_lang(text: RString) -> AnyObject {
|
|
28
|
-
detect_lang(rstring(text).to_str()).map_or(
|
|
28
|
+
detect_lang(rstring(text).to_str()).map_or(no_info(), rlang)
|
|
29
29
|
}
|
|
30
30
|
|
|
31
31
|
fn wl_detect_script(text: RString) -> AnyObject {
|
|
32
|
-
detect_script(rstring(text).to_str()).map_or(
|
|
32
|
+
detect_script(rstring(text).to_str()).map_or(no_info(), |script| {
|
|
33
33
|
RString::new_utf8(script.name()).into()
|
|
34
34
|
})
|
|
35
35
|
}
|
|
@@ -75,6 +75,13 @@ fn rlang(lang: Lang) -> AnyObject {
|
|
|
75
75
|
])
|
|
76
76
|
}
|
|
77
77
|
|
|
78
|
+
// Needed because Rutie cause segmentation fault when it returns NilClass.
|
|
79
|
+
// Should be removed after https://github.com/danielpclark/rutie/pull/180 is solved.
|
|
80
|
+
fn no_info() -> AnyObject {
|
|
81
|
+
Module::from_existing("Whatlang")
|
|
82
|
+
.const_get("NO_INFO")
|
|
83
|
+
}
|
|
84
|
+
|
|
78
85
|
#[allow(non_snake_case)]
|
|
79
86
|
#[no_mangle]
|
|
80
87
|
pub extern "C" fn Init_whatlang() {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "helper"
|
|
4
4
|
|
|
5
|
-
class
|
|
5
|
+
class TestWhatlang < Test::Unit::TestCase
|
|
6
6
|
def test_detect
|
|
7
7
|
text = "Ĉu vi ne volas eklerni Esperanton? Bonvolu!"
|
|
8
8
|
list = ["eng", "ita"]
|
|
@@ -43,4 +43,22 @@ class WhatlangTest < Test::Unit::TestCase
|
|
|
43
43
|
text = "Благодаря Эсперанто вы обрётете друзей по всему миру!"
|
|
44
44
|
assert_equal "Cyrillic", Whatlang.detect_script(text)
|
|
45
45
|
end
|
|
46
|
+
|
|
47
|
+
def test_detect_empty_string
|
|
48
|
+
assert_nil Whatlang.detect("")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def test_detect_nil
|
|
52
|
+
assert_nil Whatlang.detect(nil)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_inspect
|
|
56
|
+
assert_nothing_raised do
|
|
57
|
+
Whatlang.detect("こんにちは").inspect
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def test_numeric
|
|
62
|
+
assert_nil Whatlang.detect(1234)
|
|
63
|
+
end
|
|
46
64
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: whatlang
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kitaiti Makoto
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2024-01-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rutie
|
|
@@ -103,6 +103,7 @@ extensions:
|
|
|
103
103
|
extra_rdoc_files: []
|
|
104
104
|
files:
|
|
105
105
|
- ".gitignore"
|
|
106
|
+
- ".gitlab-ci.yml"
|
|
106
107
|
- ".yardopts"
|
|
107
108
|
- BSDL
|
|
108
109
|
- CHANGELOG.md
|
|
@@ -116,7 +117,7 @@ files:
|
|
|
116
117
|
- lib/whatlang.rb
|
|
117
118
|
- src/lib.rs
|
|
118
119
|
- test/helper.rb
|
|
119
|
-
- test/
|
|
120
|
+
- test/test_whatlang.rb
|
|
120
121
|
- whatlang.gemspec
|
|
121
122
|
homepage: https://gitlab.com/KitaitiMakoto/whatlang-rb
|
|
122
123
|
licenses:
|
|
@@ -125,7 +126,7 @@ metadata:
|
|
|
125
126
|
homepage_uri: https://gitlab.com/KitaitiMakoto/whatlang-rb
|
|
126
127
|
source_code_uri: https://gitlab.com/KitaitiMakoto/whatlang-rb
|
|
127
128
|
changelog_uri: https://gitlab.com/KitaitiMakoto/whatlang-rb/-/blob/master/CHANGELOG.md
|
|
128
|
-
post_install_message:
|
|
129
|
+
post_install_message:
|
|
129
130
|
rdoc_options: []
|
|
130
131
|
require_paths:
|
|
131
132
|
- lib
|
|
@@ -140,8 +141,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
140
141
|
- !ruby/object:Gem::Version
|
|
141
142
|
version: '0'
|
|
142
143
|
requirements: []
|
|
143
|
-
rubygems_version: 3.3
|
|
144
|
-
signing_key:
|
|
144
|
+
rubygems_version: 3.5.3
|
|
145
|
+
signing_key:
|
|
145
146
|
specification_version: 4
|
|
146
147
|
summary: Fast natural language detection library.
|
|
147
148
|
test_files: []
|