whichlang 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.gitlab-ci.yml +16 -0
- data/.yardopts +4 -0
- data/BSDL +22 -0
- data/CHANGELOG.md +4 -0
- data/COPYING +56 -0
- data/Cargo.lock +278 -0
- data/Cargo.toml +14 -0
- data/Gemfile +6 -0
- data/README.md +86 -0
- data/Rakefile +25 -0
- data/ext/Rakefile +23 -0
- data/lib/whichlang.rb +14 -0
- data/src/lib.rs +31 -0
- data/test/helper.rb +2 -0
- data/test/test_whichlang.rb +21 -0
- data/whichlang.gemspec +32 -0
- metadata +164 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5bd76e0e787f3f21b90952817fbdb74b0c62cdeca99ed5713f50b7286cfa3d37
|
4
|
+
data.tar.gz: a8b8fc66fca53b06ac211b8da26de20826bf22a218ab56d59155103e5f26f071
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 215f5dd15c54d5cd15309c32ab2a1d057ed0294ddca7e291255013ca5b49ce09c154d4e5b2217a18620795a32752a088d0e68f6f6e6fc201eab0b2adb98ca01d
|
7
|
+
data.tar.gz: 6330cc7ae47785ff1c8cba0bd18586e52e1ee0d1831716d0c22a72e2b709aa8c6fe5d94f7f8ace675775cf76bea249674f9a18ab0e9adbfbf0241488cef52dac
|
data/.gitignore
ADDED
data/.gitlab-ci.yml
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
default:
|
2
|
+
image: ruby:3.2.2
|
3
|
+
|
4
|
+
before_script:
|
5
|
+
- apt-get update && apt-get install -y clang
|
6
|
+
- gem update --system '3.4.22'
|
7
|
+
- gem install bundler -v 2.4.22
|
8
|
+
- gem install tomlrb
|
9
|
+
- bundle install
|
10
|
+
- curl https://sh.rustup.rs -sSf | sh -s -- -y
|
11
|
+
|
12
|
+
test:
|
13
|
+
variables:
|
14
|
+
RB_SYS_FORCE_INSTALL_RUST_TOOLCHAIN: 'true'
|
15
|
+
script:
|
16
|
+
- source "$HOME/.cargo/env" && bundle exec rake test
|
data/.yardopts
ADDED
data/BSDL
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (C) 2024 Ben D'Angelo. All rights reserved.
|
2
|
+
|
3
|
+
Redistribution and use in source and binary forms, with or without
|
4
|
+
modification, are permitted provided that the following conditions
|
5
|
+
are met:
|
6
|
+
1. Redistributions of source code must retain the above copyright
|
7
|
+
notice, this list of conditions and the following disclaimer.
|
8
|
+
2. Redistributions in binary form must reproduce the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer in the
|
10
|
+
documentation and/or other materials provided with the distribution.
|
11
|
+
|
12
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
13
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
16
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
17
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
18
|
+
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
19
|
+
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
20
|
+
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
21
|
+
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
22
|
+
SUCH DAMAGE.
|
data/CHANGELOG.md
ADDED
data/COPYING
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
Ruby is copyrighted free software by Ben D'Angelo <ben@bendangelo.me>.
|
2
|
+
You can redistribute it and/or modify it under either the terms of the
|
3
|
+
2-clause BSDL (see the file BSDL), or the conditions below:
|
4
|
+
|
5
|
+
1. You may make and give away verbatim copies of the source form of the
|
6
|
+
software without restriction, provided that you duplicate all of the
|
7
|
+
original copyright notices and associated disclaimers.
|
8
|
+
|
9
|
+
2. You may modify your copy of the software in any way, provided that
|
10
|
+
you do at least ONE of the following:
|
11
|
+
|
12
|
+
a) place your modifications in the Public Domain or otherwise
|
13
|
+
make them Freely Available, such as by posting said
|
14
|
+
modifications to Usenet or an equivalent medium, or by allowing
|
15
|
+
the author to include your modifications in the software.
|
16
|
+
|
17
|
+
b) use the modified software only within your corporation or
|
18
|
+
organization.
|
19
|
+
|
20
|
+
c) give non-standard binaries non-standard names, with
|
21
|
+
instructions on where to get the original software distribution.
|
22
|
+
|
23
|
+
d) make other distribution arrangements with the author.
|
24
|
+
|
25
|
+
3. You may distribute the software in object code or binary form,
|
26
|
+
provided that you do at least ONE of the following:
|
27
|
+
|
28
|
+
a) distribute the binaries and library files of the software,
|
29
|
+
together with instructions (in the manual page or equivalent)
|
30
|
+
on where to get the original distribution.
|
31
|
+
|
32
|
+
b) accompany the distribution with the machine-readable source of
|
33
|
+
the software.
|
34
|
+
|
35
|
+
c) give non-standard binaries non-standard names, with
|
36
|
+
instructions on where to get the original software distribution.
|
37
|
+
|
38
|
+
d) make other distribution arrangements with the author.
|
39
|
+
|
40
|
+
4. You may modify and include the part of the software into any other
|
41
|
+
software (possibly commercial). But some files in the distribution
|
42
|
+
are not written by the author, so that they are not under these terms.
|
43
|
+
|
44
|
+
For the list of those files and their copying conditions, see the
|
45
|
+
file LEGAL.
|
46
|
+
|
47
|
+
5. The scripts and library files supplied as input to or produced as
|
48
|
+
output from the software do not automatically fall under the
|
49
|
+
copyright of the software, but belong to whomever generated them,
|
50
|
+
and may be sold commercially, and may be aggregated with this
|
51
|
+
software.
|
52
|
+
|
53
|
+
6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
|
54
|
+
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
|
55
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
56
|
+
PURPOSE.
|
data/Cargo.lock
ADDED
@@ -0,0 +1,278 @@
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
2
|
+
# It is not intended for manual editing.
|
3
|
+
version = 3
|
4
|
+
|
5
|
+
[[package]]
|
6
|
+
name = "aho-corasick"
|
7
|
+
version = "1.1.2"
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
|
+
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
|
10
|
+
dependencies = [
|
11
|
+
"memchr",
|
12
|
+
]
|
13
|
+
|
14
|
+
[[package]]
|
15
|
+
name = "bindgen"
|
16
|
+
version = "0.69.1"
|
17
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
+
checksum = "9ffcebc3849946a7170a05992aac39da343a90676ab392c51a4280981d6379c2"
|
19
|
+
dependencies = [
|
20
|
+
"bitflags",
|
21
|
+
"cexpr",
|
22
|
+
"clang-sys",
|
23
|
+
"lazy_static",
|
24
|
+
"lazycell",
|
25
|
+
"peeking_take_while",
|
26
|
+
"proc-macro2",
|
27
|
+
"quote",
|
28
|
+
"regex",
|
29
|
+
"rustc-hash",
|
30
|
+
"shlex",
|
31
|
+
"syn",
|
32
|
+
]
|
33
|
+
|
34
|
+
[[package]]
|
35
|
+
name = "bitflags"
|
36
|
+
version = "2.4.1"
|
37
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
38
|
+
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
|
39
|
+
|
40
|
+
[[package]]
|
41
|
+
name = "cexpr"
|
42
|
+
version = "0.6.0"
|
43
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
44
|
+
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
45
|
+
dependencies = [
|
46
|
+
"nom",
|
47
|
+
]
|
48
|
+
|
49
|
+
[[package]]
|
50
|
+
name = "cfg-if"
|
51
|
+
version = "1.0.0"
|
52
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
53
|
+
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
54
|
+
|
55
|
+
[[package]]
|
56
|
+
name = "clang-sys"
|
57
|
+
version = "1.6.1"
|
58
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
59
|
+
checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
|
60
|
+
dependencies = [
|
61
|
+
"glob",
|
62
|
+
"libc",
|
63
|
+
"libloading",
|
64
|
+
]
|
65
|
+
|
66
|
+
[[package]]
|
67
|
+
name = "glob"
|
68
|
+
version = "0.3.1"
|
69
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
70
|
+
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
71
|
+
|
72
|
+
[[package]]
|
73
|
+
name = "lazy_static"
|
74
|
+
version = "1.4.0"
|
75
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
76
|
+
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
77
|
+
|
78
|
+
[[package]]
|
79
|
+
name = "lazycell"
|
80
|
+
version = "1.3.0"
|
81
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
82
|
+
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
83
|
+
|
84
|
+
[[package]]
|
85
|
+
name = "libc"
|
86
|
+
version = "0.2.151"
|
87
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
88
|
+
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
|
89
|
+
|
90
|
+
[[package]]
|
91
|
+
name = "libloading"
|
92
|
+
version = "0.7.4"
|
93
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
94
|
+
checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
|
95
|
+
dependencies = [
|
96
|
+
"cfg-if",
|
97
|
+
"winapi",
|
98
|
+
]
|
99
|
+
|
100
|
+
[[package]]
|
101
|
+
name = "memchr"
|
102
|
+
version = "2.7.1"
|
103
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
104
|
+
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
|
105
|
+
|
106
|
+
[[package]]
|
107
|
+
name = "minimal-lexical"
|
108
|
+
version = "0.2.1"
|
109
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
110
|
+
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
111
|
+
|
112
|
+
[[package]]
|
113
|
+
name = "nom"
|
114
|
+
version = "7.1.3"
|
115
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
116
|
+
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
117
|
+
dependencies = [
|
118
|
+
"memchr",
|
119
|
+
"minimal-lexical",
|
120
|
+
]
|
121
|
+
|
122
|
+
[[package]]
|
123
|
+
name = "peeking_take_while"
|
124
|
+
version = "0.1.2"
|
125
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
126
|
+
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
127
|
+
|
128
|
+
[[package]]
|
129
|
+
name = "proc-macro2"
|
130
|
+
version = "1.0.71"
|
131
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
132
|
+
checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8"
|
133
|
+
dependencies = [
|
134
|
+
"unicode-ident",
|
135
|
+
]
|
136
|
+
|
137
|
+
[[package]]
|
138
|
+
name = "quote"
|
139
|
+
version = "1.0.33"
|
140
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
141
|
+
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
142
|
+
dependencies = [
|
143
|
+
"proc-macro2",
|
144
|
+
]
|
145
|
+
|
146
|
+
[[package]]
|
147
|
+
name = "rb-sys"
|
148
|
+
version = "0.9.85"
|
149
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
150
|
+
checksum = "05b780e6858b0b0eced1d55d0f097c024b77a37b41f83bd35341130f78e37c51"
|
151
|
+
dependencies = [
|
152
|
+
"rb-sys-build",
|
153
|
+
]
|
154
|
+
|
155
|
+
[[package]]
|
156
|
+
name = "rb-sys-build"
|
157
|
+
version = "0.9.85"
|
158
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
159
|
+
checksum = "44957a3bc513dad1b0f20bdd0ee3b82e729a59da44086a6b40d8bc71958a6db8"
|
160
|
+
dependencies = [
|
161
|
+
"bindgen",
|
162
|
+
"lazy_static",
|
163
|
+
"proc-macro2",
|
164
|
+
"quote",
|
165
|
+
"regex",
|
166
|
+
"shell-words",
|
167
|
+
"syn",
|
168
|
+
]
|
169
|
+
|
170
|
+
[[package]]
|
171
|
+
name = "regex"
|
172
|
+
version = "1.10.2"
|
173
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
174
|
+
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
|
175
|
+
dependencies = [
|
176
|
+
"aho-corasick",
|
177
|
+
"memchr",
|
178
|
+
"regex-automata",
|
179
|
+
"regex-syntax",
|
180
|
+
]
|
181
|
+
|
182
|
+
[[package]]
|
183
|
+
name = "regex-automata"
|
184
|
+
version = "0.4.3"
|
185
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
186
|
+
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
|
187
|
+
dependencies = [
|
188
|
+
"aho-corasick",
|
189
|
+
"memchr",
|
190
|
+
"regex-syntax",
|
191
|
+
]
|
192
|
+
|
193
|
+
[[package]]
|
194
|
+
name = "regex-syntax"
|
195
|
+
version = "0.8.2"
|
196
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
197
|
+
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
|
198
|
+
|
199
|
+
[[package]]
|
200
|
+
name = "rustc-hash"
|
201
|
+
version = "1.1.0"
|
202
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
203
|
+
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
204
|
+
|
205
|
+
[[package]]
|
206
|
+
name = "rutie"
|
207
|
+
version = "0.8.4"
|
208
|
+
source = "git+https://github.com/MelianLabs/rutie.git?branch=segmentation_fault#929bc8b16d2a769733b900f1782e92eeb4012e0e"
|
209
|
+
dependencies = [
|
210
|
+
"lazy_static",
|
211
|
+
"libc",
|
212
|
+
"rb-sys",
|
213
|
+
]
|
214
|
+
|
215
|
+
[[package]]
|
216
|
+
name = "shell-words"
|
217
|
+
version = "1.1.0"
|
218
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
219
|
+
checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
|
220
|
+
|
221
|
+
[[package]]
|
222
|
+
name = "shlex"
|
223
|
+
version = "1.2.0"
|
224
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
225
|
+
checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
|
226
|
+
|
227
|
+
[[package]]
|
228
|
+
name = "syn"
|
229
|
+
version = "2.0.43"
|
230
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
231
|
+
checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53"
|
232
|
+
dependencies = [
|
233
|
+
"proc-macro2",
|
234
|
+
"quote",
|
235
|
+
"unicode-ident",
|
236
|
+
]
|
237
|
+
|
238
|
+
[[package]]
|
239
|
+
name = "unicode-ident"
|
240
|
+
version = "1.0.12"
|
241
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
242
|
+
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
243
|
+
|
244
|
+
[[package]]
|
245
|
+
name = "whichlang"
|
246
|
+
version = "0.1.0"
|
247
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
248
|
+
checksum = "213a57fbc76ff74e9dec77cf62e47fa4e4e01dec898dc09cc6873d992eed2ef9"
|
249
|
+
|
250
|
+
[[package]]
|
251
|
+
name = "whichlang-rb"
|
252
|
+
version = "0.1.0"
|
253
|
+
dependencies = [
|
254
|
+
"rutie",
|
255
|
+
"whichlang",
|
256
|
+
]
|
257
|
+
|
258
|
+
[[package]]
|
259
|
+
name = "winapi"
|
260
|
+
version = "0.3.9"
|
261
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
262
|
+
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
263
|
+
dependencies = [
|
264
|
+
"winapi-i686-pc-windows-gnu",
|
265
|
+
"winapi-x86_64-pc-windows-gnu",
|
266
|
+
]
|
267
|
+
|
268
|
+
[[package]]
|
269
|
+
name = "winapi-i686-pc-windows-gnu"
|
270
|
+
version = "0.4.0"
|
271
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
272
|
+
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
273
|
+
|
274
|
+
[[package]]
|
275
|
+
name = "winapi-x86_64-pc-windows-gnu"
|
276
|
+
version = "0.4.0"
|
277
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
278
|
+
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
data/Cargo.toml
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
[package]
|
2
|
+
name = "whichlang-rb"
|
3
|
+
version = "0.1.0"
|
4
|
+
authors = ["Ben D'Angelo <ben@bendangelo.me>", "Kitaiti Makoto <KitaitiMakoto@gmail.com>"]
|
5
|
+
edition = "2018"
|
6
|
+
|
7
|
+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
8
|
+
|
9
|
+
[dependencies]
|
10
|
+
rutie = { git = "https://github.com/MelianLabs/rutie.git", branch = "segmentation_fault", features = ["no-link"] }
|
11
|
+
whichlang = "0.1.0"
|
12
|
+
|
13
|
+
[lib]
|
14
|
+
crate-type = ["cdylib"]
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
Whatlang
|
2
|
+
========
|
3
|
+
|
4
|
+
Ruby bindings for [Whichlang](https://github.com/quickwit-oss/whichlang/tree/main), a natural language detection for Rust.
|
5
|
+
|
6
|
+
This is a fork of the original [whatlang-rb](https://gitlab.com/KitaitiMakoto/whatlang-rb) but this adds an interface to the [whichlang](https://github.com/quickwit-oss/whichlang/tree/main) library. It's faster and detects languages better (although not as many languages).
|
7
|
+
|
8
|
+
Features
|
9
|
+
--------
|
10
|
+
|
11
|
+
Features are derived from original [Whichlang][] library, which includes:
|
12
|
+
|
13
|
+
* Throughput above 100 MB/s for short and long strings.
|
14
|
+
* Good accuracy (99.5% on my validation dataset, but it really depends on the size of your input.)
|
15
|
+
* Supported languages: Arabic, Dutch, English, French, German, Hindi, Italian, Japanese, Korean, Mandarin, Portuguese, Russian, Spanish, Swedish, Turkish, and Vietnamese.
|
16
|
+
|
17
|
+
Installation
|
18
|
+
------------
|
19
|
+
|
20
|
+
### Requirements
|
21
|
+
|
22
|
+
You need Rust's build environment to install this gem.
|
23
|
+
|
24
|
+
For Unix like system, run
|
25
|
+
|
26
|
+
% curl https://sh.rustup.rs -sSf | sh
|
27
|
+
|
28
|
+
For Windows, download and run [installer][].
|
29
|
+
|
30
|
+
See [Rust official installation page][] for details.
|
31
|
+
|
32
|
+
### Gem installation
|
33
|
+
|
34
|
+
Add this line to your application's Gemfile:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
gem 'whichlang'
|
38
|
+
```
|
39
|
+
|
40
|
+
And then execute:
|
41
|
+
|
42
|
+
$ bundle install
|
43
|
+
|
44
|
+
Or install it yourself as:
|
45
|
+
|
46
|
+
$ gem install whichlang
|
47
|
+
|
48
|
+
Usage
|
49
|
+
-----
|
50
|
+
|
51
|
+
require "whichlang"
|
52
|
+
|
53
|
+
text = "Благодаря Эсперанто вы обрётете друзей по всему миру!"
|
54
|
+
|
55
|
+
info = whichlang.detect(text) # => "rus"
|
56
|
+
|
57
|
+
text = "Jen la trinkejo fermitis, ni iras tra mallumo kaj pluvo."
|
58
|
+
|
59
|
+
info = whichlang.detect(text) # => "spa"
|
60
|
+
|
61
|
+
# blank spaces and nil are ignored
|
62
|
+
info = whichlang.detect(" ") # => nil
|
63
|
+
info = whichlang.detect("") # => nil
|
64
|
+
info = whichlang.detect(nil) # => nil
|
65
|
+
|
66
|
+
Development
|
67
|
+
-----------
|
68
|
+
|
69
|
+
After checking out the repo, run `bundle config set local vendor/bundle && bundle install` to install dependencies. Then, run `bundle exec rake test` to run the tests. You can also run `bundle exec rake console` for an interactive prompt that will allow you to experiment.
|
70
|
+
|
71
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `Cargo.toml`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
72
|
+
|
73
|
+
Contributing
|
74
|
+
------------
|
75
|
+
|
76
|
+
Bug reports and pull requests are welcome on GitHub at https://gitlab.com/bendangelo/whichlang-rb.
|
77
|
+
|
78
|
+
License
|
79
|
+
-------
|
80
|
+
|
81
|
+
This RubyGem distributed under the Ruby's license. See {file:COPYING} file.
|
82
|
+
|
83
|
+
[Whichlang]: https://github.com/quickwit-oss/whichlang/tree/main
|
84
|
+
[installer]: https://static.rust-lang.org/rustup/dist/i686-pc-windows-gnu/rustup-init.exe
|
85
|
+
[Rust official installation page]: https://www.rust-lang.org/tools/install
|
86
|
+
[whatland-rb]: https://gitlab.com/KitaitiMakoto/whatlang-rb
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rake/clean"
|
4
|
+
require "rubygems/tasks"
|
5
|
+
require "rake/testtask"
|
6
|
+
require "yard"
|
7
|
+
|
8
|
+
task default: :test
|
9
|
+
|
10
|
+
Gem::Tasks.new
|
11
|
+
YARD::Rake::YardocTask.new
|
12
|
+
|
13
|
+
RUST_TARGET = "target/release/libwhichlang_rb.so"
|
14
|
+
RUST_SRC = FileList["src/**/*.rs"]
|
15
|
+
|
16
|
+
RUST_SRC.each do |path|
|
17
|
+
file path
|
18
|
+
end
|
19
|
+
|
20
|
+
file RUST_TARGET => RUST_SRC + ["Cargo.toml", "Cargo.lock"] do
|
21
|
+
sh "cargo build --release"
|
22
|
+
end
|
23
|
+
CLEAN.include RUST_TARGET
|
24
|
+
|
25
|
+
Rake::TestTask.new test: RUST_TARGET
|
data/ext/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
task :default do
|
2
|
+
cargo_exists = `cargo` rescue nil
|
3
|
+
if cargo_exists
|
4
|
+
sh "cargo build --release"
|
5
|
+
else
|
6
|
+
fail <<EOS
|
7
|
+
Rust environment is required to install this gem.
|
8
|
+
|
9
|
+
For Unix like system, run
|
10
|
+
|
11
|
+
% curl https://sh.rustup.rs -sSf | sh
|
12
|
+
|
13
|
+
For Windows, download installer from
|
14
|
+
|
15
|
+
https://static.rust-lang.org/rustup/dist/i686-pc-windows-gnu/rustup-init.exe
|
16
|
+
|
17
|
+
See the Rust official page below for details
|
18
|
+
|
19
|
+
https://www.rust-lang.org/tools/install
|
20
|
+
|
21
|
+
EOS
|
22
|
+
end
|
23
|
+
end
|
data/lib/whichlang.rb
ADDED
data/src/lib.rs
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
use rutie::{
|
2
|
+
methods, module, AnyException, AnyObject, Module, NilClass, Object,
|
3
|
+
RString, VM,
|
4
|
+
};
|
5
|
+
use whichlang::{detect_language, Lang};
|
6
|
+
|
7
|
+
module!(Whichlang);
|
8
|
+
|
9
|
+
methods!(
|
10
|
+
Whichlang,
|
11
|
+
_rtself,
|
12
|
+
|
13
|
+
fn wl_detect_language(text: RString) -> AnyObject {
|
14
|
+
|
15
|
+
let lang = detect_language(rstring(text).to_str());
|
16
|
+
return RString::new_utf8(lang.three_letter_code()).into();
|
17
|
+
}
|
18
|
+
|
19
|
+
);
|
20
|
+
|
21
|
+
fn rstring(rstring: Result<RString, AnyException>) -> RString {
|
22
|
+
rstring.map_err(VM::raise_ex).unwrap()
|
23
|
+
}
|
24
|
+
|
25
|
+
#[allow(non_snake_case)]
|
26
|
+
#[no_mangle]
|
27
|
+
pub extern "C" fn Init_whichlang() {
|
28
|
+
Module::new("Whichlang").define(|itself| {
|
29
|
+
itself.def_self("detect_language", wl_detect_language);
|
30
|
+
});
|
31
|
+
}
|
data/test/helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "helper"
|
4
|
+
|
5
|
+
class TestWhichlang < Test::Unit::TestCase
|
6
|
+
def test_detect
|
7
|
+
text = "Jen la trinkejo fermitis, ni iras tra mallumo kaj pluvo."
|
8
|
+
|
9
|
+
assert_equal "spa", Whichlang.detect(text)
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_detect_blank_spaces
|
13
|
+
assert_equal Whichlang.detect(""), nil
|
14
|
+
assert_equal Whichlang.detect(" "), nil
|
15
|
+
assert_equal Whichlang.detect("\n"), nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_detect_nil
|
19
|
+
assert_equal Whichlang.detect(nil), nil
|
20
|
+
end
|
21
|
+
end
|
data/whichlang.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# require "tomlrb"
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "whichlang"
|
5
|
+
# spec.version = Tomlrb.load_file("Cargo.toml")["package"]["version"]
|
6
|
+
spec.version = "0.1.5"
|
7
|
+
spec.license = "Ruby"
|
8
|
+
spec.authors = ["Ben D'Angelo", "Kitaiti Makoto"]
|
9
|
+
spec.email = ["ben@bendangelo.me", "KitaitiMakoto@gmail.com"]
|
10
|
+
|
11
|
+
spec.summary = "Fast natural language detection library."
|
12
|
+
spec.description = "Ruby bindings for Whichlang, a natural language detection for Rust."
|
13
|
+
spec.homepage = "https://github.com/bendangelo/whichlang-rb"
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = "https://github.com/bendangelo/whichlang-rb"
|
17
|
+
spec.metadata["changelog_uri"] = "https://github.com/bendangelo/whichlang-rb/blob/master/CHANGELOG.md"
|
18
|
+
|
19
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
20
|
+
`git ls-files -z`.split("\x0")
|
21
|
+
end
|
22
|
+
spec.extensions = ["ext/Rakefile"]
|
23
|
+
|
24
|
+
spec.add_runtime_dependency "rutie"
|
25
|
+
spec.add_runtime_dependency "fast_blank"
|
26
|
+
|
27
|
+
spec.add_development_dependency "tomlrb"
|
28
|
+
spec.add_development_dependency "test-unit"
|
29
|
+
spec.add_development_dependency "rake"
|
30
|
+
spec.add_development_dependency "yard"
|
31
|
+
spec.add_development_dependency "rubygems-tasks"
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: whichlang
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.5
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ben D'Angelo
|
8
|
+
- Kitaiti Makoto
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2024-01-03 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rutie
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: fast_blank
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: tomlrb
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: test-unit
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rake
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: yard
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
name: rubygems-tasks
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
type: :development
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - ">="
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
description: Ruby bindings for Whichlang, a natural language detection for Rust.
|
113
|
+
email:
|
114
|
+
- ben@bendangelo.me
|
115
|
+
- KitaitiMakoto@gmail.com
|
116
|
+
executables: []
|
117
|
+
extensions:
|
118
|
+
- ext/Rakefile
|
119
|
+
extra_rdoc_files: []
|
120
|
+
files:
|
121
|
+
- ".gitignore"
|
122
|
+
- ".gitlab-ci.yml"
|
123
|
+
- ".yardopts"
|
124
|
+
- BSDL
|
125
|
+
- CHANGELOG.md
|
126
|
+
- COPYING
|
127
|
+
- Cargo.lock
|
128
|
+
- Cargo.toml
|
129
|
+
- Gemfile
|
130
|
+
- README.md
|
131
|
+
- Rakefile
|
132
|
+
- ext/Rakefile
|
133
|
+
- lib/whichlang.rb
|
134
|
+
- src/lib.rs
|
135
|
+
- test/helper.rb
|
136
|
+
- test/test_whichlang.rb
|
137
|
+
- whichlang.gemspec
|
138
|
+
homepage: https://github.com/bendangelo/whichlang-rb
|
139
|
+
licenses:
|
140
|
+
- Ruby
|
141
|
+
metadata:
|
142
|
+
homepage_uri: https://github.com/bendangelo/whichlang-rb
|
143
|
+
source_code_uri: https://github.com/bendangelo/whichlang-rb
|
144
|
+
changelog_uri: https://github.com/bendangelo/whichlang-rb/blob/master/CHANGELOG.md
|
145
|
+
post_install_message:
|
146
|
+
rdoc_options: []
|
147
|
+
require_paths:
|
148
|
+
- lib
|
149
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - ">="
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ">="
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
requirements: []
|
160
|
+
rubygems_version: 3.5.3
|
161
|
+
signing_key:
|
162
|
+
specification_version: 4
|
163
|
+
summary: Fast natural language detection library.
|
164
|
+
test_files: []
|