whichlang 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5bd76e0e787f3f21b90952817fbdb74b0c62cdeca99ed5713f50b7286cfa3d37
4
+ data.tar.gz: a8b8fc66fca53b06ac211b8da26de20826bf22a218ab56d59155103e5f26f071
5
+ SHA512:
6
+ metadata.gz: 215f5dd15c54d5cd15309c32ab2a1d057ed0294ddca7e291255013ca5b49ce09c154d4e5b2217a18620795a32752a088d0e68f6f6e6fc201eab0b2adb98ca01d
7
+ data.tar.gz: 6330cc7ae47785ff1c8cba0bd18586e52e1ee0d1831716d0c22a72e2b709aa8c6fe5d94f7f8ace675775cf76bea249674f9a18ab0e9adbfbf0241488cef52dac
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ Gemfile.lock
10
+
11
+ # Added by cargo
12
+
13
+ /target
14
+
data/.gitlab-ci.yml ADDED
@@ -0,0 +1,16 @@
1
+ default:
2
+ image: ruby:3.2.2
3
+
4
+ before_script:
5
+ - apt-get update && apt-get install -y clang
6
+ - gem update --system '3.4.22'
7
+ - gem install bundler -v 2.4.22
8
+ - gem install tomlrb
9
+ - bundle install
10
+ - curl https://sh.rustup.rs -sSf | sh -s -- -y
11
+
12
+ test:
13
+ variables:
14
+ RB_SYS_FORCE_INSTALL_RUST_TOOLCHAIN: 'true'
15
+ script:
16
+ - source "$HOME/.cargo/env" && bundle exec rake test
data/.yardopts ADDED
@@ -0,0 +1,4 @@
1
+ -
2
+ CHANGELOG.md
3
+ COPYING
4
+ BSDL
data/BSDL ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (C) 2024 Ben D'Angelo. All rights reserved.
2
+
3
+ Redistribution and use in source and binary forms, with or without
4
+ modification, are permitted provided that the following conditions
5
+ are met:
6
+ 1. Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ 2. Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+
12
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
13
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
15
+ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
16
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
17
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
18
+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
19
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
20
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
21
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
22
+ SUCH DAMAGE.
data/CHANGELOG.md ADDED
@@ -0,0 +1,4 @@
1
+ 0.1.0
2
+ =====
3
+
4
+ * Initial release
data/COPYING ADDED
@@ -0,0 +1,56 @@
1
+ Ruby is copyrighted free software by Ben D'Angelo <ben@bendangelo.me>.
2
+ You can redistribute it and/or modify it under either the terms of the
3
+ 2-clause BSDL (see the file BSDL), or the conditions below:
4
+
5
+ 1. You may make and give away verbatim copies of the source form of the
6
+ software without restriction, provided that you duplicate all of the
7
+ original copyright notices and associated disclaimers.
8
+
9
+ 2. You may modify your copy of the software in any way, provided that
10
+ you do at least ONE of the following:
11
+
12
+ a) place your modifications in the Public Domain or otherwise
13
+ make them Freely Available, such as by posting said
14
+ modifications to Usenet or an equivalent medium, or by allowing
15
+ the author to include your modifications in the software.
16
+
17
+ b) use the modified software only within your corporation or
18
+ organization.
19
+
20
+ c) give non-standard binaries non-standard names, with
21
+ instructions on where to get the original software distribution.
22
+
23
+ d) make other distribution arrangements with the author.
24
+
25
+ 3. You may distribute the software in object code or binary form,
26
+ provided that you do at least ONE of the following:
27
+
28
+ a) distribute the binaries and library files of the software,
29
+ together with instructions (in the manual page or equivalent)
30
+ on where to get the original distribution.
31
+
32
+ b) accompany the distribution with the machine-readable source of
33
+ the software.
34
+
35
+ c) give non-standard binaries non-standard names, with
36
+ instructions on where to get the original software distribution.
37
+
38
+ d) make other distribution arrangements with the author.
39
+
40
+ 4. You may modify and include the part of the software into any other
41
+ software (possibly commercial). But some files in the distribution
42
+ are not written by the author, so that they are not under these terms.
43
+
44
+ For the list of those files and their copying conditions, see the
45
+ file LEGAL.
46
+
47
+ 5. The scripts and library files supplied as input to or produced as
48
+ output from the software do not automatically fall under the
49
+ copyright of the software, but belong to whomever generated them,
50
+ and may be sold commercially, and may be aggregated with this
51
+ software.
52
+
53
+ 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
54
+ IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
55
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56
+ PURPOSE.
data/Cargo.lock ADDED
@@ -0,0 +1,278 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 3
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.2"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "bindgen"
16
+ version = "0.69.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "9ffcebc3849946a7170a05992aac39da343a90676ab392c51a4280981d6379c2"
19
+ dependencies = [
20
+ "bitflags",
21
+ "cexpr",
22
+ "clang-sys",
23
+ "lazy_static",
24
+ "lazycell",
25
+ "peeking_take_while",
26
+ "proc-macro2",
27
+ "quote",
28
+ "regex",
29
+ "rustc-hash",
30
+ "shlex",
31
+ "syn",
32
+ ]
33
+
34
+ [[package]]
35
+ name = "bitflags"
36
+ version = "2.4.1"
37
+ source = "registry+https://github.com/rust-lang/crates.io-index"
38
+ checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
39
+
40
+ [[package]]
41
+ name = "cexpr"
42
+ version = "0.6.0"
43
+ source = "registry+https://github.com/rust-lang/crates.io-index"
44
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
45
+ dependencies = [
46
+ "nom",
47
+ ]
48
+
49
+ [[package]]
50
+ name = "cfg-if"
51
+ version = "1.0.0"
52
+ source = "registry+https://github.com/rust-lang/crates.io-index"
53
+ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
54
+
55
+ [[package]]
56
+ name = "clang-sys"
57
+ version = "1.6.1"
58
+ source = "registry+https://github.com/rust-lang/crates.io-index"
59
+ checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
60
+ dependencies = [
61
+ "glob",
62
+ "libc",
63
+ "libloading",
64
+ ]
65
+
66
+ [[package]]
67
+ name = "glob"
68
+ version = "0.3.1"
69
+ source = "registry+https://github.com/rust-lang/crates.io-index"
70
+ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
71
+
72
+ [[package]]
73
+ name = "lazy_static"
74
+ version = "1.4.0"
75
+ source = "registry+https://github.com/rust-lang/crates.io-index"
76
+ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
77
+
78
+ [[package]]
79
+ name = "lazycell"
80
+ version = "1.3.0"
81
+ source = "registry+https://github.com/rust-lang/crates.io-index"
82
+ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
83
+
84
+ [[package]]
85
+ name = "libc"
86
+ version = "0.2.151"
87
+ source = "registry+https://github.com/rust-lang/crates.io-index"
88
+ checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
89
+
90
+ [[package]]
91
+ name = "libloading"
92
+ version = "0.7.4"
93
+ source = "registry+https://github.com/rust-lang/crates.io-index"
94
+ checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
95
+ dependencies = [
96
+ "cfg-if",
97
+ "winapi",
98
+ ]
99
+
100
+ [[package]]
101
+ name = "memchr"
102
+ version = "2.7.1"
103
+ source = "registry+https://github.com/rust-lang/crates.io-index"
104
+ checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
105
+
106
+ [[package]]
107
+ name = "minimal-lexical"
108
+ version = "0.2.1"
109
+ source = "registry+https://github.com/rust-lang/crates.io-index"
110
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
111
+
112
+ [[package]]
113
+ name = "nom"
114
+ version = "7.1.3"
115
+ source = "registry+https://github.com/rust-lang/crates.io-index"
116
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
117
+ dependencies = [
118
+ "memchr",
119
+ "minimal-lexical",
120
+ ]
121
+
122
+ [[package]]
123
+ name = "peeking_take_while"
124
+ version = "0.1.2"
125
+ source = "registry+https://github.com/rust-lang/crates.io-index"
126
+ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
127
+
128
+ [[package]]
129
+ name = "proc-macro2"
130
+ version = "1.0.71"
131
+ source = "registry+https://github.com/rust-lang/crates.io-index"
132
+ checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8"
133
+ dependencies = [
134
+ "unicode-ident",
135
+ ]
136
+
137
+ [[package]]
138
+ name = "quote"
139
+ version = "1.0.33"
140
+ source = "registry+https://github.com/rust-lang/crates.io-index"
141
+ checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
142
+ dependencies = [
143
+ "proc-macro2",
144
+ ]
145
+
146
+ [[package]]
147
+ name = "rb-sys"
148
+ version = "0.9.85"
149
+ source = "registry+https://github.com/rust-lang/crates.io-index"
150
+ checksum = "05b780e6858b0b0eced1d55d0f097c024b77a37b41f83bd35341130f78e37c51"
151
+ dependencies = [
152
+ "rb-sys-build",
153
+ ]
154
+
155
+ [[package]]
156
+ name = "rb-sys-build"
157
+ version = "0.9.85"
158
+ source = "registry+https://github.com/rust-lang/crates.io-index"
159
+ checksum = "44957a3bc513dad1b0f20bdd0ee3b82e729a59da44086a6b40d8bc71958a6db8"
160
+ dependencies = [
161
+ "bindgen",
162
+ "lazy_static",
163
+ "proc-macro2",
164
+ "quote",
165
+ "regex",
166
+ "shell-words",
167
+ "syn",
168
+ ]
169
+
170
+ [[package]]
171
+ name = "regex"
172
+ version = "1.10.2"
173
+ source = "registry+https://github.com/rust-lang/crates.io-index"
174
+ checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
175
+ dependencies = [
176
+ "aho-corasick",
177
+ "memchr",
178
+ "regex-automata",
179
+ "regex-syntax",
180
+ ]
181
+
182
+ [[package]]
183
+ name = "regex-automata"
184
+ version = "0.4.3"
185
+ source = "registry+https://github.com/rust-lang/crates.io-index"
186
+ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
187
+ dependencies = [
188
+ "aho-corasick",
189
+ "memchr",
190
+ "regex-syntax",
191
+ ]
192
+
193
+ [[package]]
194
+ name = "regex-syntax"
195
+ version = "0.8.2"
196
+ source = "registry+https://github.com/rust-lang/crates.io-index"
197
+ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
198
+
199
+ [[package]]
200
+ name = "rustc-hash"
201
+ version = "1.1.0"
202
+ source = "registry+https://github.com/rust-lang/crates.io-index"
203
+ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
204
+
205
+ [[package]]
206
+ name = "rutie"
207
+ version = "0.8.4"
208
+ source = "git+https://github.com/MelianLabs/rutie.git?branch=segmentation_fault#929bc8b16d2a769733b900f1782e92eeb4012e0e"
209
+ dependencies = [
210
+ "lazy_static",
211
+ "libc",
212
+ "rb-sys",
213
+ ]
214
+
215
+ [[package]]
216
+ name = "shell-words"
217
+ version = "1.1.0"
218
+ source = "registry+https://github.com/rust-lang/crates.io-index"
219
+ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
220
+
221
+ [[package]]
222
+ name = "shlex"
223
+ version = "1.2.0"
224
+ source = "registry+https://github.com/rust-lang/crates.io-index"
225
+ checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
226
+
227
+ [[package]]
228
+ name = "syn"
229
+ version = "2.0.43"
230
+ source = "registry+https://github.com/rust-lang/crates.io-index"
231
+ checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53"
232
+ dependencies = [
233
+ "proc-macro2",
234
+ "quote",
235
+ "unicode-ident",
236
+ ]
237
+
238
+ [[package]]
239
+ name = "unicode-ident"
240
+ version = "1.0.12"
241
+ source = "registry+https://github.com/rust-lang/crates.io-index"
242
+ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
243
+
244
+ [[package]]
245
+ name = "whichlang"
246
+ version = "0.1.0"
247
+ source = "registry+https://github.com/rust-lang/crates.io-index"
248
+ checksum = "213a57fbc76ff74e9dec77cf62e47fa4e4e01dec898dc09cc6873d992eed2ef9"
249
+
250
+ [[package]]
251
+ name = "whichlang-rb"
252
+ version = "0.1.0"
253
+ dependencies = [
254
+ "rutie",
255
+ "whichlang",
256
+ ]
257
+
258
+ [[package]]
259
+ name = "winapi"
260
+ version = "0.3.9"
261
+ source = "registry+https://github.com/rust-lang/crates.io-index"
262
+ checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
263
+ dependencies = [
264
+ "winapi-i686-pc-windows-gnu",
265
+ "winapi-x86_64-pc-windows-gnu",
266
+ ]
267
+
268
+ [[package]]
269
+ name = "winapi-i686-pc-windows-gnu"
270
+ version = "0.4.0"
271
+ source = "registry+https://github.com/rust-lang/crates.io-index"
272
+ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
273
+
274
+ [[package]]
275
+ name = "winapi-x86_64-pc-windows-gnu"
276
+ version = "0.4.0"
277
+ source = "registry+https://github.com/rust-lang/crates.io-index"
278
+ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
data/Cargo.toml ADDED
@@ -0,0 +1,14 @@
1
+ [package]
2
+ name = "whichlang-rb"
3
+ version = "0.1.0"
4
+ authors = ["Ben D'Angelo <ben@bendangelo.me>", "Kitaiti Makoto <KitaitiMakoto@gmail.com>"]
5
+ edition = "2018"
6
+
7
+ # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
8
+
9
+ [dependencies]
10
+ rutie = { git = "https://github.com/MelianLabs/rutie.git", branch = "segmentation_fault", features = ["no-link"] }
11
+ whichlang = "0.1.0"
12
+
13
+ [lib]
14
+ crate-type = ["cdylib"]
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in whatlang.gemspec
6
+ gemspec
data/README.md ADDED
@@ -0,0 +1,86 @@
1
+ Whatlang
2
+ ========
3
+
4
+ Ruby bindings for [Whichlang](https://github.com/quickwit-oss/whichlang/tree/main), a natural language detection for Rust.
5
+
6
+ This is a fork of the original [whatlang-rb](https://gitlab.com/KitaitiMakoto/whatlang-rb) but this adds an interface to the [whichlang](https://github.com/quickwit-oss/whichlang/tree/main) library. It's faster and detects languages better (although not as many languages).
7
+
8
+ Features
9
+ --------
10
+
11
+ Features are derived from original [Whichlang][] library, which includes:
12
+
13
+ * Throughput above 100 MB/s for short and long strings.
14
+ * Good accuracy (99.5% on my validation dataset, but it really depends on the size of your input.)
15
+ * Supported languages: Arabic, Dutch, English, French, German, Hindi, Italian, Japanese, Korean, Mandarin, Portuguese, Russian, Spanish, Swedish, Turkish, and Vietnamese.
16
+
17
+ Installation
18
+ ------------
19
+
20
+ ### Requirements
21
+
22
+ You need Rust's build environment to install this gem.
23
+
24
+ For Unix like system, run
25
+
26
+ % curl https://sh.rustup.rs -sSf | sh
27
+
28
+ For Windows, download and run [installer][].
29
+
30
+ See [Rust official installation page][] for details.
31
+
32
+ ### Gem installation
33
+
34
+ Add this line to your application's Gemfile:
35
+
36
+ ```ruby
37
+ gem 'whichlang'
38
+ ```
39
+
40
+ And then execute:
41
+
42
+ $ bundle install
43
+
44
+ Or install it yourself as:
45
+
46
+ $ gem install whichlang
47
+
48
+ Usage
49
+ -----
50
+
51
+ require "whichlang"
52
+
53
+ text = "Благодаря Эсперанто вы обрётете друзей по всему миру!"
54
+
55
+ info = whichlang.detect(text) # => "rus"
56
+
57
+ text = "Jen la trinkejo fermitis, ni iras tra mallumo kaj pluvo."
58
+
59
+ info = whichlang.detect(text) # => "spa"
60
+
61
+ # blank spaces and nil are ignored
62
+ info = whichlang.detect(" ") # => nil
63
+ info = whichlang.detect("") # => nil
64
+ info = whichlang.detect(nil) # => nil
65
+
66
+ Development
67
+ -----------
68
+
69
+ After checking out the repo, run `bundle config set local vendor/bundle && bundle install` to install dependencies. Then, run `bundle exec rake test` to run the tests. You can also run `bundle exec rake console` for an interactive prompt that will allow you to experiment.
70
+
71
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `Cargo.toml`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
72
+
73
+ Contributing
74
+ ------------
75
+
76
+ Bug reports and pull requests are welcome on GitHub at https://gitlab.com/bendangelo/whichlang-rb.
77
+
78
+ License
79
+ -------
80
+
81
+ This RubyGem distributed under the Ruby's license. See {file:COPYING} file.
82
+
83
+ [Whichlang]: https://github.com/quickwit-oss/whichlang/tree/main
84
+ [installer]: https://static.rust-lang.org/rustup/dist/i686-pc-windows-gnu/rustup-init.exe
85
+ [Rust official installation page]: https://www.rust-lang.org/tools/install
86
+ [whatland-rb]: https://gitlab.com/KitaitiMakoto/whatlang-rb
data/Rakefile ADDED
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake/clean"
4
+ require "rubygems/tasks"
5
+ require "rake/testtask"
6
+ require "yard"
7
+
8
+ task default: :test
9
+
10
+ Gem::Tasks.new
11
+ YARD::Rake::YardocTask.new
12
+
13
+ RUST_TARGET = "target/release/libwhichlang_rb.so"
14
+ RUST_SRC = FileList["src/**/*.rs"]
15
+
16
+ RUST_SRC.each do |path|
17
+ file path
18
+ end
19
+
20
+ file RUST_TARGET => RUST_SRC + ["Cargo.toml", "Cargo.lock"] do
21
+ sh "cargo build --release"
22
+ end
23
+ CLEAN.include RUST_TARGET
24
+
25
+ Rake::TestTask.new test: RUST_TARGET
data/ext/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ task :default do
2
+ cargo_exists = `cargo` rescue nil
3
+ if cargo_exists
4
+ sh "cargo build --release"
5
+ else
6
+ fail <<EOS
7
+ Rust environment is required to install this gem.
8
+
9
+ For Unix like system, run
10
+
11
+ % curl https://sh.rustup.rs -sSf | sh
12
+
13
+ For Windows, download installer from
14
+
15
+ https://static.rust-lang.org/rustup/dist/i686-pc-windows-gnu/rustup-init.exe
16
+
17
+ See the Rust official page below for details
18
+
19
+ https://www.rust-lang.org/tools/install
20
+
21
+ EOS
22
+ end
23
+ end
data/lib/whichlang.rb ADDED
@@ -0,0 +1,14 @@
1
+ require "rutie"
2
+ require "fast_blank"
3
+
4
+ Rutie.new(:whichlang_rb).init "Init_whichlang", __dir__
5
+
6
+ module Whichlang
7
+ class << self
8
+ def detect(text)
9
+ return nil if text.nil? || text.blank_as?
10
+
11
+ detect_language(text)
12
+ end
13
+ end
14
+ end
data/src/lib.rs ADDED
@@ -0,0 +1,31 @@
1
+ use rutie::{
2
+ methods, module, AnyException, AnyObject, Module, NilClass, Object,
3
+ RString, VM,
4
+ };
5
+ use whichlang::{detect_language, Lang};
6
+
7
+ module!(Whichlang);
8
+
9
+ methods!(
10
+ Whichlang,
11
+ _rtself,
12
+
13
+ fn wl_detect_language(text: RString) -> AnyObject {
14
+
15
+ let lang = detect_language(rstring(text).to_str());
16
+ return RString::new_utf8(lang.three_letter_code()).into();
17
+ }
18
+
19
+ );
20
+
21
+ fn rstring(rstring: Result<RString, AnyException>) -> RString {
22
+ rstring.map_err(VM::raise_ex).unwrap()
23
+ }
24
+
25
+ #[allow(non_snake_case)]
26
+ #[no_mangle]
27
+ pub extern "C" fn Init_whichlang() {
28
+ Module::new("Whichlang").define(|itself| {
29
+ itself.def_self("detect_language", wl_detect_language);
30
+ });
31
+ }
data/test/helper.rb ADDED
@@ -0,0 +1,2 @@
1
+ require "test/unit"
2
+ require "whichlang"
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "helper"
4
+
5
+ class TestWhichlang < Test::Unit::TestCase
6
+ def test_detect
7
+ text = "Jen la trinkejo fermitis, ni iras tra mallumo kaj pluvo."
8
+
9
+ assert_equal "spa", Whichlang.detect(text)
10
+ end
11
+
12
+ def test_detect_blank_spaces
13
+ assert_equal Whichlang.detect(""), nil
14
+ assert_equal Whichlang.detect(" "), nil
15
+ assert_equal Whichlang.detect("\n"), nil
16
+ end
17
+
18
+ def test_detect_nil
19
+ assert_equal Whichlang.detect(nil), nil
20
+ end
21
+ end
data/whichlang.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # require "tomlrb"
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "whichlang"
5
+ # spec.version = Tomlrb.load_file("Cargo.toml")["package"]["version"]
6
+ spec.version = "0.1.5"
7
+ spec.license = "Ruby"
8
+ spec.authors = ["Ben D'Angelo", "Kitaiti Makoto"]
9
+ spec.email = ["ben@bendangelo.me", "KitaitiMakoto@gmail.com"]
10
+
11
+ spec.summary = "Fast natural language detection library."
12
+ spec.description = "Ruby bindings for Whichlang, a natural language detection for Rust."
13
+ spec.homepage = "https://github.com/bendangelo/whichlang-rb"
14
+
15
+ spec.metadata["homepage_uri"] = spec.homepage
16
+ spec.metadata["source_code_uri"] = "https://github.com/bendangelo/whichlang-rb"
17
+ spec.metadata["changelog_uri"] = "https://github.com/bendangelo/whichlang-rb/blob/master/CHANGELOG.md"
18
+
19
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
20
+ `git ls-files -z`.split("\x0")
21
+ end
22
+ spec.extensions = ["ext/Rakefile"]
23
+
24
+ spec.add_runtime_dependency "rutie"
25
+ spec.add_runtime_dependency "fast_blank"
26
+
27
+ spec.add_development_dependency "tomlrb"
28
+ spec.add_development_dependency "test-unit"
29
+ spec.add_development_dependency "rake"
30
+ spec.add_development_dependency "yard"
31
+ spec.add_development_dependency "rubygems-tasks"
32
+ end
metadata ADDED
@@ -0,0 +1,164 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: whichlang
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.5
5
+ platform: ruby
6
+ authors:
7
+ - Ben D'Angelo
8
+ - Kitaiti Makoto
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2024-01-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rutie
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: fast_blank
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: tomlrb
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: test-unit
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: rake
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ - !ruby/object:Gem::Dependency
85
+ name: yard
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ - !ruby/object:Gem::Dependency
99
+ name: rubygems-tasks
100
+ requirement: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ type: :development
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ description: Ruby bindings for Whichlang, a natural language detection for Rust.
113
+ email:
114
+ - ben@bendangelo.me
115
+ - KitaitiMakoto@gmail.com
116
+ executables: []
117
+ extensions:
118
+ - ext/Rakefile
119
+ extra_rdoc_files: []
120
+ files:
121
+ - ".gitignore"
122
+ - ".gitlab-ci.yml"
123
+ - ".yardopts"
124
+ - BSDL
125
+ - CHANGELOG.md
126
+ - COPYING
127
+ - Cargo.lock
128
+ - Cargo.toml
129
+ - Gemfile
130
+ - README.md
131
+ - Rakefile
132
+ - ext/Rakefile
133
+ - lib/whichlang.rb
134
+ - src/lib.rs
135
+ - test/helper.rb
136
+ - test/test_whichlang.rb
137
+ - whichlang.gemspec
138
+ homepage: https://github.com/bendangelo/whichlang-rb
139
+ licenses:
140
+ - Ruby
141
+ metadata:
142
+ homepage_uri: https://github.com/bendangelo/whichlang-rb
143
+ source_code_uri: https://github.com/bendangelo/whichlang-rb
144
+ changelog_uri: https://github.com/bendangelo/whichlang-rb/blob/master/CHANGELOG.md
145
+ post_install_message:
146
+ rdoc_options: []
147
+ require_paths:
148
+ - lib
149
+ required_ruby_version: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - ">="
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ required_rubygems_version: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
159
+ requirements: []
160
+ rubygems_version: 3.5.3
161
+ signing_key:
162
+ specification_version: 4
163
+ summary: Fast natural language detection library.
164
+ test_files: []