lingua_rb 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/Cargo.lock +1 -1
- data/README.md +16 -0
- data/ext/lingua/Cargo.toml +1 -1
- data/ext/lingua/src/lib.rs +73 -6
- data/lib/lingua/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b52fd46953ea949cc49052d39d9bb3db51bedbb0d00ef67b15c2ff719f87d33
|
4
|
+
data.tar.gz: c301b18948807f2280511addc927112a92f6f54f78bb5b80f76076284e8dae64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8dcbcd7372dcfa9b8946e5ece99ccca4043313ed718bfcaeac31667377272b6ad344c6556153a7854ee93cb1cbf169327cbdeb10982f6ec3dd35365693a4dc8a
|
7
|
+
data.tar.gz: 66ea01be4ea8a87ea088f4209e0babf86487dee721121688fd8432111579b1c4d1431591c6c046e9b517164214f93bba8bfee2b84b12417ce7f3ab6a6d245e6f
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
# Lingua
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/lingua_rb)
|
4
|
+

|
5
|
+

|
6
|
+

|
7
|
+
|
3
8
|
Lingua is an [Ruby][0] wrapper for the [Rust][1] [lingua][2] crate with [magnus][3].
|
4
9
|
|
5
10
|
## Installation
|
@@ -17,8 +22,19 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
17
22
|
```ruby
|
18
23
|
irb> Lingua.detect("this is definitely English")
|
19
24
|
=> "English"
|
25
|
+
|
26
|
+
irb> Lingua.detect("וזה בעברית")
|
27
|
+
=> "Hebrew"
|
28
|
+
|
29
|
+
irb> Lingua.detect("państwowych", languages: %w[english russian polish])
|
30
|
+
=> "Polish"
|
31
|
+
|
32
|
+
irb> Lingua.detect("כלב", languages: %w[english russian polish])
|
33
|
+
=> nil
|
20
34
|
```
|
21
35
|
|
36
|
+
**Note:** The value of `languages` option should be an array of String. An array of symbols will be ignored.
|
37
|
+
|
22
38
|
## Development
|
23
39
|
|
24
40
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/ext/lingua/Cargo.toml
CHANGED
data/ext/lingua/src/lib.rs
CHANGED
@@ -1,16 +1,83 @@
|
|
1
|
+
use std::str::FromStr;
|
2
|
+
|
1
3
|
use lingua::{Language, LanguageDetector, LanguageDetectorBuilder};
|
2
|
-
use magnus::{function, prelude::*, Error, Ruby};
|
4
|
+
use magnus::{function, prelude::*, Error, Ruby, Symbol};
|
5
|
+
|
6
|
+
fn detect(arguments: magnus::RArray) -> Option<String> {
|
7
|
+
match arguments.len() {
|
8
|
+
1 => {
|
9
|
+
let subject = arguments.shift::<String>().unwrap();
|
10
|
+
let detector: LanguageDetector = LanguageDetectorBuilder::from_all_languages().build();
|
11
|
+
let detected_language: Option<Language> = detector.detect_language_of(subject);
|
3
12
|
|
4
|
-
|
5
|
-
|
6
|
-
|
13
|
+
detected_language.map(|language| language.to_string())
|
14
|
+
}
|
15
|
+
2 => {
|
16
|
+
let subject = arguments.shift::<String>().unwrap();
|
17
|
+
let options = arguments.shift::<magnus::RHash>().unwrap();
|
18
|
+
let mut builder = match options.fetch::<Symbol, Vec<String>>(Symbol::new("languages")) {
|
19
|
+
Ok(languages) => {
|
20
|
+
let languages: Vec<Language> = languages
|
21
|
+
.into_iter()
|
22
|
+
.filter_map(|l| Language::from_str(&l).ok())
|
23
|
+
.collect();
|
24
|
+
Some(LanguageDetectorBuilder::from_languages(&languages))
|
25
|
+
}
|
26
|
+
Err(_) => None,
|
27
|
+
};
|
28
|
+
if builder.is_none() {
|
29
|
+
builder = match options.fetch::<&str, Vec<String>>("languages") {
|
30
|
+
Ok(languages) => {
|
31
|
+
let languages: Vec<Language> = languages
|
32
|
+
.into_iter()
|
33
|
+
.filter_map(|l| Language::from_str(&l).ok())
|
34
|
+
.collect();
|
35
|
+
Some(LanguageDetectorBuilder::from_languages(&languages))
|
36
|
+
}
|
37
|
+
Err(_) => None,
|
38
|
+
};
|
39
|
+
}
|
40
|
+
let mut builder = builder.unwrap_or_else(LanguageDetectorBuilder::from_all_languages);
|
41
|
+
if let Ok(minimum_relative_distance) =
|
42
|
+
options.fetch::<&str, f64>("minimum_relative_distance")
|
43
|
+
{
|
44
|
+
builder.with_minimum_relative_distance(minimum_relative_distance);
|
45
|
+
};
|
46
|
+
if let Ok(minimum_relative_distance) =
|
47
|
+
options.fetch::<Symbol, f64>(Symbol::new("minimum_relative_distance"))
|
48
|
+
{
|
49
|
+
builder.with_minimum_relative_distance(minimum_relative_distance);
|
50
|
+
};
|
51
|
+
if options
|
52
|
+
.fetch::<&str, bool>("is_every_language_model_preloaded")
|
53
|
+
.unwrap_or(false)
|
54
|
+
|| options
|
55
|
+
.fetch::<Symbol, bool>(Symbol::new("is_every_language_model_preloaded"))
|
56
|
+
.unwrap_or(false)
|
57
|
+
{
|
58
|
+
builder.with_preloaded_language_models();
|
59
|
+
};
|
60
|
+
if options
|
61
|
+
.fetch::<&str, bool>("is_low_accuracy_mode_enabled")
|
62
|
+
.unwrap_or(false)
|
63
|
+
|| options
|
64
|
+
.fetch::<Symbol, bool>(Symbol::new("is_low_accuracy_mode_enabled"))
|
65
|
+
.unwrap_or(false)
|
66
|
+
{
|
67
|
+
builder.with_low_accuracy_mode();
|
68
|
+
};
|
69
|
+
let detector = builder.build();
|
70
|
+
let detected_language: Option<Language> = detector.detect_language_of(subject);
|
7
71
|
|
8
|
-
|
72
|
+
detected_language.map(|language| language.to_string())
|
73
|
+
}
|
74
|
+
_ => None,
|
75
|
+
}
|
9
76
|
}
|
10
77
|
|
11
78
|
#[magnus::init]
|
12
79
|
fn init(ruby: &Ruby) -> Result<(), Error> {
|
13
80
|
let module = ruby.define_module("Lingua")?;
|
14
|
-
module.define_singleton_method("detect", function!(detect,
|
81
|
+
module.define_singleton_method("detect", function!(detect, -2))?;
|
15
82
|
Ok(())
|
16
83
|
}
|
data/lib/lingua/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lingua_rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ShallmentMo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: An Ruby wrapper around the Rust Lingua language detection library.
|
14
14
|
email:
|