lingua_rs 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -2
- data/ext/lingua/src/confidence_result.rs +11 -0
- data/ext/lingua/src/detector.rs +11 -1
- data/ext/lingua/src/language.rs +62 -2
- data/ext/lingua/src/lib.rs +7 -45
- data/ext/lingua/src/segment.rs +14 -0
- data/lib/lingua/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e301297292f2de86fd62416f6f06765c7ccd373c81020e2ad365e0c0151a55bf
|
|
4
|
+
data.tar.gz: fe961910a3e44fa7df10580100b4aecef6f2552c08a7faba278a84eaf7ae89fc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 31e45f89765b5feb782a8b47f9574891c9ce826b5883f840b5f0d06c8715f26c9d9caa999a8e08349f475e307b2ccd28ecb303677ae8e51caa79920d94cc91fc
|
|
7
|
+
data.tar.gz: 6ace3072e3b7e8e044c55712879ea398dc7f009475cb058444070cd22834bb7f4fa6525ab4a66bab95417d57b1f57b9599cda02dd159be80603280c0646b6414
|
data/README.md
CHANGED
|
@@ -140,11 +140,19 @@ detector.detect_multiple(text)
|
|
|
140
140
|
|
|
141
141
|
### `Lingua::Language` methods
|
|
142
142
|
|
|
143
|
-
`Lingua::Language` objects support equality (`==`) and can be used as Hash keys.
|
|
143
|
+
`Lingua::Language` objects support equality (`==`) and can be used as Hash keys. You can look up a language by name, ISO 639-1 code, or ISO 639-3 code using `[]`:
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
Lingua::Language['French'] # => #<Lingua::Language French>
|
|
147
|
+
Lingua::Language[:fr] # => #<Lingua::Language French>
|
|
148
|
+
Lingua::Language['fra'] # => #<Lingua::Language French>
|
|
149
|
+
Lingua::Language['xxx'] # => nil
|
|
150
|
+
```
|
|
144
151
|
|
|
145
152
|
| Method | Return type | Example |
|
|
146
153
|
|---|---|---|
|
|
147
|
-
| `
|
|
154
|
+
| `name` | `String` | `'French'` |
|
|
155
|
+
| `to_s` | `String` | `'French'` (alias for `name`) |
|
|
148
156
|
| `to_sym` | `Symbol` | `:french` |
|
|
149
157
|
| `to_iso` | `String` | `'fr'` (alias for `to_iso6391`) |
|
|
150
158
|
| `to_iso6391` | `String` | `'fr'` |
|
|
@@ -153,6 +161,14 @@ detector.detect_multiple(text)
|
|
|
153
161
|
| `==` | `Boolean` | Compare two languages |
|
|
154
162
|
| `hash` | `Integer` | Hash value (usable as Hash key) |
|
|
155
163
|
|
|
164
|
+
Class methods:
|
|
165
|
+
|
|
166
|
+
| Method | Return type | Description |
|
|
167
|
+
|---|---|---|
|
|
168
|
+
| `Lingua::Language.all` | `Array<Lingua::Language>` | All supported languages |
|
|
169
|
+
| `Lingua::Language.names` | `Array<String>` | All language names (e.g. `'French'`) |
|
|
170
|
+
| `Lingua::Language.iso_codes` | `Array<String>` | All ISO 639-1 codes (e.g. `'fr'`) |
|
|
171
|
+
|
|
156
172
|
### `Lingua::ConfidenceResult` methods
|
|
157
173
|
|
|
158
174
|
Returned by `confidence_values`.
|
|
@@ -1,7 +1,18 @@
|
|
|
1
1
|
use lingua::Language;
|
|
2
|
+
use magnus::{Error, RModule, Ruby, method, prelude::*};
|
|
2
3
|
|
|
3
4
|
use crate::language::WrappedLanguage;
|
|
4
5
|
|
|
6
|
+
pub fn define(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
7
|
+
let class = module.define_class("ConfidenceResult", ruby.class_object())?;
|
|
8
|
+
class.undef_default_alloc_func();
|
|
9
|
+
class.define_method("language", method!(ConfidenceResult::language, 0))?;
|
|
10
|
+
class.define_method("confidence", method!(ConfidenceResult::confidence, 0))?;
|
|
11
|
+
class.define_method("to_s", method!(ConfidenceResult::to_s, 0))?;
|
|
12
|
+
class.define_method("inspect", method!(ConfidenceResult::inspect, 0))?;
|
|
13
|
+
Ok(())
|
|
14
|
+
}
|
|
15
|
+
|
|
5
16
|
#[magnus::wrap(class = "Lingua::ConfidenceResult")]
|
|
6
17
|
pub struct ConfidenceResult {
|
|
7
18
|
pub language: Language,
|
data/ext/lingua/src/detector.rs
CHANGED
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
use lingua::{LanguageDetector, LanguageDetectorBuilder};
|
|
2
|
-
use magnus::{Error, RArray, RHash, Ruby};
|
|
2
|
+
use magnus::{Error, RArray, RHash, RModule, Ruby, function, method, prelude::*};
|
|
3
3
|
|
|
4
4
|
use crate::confidence_result::ConfidenceResult;
|
|
5
5
|
use crate::segment::Segment;
|
|
6
6
|
use crate::helpers::{fetch_option, parse_language, value_to_string};
|
|
7
7
|
use crate::language::WrappedLanguage;
|
|
8
8
|
|
|
9
|
+
pub fn define(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
10
|
+
let class = module.define_class("Detector", ruby.class_object())?;
|
|
11
|
+
class.define_singleton_method("new", function!(RubyDetector::new, -1))?;
|
|
12
|
+
class.define_method("detect", method!(RubyDetector::detect, 1))?;
|
|
13
|
+
class.define_method("confidence", method!(RubyDetector::confidence, 2))?;
|
|
14
|
+
class.define_method("confidence_values", method!(RubyDetector::confidence_values, 1))?;
|
|
15
|
+
class.define_method("detect_multiple", method!(RubyDetector::detect_multiple, 1))?;
|
|
16
|
+
Ok(())
|
|
17
|
+
}
|
|
18
|
+
|
|
9
19
|
pub fn compute_confidence(
|
|
10
20
|
detector: &LanguageDetector,
|
|
11
21
|
subject: String,
|
data/ext/lingua/src/language.rs
CHANGED
|
@@ -1,11 +1,71 @@
|
|
|
1
1
|
use lingua::Language;
|
|
2
|
-
use magnus::{Ruby, Symbol};
|
|
2
|
+
use magnus::{Error, RArray, RModule, Ruby, Symbol, function, method, prelude::*};
|
|
3
|
+
|
|
4
|
+
use crate::helpers::value_to_string;
|
|
5
|
+
|
|
6
|
+
pub fn define(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
7
|
+
let class = module.define_class("Language", ruby.class_object())?;
|
|
8
|
+
class.undef_default_alloc_func();
|
|
9
|
+
class.define_method("name", method!(WrappedLanguage::name, 0))?;
|
|
10
|
+
class.define_method("to_s", method!(WrappedLanguage::name, 0))?;
|
|
11
|
+
class.define_method("to_iso6391", method!(WrappedLanguage::to_iso6391, 0))?;
|
|
12
|
+
class.define_method("to_iso", method!(WrappedLanguage::to_iso6391, 0))?;
|
|
13
|
+
class.define_method("to_iso6393", method!(WrappedLanguage::to_iso6393, 0))?;
|
|
14
|
+
class.define_method("to_sym", method!(WrappedLanguage::to_sym, 0))?;
|
|
15
|
+
class.define_method("inspect", method!(WrappedLanguage::inspect, 0))?;
|
|
16
|
+
class.define_method("==", method!(WrappedLanguage::eq, 1))?;
|
|
17
|
+
class.define_method("eql?", method!(WrappedLanguage::eq, 1))?;
|
|
18
|
+
class.define_method("hash", method!(WrappedLanguage::hash, 0))?;
|
|
19
|
+
class.define_singleton_method("[]", function!(WrappedLanguage::lookup, 1))?;
|
|
20
|
+
class.define_singleton_method("all", function!(WrappedLanguage::all, 0))?;
|
|
21
|
+
class.define_singleton_method("names", function!(WrappedLanguage::names, 0))?;
|
|
22
|
+
class.define_singleton_method("iso_codes", function!(WrappedLanguage::iso_codes, 0))?;
|
|
23
|
+
Ok(())
|
|
24
|
+
}
|
|
3
25
|
|
|
4
26
|
#[magnus::wrap(class = "Lingua::Language")]
|
|
5
27
|
pub struct WrappedLanguage(pub Language);
|
|
6
28
|
|
|
7
29
|
impl WrappedLanguage {
|
|
8
|
-
pub fn
|
|
30
|
+
pub fn lookup(value: magnus::Value) -> Result<Option<WrappedLanguage>, Error> {
|
|
31
|
+
let input = value_to_string(value)?;
|
|
32
|
+
Ok(crate::helpers::parse_language(&input).map(WrappedLanguage))
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
pub fn all() -> Result<RArray, Error> {
|
|
36
|
+
let ruby = Ruby::get().unwrap();
|
|
37
|
+
let mut langs: Vec<Language> = Language::all().into_iter().collect();
|
|
38
|
+
langs.sort_by(|a, b| a.to_string().cmp(&b.to_string()));
|
|
39
|
+
let array = ruby.ary_new_capa(langs.len());
|
|
40
|
+
for lang in langs {
|
|
41
|
+
array.push(WrappedLanguage(lang))?;
|
|
42
|
+
}
|
|
43
|
+
Ok(array)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
pub fn names() -> Result<RArray, Error> {
|
|
47
|
+
let ruby = Ruby::get().unwrap();
|
|
48
|
+
let mut langs: Vec<Language> = Language::all().into_iter().collect();
|
|
49
|
+
langs.sort_by(|a, b| a.to_string().cmp(&b.to_string()));
|
|
50
|
+
let array = ruby.ary_new_capa(langs.len());
|
|
51
|
+
for lang in langs {
|
|
52
|
+
array.push(lang.to_string())?;
|
|
53
|
+
}
|
|
54
|
+
Ok(array)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
pub fn iso_codes() -> Result<RArray, Error> {
|
|
58
|
+
let ruby = Ruby::get().unwrap();
|
|
59
|
+
let mut langs: Vec<Language> = Language::all().into_iter().collect();
|
|
60
|
+
langs.sort_by(|a, b| a.to_string().cmp(&b.to_string()));
|
|
61
|
+
let array = ruby.ary_new_capa(langs.len());
|
|
62
|
+
for lang in langs {
|
|
63
|
+
array.push(lang.iso_code_639_1().to_string())?;
|
|
64
|
+
}
|
|
65
|
+
Ok(array)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
pub fn name(&self) -> String {
|
|
9
69
|
self.0.to_string()
|
|
10
70
|
}
|
|
11
71
|
|
data/ext/lingua/src/lib.rs
CHANGED
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
mod confidence_result;
|
|
2
|
-
mod segment;
|
|
3
2
|
mod detector;
|
|
4
3
|
mod helpers;
|
|
5
4
|
mod language;
|
|
5
|
+
mod segment;
|
|
6
6
|
|
|
7
|
-
use magnus::{Error, RArray, RHash, Ruby, function,
|
|
7
|
+
use magnus::{Error, RArray, RHash, Ruby, function, prelude::*};
|
|
8
8
|
|
|
9
|
-
use
|
|
10
|
-
use segment::Segment;
|
|
11
|
-
use detector::{RubyDetector, build_detector_from_options, compute_confidence, compute_confidence_values, compute_detect_multiple};
|
|
9
|
+
use detector::{build_detector_from_options, compute_confidence, compute_confidence_values, compute_detect_multiple};
|
|
12
10
|
use language::WrappedLanguage;
|
|
13
11
|
|
|
14
12
|
fn detect(ruby: &Ruby, arguments: RArray) -> Result<Option<WrappedLanguage>, Error> {
|
|
@@ -47,47 +45,11 @@ fn detect_multiple(ruby: &Ruby, arguments: RArray) -> Result<RArray, Error> {
|
|
|
47
45
|
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
48
46
|
let module = ruby.define_module("Lingua")?;
|
|
49
47
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
language_class.define_method("to_iso6391", method!(WrappedLanguage::to_iso6391, 0))?;
|
|
55
|
-
language_class.define_method("to_iso", method!(WrappedLanguage::to_iso6391, 0))?;
|
|
56
|
-
language_class.define_method("to_iso6393", method!(WrappedLanguage::to_iso6393, 0))?;
|
|
57
|
-
language_class.define_method("to_sym", method!(WrappedLanguage::to_sym, 0))?;
|
|
58
|
-
language_class.define_method("inspect", method!(WrappedLanguage::inspect, 0))?;
|
|
59
|
-
language_class.define_method("==", method!(WrappedLanguage::eq, 1))?;
|
|
60
|
-
language_class.define_method("eql?", method!(WrappedLanguage::eq, 1))?;
|
|
61
|
-
language_class.define_method("hash", method!(WrappedLanguage::hash, 0))?;
|
|
62
|
-
|
|
63
|
-
// Lingua::ConfidenceResult
|
|
64
|
-
let confidence_class = module.define_class("ConfidenceResult", ruby.class_object())?;
|
|
65
|
-
confidence_class.undef_default_alloc_func();
|
|
66
|
-
confidence_class.define_method("language", method!(ConfidenceResult::language, 0))?;
|
|
67
|
-
confidence_class.define_method("confidence", method!(ConfidenceResult::confidence, 0))?;
|
|
68
|
-
confidence_class.define_method("to_s", method!(ConfidenceResult::to_s, 0))?;
|
|
69
|
-
confidence_class.define_method("inspect", method!(ConfidenceResult::inspect, 0))?;
|
|
70
|
-
|
|
71
|
-
// Lingua::Segment
|
|
72
|
-
let segment_class = module.define_class("Segment", ruby.class_object())?;
|
|
73
|
-
segment_class.undef_default_alloc_func();
|
|
74
|
-
segment_class.define_method("language", method!(Segment::language, 0))?;
|
|
75
|
-
segment_class.define_method("start_index", method!(Segment::start_index, 0))?;
|
|
76
|
-
segment_class.define_method("end_index", method!(Segment::end_index, 0))?;
|
|
77
|
-
segment_class.define_method("word_count", method!(Segment::word_count, 0))?;
|
|
78
|
-
segment_class.define_method("text", method!(Segment::text, 0))?;
|
|
79
|
-
segment_class.define_method("to_s", method!(Segment::to_s, 0))?;
|
|
80
|
-
segment_class.define_method("inspect", method!(Segment::inspect, 0))?;
|
|
81
|
-
|
|
82
|
-
// Lingua::Detector
|
|
83
|
-
let detector_class = module.define_class("Detector", ruby.class_object())?;
|
|
84
|
-
detector_class.define_singleton_method("new", function!(RubyDetector::new, -1))?;
|
|
85
|
-
detector_class.define_method("detect", method!(RubyDetector::detect, 1))?;
|
|
86
|
-
detector_class.define_method("confidence", method!(RubyDetector::confidence, 2))?;
|
|
87
|
-
detector_class.define_method("confidence_values", method!(RubyDetector::confidence_values, 1))?;
|
|
88
|
-
detector_class.define_method("detect_multiple", method!(RubyDetector::detect_multiple, 1))?;
|
|
48
|
+
language::define(ruby, &module)?;
|
|
49
|
+
confidence_result::define(ruby, &module)?;
|
|
50
|
+
segment::define(ruby, &module)?;
|
|
51
|
+
detector::define(ruby, &module)?;
|
|
89
52
|
|
|
90
|
-
// Functional API (module methods)
|
|
91
53
|
module.define_singleton_method("detect", function!(detect, -2))?;
|
|
92
54
|
module.define_singleton_method("confidence", function!(confidence, 2))?;
|
|
93
55
|
module.define_singleton_method("confidence_values", function!(confidence_values, -2))?;
|
data/ext/lingua/src/segment.rs
CHANGED
|
@@ -1,7 +1,21 @@
|
|
|
1
1
|
use lingua::Language;
|
|
2
|
+
use magnus::{Error, RModule, Ruby, method, prelude::*};
|
|
2
3
|
|
|
3
4
|
use crate::language::WrappedLanguage;
|
|
4
5
|
|
|
6
|
+
pub fn define(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
7
|
+
let class = module.define_class("Segment", ruby.class_object())?;
|
|
8
|
+
class.undef_default_alloc_func();
|
|
9
|
+
class.define_method("language", method!(Segment::language, 0))?;
|
|
10
|
+
class.define_method("start_index", method!(Segment::start_index, 0))?;
|
|
11
|
+
class.define_method("end_index", method!(Segment::end_index, 0))?;
|
|
12
|
+
class.define_method("word_count", method!(Segment::word_count, 0))?;
|
|
13
|
+
class.define_method("text", method!(Segment::text, 0))?;
|
|
14
|
+
class.define_method("to_s", method!(Segment::to_s, 0))?;
|
|
15
|
+
class.define_method("inspect", method!(Segment::inspect, 0))?;
|
|
16
|
+
Ok(())
|
|
17
|
+
}
|
|
18
|
+
|
|
5
19
|
#[magnus::wrap(class = "Lingua::Segment")]
|
|
6
20
|
pub struct Segment {
|
|
7
21
|
pub language: Language,
|
data/lib/lingua/version.rb
CHANGED