lingua_rs 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +62 -3
- data/ext/lingua/src/confidence_result.rs +11 -0
- data/ext/lingua/src/detector.rs +38 -1
- data/ext/lingua/src/language.rs +62 -2
- data/ext/lingua/src/lib.rs +17 -31
- data/ext/lingua/src/segment.rs +59 -0
- data/lib/lingua/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e301297292f2de86fd62416f6f06765c7ccd373c81020e2ad365e0c0151a55bf
|
|
4
|
+
data.tar.gz: fe961910a3e44fa7df10580100b4aecef6f2552c08a7faba278a84eaf7ae89fc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 31e45f89765b5feb782a8b47f9574891c9ce826b5883f840b5f0d06c8715f26c9d9caa999a8e08349f475e307b2ccd28ecb303677ae8e51caa79920d94cc91fc
|
|
7
|
+
data.tar.gz: 6ace3072e3b7e8e044c55712879ea398dc7f009475cb058444070cd22834bb7f4fa6525ab4a66bab95417d57b1f57b9599cda02dd159be80603280c0646b6414
|
data/README.md
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# LinguaRs
|
|
2
2
|
|
|
3
|
+
[](https://github.com/kochka/lingua_rs/actions/workflows/ci.yml)
|
|
4
|
+
[](https://badge.fury.io/rb/lingua_rs)
|
|
5
|
+
[](https://rubygems.org/gems/lingua_rs)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+
|
|
3
8
|
A Ruby gem wrapping the [Lingua](https://github.com/pemistahl/lingua-rs) Rust library for language detection.
|
|
4
9
|
|
|
5
10
|
## Installation
|
|
@@ -104,7 +109,7 @@ detector.detect('Hello world') # => #<Lingua::Language English>
|
|
|
104
109
|
|
|
105
110
|
```ruby
|
|
106
111
|
detector.confidence('Bonjour le monde', :fr) # => 0.8217
|
|
107
|
-
Lingua.confidence('Bonjour le monde', 'fr') # => 0.
|
|
112
|
+
Lingua.confidence('Bonjour le monde', 'fr') # => 0.8217
|
|
108
113
|
|
|
109
114
|
results = detector.confidence_values('Bonjour le monde')
|
|
110
115
|
results.first.language # => #<Lingua::Language French>
|
|
@@ -113,13 +118,41 @@ results.first.to_s # => "French (0.82)"
|
|
|
113
118
|
results.sum(&:confidence) # => 1.0
|
|
114
119
|
```
|
|
115
120
|
|
|
121
|
+
### Mixed-language detection
|
|
122
|
+
|
|
123
|
+
`detect_multiple` identifies multiple languages within a single text and returns an array of `Lingua::Segment` objects. Available on both `Lingua::Detector` and as a module method on `Lingua`.
|
|
124
|
+
|
|
125
|
+
```ruby
|
|
126
|
+
text = "Parlez-vous français? Ich spreche Französisch nur ein bisschen. A little bit is better than nothing."
|
|
127
|
+
|
|
128
|
+
segments = Lingua.detect_multiple(text, languages: %w[en fr de])
|
|
129
|
+
segments.each do |s|
|
|
130
|
+
puts "#{s.language} (#{s.start_index}..#{s.end_index}): #{s.text}"
|
|
131
|
+
end
|
|
132
|
+
# French (0..22): Parlez-vous français?
|
|
133
|
+
# German (23..64): Ich spreche Französisch nur ein bisschen.
|
|
134
|
+
# English (65..101): A little bit is better than nothing.
|
|
135
|
+
|
|
136
|
+
# With a persistent detector
|
|
137
|
+
detector = Lingua::Detector.new(languages: %w[en fr de])
|
|
138
|
+
detector.detect_multiple(text)
|
|
139
|
+
```
|
|
140
|
+
|
|
116
141
|
### `Lingua::Language` methods
|
|
117
142
|
|
|
118
|
-
`Lingua::Language` objects support equality (`==`) and can be used as Hash keys.
|
|
143
|
+
`Lingua::Language` objects support equality (`==`) and can be used as Hash keys. You can look up a language by name, ISO 639-1 code, or ISO 639-3 code using `[]`:
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
Lingua::Language['French'] # => #<Lingua::Language French>
|
|
147
|
+
Lingua::Language[:fr] # => #<Lingua::Language French>
|
|
148
|
+
Lingua::Language['fra'] # => #<Lingua::Language French>
|
|
149
|
+
Lingua::Language['xxx'] # => nil
|
|
150
|
+
```
|
|
119
151
|
|
|
120
152
|
| Method | Return type | Example |
|
|
121
153
|
|---|---|---|
|
|
122
|
-
| `
|
|
154
|
+
| `name` | `String` | `'French'` |
|
|
155
|
+
| `to_s` | `String` | `'French'` (alias for `name`) |
|
|
123
156
|
| `to_sym` | `Symbol` | `:french` |
|
|
124
157
|
| `to_iso` | `String` | `'fr'` (alias for `to_iso6391`) |
|
|
125
158
|
| `to_iso6391` | `String` | `'fr'` |
|
|
@@ -128,6 +161,14 @@ results.sum(&:confidence) # => 1.0
|
|
|
128
161
|
| `==` | `Boolean` | Compare two languages |
|
|
129
162
|
| `hash` | `Integer` | Hash value (usable as Hash key) |
|
|
130
163
|
|
|
164
|
+
Class methods:
|
|
165
|
+
|
|
166
|
+
| Method | Return type | Description |
|
|
167
|
+
|---|---|---|
|
|
168
|
+
| `Lingua::Language.all` | `Array<Lingua::Language>` | All supported languages |
|
|
169
|
+
| `Lingua::Language.names` | `Array<String>` | All language names (e.g. `'French'`) |
|
|
170
|
+
| `Lingua::Language.iso_codes` | `Array<String>` | All ISO 639-1 codes (e.g. `'fr'`) |
|
|
171
|
+
|
|
131
172
|
### `Lingua::ConfidenceResult` methods
|
|
132
173
|
|
|
133
174
|
Returned by `confidence_values`.
|
|
@@ -139,10 +180,28 @@ Returned by `confidence_values`.
|
|
|
139
180
|
| `to_s` | `String` | `'French (0.82)'` |
|
|
140
181
|
| `inspect` | `String` | `'#<Lingua::ConfidenceResult French (0.8217)>'` |
|
|
141
182
|
|
|
183
|
+
### `Lingua::Segment` methods
|
|
184
|
+
|
|
185
|
+
Returned by `detect_multiple`.
|
|
186
|
+
|
|
187
|
+
| Method | Return type | Example |
|
|
188
|
+
|---|---|---|
|
|
189
|
+
| `language` | `Lingua::Language` | `#<Lingua::Language French>` |
|
|
190
|
+
| `start_index` | `Integer` | `0` |
|
|
191
|
+
| `end_index` | `Integer` | `22` |
|
|
192
|
+
| `word_count` | `Integer` | `3` |
|
|
193
|
+
| `text` | `String` | `'Parlez-vous français? '` |
|
|
194
|
+
| `to_s` | `String` | `'French (0-22): Parlez-vous français? '` |
|
|
195
|
+
| `inspect` | `String` | `'#<Lingua::Segment French (0-22) "Parlez-vous français? ">'` |
|
|
196
|
+
|
|
142
197
|
## Development
|
|
143
198
|
|
|
144
199
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake compile` to build the native extension and `rake test` to run the tests.
|
|
145
200
|
|
|
201
|
+
## Acknowledgements
|
|
202
|
+
|
|
203
|
+
This gem is built on top of [Lingua](https://github.com/pemistahl/lingua-rs) by [Peter M. Stahl](https://github.com/pemistahl), a highly accurate natural language detection library written in Rust.
|
|
204
|
+
|
|
146
205
|
## License
|
|
147
206
|
|
|
148
207
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
|
@@ -1,7 +1,18 @@
|
|
|
1
1
|
use lingua::Language;
|
|
2
|
+
use magnus::{Error, RModule, Ruby, method, prelude::*};
|
|
2
3
|
|
|
3
4
|
use crate::language::WrappedLanguage;
|
|
4
5
|
|
|
6
|
+
pub fn define(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
7
|
+
let class = module.define_class("ConfidenceResult", ruby.class_object())?;
|
|
8
|
+
class.undef_default_alloc_func();
|
|
9
|
+
class.define_method("language", method!(ConfidenceResult::language, 0))?;
|
|
10
|
+
class.define_method("confidence", method!(ConfidenceResult::confidence, 0))?;
|
|
11
|
+
class.define_method("to_s", method!(ConfidenceResult::to_s, 0))?;
|
|
12
|
+
class.define_method("inspect", method!(ConfidenceResult::inspect, 0))?;
|
|
13
|
+
Ok(())
|
|
14
|
+
}
|
|
15
|
+
|
|
5
16
|
#[magnus::wrap(class = "Lingua::ConfidenceResult")]
|
|
6
17
|
pub struct ConfidenceResult {
|
|
7
18
|
pub language: Language,
|
data/ext/lingua/src/detector.rs
CHANGED
|
@@ -1,10 +1,21 @@
|
|
|
1
1
|
use lingua::{LanguageDetector, LanguageDetectorBuilder};
|
|
2
|
-
use magnus::{Error, RArray, RHash, Ruby};
|
|
2
|
+
use magnus::{Error, RArray, RHash, RModule, Ruby, function, method, prelude::*};
|
|
3
3
|
|
|
4
4
|
use crate::confidence_result::ConfidenceResult;
|
|
5
|
+
use crate::segment::Segment;
|
|
5
6
|
use crate::helpers::{fetch_option, parse_language, value_to_string};
|
|
6
7
|
use crate::language::WrappedLanguage;
|
|
7
8
|
|
|
9
|
+
pub fn define(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
10
|
+
let class = module.define_class("Detector", ruby.class_object())?;
|
|
11
|
+
class.define_singleton_method("new", function!(RubyDetector::new, -1))?;
|
|
12
|
+
class.define_method("detect", method!(RubyDetector::detect, 1))?;
|
|
13
|
+
class.define_method("confidence", method!(RubyDetector::confidence, 2))?;
|
|
14
|
+
class.define_method("confidence_values", method!(RubyDetector::confidence_values, 1))?;
|
|
15
|
+
class.define_method("detect_multiple", method!(RubyDetector::detect_multiple, 1))?;
|
|
16
|
+
Ok(())
|
|
17
|
+
}
|
|
18
|
+
|
|
8
19
|
pub fn compute_confidence(
|
|
9
20
|
detector: &LanguageDetector,
|
|
10
21
|
subject: String,
|
|
@@ -66,6 +77,32 @@ impl RubyDetector {
|
|
|
66
77
|
pub fn confidence_values(&self, subject: String) -> Result<RArray, Error> {
|
|
67
78
|
compute_confidence_values(&self.detector, subject)
|
|
68
79
|
}
|
|
80
|
+
|
|
81
|
+
pub fn detect_multiple(&self, subject: String) -> Result<RArray, Error> {
|
|
82
|
+
compute_detect_multiple(&self.detector, &subject)
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
pub fn compute_detect_multiple(
|
|
87
|
+
detector: &LanguageDetector,
|
|
88
|
+
subject: &str,
|
|
89
|
+
) -> Result<RArray, Error> {
|
|
90
|
+
let ruby = Ruby::get().unwrap();
|
|
91
|
+
let results = detector.detect_multiple_languages_of(subject);
|
|
92
|
+
let array = ruby.ary_new_capa(results.len());
|
|
93
|
+
for r in results {
|
|
94
|
+
let text = subject[r.start_index()..r.end_index()].to_string();
|
|
95
|
+
let start_index = subject[..r.start_index()].chars().count();
|
|
96
|
+
let end_index = start_index + text.chars().count();
|
|
97
|
+
array.push(Segment {
|
|
98
|
+
language: r.language(),
|
|
99
|
+
start_index,
|
|
100
|
+
end_index,
|
|
101
|
+
word_count: r.word_count(),
|
|
102
|
+
text,
|
|
103
|
+
})?;
|
|
104
|
+
}
|
|
105
|
+
Ok(array)
|
|
69
106
|
}
|
|
70
107
|
|
|
71
108
|
pub fn build_detector_from_options(
|
data/ext/lingua/src/language.rs
CHANGED
|
@@ -1,11 +1,71 @@
|
|
|
1
1
|
use lingua::Language;
|
|
2
|
-
use magnus::{Ruby, Symbol};
|
|
2
|
+
use magnus::{Error, RArray, RModule, Ruby, Symbol, function, method, prelude::*};
|
|
3
|
+
|
|
4
|
+
use crate::helpers::value_to_string;
|
|
5
|
+
|
|
6
|
+
pub fn define(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
7
|
+
let class = module.define_class("Language", ruby.class_object())?;
|
|
8
|
+
class.undef_default_alloc_func();
|
|
9
|
+
class.define_method("name", method!(WrappedLanguage::name, 0))?;
|
|
10
|
+
class.define_method("to_s", method!(WrappedLanguage::name, 0))?;
|
|
11
|
+
class.define_method("to_iso6391", method!(WrappedLanguage::to_iso6391, 0))?;
|
|
12
|
+
class.define_method("to_iso", method!(WrappedLanguage::to_iso6391, 0))?;
|
|
13
|
+
class.define_method("to_iso6393", method!(WrappedLanguage::to_iso6393, 0))?;
|
|
14
|
+
class.define_method("to_sym", method!(WrappedLanguage::to_sym, 0))?;
|
|
15
|
+
class.define_method("inspect", method!(WrappedLanguage::inspect, 0))?;
|
|
16
|
+
class.define_method("==", method!(WrappedLanguage::eq, 1))?;
|
|
17
|
+
class.define_method("eql?", method!(WrappedLanguage::eq, 1))?;
|
|
18
|
+
class.define_method("hash", method!(WrappedLanguage::hash, 0))?;
|
|
19
|
+
class.define_singleton_method("[]", function!(WrappedLanguage::lookup, 1))?;
|
|
20
|
+
class.define_singleton_method("all", function!(WrappedLanguage::all, 0))?;
|
|
21
|
+
class.define_singleton_method("names", function!(WrappedLanguage::names, 0))?;
|
|
22
|
+
class.define_singleton_method("iso_codes", function!(WrappedLanguage::iso_codes, 0))?;
|
|
23
|
+
Ok(())
|
|
24
|
+
}
|
|
3
25
|
|
|
4
26
|
#[magnus::wrap(class = "Lingua::Language")]
|
|
5
27
|
pub struct WrappedLanguage(pub Language);
|
|
6
28
|
|
|
7
29
|
impl WrappedLanguage {
|
|
8
|
-
pub fn
|
|
30
|
+
pub fn lookup(value: magnus::Value) -> Result<Option<WrappedLanguage>, Error> {
|
|
31
|
+
let input = value_to_string(value)?;
|
|
32
|
+
Ok(crate::helpers::parse_language(&input).map(WrappedLanguage))
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
pub fn all() -> Result<RArray, Error> {
|
|
36
|
+
let ruby = Ruby::get().unwrap();
|
|
37
|
+
let mut langs: Vec<Language> = Language::all().into_iter().collect();
|
|
38
|
+
langs.sort_by(|a, b| a.to_string().cmp(&b.to_string()));
|
|
39
|
+
let array = ruby.ary_new_capa(langs.len());
|
|
40
|
+
for lang in langs {
|
|
41
|
+
array.push(WrappedLanguage(lang))?;
|
|
42
|
+
}
|
|
43
|
+
Ok(array)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
pub fn names() -> Result<RArray, Error> {
|
|
47
|
+
let ruby = Ruby::get().unwrap();
|
|
48
|
+
let mut langs: Vec<Language> = Language::all().into_iter().collect();
|
|
49
|
+
langs.sort_by(|a, b| a.to_string().cmp(&b.to_string()));
|
|
50
|
+
let array = ruby.ary_new_capa(langs.len());
|
|
51
|
+
for lang in langs {
|
|
52
|
+
array.push(lang.to_string())?;
|
|
53
|
+
}
|
|
54
|
+
Ok(array)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
pub fn iso_codes() -> Result<RArray, Error> {
|
|
58
|
+
let ruby = Ruby::get().unwrap();
|
|
59
|
+
let mut langs: Vec<Language> = Language::all().into_iter().collect();
|
|
60
|
+
langs.sort_by(|a, b| a.to_string().cmp(&b.to_string()));
|
|
61
|
+
let array = ruby.ary_new_capa(langs.len());
|
|
62
|
+
for lang in langs {
|
|
63
|
+
array.push(lang.iso_code_639_1().to_string())?;
|
|
64
|
+
}
|
|
65
|
+
Ok(array)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
pub fn name(&self) -> String {
|
|
9
69
|
self.0.to_string()
|
|
10
70
|
}
|
|
11
71
|
|
data/ext/lingua/src/lib.rs
CHANGED
|
@@ -2,11 +2,11 @@ mod confidence_result;
|
|
|
2
2
|
mod detector;
|
|
3
3
|
mod helpers;
|
|
4
4
|
mod language;
|
|
5
|
+
mod segment;
|
|
5
6
|
|
|
6
|
-
use magnus::{Error, RArray, RHash, Ruby, function,
|
|
7
|
+
use magnus::{Error, RArray, RHash, Ruby, function, prelude::*};
|
|
7
8
|
|
|
8
|
-
use
|
|
9
|
-
use detector::{RubyDetector, build_detector_from_options, compute_confidence, compute_confidence_values};
|
|
9
|
+
use detector::{build_detector_from_options, compute_confidence, compute_confidence_values, compute_detect_multiple};
|
|
10
10
|
use language::WrappedLanguage;
|
|
11
11
|
|
|
12
12
|
fn detect(ruby: &Ruby, arguments: RArray) -> Result<Option<WrappedLanguage>, Error> {
|
|
@@ -32,42 +32,28 @@ fn confidence_values(ruby: &Ruby, arguments: RArray) -> Result<RArray, Error> {
|
|
|
32
32
|
compute_confidence_values(&detector, subject)
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
+
fn detect_multiple(ruby: &Ruby, arguments: RArray) -> Result<RArray, Error> {
|
|
36
|
+
let subject = arguments
|
|
37
|
+
.shift::<String>()
|
|
38
|
+
.map_err(|_| Error::new(ruby.exception_arg_error(), "expected a string as first argument"))?;
|
|
39
|
+
let options = arguments.shift::<RHash>().ok();
|
|
40
|
+
let detector = build_detector_from_options(ruby, options.as_ref())?;
|
|
41
|
+
compute_detect_multiple(&detector, &subject)
|
|
42
|
+
}
|
|
43
|
+
|
|
35
44
|
#[magnus::init]
|
|
36
45
|
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
37
46
|
let module = ruby.define_module("Lingua")?;
|
|
38
47
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
language_class.define_method("to_iso6391", method!(WrappedLanguage::to_iso6391, 0))?;
|
|
44
|
-
language_class.define_method("to_iso", method!(WrappedLanguage::to_iso6391, 0))?;
|
|
45
|
-
language_class.define_method("to_iso6393", method!(WrappedLanguage::to_iso6393, 0))?;
|
|
46
|
-
language_class.define_method("to_sym", method!(WrappedLanguage::to_sym, 0))?;
|
|
47
|
-
language_class.define_method("inspect", method!(WrappedLanguage::inspect, 0))?;
|
|
48
|
-
language_class.define_method("==", method!(WrappedLanguage::eq, 1))?;
|
|
49
|
-
language_class.define_method("eql?", method!(WrappedLanguage::eq, 1))?;
|
|
50
|
-
language_class.define_method("hash", method!(WrappedLanguage::hash, 0))?;
|
|
51
|
-
|
|
52
|
-
// Lingua::ConfidenceResult
|
|
53
|
-
let confidence_class = module.define_class("ConfidenceResult", ruby.class_object())?;
|
|
54
|
-
confidence_class.undef_default_alloc_func();
|
|
55
|
-
confidence_class.define_method("language", method!(ConfidenceResult::language, 0))?;
|
|
56
|
-
confidence_class.define_method("confidence", method!(ConfidenceResult::confidence, 0))?;
|
|
57
|
-
confidence_class.define_method("to_s", method!(ConfidenceResult::to_s, 0))?;
|
|
58
|
-
confidence_class.define_method("inspect", method!(ConfidenceResult::inspect, 0))?;
|
|
59
|
-
|
|
60
|
-
// Lingua::Detector
|
|
61
|
-
let detector_class = module.define_class("Detector", ruby.class_object())?;
|
|
62
|
-
detector_class.define_singleton_method("new", function!(RubyDetector::new, -1))?;
|
|
63
|
-
detector_class.define_method("detect", method!(RubyDetector::detect, 1))?;
|
|
64
|
-
detector_class.define_method("confidence", method!(RubyDetector::confidence, 2))?;
|
|
65
|
-
detector_class.define_method("confidence_values", method!(RubyDetector::confidence_values, 1))?;
|
|
48
|
+
language::define(ruby, &module)?;
|
|
49
|
+
confidence_result::define(ruby, &module)?;
|
|
50
|
+
segment::define(ruby, &module)?;
|
|
51
|
+
detector::define(ruby, &module)?;
|
|
66
52
|
|
|
67
|
-
// Functional API (module methods)
|
|
68
53
|
module.define_singleton_method("detect", function!(detect, -2))?;
|
|
69
54
|
module.define_singleton_method("confidence", function!(confidence, 2))?;
|
|
70
55
|
module.define_singleton_method("confidence_values", function!(confidence_values, -2))?;
|
|
56
|
+
module.define_singleton_method("detect_multiple", function!(detect_multiple, -2))?;
|
|
71
57
|
|
|
72
58
|
Ok(())
|
|
73
59
|
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
use lingua::Language;
|
|
2
|
+
use magnus::{Error, RModule, Ruby, method, prelude::*};
|
|
3
|
+
|
|
4
|
+
use crate::language::WrappedLanguage;
|
|
5
|
+
|
|
6
|
+
pub fn define(ruby: &Ruby, module: &RModule) -> Result<(), Error> {
|
|
7
|
+
let class = module.define_class("Segment", ruby.class_object())?;
|
|
8
|
+
class.undef_default_alloc_func();
|
|
9
|
+
class.define_method("language", method!(Segment::language, 0))?;
|
|
10
|
+
class.define_method("start_index", method!(Segment::start_index, 0))?;
|
|
11
|
+
class.define_method("end_index", method!(Segment::end_index, 0))?;
|
|
12
|
+
class.define_method("word_count", method!(Segment::word_count, 0))?;
|
|
13
|
+
class.define_method("text", method!(Segment::text, 0))?;
|
|
14
|
+
class.define_method("to_s", method!(Segment::to_s, 0))?;
|
|
15
|
+
class.define_method("inspect", method!(Segment::inspect, 0))?;
|
|
16
|
+
Ok(())
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
#[magnus::wrap(class = "Lingua::Segment")]
|
|
20
|
+
pub struct Segment {
|
|
21
|
+
pub language: Language,
|
|
22
|
+
pub start_index: usize,
|
|
23
|
+
pub end_index: usize,
|
|
24
|
+
pub word_count: usize,
|
|
25
|
+
pub text: String,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
impl Segment {
|
|
29
|
+
pub fn language(&self) -> WrappedLanguage {
|
|
30
|
+
WrappedLanguage(self.language)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
pub fn start_index(&self) -> usize {
|
|
34
|
+
self.start_index
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
pub fn end_index(&self) -> usize {
|
|
38
|
+
self.end_index
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
pub fn word_count(&self) -> usize {
|
|
42
|
+
self.word_count
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
pub fn text(&self) -> String {
|
|
46
|
+
self.text.clone()
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
pub fn to_s(&self) -> String {
|
|
50
|
+
format!("{} ({}-{}): {}", self.language, self.start_index, self.end_index, self.text)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
pub fn inspect(&self) -> String {
|
|
54
|
+
format!(
|
|
55
|
+
"#<Lingua::Segment {} ({}-{}) \"{}\">",
|
|
56
|
+
self.language, self.start_index, self.end_index, self.text
|
|
57
|
+
)
|
|
58
|
+
}
|
|
59
|
+
}
|
data/lib/lingua/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lingua_rs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sébastien Vrillaud
|
|
@@ -59,6 +59,7 @@ files:
|
|
|
59
59
|
- ext/lingua/src/helpers.rs
|
|
60
60
|
- ext/lingua/src/language.rs
|
|
61
61
|
- ext/lingua/src/lib.rs
|
|
62
|
+
- ext/lingua/src/segment.rs
|
|
62
63
|
- lib/lingua.rb
|
|
63
64
|
- lib/lingua/version.rb
|
|
64
65
|
- sig/lingua.rbs
|
|
@@ -66,9 +67,10 @@ homepage: https://github.com/kochka/lingua_rs
|
|
|
66
67
|
licenses:
|
|
67
68
|
- MIT
|
|
68
69
|
metadata:
|
|
69
|
-
homepage_uri: https://github.com/kochka/lingua_rs
|
|
70
70
|
source_code_uri: https://github.com/kochka/lingua_rs
|
|
71
71
|
changelog_uri: https://github.com/kochka/lingua_rs/blob/main/CHANGELOG.md
|
|
72
|
+
documentation_uri: https://github.com/kochka/lingua_rs#readme
|
|
73
|
+
bug_tracker_uri: https://github.com/kochka/lingua_rs/issues
|
|
72
74
|
rdoc_options: []
|
|
73
75
|
require_paths:
|
|
74
76
|
- lib
|