spellkit 0.2.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +697 -0
- data/ext/spellkit/Cargo.toml +19 -0
- data/ext/spellkit/extconf.rb +4 -0
- data/ext/spellkit/src/guards.rs +75 -0
- data/ext/spellkit/src/lib.rs +393 -0
- data/ext/spellkit/src/symspell.rs +349 -0
- data/lib/spellkit/3.1/spellkit.so +0 -0
- data/lib/spellkit/3.2/spellkit.so +0 -0
- data/lib/spellkit/3.3/spellkit.so +0 -0
- data/lib/spellkit/3.4/spellkit.so +0 -0
- data/lib/spellkit/version.rb +5 -0
- data/lib/spellkit.rb +368 -0
- metadata +202 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "spellkit"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
authors = ["Chris Petersen <chris@petersen.io>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
description = "Fast, safe typo correction for search-term extraction"
|
|
8
|
+
|
|
9
|
+
[lib]
|
|
10
|
+
name = "spellkit"
|
|
11
|
+
crate-type = ["cdylib"]
|
|
12
|
+
|
|
13
|
+
[dependencies]
|
|
14
|
+
magnus = { version = "0.7", features = ["rb-sys"] }
|
|
15
|
+
hashbrown = "0.15"
|
|
16
|
+
unicode-normalization = "0.1"
|
|
17
|
+
regex = "1.11"
|
|
18
|
+
|
|
19
|
+
[dev-dependencies]
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
use hashbrown::HashSet;
|
|
2
|
+
use regex::{Regex, RegexBuilder};
|
|
3
|
+
use crate::symspell::SymSpell;
|
|
4
|
+
|
|
5
|
+
#[derive(Debug, Clone)]
|
|
6
|
+
pub struct Guards {
|
|
7
|
+
protected_set: HashSet<String>,
|
|
8
|
+
protected_patterns: Vec<Regex>,
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
impl Guards {
|
|
12
|
+
pub fn new() -> Self {
|
|
13
|
+
Self {
|
|
14
|
+
protected_set: HashSet::new(),
|
|
15
|
+
protected_patterns: Vec::new(),
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
pub fn load_protected(&mut self, content: &str) {
|
|
20
|
+
for line in content.lines() {
|
|
21
|
+
let trimmed = line.trim();
|
|
22
|
+
if !trimmed.is_empty() && !trimmed.starts_with('#') {
|
|
23
|
+
// Store literal form
|
|
24
|
+
self.protected_set.insert(trimmed.to_string());
|
|
25
|
+
// Store lowercase form
|
|
26
|
+
self.protected_set.insert(trimmed.to_lowercase());
|
|
27
|
+
// Store normalized form (strips whitespace, converts to lowercase)
|
|
28
|
+
// This ensures variants like "newyork" are protected if "New York" is in the list
|
|
29
|
+
let normalized = SymSpell::normalize_word(trimmed);
|
|
30
|
+
self.protected_set.insert(normalized);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
pub fn add_pattern_with_flags(
|
|
36
|
+
&mut self,
|
|
37
|
+
pattern: &str,
|
|
38
|
+
case_insensitive: bool,
|
|
39
|
+
multiline: bool,
|
|
40
|
+
extended: bool,
|
|
41
|
+
) -> Result<(), String> {
|
|
42
|
+
match RegexBuilder::new(pattern)
|
|
43
|
+
.case_insensitive(case_insensitive)
|
|
44
|
+
.multi_line(multiline)
|
|
45
|
+
.ignore_whitespace(extended)
|
|
46
|
+
.build()
|
|
47
|
+
{
|
|
48
|
+
Ok(regex) => {
|
|
49
|
+
self.protected_patterns.push(regex);
|
|
50
|
+
Ok(())
|
|
51
|
+
}
|
|
52
|
+
Err(e) => Err(format!("Invalid regex pattern: {}", e)),
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
pub fn is_protected(&self, word: &str) -> bool {
|
|
57
|
+
let lower = word.to_lowercase();
|
|
58
|
+
|
|
59
|
+
if self.protected_set.contains(word) || self.protected_set.contains(&lower) {
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
for pattern in &self.protected_patterns {
|
|
64
|
+
if pattern.is_match(word) {
|
|
65
|
+
return true;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
false
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
pub fn is_protected_normalized(&self, word: &str, normalized: &str) -> bool {
|
|
73
|
+
self.is_protected(word) || self.is_protected(normalized)
|
|
74
|
+
}
|
|
75
|
+
}
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
mod symspell;
|
|
2
|
+
mod guards;
|
|
3
|
+
|
|
4
|
+
use magnus::{class, define_module, function, method, prelude::*, Error, RArray, RHash, Ruby, Value, TryConvert};
|
|
5
|
+
use std::sync::{Arc, RwLock};
|
|
6
|
+
use symspell::SymSpell;
|
|
7
|
+
use guards::Guards;
|
|
8
|
+
|
|
9
|
+
use std::time::{SystemTime, UNIX_EPOCH};
|
|
10
|
+
|
|
11
|
+
#[derive(Clone)]
|
|
12
|
+
#[magnus::wrap(class = "SpellKit::Checker", free_immediately, size)]
|
|
13
|
+
struct Checker {
|
|
14
|
+
state: Arc<RwLock<CheckerState>>,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
struct CheckerState {
|
|
18
|
+
symspell: Option<SymSpell>,
|
|
19
|
+
guards: Guards,
|
|
20
|
+
loaded: bool,
|
|
21
|
+
frequency_threshold: f64,
|
|
22
|
+
loaded_at: Option<u64>,
|
|
23
|
+
dictionary_size: usize,
|
|
24
|
+
edit_distance: usize,
|
|
25
|
+
skipped_malformed: usize,
|
|
26
|
+
skipped_multiword: usize,
|
|
27
|
+
skipped_invalid_freq: usize,
|
|
28
|
+
skipped_duplicates: usize,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
impl CheckerState {
|
|
32
|
+
fn new() -> Self {
|
|
33
|
+
Self {
|
|
34
|
+
symspell: None,
|
|
35
|
+
guards: Guards::new(),
|
|
36
|
+
loaded: false,
|
|
37
|
+
frequency_threshold: 10.0,
|
|
38
|
+
loaded_at: None,
|
|
39
|
+
dictionary_size: 0,
|
|
40
|
+
edit_distance: 1,
|
|
41
|
+
skipped_malformed: 0,
|
|
42
|
+
skipped_multiword: 0,
|
|
43
|
+
skipped_invalid_freq: 0,
|
|
44
|
+
skipped_duplicates: 0,
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Helper function to correct a single word
|
|
50
|
+
// Returns the corrected word or the original if no correction is appropriate
|
|
51
|
+
fn correct_word(
|
|
52
|
+
state: &CheckerState,
|
|
53
|
+
symspell: &SymSpell,
|
|
54
|
+
word: &str,
|
|
55
|
+
) -> String {
|
|
56
|
+
// Always check if word is protected
|
|
57
|
+
let normalized = SymSpell::normalize_word(word);
|
|
58
|
+
if state.guards.is_protected_normalized(word, &normalized) {
|
|
59
|
+
return word.to_string();
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
let suggestions = symspell.suggestions(word, 5);
|
|
63
|
+
|
|
64
|
+
// If exact match exists, return canonical form from dictionary
|
|
65
|
+
if !suggestions.is_empty() && suggestions[0].distance == 0 {
|
|
66
|
+
return suggestions[0].term.clone();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Get original word's frequency (if it exists in dictionary)
|
|
70
|
+
let original_freq = symspell.get_frequency(word);
|
|
71
|
+
|
|
72
|
+
// Find best correction with frequency threshold
|
|
73
|
+
for suggestion in &suggestions {
|
|
74
|
+
if suggestion.distance <= state.edit_distance {
|
|
75
|
+
// Apply frequency threshold
|
|
76
|
+
let passes_threshold = match original_freq {
|
|
77
|
+
// Word not in dictionary: require suggestion frequency >= absolute threshold
|
|
78
|
+
None => suggestion.frequency as f64 >= state.frequency_threshold,
|
|
79
|
+
// Word in dictionary: require suggestion frequency >= threshold * original frequency
|
|
80
|
+
Some(orig_freq) => {
|
|
81
|
+
suggestion.frequency as f64 >= state.frequency_threshold * orig_freq as f64
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
if passes_threshold {
|
|
86
|
+
return suggestion.term.clone();
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// No suggestions passed the threshold
|
|
92
|
+
word.to_string()
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
impl Checker {
|
|
96
|
+
fn new() -> Self {
|
|
97
|
+
Self {
|
|
98
|
+
state: Arc::new(RwLock::new(CheckerState::new())),
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
fn load_full(&self, config: RHash) -> Result<(), Error> {
|
|
103
|
+
let ruby = Ruby::get().unwrap();
|
|
104
|
+
|
|
105
|
+
// Required: dictionary path
|
|
106
|
+
let dictionary_path: String = TryConvert::try_convert(
|
|
107
|
+
config.fetch::<_, Value>("dictionary_path")
|
|
108
|
+
.map_err(|_| Error::new(ruby.exception_arg_error(), "dictionary_path is required"))?
|
|
109
|
+
)?;
|
|
110
|
+
|
|
111
|
+
// Optional: edit distance
|
|
112
|
+
let edit_dist: usize = config.get("edit_distance")
|
|
113
|
+
.and_then(|v: Value| TryConvert::try_convert(v).ok())
|
|
114
|
+
.unwrap_or(1);
|
|
115
|
+
|
|
116
|
+
if edit_dist > 2 {
|
|
117
|
+
return Err(Error::new(ruby.exception_arg_error(), "edit_distance must be 1 or 2"));
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Stream dictionary loading: read line-by-line and add directly to SymSpell
|
|
121
|
+
// This avoids buffering the entire file and intermediate Vec allocation
|
|
122
|
+
let file = std::fs::File::open(&dictionary_path)
|
|
123
|
+
.map_err(|e| Error::new(ruby.exception_runtime_error(), format!("Failed to open dictionary file: {}", e)))?;
|
|
124
|
+
|
|
125
|
+
let reader = std::io::BufReader::new(file);
|
|
126
|
+
let mut symspell = SymSpell::new(edit_dist);
|
|
127
|
+
let mut dictionary_size = 0;
|
|
128
|
+
let mut skipped_malformed = 0;
|
|
129
|
+
let mut skipped_multiword = 0;
|
|
130
|
+
let mut skipped_invalid_freq = 0;
|
|
131
|
+
let mut skipped_duplicates = 0;
|
|
132
|
+
|
|
133
|
+
use std::io::BufRead;
|
|
134
|
+
for line in reader.lines() {
|
|
135
|
+
let line = line.map_err(|e| Error::new(ruby.exception_runtime_error(), format!("Failed to read line: {}", e)))?;
|
|
136
|
+
|
|
137
|
+
// Try tab-separated first (allows multi-word terms), then space-separated (SymSpell format)
|
|
138
|
+
let parts: Vec<&str> = if line.contains('\t') {
|
|
139
|
+
line.split('\t').collect()
|
|
140
|
+
} else {
|
|
141
|
+
line.split_whitespace().collect()
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
// Validate we have exactly 2 columns (term and frequency)
|
|
145
|
+
if parts.len() != 2 {
|
|
146
|
+
skipped_malformed += 1;
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
let term = parts[0].trim();
|
|
151
|
+
let freq_str = parts[1].trim();
|
|
152
|
+
|
|
153
|
+
// Skip empty terms or frequencies
|
|
154
|
+
if term.is_empty() || freq_str.is_empty() {
|
|
155
|
+
skipped_malformed += 1;
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Check for multi-word terms (SymSpell algorithm doesn't support phrases)
|
|
160
|
+
if term.contains(char::is_whitespace) {
|
|
161
|
+
skipped_multiword += 1;
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Parse frequency
|
|
166
|
+
match freq_str.parse::<u64>() {
|
|
167
|
+
Ok(freq) => {
|
|
168
|
+
let normalized = SymSpell::normalize_word(term);
|
|
169
|
+
let was_new = symspell.add_word(&normalized, term, freq);
|
|
170
|
+
if was_new {
|
|
171
|
+
dictionary_size += 1;
|
|
172
|
+
} else {
|
|
173
|
+
skipped_duplicates += 1;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
Err(_) => {
|
|
177
|
+
skipped_invalid_freq += 1;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
let mut guards = Guards::new();
|
|
183
|
+
|
|
184
|
+
// Load optional protected terms file
|
|
185
|
+
if let Some(protected_path) = config.get("protected_path") {
|
|
186
|
+
let path: String = TryConvert::try_convert(protected_path)?;
|
|
187
|
+
let content = std::fs::read_to_string(&path)
|
|
188
|
+
.map_err(|e| Error::new(ruby.exception_runtime_error(),
|
|
189
|
+
format!("Failed to read protected terms file '{}': {}", path, e)))?;
|
|
190
|
+
guards.load_protected(&content);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Load optional protected patterns
|
|
194
|
+
if let Some(patterns_value) = config.get("protected_patterns") {
|
|
195
|
+
let patterns: RArray = TryConvert::try_convert(patterns_value)?;
|
|
196
|
+
for pattern_value in patterns.into_iter() {
|
|
197
|
+
let pattern_hash: RHash = TryConvert::try_convert(pattern_value)?;
|
|
198
|
+
|
|
199
|
+
let source: String = TryConvert::try_convert(
|
|
200
|
+
pattern_hash.fetch::<_, Value>("source")
|
|
201
|
+
.map_err(|_| Error::new(ruby.exception_arg_error(), "pattern hash missing 'source' key"))?
|
|
202
|
+
)?;
|
|
203
|
+
|
|
204
|
+
let case_insensitive: bool = pattern_hash.get("case_insensitive")
|
|
205
|
+
.and_then(|v: Value| TryConvert::try_convert(v).ok())
|
|
206
|
+
.unwrap_or(false);
|
|
207
|
+
|
|
208
|
+
let multiline: bool = pattern_hash.get("multiline")
|
|
209
|
+
.and_then(|v: Value| TryConvert::try_convert(v).ok())
|
|
210
|
+
.unwrap_or(false);
|
|
211
|
+
|
|
212
|
+
let extended: bool = pattern_hash.get("extended")
|
|
213
|
+
.and_then(|v: Value| TryConvert::try_convert(v).ok())
|
|
214
|
+
.unwrap_or(false);
|
|
215
|
+
|
|
216
|
+
guards.add_pattern_with_flags(&source, case_insensitive, multiline, extended)
|
|
217
|
+
.map_err(|e| Error::new(ruby.exception_arg_error(), e))?;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Optional frequency threshold
|
|
222
|
+
let frequency_threshold: f64 = config.get("frequency_threshold")
|
|
223
|
+
.and_then(|v: Value| TryConvert::try_convert(v).ok())
|
|
224
|
+
.unwrap_or(10.0);
|
|
225
|
+
|
|
226
|
+
// Validate frequency threshold
|
|
227
|
+
if !frequency_threshold.is_finite() {
|
|
228
|
+
return Err(Error::new(ruby.exception_arg_error(), "frequency_threshold must be finite (not NaN or Infinity)"));
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if frequency_threshold < 0.0 {
|
|
232
|
+
return Err(Error::new(ruby.exception_arg_error(), format!("frequency_threshold must be non-negative, got: {}", frequency_threshold)));
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
let loaded_at = SystemTime::now()
|
|
236
|
+
.duration_since(UNIX_EPOCH)
|
|
237
|
+
.ok()
|
|
238
|
+
.map(|d| d.as_secs());
|
|
239
|
+
|
|
240
|
+
let mut state = self.state.write().unwrap();
|
|
241
|
+
state.symspell = Some(symspell);
|
|
242
|
+
state.guards = guards;
|
|
243
|
+
state.frequency_threshold = frequency_threshold;
|
|
244
|
+
state.loaded = true;
|
|
245
|
+
state.loaded_at = loaded_at;
|
|
246
|
+
state.dictionary_size = dictionary_size;
|
|
247
|
+
state.edit_distance = edit_dist;
|
|
248
|
+
state.skipped_malformed = skipped_malformed;
|
|
249
|
+
state.skipped_multiword = skipped_multiword;
|
|
250
|
+
state.skipped_invalid_freq = skipped_invalid_freq;
|
|
251
|
+
state.skipped_duplicates = skipped_duplicates;
|
|
252
|
+
|
|
253
|
+
Ok(())
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
fn suggestions(&self, word: String, max: Option<usize>) -> Result<RArray, Error> {
|
|
257
|
+
let ruby = Ruby::get().unwrap();
|
|
258
|
+
let max_suggestions = max.unwrap_or(5);
|
|
259
|
+
let state = self.state.read().unwrap();
|
|
260
|
+
|
|
261
|
+
if !state.loaded {
|
|
262
|
+
return Err(Error::new(ruby.exception_runtime_error(), "Dictionary not loaded. Call load! first"));
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if let Some(ref symspell) = state.symspell {
|
|
266
|
+
let suggestions = symspell.suggestions(&word, max_suggestions);
|
|
267
|
+
let result = RArray::new();
|
|
268
|
+
|
|
269
|
+
for suggestion in suggestions {
|
|
270
|
+
let hash = RHash::new();
|
|
271
|
+
hash.aset("term", suggestion.term)?;
|
|
272
|
+
hash.aset("distance", suggestion.distance)?;
|
|
273
|
+
hash.aset("freq", suggestion.frequency)?;
|
|
274
|
+
result.push(hash)?;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
Ok(result)
|
|
278
|
+
} else {
|
|
279
|
+
Err(Error::new(ruby.exception_runtime_error(), "SymSpell not initialized"))
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
fn correct(&self, word: String) -> Result<bool, Error> {
|
|
284
|
+
let ruby = Ruby::get().unwrap();
|
|
285
|
+
let state = self.state.read().unwrap();
|
|
286
|
+
|
|
287
|
+
if !state.loaded {
|
|
288
|
+
return Err(Error::new(ruby.exception_runtime_error(), "Dictionary not loaded. Call load! first"));
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if let Some(ref symspell) = state.symspell {
|
|
292
|
+
Ok(symspell.contains(&word))
|
|
293
|
+
} else {
|
|
294
|
+
Err(Error::new(ruby.exception_runtime_error(), "SymSpell not initialized"))
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
fn correct_if_unknown(&self, word: String) -> Result<String, Error> {
|
|
299
|
+
let ruby = Ruby::get().unwrap();
|
|
300
|
+
let state = self.state.read().unwrap();
|
|
301
|
+
|
|
302
|
+
if !state.loaded {
|
|
303
|
+
return Err(Error::new(ruby.exception_runtime_error(), "Dictionary not loaded. Call load! first"));
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if let Some(ref symspell) = state.symspell {
|
|
307
|
+
Ok(correct_word(&state, symspell, &word))
|
|
308
|
+
} else {
|
|
309
|
+
Err(Error::new(ruby.exception_runtime_error(), "SymSpell not initialized"))
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
fn correct_tokens(&self, tokens: RArray) -> Result<RArray, Error> {
|
|
314
|
+
// Optimize batch correction by acquiring lock once for all tokens
|
|
315
|
+
// instead of calling correct_if_unknown per token (which re-locks each time)
|
|
316
|
+
let ruby = Ruby::get().unwrap();
|
|
317
|
+
let state = self.state.read().unwrap();
|
|
318
|
+
|
|
319
|
+
if !state.loaded {
|
|
320
|
+
return Err(Error::new(ruby.exception_runtime_error(), "Dictionary not loaded. Call load! first"));
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
let result = RArray::new();
|
|
324
|
+
|
|
325
|
+
if let Some(ref symspell) = state.symspell {
|
|
326
|
+
for token in tokens.into_iter() {
|
|
327
|
+
let word: String = TryConvert::try_convert(token)?;
|
|
328
|
+
let corrected = correct_word(&state, symspell, &word);
|
|
329
|
+
result.push(corrected)?;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
Ok(result)
|
|
333
|
+
} else {
|
|
334
|
+
Err(Error::new(ruby.exception_runtime_error(), "SymSpell not initialized"))
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
fn stats(&self) -> Result<RHash, Error> {
|
|
339
|
+
let state = self.state.read().unwrap();
|
|
340
|
+
let stats = RHash::new();
|
|
341
|
+
|
|
342
|
+
if !state.loaded {
|
|
343
|
+
stats.aset("loaded", false)?;
|
|
344
|
+
return Ok(stats);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
stats.aset("loaded", true)?;
|
|
348
|
+
stats.aset("dictionary_size", state.dictionary_size)?;
|
|
349
|
+
stats.aset("edit_distance", state.edit_distance)?;
|
|
350
|
+
stats.aset("skipped_malformed", state.skipped_malformed)?;
|
|
351
|
+
stats.aset("skipped_multiword", state.skipped_multiword)?;
|
|
352
|
+
stats.aset("skipped_invalid_freq", state.skipped_invalid_freq)?;
|
|
353
|
+
stats.aset("skipped_duplicates", state.skipped_duplicates)?;
|
|
354
|
+
|
|
355
|
+
if let Some(loaded_at) = state.loaded_at {
|
|
356
|
+
stats.aset("loaded_at", loaded_at)?;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
Ok(stats)
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
fn healthcheck(&self) -> Result<(), Error> {
|
|
363
|
+
let ruby = Ruby::get().unwrap();
|
|
364
|
+
let state = self.state.read().unwrap();
|
|
365
|
+
|
|
366
|
+
if !state.loaded {
|
|
367
|
+
return Err(Error::new(ruby.exception_runtime_error(), "Dictionary not loaded"));
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
if state.symspell.is_none() {
|
|
371
|
+
return Err(Error::new(ruby.exception_runtime_error(), "SymSpell not initialized"));
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
Ok(())
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
#[magnus::init]
|
|
379
|
+
fn init(_ruby: &Ruby) -> Result<(), Error> {
|
|
380
|
+
let module = define_module("SpellKit")?;
|
|
381
|
+
let checker_class = module.define_class("Checker", class::object())?;
|
|
382
|
+
|
|
383
|
+
checker_class.define_singleton_method("new", function!(Checker::new, 0))?;
|
|
384
|
+
checker_class.define_method("load!", method!(Checker::load_full, 1))?;
|
|
385
|
+
checker_class.define_method("suggestions", method!(Checker::suggestions, 2))?;
|
|
386
|
+
checker_class.define_method("correct?", method!(Checker::correct, 1))?;
|
|
387
|
+
checker_class.define_method("correct", method!(Checker::correct_if_unknown, 1))?;
|
|
388
|
+
checker_class.define_method("correct_tokens", method!(Checker::correct_tokens, 1))?;
|
|
389
|
+
checker_class.define_method("stats", method!(Checker::stats, 0))?;
|
|
390
|
+
checker_class.define_method("healthcheck", method!(Checker::healthcheck, 0))?;
|
|
391
|
+
|
|
392
|
+
Ok(())
|
|
393
|
+
}
|