spellkit 0.1.0.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,402 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.3"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "allocator-api2"
16
+ version = "0.2.21"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
19
+
20
+ [[package]]
21
+ name = "bindgen"
22
+ version = "0.69.5"
23
+ source = "registry+https://github.com/rust-lang/crates.io-index"
24
+ checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
25
+ dependencies = [
26
+ "bitflags",
27
+ "cexpr",
28
+ "clang-sys",
29
+ "itertools",
30
+ "lazy_static",
31
+ "lazycell",
32
+ "proc-macro2",
33
+ "quote",
34
+ "regex",
35
+ "rustc-hash",
36
+ "shlex",
37
+ "syn",
38
+ ]
39
+
40
+ [[package]]
41
+ name = "bitflags"
42
+ version = "2.9.4"
43
+ source = "registry+https://github.com/rust-lang/crates.io-index"
44
+ checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
45
+
46
+ [[package]]
47
+ name = "cexpr"
48
+ version = "0.6.0"
49
+ source = "registry+https://github.com/rust-lang/crates.io-index"
50
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
51
+ dependencies = [
52
+ "nom",
53
+ ]
54
+
55
+ [[package]]
56
+ name = "cfg-if"
57
+ version = "1.0.3"
58
+ source = "registry+https://github.com/rust-lang/crates.io-index"
59
+ checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
60
+
61
+ [[package]]
62
+ name = "clang-sys"
63
+ version = "1.8.1"
64
+ source = "registry+https://github.com/rust-lang/crates.io-index"
65
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
66
+ dependencies = [
67
+ "glob",
68
+ "libc",
69
+ "libloading",
70
+ ]
71
+
72
+ [[package]]
73
+ name = "either"
74
+ version = "1.15.0"
75
+ source = "registry+https://github.com/rust-lang/crates.io-index"
76
+ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
77
+
78
+ [[package]]
79
+ name = "equivalent"
80
+ version = "1.0.2"
81
+ source = "registry+https://github.com/rust-lang/crates.io-index"
82
+ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
83
+
84
+ [[package]]
85
+ name = "foldhash"
86
+ version = "0.1.5"
87
+ source = "registry+https://github.com/rust-lang/crates.io-index"
88
+ checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
89
+
90
+ [[package]]
91
+ name = "glob"
92
+ version = "0.3.3"
93
+ source = "registry+https://github.com/rust-lang/crates.io-index"
94
+ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
95
+
96
+ [[package]]
97
+ name = "hashbrown"
98
+ version = "0.15.5"
99
+ source = "registry+https://github.com/rust-lang/crates.io-index"
100
+ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
101
+ dependencies = [
102
+ "allocator-api2",
103
+ "equivalent",
104
+ "foldhash",
105
+ ]
106
+
107
+ [[package]]
108
+ name = "itertools"
109
+ version = "0.12.1"
110
+ source = "registry+https://github.com/rust-lang/crates.io-index"
111
+ checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
112
+ dependencies = [
113
+ "either",
114
+ ]
115
+
116
+ [[package]]
117
+ name = "itoa"
118
+ version = "1.0.15"
119
+ source = "registry+https://github.com/rust-lang/crates.io-index"
120
+ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
121
+
122
+ [[package]]
123
+ name = "lazy_static"
124
+ version = "1.5.0"
125
+ source = "registry+https://github.com/rust-lang/crates.io-index"
126
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
127
+
128
+ [[package]]
129
+ name = "lazycell"
130
+ version = "1.3.0"
131
+ source = "registry+https://github.com/rust-lang/crates.io-index"
132
+ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
133
+
134
+ [[package]]
135
+ name = "libc"
136
+ version = "0.2.176"
137
+ source = "registry+https://github.com/rust-lang/crates.io-index"
138
+ checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
139
+
140
+ [[package]]
141
+ name = "libloading"
142
+ version = "0.8.9"
143
+ source = "registry+https://github.com/rust-lang/crates.io-index"
144
+ checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
145
+ dependencies = [
146
+ "cfg-if",
147
+ "windows-link",
148
+ ]
149
+
150
+ [[package]]
151
+ name = "magnus"
152
+ version = "0.7.1"
153
+ source = "registry+https://github.com/rust-lang/crates.io-index"
154
+ checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
155
+ dependencies = [
156
+ "magnus-macros",
157
+ "rb-sys",
158
+ "rb-sys-env",
159
+ "seq-macro",
160
+ ]
161
+
162
+ [[package]]
163
+ name = "magnus-macros"
164
+ version = "0.6.0"
165
+ source = "registry+https://github.com/rust-lang/crates.io-index"
166
+ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
167
+ dependencies = [
168
+ "proc-macro2",
169
+ "quote",
170
+ "syn",
171
+ ]
172
+
173
+ [[package]]
174
+ name = "memchr"
175
+ version = "2.7.6"
176
+ source = "registry+https://github.com/rust-lang/crates.io-index"
177
+ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
178
+
179
+ [[package]]
180
+ name = "minimal-lexical"
181
+ version = "0.2.1"
182
+ source = "registry+https://github.com/rust-lang/crates.io-index"
183
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
184
+
185
+ [[package]]
186
+ name = "nom"
187
+ version = "7.1.3"
188
+ source = "registry+https://github.com/rust-lang/crates.io-index"
189
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
190
+ dependencies = [
191
+ "memchr",
192
+ "minimal-lexical",
193
+ ]
194
+
195
+ [[package]]
196
+ name = "proc-macro2"
197
+ version = "1.0.101"
198
+ source = "registry+https://github.com/rust-lang/crates.io-index"
199
+ checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
200
+ dependencies = [
201
+ "unicode-ident",
202
+ ]
203
+
204
+ [[package]]
205
+ name = "quote"
206
+ version = "1.0.40"
207
+ source = "registry+https://github.com/rust-lang/crates.io-index"
208
+ checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
209
+ dependencies = [
210
+ "proc-macro2",
211
+ ]
212
+
213
+ [[package]]
214
+ name = "rb-sys"
215
+ version = "0.9.117"
216
+ source = "registry+https://github.com/rust-lang/crates.io-index"
217
+ checksum = "f900d1ce4629a2ebffaf5de74bd8f9c1188d4c5ed406df02f97e22f77a006f44"
218
+ dependencies = [
219
+ "rb-sys-build",
220
+ ]
221
+
222
+ [[package]]
223
+ name = "rb-sys-build"
224
+ version = "0.9.117"
225
+ source = "registry+https://github.com/rust-lang/crates.io-index"
226
+ checksum = "ef1e9c857028f631056bcd6d88cec390c751e343ce2223ddb26d23eb4a151d59"
227
+ dependencies = [
228
+ "bindgen",
229
+ "lazy_static",
230
+ "proc-macro2",
231
+ "quote",
232
+ "regex",
233
+ "shell-words",
234
+ "syn",
235
+ ]
236
+
237
+ [[package]]
238
+ name = "rb-sys-env"
239
+ version = "0.1.2"
240
+ source = "registry+https://github.com/rust-lang/crates.io-index"
241
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
242
+
243
+ [[package]]
244
+ name = "regex"
245
+ version = "1.11.3"
246
+ source = "registry+https://github.com/rust-lang/crates.io-index"
247
+ checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
248
+ dependencies = [
249
+ "aho-corasick",
250
+ "memchr",
251
+ "regex-automata",
252
+ "regex-syntax",
253
+ ]
254
+
255
+ [[package]]
256
+ name = "regex-automata"
257
+ version = "0.4.11"
258
+ source = "registry+https://github.com/rust-lang/crates.io-index"
259
+ checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
260
+ dependencies = [
261
+ "aho-corasick",
262
+ "memchr",
263
+ "regex-syntax",
264
+ ]
265
+
266
+ [[package]]
267
+ name = "regex-syntax"
268
+ version = "0.8.6"
269
+ source = "registry+https://github.com/rust-lang/crates.io-index"
270
+ checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
271
+
272
+ [[package]]
273
+ name = "rustc-hash"
274
+ version = "1.1.0"
275
+ source = "registry+https://github.com/rust-lang/crates.io-index"
276
+ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
277
+
278
+ [[package]]
279
+ name = "ryu"
280
+ version = "1.0.20"
281
+ source = "registry+https://github.com/rust-lang/crates.io-index"
282
+ checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
283
+
284
+ [[package]]
285
+ name = "seq-macro"
286
+ version = "0.3.6"
287
+ source = "registry+https://github.com/rust-lang/crates.io-index"
288
+ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
289
+
290
+ [[package]]
291
+ name = "serde"
292
+ version = "1.0.227"
293
+ source = "registry+https://github.com/rust-lang/crates.io-index"
294
+ checksum = "80ece43fc6fbed4eb5392ab50c07334d3e577cbf40997ee896fe7af40bba4245"
295
+ dependencies = [
296
+ "serde_core",
297
+ "serde_derive",
298
+ ]
299
+
300
+ [[package]]
301
+ name = "serde_core"
302
+ version = "1.0.227"
303
+ source = "registry+https://github.com/rust-lang/crates.io-index"
304
+ checksum = "7a576275b607a2c86ea29e410193df32bc680303c82f31e275bbfcafe8b33be5"
305
+ dependencies = [
306
+ "serde_derive",
307
+ ]
308
+
309
+ [[package]]
310
+ name = "serde_derive"
311
+ version = "1.0.227"
312
+ source = "registry+https://github.com/rust-lang/crates.io-index"
313
+ checksum = "51e694923b8824cf0e9b382adf0f60d4e05f348f357b38833a3fa5ed7c2ede04"
314
+ dependencies = [
315
+ "proc-macro2",
316
+ "quote",
317
+ "syn",
318
+ ]
319
+
320
+ [[package]]
321
+ name = "serde_json"
322
+ version = "1.0.145"
323
+ source = "registry+https://github.com/rust-lang/crates.io-index"
324
+ checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
325
+ dependencies = [
326
+ "itoa",
327
+ "memchr",
328
+ "ryu",
329
+ "serde",
330
+ "serde_core",
331
+ ]
332
+
333
+ [[package]]
334
+ name = "shell-words"
335
+ version = "1.1.0"
336
+ source = "registry+https://github.com/rust-lang/crates.io-index"
337
+ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
338
+
339
+ [[package]]
340
+ name = "shlex"
341
+ version = "1.3.0"
342
+ source = "registry+https://github.com/rust-lang/crates.io-index"
343
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
344
+
345
+ [[package]]
346
+ name = "spellkit"
347
+ version = "0.1.0"
348
+ dependencies = [
349
+ "hashbrown",
350
+ "magnus",
351
+ "regex",
352
+ "serde",
353
+ "serde_json",
354
+ "unicode-normalization",
355
+ ]
356
+
357
+ [[package]]
358
+ name = "syn"
359
+ version = "2.0.106"
360
+ source = "registry+https://github.com/rust-lang/crates.io-index"
361
+ checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
362
+ dependencies = [
363
+ "proc-macro2",
364
+ "quote",
365
+ "unicode-ident",
366
+ ]
367
+
368
+ [[package]]
369
+ name = "tinyvec"
370
+ version = "1.10.0"
371
+ source = "registry+https://github.com/rust-lang/crates.io-index"
372
+ checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
373
+ dependencies = [
374
+ "tinyvec_macros",
375
+ ]
376
+
377
+ [[package]]
378
+ name = "tinyvec_macros"
379
+ version = "0.1.1"
380
+ source = "registry+https://github.com/rust-lang/crates.io-index"
381
+ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
382
+
383
+ [[package]]
384
+ name = "unicode-ident"
385
+ version = "1.0.19"
386
+ source = "registry+https://github.com/rust-lang/crates.io-index"
387
+ checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
388
+
389
+ [[package]]
390
+ name = "unicode-normalization"
391
+ version = "0.1.24"
392
+ source = "registry+https://github.com/rust-lang/crates.io-index"
393
+ checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
394
+ dependencies = [
395
+ "tinyvec",
396
+ ]
397
+
398
+ [[package]]
399
+ name = "windows-link"
400
+ version = "0.2.0"
401
+ source = "registry+https://github.com/rust-lang/crates.io-index"
402
+ checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
@@ -0,0 +1,21 @@
1
+ [package]
2
+ name = "spellkit"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ authors = ["Chris Petersen <chris@petersen.io>"]
6
+ license = "MIT"
7
+ description = "Fast, safe typo correction for search-term extraction"
8
+
9
+ [lib]
10
+ name = "spellkit"
11
+ crate-type = ["cdylib"]
12
+
13
+ [dependencies]
14
+ magnus = { version = "0.7", features = ["rb-sys"] }
15
+ serde = { version = "1.0", features = ["derive"] }
16
+ serde_json = "1.0"
17
+ hashbrown = "0.15"
18
+ unicode-normalization = "0.1"
19
+ regex = "1.11"
20
+
21
+ [dev-dependencies]
@@ -0,0 +1,4 @@
1
+ require "mkmf"
2
+ require "rb_sys/mkmf"
3
+
4
+ create_rust_makefile("spellkit/spellkit")
@@ -0,0 +1,57 @@
1
+ use hashbrown::HashSet;
2
+ use regex::Regex;
3
+
4
+ #[derive(Debug, Clone)]
5
+ pub struct Guards {
6
+ protected_set: HashSet<String>,
7
+ protected_patterns: Vec<Regex>,
8
+ }
9
+
10
+ impl Guards {
11
+ pub fn new() -> Self {
12
+ Self {
13
+ protected_set: HashSet::new(),
14
+ protected_patterns: Vec::new(),
15
+ }
16
+ }
17
+
18
+ pub fn load_protected(&mut self, content: &str) {
19
+ for line in content.lines() {
20
+ let trimmed = line.trim();
21
+ if !trimmed.is_empty() && !trimmed.starts_with('#') {
22
+ self.protected_set.insert(trimmed.to_string());
23
+ self.protected_set.insert(trimmed.to_lowercase());
24
+ }
25
+ }
26
+ }
27
+
28
+ pub fn add_pattern(&mut self, pattern: &str) -> Result<(), String> {
29
+ match Regex::new(pattern) {
30
+ Ok(regex) => {
31
+ self.protected_patterns.push(regex);
32
+ Ok(())
33
+ }
34
+ Err(e) => Err(format!("Invalid regex pattern: {}", e)),
35
+ }
36
+ }
37
+
38
+ pub fn is_protected(&self, word: &str) -> bool {
39
+ let lower = word.to_lowercase();
40
+
41
+ if self.protected_set.contains(word) || self.protected_set.contains(&lower) {
42
+ return true;
43
+ }
44
+
45
+ for pattern in &self.protected_patterns {
46
+ if pattern.is_match(word) {
47
+ return true;
48
+ }
49
+ }
50
+
51
+ false
52
+ }
53
+
54
+ pub fn is_protected_normalized(&self, word: &str, normalized: &str) -> bool {
55
+ self.is_protected(word) || self.is_protected(normalized)
56
+ }
57
+ }