ruby-fst 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.lock +298 -0
- data/Cargo.toml +3 -0
- data/Gemfile +9 -0
- data/LICENSE.txt +46 -0
- data/README.md +126 -0
- data/Rakefile +44 -0
- data/ext/ruby_fst/Cargo.toml +13 -0
- data/ext/ruby_fst/extconf.rb +6 -0
- data/ext/ruby_fst/src/lib.rs +405 -0
- data/lib/ruby_fst/version.rb +5 -0
- data/lib/ruby_fst.rb +14 -0
- data/ruby_fst.gemspec +27 -0
- metadata +71 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 82d0d6d69fc9c3c580ecd104cd588bf669d2d8d269e6e73ccdb3650e571ece14
|
|
4
|
+
data.tar.gz: 300c838ff26a9e6fc790ead10d403cfd19c419a40b329f684221bd29b60e7607
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: b5d38c9b3cb260f1632929bd2adce4d5b6f13e6fb7273d2a8af4e69667fe0a36ddf60a1524e48775f6bad0b5954b2e981d0e3bb6428b014ea2ce187e21d10ffb
|
|
7
|
+
data.tar.gz: 6fb9e1d99c4888032eb5f3c0a4657dd0ea5498ad85d6aec28c5bc2da727975006331d7007706a9d3ccfb141b9884545a723ba4e070228ce482eeb5ac3a5abbf6
|
data/Cargo.lock
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
|
2
|
+
# It is not intended for manual editing.
|
|
3
|
+
version = 4
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "aho-corasick"
|
|
7
|
+
version = "1.1.4"
|
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
+
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"memchr",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[[package]]
|
|
15
|
+
name = "bindgen"
|
|
16
|
+
version = "0.72.1"
|
|
17
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
18
|
+
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
|
19
|
+
dependencies = [
|
|
20
|
+
"bitflags",
|
|
21
|
+
"cexpr",
|
|
22
|
+
"clang-sys",
|
|
23
|
+
"itertools",
|
|
24
|
+
"proc-macro2",
|
|
25
|
+
"quote",
|
|
26
|
+
"regex",
|
|
27
|
+
"rustc-hash",
|
|
28
|
+
"shlex",
|
|
29
|
+
"syn",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[[package]]
|
|
33
|
+
name = "bitflags"
|
|
34
|
+
version = "2.11.1"
|
|
35
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
36
|
+
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
|
|
37
|
+
|
|
38
|
+
[[package]]
|
|
39
|
+
name = "cexpr"
|
|
40
|
+
version = "0.6.0"
|
|
41
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
42
|
+
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
|
43
|
+
dependencies = [
|
|
44
|
+
"nom",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
[[package]]
|
|
48
|
+
name = "cfg-if"
|
|
49
|
+
version = "1.0.4"
|
|
50
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
51
|
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
52
|
+
|
|
53
|
+
[[package]]
|
|
54
|
+
name = "clang-sys"
|
|
55
|
+
version = "1.8.1"
|
|
56
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
57
|
+
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
|
58
|
+
dependencies = [
|
|
59
|
+
"glob",
|
|
60
|
+
"libc",
|
|
61
|
+
"libloading",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
[[package]]
|
|
65
|
+
name = "either"
|
|
66
|
+
version = "1.15.0"
|
|
67
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
68
|
+
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
69
|
+
|
|
70
|
+
[[package]]
|
|
71
|
+
name = "fst"
|
|
72
|
+
version = "0.4.7"
|
|
73
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
74
|
+
checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a"
|
|
75
|
+
dependencies = [
|
|
76
|
+
"utf8-ranges",
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
[[package]]
|
|
80
|
+
name = "glob"
|
|
81
|
+
version = "0.3.3"
|
|
82
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
83
|
+
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
|
84
|
+
|
|
85
|
+
[[package]]
|
|
86
|
+
name = "itertools"
|
|
87
|
+
version = "0.13.0"
|
|
88
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
89
|
+
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
|
90
|
+
dependencies = [
|
|
91
|
+
"either",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
[[package]]
|
|
95
|
+
name = "lazy_static"
|
|
96
|
+
version = "1.5.0"
|
|
97
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
98
|
+
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
|
99
|
+
|
|
100
|
+
[[package]]
|
|
101
|
+
name = "libc"
|
|
102
|
+
version = "0.2.186"
|
|
103
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
104
|
+
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
|
|
105
|
+
|
|
106
|
+
[[package]]
|
|
107
|
+
name = "libloading"
|
|
108
|
+
version = "0.8.9"
|
|
109
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
110
|
+
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
|
111
|
+
dependencies = [
|
|
112
|
+
"cfg-if",
|
|
113
|
+
"windows-link",
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
[[package]]
|
|
117
|
+
name = "magnus"
|
|
118
|
+
version = "0.7.1"
|
|
119
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
120
|
+
checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
|
|
121
|
+
dependencies = [
|
|
122
|
+
"magnus-macros",
|
|
123
|
+
"rb-sys",
|
|
124
|
+
"rb-sys-env",
|
|
125
|
+
"seq-macro",
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
[[package]]
|
|
129
|
+
name = "magnus-macros"
|
|
130
|
+
version = "0.6.0"
|
|
131
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
132
|
+
checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
|
|
133
|
+
dependencies = [
|
|
134
|
+
"proc-macro2",
|
|
135
|
+
"quote",
|
|
136
|
+
"syn",
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
[[package]]
|
|
140
|
+
name = "memchr"
|
|
141
|
+
version = "2.8.0"
|
|
142
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
143
|
+
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
|
144
|
+
|
|
145
|
+
[[package]]
|
|
146
|
+
name = "minimal-lexical"
|
|
147
|
+
version = "0.2.1"
|
|
148
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
149
|
+
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
|
150
|
+
|
|
151
|
+
[[package]]
|
|
152
|
+
name = "nom"
|
|
153
|
+
version = "7.1.3"
|
|
154
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
155
|
+
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
|
156
|
+
dependencies = [
|
|
157
|
+
"memchr",
|
|
158
|
+
"minimal-lexical",
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
[[package]]
|
|
162
|
+
name = "proc-macro2"
|
|
163
|
+
version = "1.0.106"
|
|
164
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
165
|
+
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
166
|
+
dependencies = [
|
|
167
|
+
"unicode-ident",
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
[[package]]
|
|
171
|
+
name = "quote"
|
|
172
|
+
version = "1.0.45"
|
|
173
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
174
|
+
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
|
175
|
+
dependencies = [
|
|
176
|
+
"proc-macro2",
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
[[package]]
|
|
180
|
+
name = "rb-sys"
|
|
181
|
+
version = "0.9.128"
|
|
182
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
183
|
+
checksum = "45ca28513560e56cfb79a62b1fce363c73af170a182024ce880c77ee9429920a"
|
|
184
|
+
dependencies = [
|
|
185
|
+
"rb-sys-build",
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
[[package]]
|
|
189
|
+
name = "rb-sys-build"
|
|
190
|
+
version = "0.9.128"
|
|
191
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
192
|
+
checksum = "ce04b2c55eff3a21aaa623fcc655d94373238e72cac6b3e1a3641ff31649f99a"
|
|
193
|
+
dependencies = [
|
|
194
|
+
"bindgen",
|
|
195
|
+
"lazy_static",
|
|
196
|
+
"proc-macro2",
|
|
197
|
+
"quote",
|
|
198
|
+
"regex",
|
|
199
|
+
"shell-words",
|
|
200
|
+
"syn",
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
[[package]]
|
|
204
|
+
name = "rb-sys-env"
|
|
205
|
+
version = "0.1.2"
|
|
206
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
207
|
+
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
208
|
+
|
|
209
|
+
[[package]]
|
|
210
|
+
name = "regex"
|
|
211
|
+
version = "1.12.3"
|
|
212
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
213
|
+
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
|
|
214
|
+
dependencies = [
|
|
215
|
+
"aho-corasick",
|
|
216
|
+
"memchr",
|
|
217
|
+
"regex-automata",
|
|
218
|
+
"regex-syntax",
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
[[package]]
|
|
222
|
+
name = "regex-automata"
|
|
223
|
+
version = "0.4.14"
|
|
224
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
225
|
+
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
|
226
|
+
dependencies = [
|
|
227
|
+
"aho-corasick",
|
|
228
|
+
"memchr",
|
|
229
|
+
"regex-syntax",
|
|
230
|
+
]
|
|
231
|
+
|
|
232
|
+
[[package]]
|
|
233
|
+
name = "regex-syntax"
|
|
234
|
+
version = "0.8.10"
|
|
235
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
236
|
+
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
|
237
|
+
|
|
238
|
+
[[package]]
|
|
239
|
+
name = "ruby_fst"
|
|
240
|
+
version = "0.1.0"
|
|
241
|
+
dependencies = [
|
|
242
|
+
"fst",
|
|
243
|
+
"magnus",
|
|
244
|
+
"rb-sys",
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
[[package]]
|
|
248
|
+
name = "rustc-hash"
|
|
249
|
+
version = "2.1.2"
|
|
250
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
251
|
+
checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
|
|
252
|
+
|
|
253
|
+
[[package]]
|
|
254
|
+
name = "seq-macro"
|
|
255
|
+
version = "0.3.6"
|
|
256
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
257
|
+
checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
|
|
258
|
+
|
|
259
|
+
[[package]]
|
|
260
|
+
name = "shell-words"
|
|
261
|
+
version = "1.1.1"
|
|
262
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
263
|
+
checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
|
|
264
|
+
|
|
265
|
+
[[package]]
|
|
266
|
+
name = "shlex"
|
|
267
|
+
version = "1.3.0"
|
|
268
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
269
|
+
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
|
270
|
+
|
|
271
|
+
[[package]]
|
|
272
|
+
name = "syn"
|
|
273
|
+
version = "2.0.117"
|
|
274
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
275
|
+
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
|
276
|
+
dependencies = [
|
|
277
|
+
"proc-macro2",
|
|
278
|
+
"quote",
|
|
279
|
+
"unicode-ident",
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
[[package]]
|
|
283
|
+
name = "unicode-ident"
|
|
284
|
+
version = "1.0.24"
|
|
285
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
286
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
287
|
+
|
|
288
|
+
[[package]]
|
|
289
|
+
name = "utf8-ranges"
|
|
290
|
+
version = "1.0.5"
|
|
291
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
292
|
+
checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba"
|
|
293
|
+
|
|
294
|
+
[[package]]
|
|
295
|
+
name = "windows-link"
|
|
296
|
+
version = "0.2.1"
|
|
297
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
298
|
+
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
data/Cargo.toml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Denis Sablic
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
This gem wraps the fst crate (https://github.com/BurntSushi/fst) by
|
|
26
|
+
Andrew Gallant, which is licensed under the MIT License:
|
|
27
|
+
|
|
28
|
+
Copyright (c) 2015 Andrew Gallant
|
|
29
|
+
|
|
30
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
31
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
32
|
+
in the Software without restriction, including without limitation the rights
|
|
33
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
34
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
35
|
+
furnished to do so, subject to the following conditions:
|
|
36
|
+
|
|
37
|
+
The above copyright notice and this permission notice shall be included in
|
|
38
|
+
all copies or substantial portions of the Software.
|
|
39
|
+
|
|
40
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
41
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
42
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
43
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
44
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
45
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
46
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# ruby-fst
|
|
2
|
+
|
|
3
|
+
Ruby bindings for the [fst](https://github.com/BurntSushi/fst) crate by Andrew Gallant. Provides finite state transducer backed ordered maps and sets with fast lookup, range queries, and fuzzy search.
|
|
4
|
+
|
|
5
|
+
## Requirements
|
|
6
|
+
|
|
7
|
+
- Ruby >= 3.0
|
|
8
|
+
- Rust toolchain (for compilation)
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```ruby
|
|
13
|
+
gem 'ruby-fst'
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Usage
|
|
17
|
+
|
|
18
|
+
### Map
|
|
19
|
+
|
|
20
|
+
Ordered map from byte string keys to unsigned 64-bit integer values. Keys must be inserted in lexicographic order.
|
|
21
|
+
|
|
22
|
+
```ruby
|
|
23
|
+
require 'ruby_fst'
|
|
24
|
+
|
|
25
|
+
builder = RubyFst::MapBuilder.new
|
|
26
|
+
builder.insert('bar', 2)
|
|
27
|
+
builder.insert('baz', 3)
|
|
28
|
+
builder.insert('foo', 1)
|
|
29
|
+
map = RubyFst::Map.new(builder.finish)
|
|
30
|
+
|
|
31
|
+
map['foo'] # => 1
|
|
32
|
+
map.get('missing') # => nil
|
|
33
|
+
map.contains?('bar') # => true
|
|
34
|
+
map.length # => 3
|
|
35
|
+
|
|
36
|
+
map.each { |key, value| puts "#{key}: #{value}" }
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Set
|
|
40
|
+
|
|
41
|
+
Ordered set of byte string keys.
|
|
42
|
+
|
|
43
|
+
```ruby
|
|
44
|
+
builder = RubyFst::SetBuilder.new
|
|
45
|
+
builder.insert('bar')
|
|
46
|
+
builder.insert('baz')
|
|
47
|
+
builder.insert('foo')
|
|
48
|
+
set = RubyFst::Set.new(builder.finish)
|
|
49
|
+
|
|
50
|
+
set.contains?('foo') # => true
|
|
51
|
+
set.length # => 3
|
|
52
|
+
|
|
53
|
+
set.each { |key| puts key }
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Floor and ceiling lookups
|
|
57
|
+
|
|
58
|
+
`get_le` returns the greatest key less than or equal to the query. `get_ge` returns the smallest key greater than or equal to the query. Both return `[key, value]` or `nil`.
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
builder = RubyFst::MapBuilder.new
|
|
62
|
+
builder.insert('bar', 1)
|
|
63
|
+
builder.insert('foo', 2)
|
|
64
|
+
builder.insert('qux', 3)
|
|
65
|
+
map = RubyFst::Map.new(builder.finish)
|
|
66
|
+
|
|
67
|
+
map.get_le('dog') # => ["bar", 1]
|
|
68
|
+
map.get_ge('dog') # => ["foo", 2]
|
|
69
|
+
map.get_le('aaa') # => nil
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
This is useful for IP range lookups. Encode range starts as 4-byte big-endian keys and use `get_le` to find which range an IP falls into:
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
builder = RubyFst::MapBuilder.new
|
|
76
|
+
builder.insert([167_772_160].pack('N'), 1) # 10.0.0.0
|
|
77
|
+
builder.insert([3_232_235_520].pack('N'), 2) # 192.168.0.0
|
|
78
|
+
map = RubyFst::Map.new(builder.finish)
|
|
79
|
+
|
|
80
|
+
ip = IPAddr.new('10.0.0.100').to_i
|
|
81
|
+
key, label_id = map.get_le([ip].pack('N'))
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Levenshtein search
|
|
85
|
+
|
|
86
|
+
Find all keys within a given edit distance. The search runs as an automaton intersection with the FST, visiting only reachable states.
|
|
87
|
+
|
|
88
|
+
```ruby
|
|
89
|
+
builder = RubyFst::MapBuilder.new
|
|
90
|
+
%w(bar baz cat foo fun).each_with_index { |w, i| builder.insert(w, i) }
|
|
91
|
+
map = RubyFst::Map.new(builder.finish)
|
|
92
|
+
|
|
93
|
+
map.search_levenshtein('far', 1) { |key, value| puts key }
|
|
94
|
+
# => bar
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Works on sets too:
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
set.search_levenshtein('university', 2) { |key| puts key }
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Serialization
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
# To/from bytes
|
|
107
|
+
bytes = map.to_bytes
|
|
108
|
+
map = RubyFst::Map.new(bytes)
|
|
109
|
+
|
|
110
|
+
# To/from file
|
|
111
|
+
map.save('/path/to/file.fst')
|
|
112
|
+
map = RubyFst::Map.from_path('/path/to/file.fst')
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Development
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
bundle install
|
|
119
|
+
bundle exec rake compile test
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
MIT. See [LICENSE.txt](LICENSE.txt) for details.
|
|
125
|
+
|
|
126
|
+
This gem wraps the [fst](https://github.com/BurntSushi/fst) crate by Andrew Gallant, also MIT licensed.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rake/testtask'
|
|
4
|
+
require 'rb_sys/extensiontask'
|
|
5
|
+
|
|
6
|
+
GEMSPEC = Gem::Specification.load('ruby_fst.gemspec')
|
|
7
|
+
|
|
8
|
+
RbSys::ExtensionTask.new('ruby_fst', GEMSPEC) do |ext|
|
|
9
|
+
ext.lib_dir = 'lib/ruby_fst'
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
Rake::TestTask.new do |t|
|
|
13
|
+
t.libs << 'test'
|
|
14
|
+
t.test_files = FileList['test/**/*_test.rb']
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
task default: %i(compile test)
|
|
18
|
+
|
|
19
|
+
desc 'Bump version (rake bump[patch], rake bump[minor], rake bump[major])'
|
|
20
|
+
task :bump, [:level] do |_, args|
|
|
21
|
+
level = args[:level] || 'patch'
|
|
22
|
+
version_file = File.join(__dir__, 'lib', 'ruby_fst', 'version.rb')
|
|
23
|
+
cargo_file = File.join(__dir__, 'ext', 'ruby_fst', 'Cargo.toml')
|
|
24
|
+
|
|
25
|
+
content = File.read(version_file)
|
|
26
|
+
current = content[/VERSION = '(.+)'/, 1]
|
|
27
|
+
major, minor, patch = current.split('.').map(&:to_i)
|
|
28
|
+
|
|
29
|
+
case level
|
|
30
|
+
when 'major' then major += 1; minor = 0; patch = 0
|
|
31
|
+
when 'minor' then minor += 1; patch = 0
|
|
32
|
+
when 'patch' then patch += 1
|
|
33
|
+
else abort("Unknown level: #{level}. Use major, minor, or patch.")
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
new_version = "#{major}.#{minor}.#{patch}"
|
|
37
|
+
|
|
38
|
+
File.write(version_file, content.sub(/VERSION = '.+'/, "VERSION = '#{new_version}'"))
|
|
39
|
+
|
|
40
|
+
cargo = File.read(cargo_file)
|
|
41
|
+
File.write(cargo_file, cargo.sub(/^version = ".+"/, "version = \"#{new_version}\""))
|
|
42
|
+
|
|
43
|
+
puts("#{current} -> #{new_version}")
|
|
44
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "ruby_fst"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
publish = false
|
|
6
|
+
|
|
7
|
+
[lib]
|
|
8
|
+
crate-type = ["cdylib"]
|
|
9
|
+
|
|
10
|
+
[dependencies]
|
|
11
|
+
magnus = { version = "0.7", features = ["rb-sys"] }
|
|
12
|
+
rb-sys = { version = "0.9", features = ["stable-api-compiled-fallback"] }
|
|
13
|
+
fst = { version = "0.4", features = ["levenshtein"] }
|
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
use std::cell::RefCell;
|
|
2
|
+
use std::fs;
|
|
3
|
+
|
|
4
|
+
use fst::automaton::Levenshtein;
|
|
5
|
+
use fst::raw::{CompiledAddr, Fst, Node, Output};
|
|
6
|
+
use fst::{IntoStreamer, Streamer};
|
|
7
|
+
use magnus::prelude::*;
|
|
8
|
+
use magnus::{block, exception, function, method, Error, RArray, RString, Ruby, Value};
|
|
9
|
+
|
|
10
|
+
fn err(msg: impl std::fmt::Display) -> Error {
|
|
11
|
+
Error::new(exception::runtime_error(), msg.to_string())
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
fn ruby() -> Ruby {
|
|
15
|
+
unsafe { Ruby::get_unchecked() }
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Map
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
#[magnus::wrap(class = "RubyFst::Map", free_immediately, size)]
|
|
23
|
+
struct FstMap {
|
|
24
|
+
inner: fst::Map<Vec<u8>>,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
impl FstMap {
|
|
28
|
+
fn new(bytes: RString) -> Result<Self, Error> {
|
|
29
|
+
let data = unsafe { bytes.as_slice() }.to_vec();
|
|
30
|
+
let inner = fst::Map::new(data).map_err(err)?;
|
|
31
|
+
Ok(Self { inner })
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
fn from_path(path: String) -> Result<Self, Error> {
|
|
35
|
+
let data = fs::read(&path).map_err(err)?;
|
|
36
|
+
let inner = fst::Map::new(data).map_err(err)?;
|
|
37
|
+
Ok(Self { inner })
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
fn get(&self, key: RString) -> Option<u64> {
|
|
41
|
+
let key = unsafe { key.as_slice() };
|
|
42
|
+
self.inner.get(key)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
fn contains(&self, key: RString) -> bool {
|
|
46
|
+
let key = unsafe { key.as_slice() };
|
|
47
|
+
self.inner.contains_key(key)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
fn len(&self) -> usize {
|
|
51
|
+
self.inner.len()
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
fn is_empty(&self) -> bool {
|
|
55
|
+
self.inner.is_empty()
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
fn to_bytes(&self) -> RString {
|
|
59
|
+
ruby().str_from_slice(self.inner.as_fst().as_bytes())
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
fn save(&self, path: String) -> Result<(), Error> {
|
|
63
|
+
fs::write(&path, self.inner.as_fst().as_bytes()).map_err(err)
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
fn get_le(&self, key: RString) -> Result<Option<RArray>, Error> {
|
|
67
|
+
let r = ruby();
|
|
68
|
+
let key = unsafe { key.as_slice() };
|
|
69
|
+
match floor_lookup(self.inner.as_fst(), key) {
|
|
70
|
+
Some((found_key, value)) => {
|
|
71
|
+
let arr = r.ary_new_capa(2);
|
|
72
|
+
arr.push(r.str_from_slice(&found_key))?;
|
|
73
|
+
arr.push(value)?;
|
|
74
|
+
Ok(Some(arr))
|
|
75
|
+
}
|
|
76
|
+
None => Ok(None),
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
fn get_ge(&self, key: RString) -> Result<Option<RArray>, Error> {
|
|
81
|
+
let r = ruby();
|
|
82
|
+
let key = unsafe { key.as_slice() };
|
|
83
|
+
let mut stream = self.inner.range().ge(key).into_stream();
|
|
84
|
+
match stream.next() {
|
|
85
|
+
Some((k, v)) => {
|
|
86
|
+
let arr = r.ary_new_capa(2);
|
|
87
|
+
arr.push(r.str_from_slice(k))?;
|
|
88
|
+
arr.push(v)?;
|
|
89
|
+
Ok(Some(arr))
|
|
90
|
+
}
|
|
91
|
+
None => Ok(None),
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
fn each(&self) -> Result<(), Error> {
|
|
96
|
+
let r = ruby();
|
|
97
|
+
let mut stream = (&self.inner).into_stream();
|
|
98
|
+
while let Some((key, value)) = stream.next() {
|
|
99
|
+
let rb_key = r.str_from_slice(key);
|
|
100
|
+
let _: Value = block::yield_values((rb_key, value))?;
|
|
101
|
+
}
|
|
102
|
+
Ok(())
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
fn search_levenshtein(&self, query: String, distance: u32) -> Result<(), Error> {
|
|
106
|
+
let r = ruby();
|
|
107
|
+
let lev = Levenshtein::new(&query, distance).map_err(err)?;
|
|
108
|
+
let mut stream = self.inner.search(lev).into_stream();
|
|
109
|
+
while let Some((key, value)) = stream.next() {
|
|
110
|
+
let rb_key = r.str_from_slice(key);
|
|
111
|
+
let _: Value = block::yield_values((rb_key, value))?;
|
|
112
|
+
}
|
|
113
|
+
Ok(())
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// ---------------------------------------------------------------------------
|
|
118
|
+
// MapBuilder
|
|
119
|
+
// ---------------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
#[magnus::wrap(class = "RubyFst::MapBuilder", free_immediately, size)]
|
|
122
|
+
struct FstMapBuilder {
|
|
123
|
+
inner: RefCell<Option<fst::MapBuilder<Vec<u8>>>>,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
impl FstMapBuilder {
|
|
127
|
+
fn new() -> Self {
|
|
128
|
+
Self {
|
|
129
|
+
inner: RefCell::new(Some(fst::MapBuilder::memory())),
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
fn insert(&self, key: RString, value: u64) -> Result<(), Error> {
|
|
134
|
+
let key = unsafe { key.as_slice() }.to_vec();
|
|
135
|
+
let mut guard = self.inner.borrow_mut();
|
|
136
|
+
let b = guard.as_mut().ok_or_else(|| err("builder already finished"))?;
|
|
137
|
+
b.insert(&key, value).map_err(err)
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
fn finish(&self) -> Result<RString, Error> {
|
|
141
|
+
let mut guard = self.inner.borrow_mut();
|
|
142
|
+
let b = guard.take().ok_or_else(|| err("builder already finished"))?;
|
|
143
|
+
let bytes = b.into_inner().map_err(err)?;
|
|
144
|
+
Ok(ruby().str_from_slice(&bytes))
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
// Set
|
|
150
|
+
// ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
#[magnus::wrap(class = "RubyFst::Set", free_immediately, size)]
|
|
153
|
+
struct FstSet {
|
|
154
|
+
inner: fst::Set<Vec<u8>>,
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
impl FstSet {
|
|
158
|
+
fn new(bytes: RString) -> Result<Self, Error> {
|
|
159
|
+
let data = unsafe { bytes.as_slice() }.to_vec();
|
|
160
|
+
let inner = fst::Set::new(data).map_err(err)?;
|
|
161
|
+
Ok(Self { inner })
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
fn from_path(path: String) -> Result<Self, Error> {
|
|
165
|
+
let data = fs::read(&path).map_err(err)?;
|
|
166
|
+
let inner = fst::Set::new(data).map_err(err)?;
|
|
167
|
+
Ok(Self { inner })
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
fn contains(&self, key: RString) -> bool {
|
|
171
|
+
let key = unsafe { key.as_slice() };
|
|
172
|
+
self.inner.contains(key)
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
fn len(&self) -> usize {
|
|
176
|
+
self.inner.len()
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
fn is_empty(&self) -> bool {
|
|
180
|
+
self.inner.is_empty()
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
fn to_bytes(&self) -> RString {
|
|
184
|
+
ruby().str_from_slice(self.inner.as_fst().as_bytes())
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
fn save(&self, path: String) -> Result<(), Error> {
|
|
188
|
+
fs::write(&path, self.inner.as_fst().as_bytes()).map_err(err)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
fn each(&self) -> Result<(), Error> {
|
|
192
|
+
let r = ruby();
|
|
193
|
+
let mut stream = (&self.inner).into_stream();
|
|
194
|
+
while let Some(key) = stream.next() {
|
|
195
|
+
let rb_key = r.str_from_slice(key);
|
|
196
|
+
let _: Value = block::yield_value(rb_key)?;
|
|
197
|
+
}
|
|
198
|
+
Ok(())
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
fn search_levenshtein(&self, query: String, distance: u32) -> Result<(), Error> {
|
|
202
|
+
let r = ruby();
|
|
203
|
+
let lev = Levenshtein::new(&query, distance).map_err(err)?;
|
|
204
|
+
let mut stream = self.inner.search(lev).into_stream();
|
|
205
|
+
while let Some(key) = stream.next() {
|
|
206
|
+
let rb_key = r.str_from_slice(key);
|
|
207
|
+
let _: Value = block::yield_value(rb_key)?;
|
|
208
|
+
}
|
|
209
|
+
Ok(())
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// ---------------------------------------------------------------------------
|
|
214
|
+
// SetBuilder
|
|
215
|
+
// ---------------------------------------------------------------------------
|
|
216
|
+
|
|
217
|
+
#[magnus::wrap(class = "RubyFst::SetBuilder", free_immediately, size)]
|
|
218
|
+
struct FstSetBuilder {
|
|
219
|
+
inner: RefCell<Option<fst::SetBuilder<Vec<u8>>>>,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
impl FstSetBuilder {
|
|
223
|
+
fn new() -> Self {
|
|
224
|
+
Self {
|
|
225
|
+
inner: RefCell::new(Some(fst::SetBuilder::memory())),
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
fn insert(&self, key: RString) -> Result<(), Error> {
|
|
230
|
+
let key = unsafe { key.as_slice() }.to_vec();
|
|
231
|
+
let mut guard = self.inner.borrow_mut();
|
|
232
|
+
let b = guard.as_mut().ok_or_else(|| err("builder already finished"))?;
|
|
233
|
+
b.insert(&key).map_err(err)
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
fn finish(&self) -> Result<RString, Error> {
|
|
237
|
+
let mut guard = self.inner.borrow_mut();
|
|
238
|
+
let b = guard.take().ok_or_else(|| err("builder already finished"))?;
|
|
239
|
+
let bytes = b.into_inner().map_err(err)?;
|
|
240
|
+
Ok(ruby().str_from_slice(&bytes))
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
// Floor lookup (get_le): greatest key <= query
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
struct Frame {
|
|
249
|
+
node_addr: CompiledAddr,
|
|
250
|
+
output: Output,
|
|
251
|
+
prefix_len: usize,
|
|
252
|
+
max_lesser_idx: Option<usize>,
|
|
253
|
+
is_final: bool,
|
|
254
|
+
final_value: u64,
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
fn find_max_lesser(node: &Node, byte: u8) -> Option<usize> {
|
|
258
|
+
let n = node.len();
|
|
259
|
+
if n == 0 {
|
|
260
|
+
return None;
|
|
261
|
+
}
|
|
262
|
+
let mut lo: usize = 0;
|
|
263
|
+
let mut hi: usize = n;
|
|
264
|
+
while lo < hi {
|
|
265
|
+
let mid = lo + (hi - lo) / 2;
|
|
266
|
+
if node.transition(mid).inp < byte {
|
|
267
|
+
lo = mid + 1;
|
|
268
|
+
} else {
|
|
269
|
+
hi = mid;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
lo.checked_sub(1)
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
fn rightmost_to_leaf<D: AsRef<[u8]>>(
|
|
276
|
+
fst: &Fst<D>,
|
|
277
|
+
addr: CompiledAddr,
|
|
278
|
+
output: Output,
|
|
279
|
+
) -> (Vec<u8>, u64) {
|
|
280
|
+
let mut node = fst.node(addr);
|
|
281
|
+
let mut out = output;
|
|
282
|
+
let mut suffix = Vec::new();
|
|
283
|
+
|
|
284
|
+
while node.len() > 0 {
|
|
285
|
+
let last = node.len() - 1;
|
|
286
|
+
let t = node.transition(last);
|
|
287
|
+
suffix.push(t.inp);
|
|
288
|
+
out = out.cat(t.out);
|
|
289
|
+
node = fst.node(t.addr);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
(suffix, out.cat(node.final_output()).value())
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
fn floor_lookup<D: AsRef<[u8]>>(fst: &Fst<D>, key: &[u8]) -> Option<(Vec<u8>, u64)> {
|
|
296
|
+
let root = fst.root();
|
|
297
|
+
|
|
298
|
+
if key.is_empty() {
|
|
299
|
+
return if root.is_final() {
|
|
300
|
+
Some((Vec::new(), root.final_output().value()))
|
|
301
|
+
} else {
|
|
302
|
+
None
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
let mut node = root;
|
|
307
|
+
let mut output = Output::zero();
|
|
308
|
+
let mut stack: Vec<Frame> = Vec::with_capacity(key.len());
|
|
309
|
+
let mut matched: usize = 0;
|
|
310
|
+
|
|
311
|
+
for &byte in key.iter() {
|
|
312
|
+
let lesser = find_max_lesser(&node, byte);
|
|
313
|
+
|
|
314
|
+
stack.push(Frame {
|
|
315
|
+
node_addr: node.addr(),
|
|
316
|
+
output,
|
|
317
|
+
prefix_len: matched,
|
|
318
|
+
max_lesser_idx: lesser,
|
|
319
|
+
is_final: node.is_final(),
|
|
320
|
+
final_value: output.cat(node.final_output()).value(),
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
match node.find_input(byte) {
|
|
324
|
+
Some(idx) => {
|
|
325
|
+
let t = node.transition(idx);
|
|
326
|
+
output = output.cat(t.out);
|
|
327
|
+
node = fst.node(t.addr);
|
|
328
|
+
matched += 1;
|
|
329
|
+
}
|
|
330
|
+
None => break,
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if matched == key.len() && node.is_final() {
|
|
335
|
+
return Some((key.to_vec(), output.cat(node.final_output()).value()));
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
while let Some(frame) = stack.pop() {
|
|
339
|
+
if let Some(j) = frame.max_lesser_idx {
|
|
340
|
+
let frame_node = fst.node(frame.node_addr);
|
|
341
|
+
let t = frame_node.transition(j);
|
|
342
|
+
let mut result = key[..frame.prefix_len].to_vec();
|
|
343
|
+
result.push(t.inp);
|
|
344
|
+
let branch_output = frame.output.cat(t.out);
|
|
345
|
+
let (suffix, val) = rightmost_to_leaf(fst, t.addr, branch_output);
|
|
346
|
+
result.extend(suffix);
|
|
347
|
+
return Some((result, val));
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if frame.is_final {
|
|
351
|
+
return Some((key[..frame.prefix_len].to_vec(), frame.final_value));
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
None
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// ---------------------------------------------------------------------------
|
|
359
|
+
// Init
|
|
360
|
+
// ---------------------------------------------------------------------------
|
|
361
|
+
|
|
362
|
+
#[magnus::init]
|
|
363
|
+
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
364
|
+
let module = ruby.define_module("RubyFst")?;
|
|
365
|
+
|
|
366
|
+
let map_class = module.define_class("Map", ruby.class_object())?;
|
|
367
|
+
map_class.define_singleton_method("new", function!(FstMap::new, 1))?;
|
|
368
|
+
map_class.define_singleton_method("from_path", function!(FstMap::from_path, 1))?;
|
|
369
|
+
map_class.define_method("get", method!(FstMap::get, 1))?;
|
|
370
|
+
map_class.define_method("[]", method!(FstMap::get, 1))?;
|
|
371
|
+
map_class.define_method("contains?", method!(FstMap::contains, 1))?;
|
|
372
|
+
map_class.define_method("length", method!(FstMap::len, 0))?;
|
|
373
|
+
map_class.define_method("size", method!(FstMap::len, 0))?;
|
|
374
|
+
map_class.define_method("empty?", method!(FstMap::is_empty, 0))?;
|
|
375
|
+
map_class.define_method("to_bytes", method!(FstMap::to_bytes, 0))?;
|
|
376
|
+
map_class.define_method("save", method!(FstMap::save, 1))?;
|
|
377
|
+
map_class.define_method("get_le", method!(FstMap::get_le, 1))?;
|
|
378
|
+
map_class.define_method("get_ge", method!(FstMap::get_ge, 1))?;
|
|
379
|
+
map_class.define_method("each", method!(FstMap::each, 0))?;
|
|
380
|
+
map_class.define_method("search_levenshtein", method!(FstMap::search_levenshtein, 2))?;
|
|
381
|
+
|
|
382
|
+
let map_builder = module.define_class("MapBuilder", ruby.class_object())?;
|
|
383
|
+
map_builder.define_singleton_method("new", function!(FstMapBuilder::new, 0))?;
|
|
384
|
+
map_builder.define_method("insert", method!(FstMapBuilder::insert, 2))?;
|
|
385
|
+
map_builder.define_method("finish", method!(FstMapBuilder::finish, 0))?;
|
|
386
|
+
|
|
387
|
+
let set_class = module.define_class("Set", ruby.class_object())?;
|
|
388
|
+
set_class.define_singleton_method("new", function!(FstSet::new, 1))?;
|
|
389
|
+
set_class.define_singleton_method("from_path", function!(FstSet::from_path, 1))?;
|
|
390
|
+
set_class.define_method("contains?", method!(FstSet::contains, 1))?;
|
|
391
|
+
set_class.define_method("length", method!(FstSet::len, 0))?;
|
|
392
|
+
set_class.define_method("size", method!(FstSet::len, 0))?;
|
|
393
|
+
set_class.define_method("empty?", method!(FstSet::is_empty, 0))?;
|
|
394
|
+
set_class.define_method("to_bytes", method!(FstSet::to_bytes, 0))?;
|
|
395
|
+
set_class.define_method("save", method!(FstSet::save, 1))?;
|
|
396
|
+
set_class.define_method("each", method!(FstSet::each, 0))?;
|
|
397
|
+
set_class.define_method("search_levenshtein", method!(FstSet::search_levenshtein, 2))?;
|
|
398
|
+
|
|
399
|
+
let set_builder = module.define_class("SetBuilder", ruby.class_object())?;
|
|
400
|
+
set_builder.define_singleton_method("new", function!(FstSetBuilder::new, 0))?;
|
|
401
|
+
set_builder.define_method("insert", method!(FstSetBuilder::insert, 1))?;
|
|
402
|
+
set_builder.define_method("finish", method!(FstSetBuilder::finish, 0))?;
|
|
403
|
+
|
|
404
|
+
Ok(())
|
|
405
|
+
}
|
data/lib/ruby_fst.rb
ADDED
data/ruby_fst.gemspec
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/ruby_fst/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'ruby-fst'
|
|
7
|
+
spec.version = RubyFst::VERSION
|
|
8
|
+
spec.authors = ['Denis Sablic']
|
|
9
|
+
spec.email = ['denis.sablic@gmail.com']
|
|
10
|
+
spec.summary = 'Ruby bindings for the Rust fst crate'
|
|
11
|
+
spec.description = 'Finite state transducer backed ordered sets and maps via the Rust fst crate by BurntSushi'
|
|
12
|
+
spec.homepage = 'https://github.com/dsablic/ruby-fst'
|
|
13
|
+
spec.license = 'MIT'
|
|
14
|
+
spec.required_ruby_version = '>= 3.0'
|
|
15
|
+
|
|
16
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
17
|
+
spec.metadata['source_code_uri'] = 'https://github.com/dsablic/ruby-fst'
|
|
18
|
+
spec.metadata['changelog_uri'] = 'https://github.com/dsablic/ruby-fst/releases'
|
|
19
|
+
|
|
20
|
+
spec.files = Dir.chdir(__dir__) do
|
|
21
|
+
`git ls-files -z`.split("\x0").reject { |f| f.start_with?('test/', '.git') }
|
|
22
|
+
end
|
|
23
|
+
spec.extensions = ['ext/ruby_fst/extconf.rb']
|
|
24
|
+
spec.require_paths = ['lib']
|
|
25
|
+
|
|
26
|
+
spec.add_dependency('rb_sys', '~> 0.9')
|
|
27
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: ruby-fst
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Denis Sablic
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: rb_sys
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0.9'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0.9'
|
|
26
|
+
description: Finite state transducer backed ordered sets and maps via the Rust fst
|
|
27
|
+
crate by BurntSushi
|
|
28
|
+
email:
|
|
29
|
+
- denis.sablic@gmail.com
|
|
30
|
+
executables: []
|
|
31
|
+
extensions:
|
|
32
|
+
- ext/ruby_fst/extconf.rb
|
|
33
|
+
extra_rdoc_files: []
|
|
34
|
+
files:
|
|
35
|
+
- Cargo.lock
|
|
36
|
+
- Cargo.toml
|
|
37
|
+
- Gemfile
|
|
38
|
+
- LICENSE.txt
|
|
39
|
+
- README.md
|
|
40
|
+
- Rakefile
|
|
41
|
+
- ext/ruby_fst/Cargo.toml
|
|
42
|
+
- ext/ruby_fst/extconf.rb
|
|
43
|
+
- ext/ruby_fst/src/lib.rs
|
|
44
|
+
- lib/ruby_fst.rb
|
|
45
|
+
- lib/ruby_fst/version.rb
|
|
46
|
+
- ruby_fst.gemspec
|
|
47
|
+
homepage: https://github.com/dsablic/ruby-fst
|
|
48
|
+
licenses:
|
|
49
|
+
- MIT
|
|
50
|
+
metadata:
|
|
51
|
+
rubygems_mfa_required: 'true'
|
|
52
|
+
source_code_uri: https://github.com/dsablic/ruby-fst
|
|
53
|
+
changelog_uri: https://github.com/dsablic/ruby-fst/releases
|
|
54
|
+
rdoc_options: []
|
|
55
|
+
require_paths:
|
|
56
|
+
- lib
|
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '3.0'
|
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
|
+
requirements:
|
|
64
|
+
- - ">="
|
|
65
|
+
- !ruby/object:Gem::Version
|
|
66
|
+
version: '0'
|
|
67
|
+
requirements: []
|
|
68
|
+
rubygems_version: 3.6.9
|
|
69
|
+
specification_version: 4
|
|
70
|
+
summary: Ruby bindings for the Rust fst crate
|
|
71
|
+
test_files: []
|