ruby-fst 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 82d0d6d69fc9c3c580ecd104cd588bf669d2d8d269e6e73ccdb3650e571ece14
4
+ data.tar.gz: 300c838ff26a9e6fc790ead10d403cfd19c419a40b329f684221bd29b60e7607
5
+ SHA512:
6
+ metadata.gz: b5d38c9b3cb260f1632929bd2adce4d5b6f13e6fb7273d2a8af4e69667fe0a36ddf60a1524e48775f6bad0b5954b2e981d0e3bb6428b014ea2ce187e21d10ffb
7
+ data.tar.gz: 6fb9e1d99c4888032eb5f3c0a4657dd0ea5498ad85d6aec28c5bc2da727975006331d7007706a9d3ccfb141b9884545a723ba4e070228ce482eeb5ac3a5abbf6
data/Cargo.lock ADDED
@@ -0,0 +1,298 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.4"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "bindgen"
16
+ version = "0.72.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
19
+ dependencies = [
20
+ "bitflags",
21
+ "cexpr",
22
+ "clang-sys",
23
+ "itertools",
24
+ "proc-macro2",
25
+ "quote",
26
+ "regex",
27
+ "rustc-hash",
28
+ "shlex",
29
+ "syn",
30
+ ]
31
+
32
+ [[package]]
33
+ name = "bitflags"
34
+ version = "2.11.1"
35
+ source = "registry+https://github.com/rust-lang/crates.io-index"
36
+ checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
37
+
38
+ [[package]]
39
+ name = "cexpr"
40
+ version = "0.6.0"
41
+ source = "registry+https://github.com/rust-lang/crates.io-index"
42
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
43
+ dependencies = [
44
+ "nom",
45
+ ]
46
+
47
+ [[package]]
48
+ name = "cfg-if"
49
+ version = "1.0.4"
50
+ source = "registry+https://github.com/rust-lang/crates.io-index"
51
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
52
+
53
+ [[package]]
54
+ name = "clang-sys"
55
+ version = "1.8.1"
56
+ source = "registry+https://github.com/rust-lang/crates.io-index"
57
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
58
+ dependencies = [
59
+ "glob",
60
+ "libc",
61
+ "libloading",
62
+ ]
63
+
64
+ [[package]]
65
+ name = "either"
66
+ version = "1.15.0"
67
+ source = "registry+https://github.com/rust-lang/crates.io-index"
68
+ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
69
+
70
+ [[package]]
71
+ name = "fst"
72
+ version = "0.4.7"
73
+ source = "registry+https://github.com/rust-lang/crates.io-index"
74
+ checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a"
75
+ dependencies = [
76
+ "utf8-ranges",
77
+ ]
78
+
79
+ [[package]]
80
+ name = "glob"
81
+ version = "0.3.3"
82
+ source = "registry+https://github.com/rust-lang/crates.io-index"
83
+ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
84
+
85
+ [[package]]
86
+ name = "itertools"
87
+ version = "0.13.0"
88
+ source = "registry+https://github.com/rust-lang/crates.io-index"
89
+ checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
90
+ dependencies = [
91
+ "either",
92
+ ]
93
+
94
+ [[package]]
95
+ name = "lazy_static"
96
+ version = "1.5.0"
97
+ source = "registry+https://github.com/rust-lang/crates.io-index"
98
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
99
+
100
+ [[package]]
101
+ name = "libc"
102
+ version = "0.2.186"
103
+ source = "registry+https://github.com/rust-lang/crates.io-index"
104
+ checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
105
+
106
+ [[package]]
107
+ name = "libloading"
108
+ version = "0.8.9"
109
+ source = "registry+https://github.com/rust-lang/crates.io-index"
110
+ checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
111
+ dependencies = [
112
+ "cfg-if",
113
+ "windows-link",
114
+ ]
115
+
116
+ [[package]]
117
+ name = "magnus"
118
+ version = "0.7.1"
119
+ source = "registry+https://github.com/rust-lang/crates.io-index"
120
+ checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
121
+ dependencies = [
122
+ "magnus-macros",
123
+ "rb-sys",
124
+ "rb-sys-env",
125
+ "seq-macro",
126
+ ]
127
+
128
+ [[package]]
129
+ name = "magnus-macros"
130
+ version = "0.6.0"
131
+ source = "registry+https://github.com/rust-lang/crates.io-index"
132
+ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
133
+ dependencies = [
134
+ "proc-macro2",
135
+ "quote",
136
+ "syn",
137
+ ]
138
+
139
+ [[package]]
140
+ name = "memchr"
141
+ version = "2.8.0"
142
+ source = "registry+https://github.com/rust-lang/crates.io-index"
143
+ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
144
+
145
+ [[package]]
146
+ name = "minimal-lexical"
147
+ version = "0.2.1"
148
+ source = "registry+https://github.com/rust-lang/crates.io-index"
149
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
150
+
151
+ [[package]]
152
+ name = "nom"
153
+ version = "7.1.3"
154
+ source = "registry+https://github.com/rust-lang/crates.io-index"
155
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
156
+ dependencies = [
157
+ "memchr",
158
+ "minimal-lexical",
159
+ ]
160
+
161
+ [[package]]
162
+ name = "proc-macro2"
163
+ version = "1.0.106"
164
+ source = "registry+https://github.com/rust-lang/crates.io-index"
165
+ checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
166
+ dependencies = [
167
+ "unicode-ident",
168
+ ]
169
+
170
+ [[package]]
171
+ name = "quote"
172
+ version = "1.0.45"
173
+ source = "registry+https://github.com/rust-lang/crates.io-index"
174
+ checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
175
+ dependencies = [
176
+ "proc-macro2",
177
+ ]
178
+
179
+ [[package]]
180
+ name = "rb-sys"
181
+ version = "0.9.128"
182
+ source = "registry+https://github.com/rust-lang/crates.io-index"
183
+ checksum = "45ca28513560e56cfb79a62b1fce363c73af170a182024ce880c77ee9429920a"
184
+ dependencies = [
185
+ "rb-sys-build",
186
+ ]
187
+
188
+ [[package]]
189
+ name = "rb-sys-build"
190
+ version = "0.9.128"
191
+ source = "registry+https://github.com/rust-lang/crates.io-index"
192
+ checksum = "ce04b2c55eff3a21aaa623fcc655d94373238e72cac6b3e1a3641ff31649f99a"
193
+ dependencies = [
194
+ "bindgen",
195
+ "lazy_static",
196
+ "proc-macro2",
197
+ "quote",
198
+ "regex",
199
+ "shell-words",
200
+ "syn",
201
+ ]
202
+
203
+ [[package]]
204
+ name = "rb-sys-env"
205
+ version = "0.1.2"
206
+ source = "registry+https://github.com/rust-lang/crates.io-index"
207
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
208
+
209
+ [[package]]
210
+ name = "regex"
211
+ version = "1.12.3"
212
+ source = "registry+https://github.com/rust-lang/crates.io-index"
213
+ checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
214
+ dependencies = [
215
+ "aho-corasick",
216
+ "memchr",
217
+ "regex-automata",
218
+ "regex-syntax",
219
+ ]
220
+
221
+ [[package]]
222
+ name = "regex-automata"
223
+ version = "0.4.14"
224
+ source = "registry+https://github.com/rust-lang/crates.io-index"
225
+ checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
226
+ dependencies = [
227
+ "aho-corasick",
228
+ "memchr",
229
+ "regex-syntax",
230
+ ]
231
+
232
+ [[package]]
233
+ name = "regex-syntax"
234
+ version = "0.8.10"
235
+ source = "registry+https://github.com/rust-lang/crates.io-index"
236
+ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
237
+
238
+ [[package]]
239
+ name = "ruby_fst"
240
+ version = "0.1.0"
241
+ dependencies = [
242
+ "fst",
243
+ "magnus",
244
+ "rb-sys",
245
+ ]
246
+
247
+ [[package]]
248
+ name = "rustc-hash"
249
+ version = "2.1.2"
250
+ source = "registry+https://github.com/rust-lang/crates.io-index"
251
+ checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
252
+
253
+ [[package]]
254
+ name = "seq-macro"
255
+ version = "0.3.6"
256
+ source = "registry+https://github.com/rust-lang/crates.io-index"
257
+ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
258
+
259
+ [[package]]
260
+ name = "shell-words"
261
+ version = "1.1.1"
262
+ source = "registry+https://github.com/rust-lang/crates.io-index"
263
+ checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
264
+
265
+ [[package]]
266
+ name = "shlex"
267
+ version = "1.3.0"
268
+ source = "registry+https://github.com/rust-lang/crates.io-index"
269
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
270
+
271
+ [[package]]
272
+ name = "syn"
273
+ version = "2.0.117"
274
+ source = "registry+https://github.com/rust-lang/crates.io-index"
275
+ checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
276
+ dependencies = [
277
+ "proc-macro2",
278
+ "quote",
279
+ "unicode-ident",
280
+ ]
281
+
282
+ [[package]]
283
+ name = "unicode-ident"
284
+ version = "1.0.24"
285
+ source = "registry+https://github.com/rust-lang/crates.io-index"
286
+ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
287
+
288
+ [[package]]
289
+ name = "utf8-ranges"
290
+ version = "1.0.5"
291
+ source = "registry+https://github.com/rust-lang/crates.io-index"
292
+ checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba"
293
+
294
+ [[package]]
295
+ name = "windows-link"
296
+ version = "0.2.1"
297
+ source = "registry+https://github.com/rust-lang/crates.io-index"
298
+ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
data/Cargo.toml ADDED
@@ -0,0 +1,3 @@
1
+ [workspace]
2
+ resolver = "2"
3
+ members = ["ext/ruby_fst"]
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gemspec
6
+
7
+ gem 'minitest', '~> 5.0'
8
+ gem 'rake', '~> 13.0'
9
+ gem 'rake-compiler', '~> 1.2'
data/LICENSE.txt ADDED
@@ -0,0 +1,46 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Denis Sablic
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
22
+
23
+ ---
24
+
25
+ This gem wraps the fst crate (https://github.com/BurntSushi/fst) by
26
+ Andrew Gallant, which is licensed under the MIT License:
27
+
28
+ Copyright (c) 2015 Andrew Gallant
29
+
30
+ Permission is hereby granted, free of charge, to any person obtaining a copy
31
+ of this software and associated documentation files (the "Software"), to deal
32
+ in the Software without restriction, including without limitation the rights
33
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
34
+ copies of the Software, and to permit persons to whom the Software is
35
+ furnished to do so, subject to the following conditions:
36
+
37
+ The above copyright notice and this permission notice shall be included in
38
+ all copies or substantial portions of the Software.
39
+
40
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
41
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
42
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
43
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
44
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
45
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
46
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,126 @@
1
+ # ruby-fst
2
+
3
+ Ruby bindings for the [fst](https://github.com/BurntSushi/fst) crate by Andrew Gallant. Provides finite state transducer backed ordered maps and sets with fast lookup, range queries, and fuzzy search.
4
+
5
+ ## Requirements
6
+
7
+ - Ruby >= 3.0
8
+ - Rust toolchain (for compilation)
9
+
10
+ ## Installation
11
+
12
+ ```ruby
13
+ gem 'ruby-fst'
14
+ ```
15
+
16
+ ## Usage
17
+
18
+ ### Map
19
+
20
+ Ordered map from byte string keys to unsigned 64-bit integer values. Keys must be inserted in lexicographic order.
21
+
22
+ ```ruby
23
+ require 'ruby_fst'
24
+
25
+ builder = RubyFst::MapBuilder.new
26
+ builder.insert('bar', 2)
27
+ builder.insert('baz', 3)
28
+ builder.insert('foo', 1)
29
+ map = RubyFst::Map.new(builder.finish)
30
+
31
+ map['foo'] # => 1
32
+ map.get('missing') # => nil
33
+ map.contains?('bar') # => true
34
+ map.length # => 3
35
+
36
+ map.each { |key, value| puts "#{key}: #{value}" }
37
+ ```
38
+
39
+ ### Set
40
+
41
+ Ordered set of byte string keys.
42
+
43
+ ```ruby
44
+ builder = RubyFst::SetBuilder.new
45
+ builder.insert('bar')
46
+ builder.insert('baz')
47
+ builder.insert('foo')
48
+ set = RubyFst::Set.new(builder.finish)
49
+
50
+ set.contains?('foo') # => true
51
+ set.length # => 3
52
+
53
+ set.each { |key| puts key }
54
+ ```
55
+
56
+ ### Floor and ceiling lookups
57
+
58
+ `get_le` returns the greatest key less than or equal to the query. `get_ge` returns the smallest key greater than or equal to the query. Both return `[key, value]` or `nil`.
59
+
60
+ ```ruby
61
+ builder = RubyFst::MapBuilder.new
62
+ builder.insert('bar', 1)
63
+ builder.insert('foo', 2)
64
+ builder.insert('qux', 3)
65
+ map = RubyFst::Map.new(builder.finish)
66
+
67
+ map.get_le('dog') # => ["bar", 1]
68
+ map.get_ge('dog') # => ["foo", 2]
69
+ map.get_le('aaa') # => nil
70
+ ```
71
+
72
+ This is useful for IP range lookups. Encode range starts as 4-byte big-endian keys and use `get_le` to find which range an IP falls into:
73
+
74
+ ```ruby
75
+ builder = RubyFst::MapBuilder.new
76
+ builder.insert([167_772_160].pack('N'), 1) # 10.0.0.0
77
+ builder.insert([3_232_235_520].pack('N'), 2) # 192.168.0.0
78
+ map = RubyFst::Map.new(builder.finish)
79
+
80
+ ip = IPAddr.new('10.0.0.100').to_i
81
+ key, label_id = map.get_le([ip].pack('N'))
82
+ ```
83
+
84
+ ### Levenshtein search
85
+
86
+ Find all keys within a given edit distance. The search runs as an automaton intersection with the FST, visiting only reachable states.
87
+
88
+ ```ruby
89
+ builder = RubyFst::MapBuilder.new
90
+ %w(bar baz cat foo fun).each_with_index { |w, i| builder.insert(w, i) }
91
+ map = RubyFst::Map.new(builder.finish)
92
+
93
+ map.search_levenshtein('far', 1) { |key, value| puts key }
94
+ # => bar
95
+ ```
96
+
97
+ Works on sets too:
98
+
99
+ ```ruby
100
+ set.search_levenshtein('university', 2) { |key| puts key }
101
+ ```
102
+
103
+ ### Serialization
104
+
105
+ ```ruby
106
+ # To/from bytes
107
+ bytes = map.to_bytes
108
+ map = RubyFst::Map.new(bytes)
109
+
110
+ # To/from file
111
+ map.save('/path/to/file.fst')
112
+ map = RubyFst::Map.from_path('/path/to/file.fst')
113
+ ```
114
+
115
+ ## Development
116
+
117
+ ```
118
+ bundle install
119
+ bundle exec rake compile test
120
+ ```
121
+
122
+ ## License
123
+
124
+ MIT. See [LICENSE.txt](LICENSE.txt) for details.
125
+
126
+ This gem wraps the [fst](https://github.com/BurntSushi/fst) crate by Andrew Gallant, also MIT licensed.
data/Rakefile ADDED
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake/testtask'
4
+ require 'rb_sys/extensiontask'
5
+
6
+ GEMSPEC = Gem::Specification.load('ruby_fst.gemspec')
7
+
8
+ RbSys::ExtensionTask.new('ruby_fst', GEMSPEC) do |ext|
9
+ ext.lib_dir = 'lib/ruby_fst'
10
+ end
11
+
12
+ Rake::TestTask.new do |t|
13
+ t.libs << 'test'
14
+ t.test_files = FileList['test/**/*_test.rb']
15
+ end
16
+
17
+ task default: %i(compile test)
18
+
19
+ desc 'Bump version (rake bump[patch], rake bump[minor], rake bump[major])'
20
+ task :bump, [:level] do |_, args|
21
+ level = args[:level] || 'patch'
22
+ version_file = File.join(__dir__, 'lib', 'ruby_fst', 'version.rb')
23
+ cargo_file = File.join(__dir__, 'ext', 'ruby_fst', 'Cargo.toml')
24
+
25
+ content = File.read(version_file)
26
+ current = content[/VERSION = '(.+)'/, 1]
27
+ major, minor, patch = current.split('.').map(&:to_i)
28
+
29
+ case level
30
+ when 'major' then major += 1; minor = 0; patch = 0
31
+ when 'minor' then minor += 1; patch = 0
32
+ when 'patch' then patch += 1
33
+ else abort("Unknown level: #{level}. Use major, minor, or patch.")
34
+ end
35
+
36
+ new_version = "#{major}.#{minor}.#{patch}"
37
+
38
+ File.write(version_file, content.sub(/VERSION = '.+'/, "VERSION = '#{new_version}'"))
39
+
40
+ cargo = File.read(cargo_file)
41
+ File.write(cargo_file, cargo.sub(/^version = ".+"/, "version = \"#{new_version}\""))
42
+
43
+ puts("#{current} -> #{new_version}")
44
+ end
@@ -0,0 +1,13 @@
1
+ [package]
2
+ name = "ruby_fst"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ publish = false
6
+
7
+ [lib]
8
+ crate-type = ["cdylib"]
9
+
10
+ [dependencies]
11
+ magnus = { version = "0.7", features = ["rb-sys"] }
12
+ rb-sys = { version = "0.9", features = ["stable-api-compiled-fallback"] }
13
+ fst = { version = "0.4", features = ["levenshtein"] }
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+ require 'rb_sys/mkmf'
5
+
6
+ create_rust_makefile('ruby_fst/ruby_fst')
@@ -0,0 +1,405 @@
1
+ use std::cell::RefCell;
2
+ use std::fs;
3
+
4
+ use fst::automaton::Levenshtein;
5
+ use fst::raw::{CompiledAddr, Fst, Node, Output};
6
+ use fst::{IntoStreamer, Streamer};
7
+ use magnus::prelude::*;
8
+ use magnus::{block, exception, function, method, Error, RArray, RString, Ruby, Value};
9
+
10
+ fn err(msg: impl std::fmt::Display) -> Error {
11
+ Error::new(exception::runtime_error(), msg.to_string())
12
+ }
13
+
14
+ fn ruby() -> Ruby {
15
+ unsafe { Ruby::get_unchecked() }
16
+ }
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Map
20
+ // ---------------------------------------------------------------------------
21
+
22
+ #[magnus::wrap(class = "RubyFst::Map", free_immediately, size)]
23
+ struct FstMap {
24
+ inner: fst::Map<Vec<u8>>,
25
+ }
26
+
27
+ impl FstMap {
28
+ fn new(bytes: RString) -> Result<Self, Error> {
29
+ let data = unsafe { bytes.as_slice() }.to_vec();
30
+ let inner = fst::Map::new(data).map_err(err)?;
31
+ Ok(Self { inner })
32
+ }
33
+
34
+ fn from_path(path: String) -> Result<Self, Error> {
35
+ let data = fs::read(&path).map_err(err)?;
36
+ let inner = fst::Map::new(data).map_err(err)?;
37
+ Ok(Self { inner })
38
+ }
39
+
40
+ fn get(&self, key: RString) -> Option<u64> {
41
+ let key = unsafe { key.as_slice() };
42
+ self.inner.get(key)
43
+ }
44
+
45
+ fn contains(&self, key: RString) -> bool {
46
+ let key = unsafe { key.as_slice() };
47
+ self.inner.contains_key(key)
48
+ }
49
+
50
+ fn len(&self) -> usize {
51
+ self.inner.len()
52
+ }
53
+
54
+ fn is_empty(&self) -> bool {
55
+ self.inner.is_empty()
56
+ }
57
+
58
+ fn to_bytes(&self) -> RString {
59
+ ruby().str_from_slice(self.inner.as_fst().as_bytes())
60
+ }
61
+
62
+ fn save(&self, path: String) -> Result<(), Error> {
63
+ fs::write(&path, self.inner.as_fst().as_bytes()).map_err(err)
64
+ }
65
+
66
+ fn get_le(&self, key: RString) -> Result<Option<RArray>, Error> {
67
+ let r = ruby();
68
+ let key = unsafe { key.as_slice() };
69
+ match floor_lookup(self.inner.as_fst(), key) {
70
+ Some((found_key, value)) => {
71
+ let arr = r.ary_new_capa(2);
72
+ arr.push(r.str_from_slice(&found_key))?;
73
+ arr.push(value)?;
74
+ Ok(Some(arr))
75
+ }
76
+ None => Ok(None),
77
+ }
78
+ }
79
+
80
+ fn get_ge(&self, key: RString) -> Result<Option<RArray>, Error> {
81
+ let r = ruby();
82
+ let key = unsafe { key.as_slice() };
83
+ let mut stream = self.inner.range().ge(key).into_stream();
84
+ match stream.next() {
85
+ Some((k, v)) => {
86
+ let arr = r.ary_new_capa(2);
87
+ arr.push(r.str_from_slice(k))?;
88
+ arr.push(v)?;
89
+ Ok(Some(arr))
90
+ }
91
+ None => Ok(None),
92
+ }
93
+ }
94
+
95
+ fn each(&self) -> Result<(), Error> {
96
+ let r = ruby();
97
+ let mut stream = (&self.inner).into_stream();
98
+ while let Some((key, value)) = stream.next() {
99
+ let rb_key = r.str_from_slice(key);
100
+ let _: Value = block::yield_values((rb_key, value))?;
101
+ }
102
+ Ok(())
103
+ }
104
+
105
+ fn search_levenshtein(&self, query: String, distance: u32) -> Result<(), Error> {
106
+ let r = ruby();
107
+ let lev = Levenshtein::new(&query, distance).map_err(err)?;
108
+ let mut stream = self.inner.search(lev).into_stream();
109
+ while let Some((key, value)) = stream.next() {
110
+ let rb_key = r.str_from_slice(key);
111
+ let _: Value = block::yield_values((rb_key, value))?;
112
+ }
113
+ Ok(())
114
+ }
115
+ }
116
+
117
+ // ---------------------------------------------------------------------------
118
+ // MapBuilder
119
+ // ---------------------------------------------------------------------------
120
+
121
+ #[magnus::wrap(class = "RubyFst::MapBuilder", free_immediately, size)]
122
+ struct FstMapBuilder {
123
+ inner: RefCell<Option<fst::MapBuilder<Vec<u8>>>>,
124
+ }
125
+
126
+ impl FstMapBuilder {
127
+ fn new() -> Self {
128
+ Self {
129
+ inner: RefCell::new(Some(fst::MapBuilder::memory())),
130
+ }
131
+ }
132
+
133
+ fn insert(&self, key: RString, value: u64) -> Result<(), Error> {
134
+ let key = unsafe { key.as_slice() }.to_vec();
135
+ let mut guard = self.inner.borrow_mut();
136
+ let b = guard.as_mut().ok_or_else(|| err("builder already finished"))?;
137
+ b.insert(&key, value).map_err(err)
138
+ }
139
+
140
+ fn finish(&self) -> Result<RString, Error> {
141
+ let mut guard = self.inner.borrow_mut();
142
+ let b = guard.take().ok_or_else(|| err("builder already finished"))?;
143
+ let bytes = b.into_inner().map_err(err)?;
144
+ Ok(ruby().str_from_slice(&bytes))
145
+ }
146
+ }
147
+
148
+ // ---------------------------------------------------------------------------
149
+ // Set
150
+ // ---------------------------------------------------------------------------
151
+
152
+ #[magnus::wrap(class = "RubyFst::Set", free_immediately, size)]
153
+ struct FstSet {
154
+ inner: fst::Set<Vec<u8>>,
155
+ }
156
+
157
+ impl FstSet {
158
+ fn new(bytes: RString) -> Result<Self, Error> {
159
+ let data = unsafe { bytes.as_slice() }.to_vec();
160
+ let inner = fst::Set::new(data).map_err(err)?;
161
+ Ok(Self { inner })
162
+ }
163
+
164
+ fn from_path(path: String) -> Result<Self, Error> {
165
+ let data = fs::read(&path).map_err(err)?;
166
+ let inner = fst::Set::new(data).map_err(err)?;
167
+ Ok(Self { inner })
168
+ }
169
+
170
+ fn contains(&self, key: RString) -> bool {
171
+ let key = unsafe { key.as_slice() };
172
+ self.inner.contains(key)
173
+ }
174
+
175
+ fn len(&self) -> usize {
176
+ self.inner.len()
177
+ }
178
+
179
+ fn is_empty(&self) -> bool {
180
+ self.inner.is_empty()
181
+ }
182
+
183
+ fn to_bytes(&self) -> RString {
184
+ ruby().str_from_slice(self.inner.as_fst().as_bytes())
185
+ }
186
+
187
+ fn save(&self, path: String) -> Result<(), Error> {
188
+ fs::write(&path, self.inner.as_fst().as_bytes()).map_err(err)
189
+ }
190
+
191
+ fn each(&self) -> Result<(), Error> {
192
+ let r = ruby();
193
+ let mut stream = (&self.inner).into_stream();
194
+ while let Some(key) = stream.next() {
195
+ let rb_key = r.str_from_slice(key);
196
+ let _: Value = block::yield_value(rb_key)?;
197
+ }
198
+ Ok(())
199
+ }
200
+
201
+ fn search_levenshtein(&self, query: String, distance: u32) -> Result<(), Error> {
202
+ let r = ruby();
203
+ let lev = Levenshtein::new(&query, distance).map_err(err)?;
204
+ let mut stream = self.inner.search(lev).into_stream();
205
+ while let Some(key) = stream.next() {
206
+ let rb_key = r.str_from_slice(key);
207
+ let _: Value = block::yield_value(rb_key)?;
208
+ }
209
+ Ok(())
210
+ }
211
+ }
212
+
213
+ // ---------------------------------------------------------------------------
214
+ // SetBuilder
215
+ // ---------------------------------------------------------------------------
216
+
217
+ #[magnus::wrap(class = "RubyFst::SetBuilder", free_immediately, size)]
218
+ struct FstSetBuilder {
219
+ inner: RefCell<Option<fst::SetBuilder<Vec<u8>>>>,
220
+ }
221
+
222
+ impl FstSetBuilder {
223
+ fn new() -> Self {
224
+ Self {
225
+ inner: RefCell::new(Some(fst::SetBuilder::memory())),
226
+ }
227
+ }
228
+
229
+ fn insert(&self, key: RString) -> Result<(), Error> {
230
+ let key = unsafe { key.as_slice() }.to_vec();
231
+ let mut guard = self.inner.borrow_mut();
232
+ let b = guard.as_mut().ok_or_else(|| err("builder already finished"))?;
233
+ b.insert(&key).map_err(err)
234
+ }
235
+
236
+ fn finish(&self) -> Result<RString, Error> {
237
+ let mut guard = self.inner.borrow_mut();
238
+ let b = guard.take().ok_or_else(|| err("builder already finished"))?;
239
+ let bytes = b.into_inner().map_err(err)?;
240
+ Ok(ruby().str_from_slice(&bytes))
241
+ }
242
+ }
243
+
244
+ // ---------------------------------------------------------------------------
245
+ // Floor lookup (get_le): greatest key <= query
246
+ // ---------------------------------------------------------------------------
247
+
248
+ struct Frame {
249
+ node_addr: CompiledAddr,
250
+ output: Output,
251
+ prefix_len: usize,
252
+ max_lesser_idx: Option<usize>,
253
+ is_final: bool,
254
+ final_value: u64,
255
+ }
256
+
257
+ fn find_max_lesser(node: &Node, byte: u8) -> Option<usize> {
258
+ let n = node.len();
259
+ if n == 0 {
260
+ return None;
261
+ }
262
+ let mut lo: usize = 0;
263
+ let mut hi: usize = n;
264
+ while lo < hi {
265
+ let mid = lo + (hi - lo) / 2;
266
+ if node.transition(mid).inp < byte {
267
+ lo = mid + 1;
268
+ } else {
269
+ hi = mid;
270
+ }
271
+ }
272
+ lo.checked_sub(1)
273
+ }
274
+
275
+ fn rightmost_to_leaf<D: AsRef<[u8]>>(
276
+ fst: &Fst<D>,
277
+ addr: CompiledAddr,
278
+ output: Output,
279
+ ) -> (Vec<u8>, u64) {
280
+ let mut node = fst.node(addr);
281
+ let mut out = output;
282
+ let mut suffix = Vec::new();
283
+
284
+ while node.len() > 0 {
285
+ let last = node.len() - 1;
286
+ let t = node.transition(last);
287
+ suffix.push(t.inp);
288
+ out = out.cat(t.out);
289
+ node = fst.node(t.addr);
290
+ }
291
+
292
+ (suffix, out.cat(node.final_output()).value())
293
+ }
294
+
295
+ fn floor_lookup<D: AsRef<[u8]>>(fst: &Fst<D>, key: &[u8]) -> Option<(Vec<u8>, u64)> {
296
+ let root = fst.root();
297
+
298
+ if key.is_empty() {
299
+ return if root.is_final() {
300
+ Some((Vec::new(), root.final_output().value()))
301
+ } else {
302
+ None
303
+ };
304
+ }
305
+
306
+ let mut node = root;
307
+ let mut output = Output::zero();
308
+ let mut stack: Vec<Frame> = Vec::with_capacity(key.len());
309
+ let mut matched: usize = 0;
310
+
311
+ for &byte in key.iter() {
312
+ let lesser = find_max_lesser(&node, byte);
313
+
314
+ stack.push(Frame {
315
+ node_addr: node.addr(),
316
+ output,
317
+ prefix_len: matched,
318
+ max_lesser_idx: lesser,
319
+ is_final: node.is_final(),
320
+ final_value: output.cat(node.final_output()).value(),
321
+ });
322
+
323
+ match node.find_input(byte) {
324
+ Some(idx) => {
325
+ let t = node.transition(idx);
326
+ output = output.cat(t.out);
327
+ node = fst.node(t.addr);
328
+ matched += 1;
329
+ }
330
+ None => break,
331
+ }
332
+ }
333
+
334
+ if matched == key.len() && node.is_final() {
335
+ return Some((key.to_vec(), output.cat(node.final_output()).value()));
336
+ }
337
+
338
+ while let Some(frame) = stack.pop() {
339
+ if let Some(j) = frame.max_lesser_idx {
340
+ let frame_node = fst.node(frame.node_addr);
341
+ let t = frame_node.transition(j);
342
+ let mut result = key[..frame.prefix_len].to_vec();
343
+ result.push(t.inp);
344
+ let branch_output = frame.output.cat(t.out);
345
+ let (suffix, val) = rightmost_to_leaf(fst, t.addr, branch_output);
346
+ result.extend(suffix);
347
+ return Some((result, val));
348
+ }
349
+
350
+ if frame.is_final {
351
+ return Some((key[..frame.prefix_len].to_vec(), frame.final_value));
352
+ }
353
+ }
354
+
355
+ None
356
+ }
357
+
358
+ // ---------------------------------------------------------------------------
359
+ // Init
360
+ // ---------------------------------------------------------------------------
361
+
362
+ #[magnus::init]
363
+ fn init(ruby: &Ruby) -> Result<(), Error> {
364
+ let module = ruby.define_module("RubyFst")?;
365
+
366
+ let map_class = module.define_class("Map", ruby.class_object())?;
367
+ map_class.define_singleton_method("new", function!(FstMap::new, 1))?;
368
+ map_class.define_singleton_method("from_path", function!(FstMap::from_path, 1))?;
369
+ map_class.define_method("get", method!(FstMap::get, 1))?;
370
+ map_class.define_method("[]", method!(FstMap::get, 1))?;
371
+ map_class.define_method("contains?", method!(FstMap::contains, 1))?;
372
+ map_class.define_method("length", method!(FstMap::len, 0))?;
373
+ map_class.define_method("size", method!(FstMap::len, 0))?;
374
+ map_class.define_method("empty?", method!(FstMap::is_empty, 0))?;
375
+ map_class.define_method("to_bytes", method!(FstMap::to_bytes, 0))?;
376
+ map_class.define_method("save", method!(FstMap::save, 1))?;
377
+ map_class.define_method("get_le", method!(FstMap::get_le, 1))?;
378
+ map_class.define_method("get_ge", method!(FstMap::get_ge, 1))?;
379
+ map_class.define_method("each", method!(FstMap::each, 0))?;
380
+ map_class.define_method("search_levenshtein", method!(FstMap::search_levenshtein, 2))?;
381
+
382
+ let map_builder = module.define_class("MapBuilder", ruby.class_object())?;
383
+ map_builder.define_singleton_method("new", function!(FstMapBuilder::new, 0))?;
384
+ map_builder.define_method("insert", method!(FstMapBuilder::insert, 2))?;
385
+ map_builder.define_method("finish", method!(FstMapBuilder::finish, 0))?;
386
+
387
+ let set_class = module.define_class("Set", ruby.class_object())?;
388
+ set_class.define_singleton_method("new", function!(FstSet::new, 1))?;
389
+ set_class.define_singleton_method("from_path", function!(FstSet::from_path, 1))?;
390
+ set_class.define_method("contains?", method!(FstSet::contains, 1))?;
391
+ set_class.define_method("length", method!(FstSet::len, 0))?;
392
+ set_class.define_method("size", method!(FstSet::len, 0))?;
393
+ set_class.define_method("empty?", method!(FstSet::is_empty, 0))?;
394
+ set_class.define_method("to_bytes", method!(FstSet::to_bytes, 0))?;
395
+ set_class.define_method("save", method!(FstSet::save, 1))?;
396
+ set_class.define_method("each", method!(FstSet::each, 0))?;
397
+ set_class.define_method("search_levenshtein", method!(FstSet::search_levenshtein, 2))?;
398
+
399
+ let set_builder = module.define_class("SetBuilder", ruby.class_object())?;
400
+ set_builder.define_singleton_method("new", function!(FstSetBuilder::new, 0))?;
401
+ set_builder.define_method("insert", method!(FstSetBuilder::insert, 1))?;
402
+ set_builder.define_method("finish", method!(FstSetBuilder::finish, 0))?;
403
+
404
+ Ok(())
405
+ }
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyFst
4
+ VERSION = '0.1.0'
5
+ end
data/lib/ruby_fst.rb ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ruby_fst/version'
4
+ require_relative 'ruby_fst/ruby_fst'
5
+
6
+ module RubyFst
7
+ class Map
8
+ include Enumerable
9
+ end
10
+
11
+ class Set
12
+ include Enumerable
13
+ end
14
+ end
data/ruby_fst.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/ruby_fst/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'ruby-fst'
7
+ spec.version = RubyFst::VERSION
8
+ spec.authors = ['Denis Sablic']
9
+ spec.email = ['denis.sablic@gmail.com']
10
+ spec.summary = 'Ruby bindings for the Rust fst crate'
11
+ spec.description = 'Finite state transducer backed ordered sets and maps via the Rust fst crate by BurntSushi'
12
+ spec.homepage = 'https://github.com/dsablic/ruby-fst'
13
+ spec.license = 'MIT'
14
+ spec.required_ruby_version = '>= 3.0'
15
+
16
+ spec.metadata['rubygems_mfa_required'] = 'true'
17
+ spec.metadata['source_code_uri'] = 'https://github.com/dsablic/ruby-fst'
18
+ spec.metadata['changelog_uri'] = 'https://github.com/dsablic/ruby-fst/releases'
19
+
20
+ spec.files = Dir.chdir(__dir__) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.start_with?('test/', '.git') }
22
+ end
23
+ spec.extensions = ['ext/ruby_fst/extconf.rb']
24
+ spec.require_paths = ['lib']
25
+
26
+ spec.add_dependency('rb_sys', '~> 0.9')
27
+ end
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-fst
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Denis Sablic
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rb_sys
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '0.9'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '0.9'
26
+ description: Finite state transducer backed ordered sets and maps via the Rust fst
27
+ crate by BurntSushi
28
+ email:
29
+ - denis.sablic@gmail.com
30
+ executables: []
31
+ extensions:
32
+ - ext/ruby_fst/extconf.rb
33
+ extra_rdoc_files: []
34
+ files:
35
+ - Cargo.lock
36
+ - Cargo.toml
37
+ - Gemfile
38
+ - LICENSE.txt
39
+ - README.md
40
+ - Rakefile
41
+ - ext/ruby_fst/Cargo.toml
42
+ - ext/ruby_fst/extconf.rb
43
+ - ext/ruby_fst/src/lib.rs
44
+ - lib/ruby_fst.rb
45
+ - lib/ruby_fst/version.rb
46
+ - ruby_fst.gemspec
47
+ homepage: https://github.com/dsablic/ruby-fst
48
+ licenses:
49
+ - MIT
50
+ metadata:
51
+ rubygems_mfa_required: 'true'
52
+ source_code_uri: https://github.com/dsablic/ruby-fst
53
+ changelog_uri: https://github.com/dsablic/ruby-fst/releases
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubygems_version: 3.6.9
69
+ specification_version: 4
70
+ summary: Ruby bindings for the Rust fst crate
71
+ test_files: []