ruby-fst 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +82 -0
- data/CHANGELOG.md +34 -0
- data/Cargo.lock +17 -7
- data/Gemfile +3 -0
- data/README.md +78 -30
- data/Rakefile +44 -9
- data/ext/ruby_fst/Cargo.toml +3 -2
- data/ext/ruby_fst/src/lib.rs +258 -21
- data/lib/ruby_fst/version.rb +1 -1
- data/lib/ruby_fst.rb +26 -0
- data/ruby_fst.gemspec +2 -2
- metadata +5 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f20d41a6b4f23c4d08861ebfa42cf22221428852c1a50c38541c81ba64b425b2
|
|
4
|
+
data.tar.gz: b72b1e9cef426abef16a5296000383b01b15f8d14b9848102b6a7084e0a83df7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c6a8d55e3569a7cbd2d6e890bc6859f8251e63b53b99f25f70793afcaea435930f1ada3cf8f95bb45f0f9119b95de0c4605e75b7430d2abe09733372bbb84e48
|
|
7
|
+
data.tar.gz: bc65a0d886637ff6ab1a897a3303c9e6f55de379d0a98ce87228bd574634f6a199095921936935ecc110d5b834de152d83487656b903c028c1debb42af35ebc6
|
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
plugins:
|
|
2
|
+
- rubocop-minitest
|
|
3
|
+
- rubocop-rake
|
|
4
|
+
|
|
5
|
+
AllCops:
|
|
6
|
+
NewCops: enable
|
|
7
|
+
TargetRubyVersion: 3.2
|
|
8
|
+
SuggestExtensions: false
|
|
9
|
+
Exclude:
|
|
10
|
+
- 'target/**/*'
|
|
11
|
+
- 'tmp/**/*'
|
|
12
|
+
- 'pkg/**/*'
|
|
13
|
+
- 'vendor/**/*'
|
|
14
|
+
|
|
15
|
+
Style/StringLiterals:
|
|
16
|
+
EnforcedStyle: single_quotes
|
|
17
|
+
|
|
18
|
+
Style/StringLiteralsInInterpolation:
|
|
19
|
+
EnforcedStyle: single_quotes
|
|
20
|
+
|
|
21
|
+
Style/SymbolArray:
|
|
22
|
+
EnforcedStyle: percent
|
|
23
|
+
MinSize: 3
|
|
24
|
+
|
|
25
|
+
Style/WordArray:
|
|
26
|
+
EnforcedStyle: percent
|
|
27
|
+
MinSize: 3
|
|
28
|
+
|
|
29
|
+
Style/PercentLiteralDelimiters:
|
|
30
|
+
PreferredDelimiters:
|
|
31
|
+
default: '()'
|
|
32
|
+
'%i': '()'
|
|
33
|
+
'%w': '()'
|
|
34
|
+
|
|
35
|
+
Style/Documentation:
|
|
36
|
+
Enabled: false
|
|
37
|
+
|
|
38
|
+
Style/HashEachMethods:
|
|
39
|
+
Exclude:
|
|
40
|
+
- 'test/**/*'
|
|
41
|
+
|
|
42
|
+
Style/MapIntoArray:
|
|
43
|
+
Exclude:
|
|
44
|
+
- 'test/**/*'
|
|
45
|
+
|
|
46
|
+
Naming/MethodParameterName:
|
|
47
|
+
AllowedNames:
|
|
48
|
+
- ge
|
|
49
|
+
- le
|
|
50
|
+
- id
|
|
51
|
+
- to
|
|
52
|
+
- by
|
|
53
|
+
- on
|
|
54
|
+
- in
|
|
55
|
+
- at
|
|
56
|
+
- of
|
|
57
|
+
- or
|
|
58
|
+
- if
|
|
59
|
+
- is
|
|
60
|
+
- as
|
|
61
|
+
- it
|
|
62
|
+
|
|
63
|
+
Naming/VariableNumber:
|
|
64
|
+
CheckMethodNames: false
|
|
65
|
+
|
|
66
|
+
Layout/LineLength:
|
|
67
|
+
Max: 120
|
|
68
|
+
|
|
69
|
+
Metrics/BlockLength:
|
|
70
|
+
Exclude:
|
|
71
|
+
- 'test/**/*'
|
|
72
|
+
- '*.gemspec'
|
|
73
|
+
- 'Rakefile'
|
|
74
|
+
|
|
75
|
+
Metrics/MethodLength:
|
|
76
|
+
Max: 25
|
|
77
|
+
|
|
78
|
+
Metrics/AbcSize:
|
|
79
|
+
Max: 25
|
|
80
|
+
|
|
81
|
+
Minitest/MultipleAssertions:
|
|
82
|
+
Max: 8
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here. The format is based on
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and this project
|
|
5
|
+
adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
## [0.2.0] — 2026-05-13
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- `Map.from_path_mmap` and `Set.from_path_mmap` for memory-mapped loading of large FSTs.
|
|
13
|
+
- `Map#range(ge:, le:)` and `Set#range(ge:, le:)` streaming range iteration.
|
|
14
|
+
- `Map#starts_with(prefix)` and `Set#starts_with(prefix)` prefix scans.
|
|
15
|
+
- `Map#get_le_value` and `Map#get_ge_value` — return only the value (no key allocation) for floor/ceiling lookups.
|
|
16
|
+
- Precompiled native gems for Linux (x86_64, aarch64), macOS (x86_64, arm64), and Windows (x64) — installs without a Rust toolchain on those platforms.
|
|
17
|
+
- Tests covering binary string encoding, key lifetime after stream drop, builder GC, and 0xFF prefix-scan edge case.
|
|
18
|
+
- RuboCop lint configuration with rubocop-minitest and rubocop-rake plugins; runs as part of `rake` and as a CI gate.
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
- Minimum Ruby version raised to 3.2.
|
|
22
|
+
- `MapBuilder` and `SetBuilder` now operate on a generic `Storage` backing (heap or mmap) for both `Map` and `Set`.
|
|
23
|
+
- Upgraded to magnus 0.8 (drops support for Ruby 2.7 and 3.0 in the underlying bindings; we already require 3.2+).
|
|
24
|
+
|
|
25
|
+
### Documentation
|
|
26
|
+
- README clarifies key encoding, insertion order, mmap vs in-memory loading, and Levenshtein UTF-8 requirement.
|
|
27
|
+
|
|
28
|
+
## [0.1.0] — 2026-05-13
|
|
29
|
+
|
|
30
|
+
### Added
|
|
31
|
+
- Initial release: `RubyFst::Map`, `RubyFst::Set`, `MapBuilder`, `SetBuilder`.
|
|
32
|
+
- Floor/ceiling lookups (`get_le`, `get_ge`).
|
|
33
|
+
- Levenshtein automaton search.
|
|
34
|
+
- Bytes/file serialization via `to_bytes` / `save` / `from_path`.
|
data/Cargo.lock
CHANGED
|
@@ -115,9 +115,9 @@ dependencies = [
|
|
|
115
115
|
|
|
116
116
|
[[package]]
|
|
117
117
|
name = "magnus"
|
|
118
|
-
version = "0.
|
|
118
|
+
version = "0.8.2"
|
|
119
119
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
120
|
-
checksum = "
|
|
120
|
+
checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
|
|
121
121
|
dependencies = [
|
|
122
122
|
"magnus-macros",
|
|
123
123
|
"rb-sys",
|
|
@@ -127,9 +127,9 @@ dependencies = [
|
|
|
127
127
|
|
|
128
128
|
[[package]]
|
|
129
129
|
name = "magnus-macros"
|
|
130
|
-
version = "0.
|
|
130
|
+
version = "0.8.0"
|
|
131
131
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
132
|
-
checksum = "
|
|
132
|
+
checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
|
|
133
133
|
dependencies = [
|
|
134
134
|
"proc-macro2",
|
|
135
135
|
"quote",
|
|
@@ -142,6 +142,15 @@ version = "2.8.0"
|
|
|
142
142
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
143
143
|
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
|
144
144
|
|
|
145
|
+
[[package]]
|
|
146
|
+
name = "memmap2"
|
|
147
|
+
version = "0.9.10"
|
|
148
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
149
|
+
checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
|
|
150
|
+
dependencies = [
|
|
151
|
+
"libc",
|
|
152
|
+
]
|
|
153
|
+
|
|
145
154
|
[[package]]
|
|
146
155
|
name = "minimal-lexical"
|
|
147
156
|
version = "0.2.1"
|
|
@@ -202,9 +211,9 @@ dependencies = [
|
|
|
202
211
|
|
|
203
212
|
[[package]]
|
|
204
213
|
name = "rb-sys-env"
|
|
205
|
-
version = "0.
|
|
214
|
+
version = "0.2.3"
|
|
206
215
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
207
|
-
checksum = "
|
|
216
|
+
checksum = "cca7ad6a7e21e72151d56fe2495a259b5670e204c3adac41ee7ef676ea08117a"
|
|
208
217
|
|
|
209
218
|
[[package]]
|
|
210
219
|
name = "regex"
|
|
@@ -237,10 +246,11 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
|
|
237
246
|
|
|
238
247
|
[[package]]
|
|
239
248
|
name = "ruby_fst"
|
|
240
|
-
version = "0.
|
|
249
|
+
version = "0.2.0"
|
|
241
250
|
dependencies = [
|
|
242
251
|
"fst",
|
|
243
252
|
"magnus",
|
|
253
|
+
"memmap2",
|
|
244
254
|
"rb-sys",
|
|
245
255
|
]
|
|
246
256
|
|
data/Gemfile
CHANGED
data/README.md
CHANGED
|
@@ -1,23 +1,35 @@
|
|
|
1
1
|
# ruby-fst
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://github.com/dsablic/ruby-fst/actions/workflows/ci.yml) [](https://badge.fury.io/rb/ruby-fst) [](/LICENSE.txt) [](https://www.ruby-lang.org/)
|
|
4
|
+
|
|
5
|
+
Ruby bindings for the [fst](https://github.com/BurntSushi/fst) crate by Andrew Gallant. Provides finite state transducer backed ordered maps and sets with fast lookup, prefix and range scans, floor/ceiling lookups, and Levenshtein fuzzy search.
|
|
4
6
|
|
|
5
7
|
## Requirements
|
|
6
8
|
|
|
7
|
-
- Ruby >= 3.
|
|
8
|
-
- Rust toolchain
|
|
9
|
+
- Ruby >= 3.2
|
|
10
|
+
- For source installs: a Rust toolchain. Precompiled gems are published for Linux (x86_64, aarch64, glibc and musl), macOS (x86_64, arm64), and Windows (x64) — these install with no Rust toolchain required.
|
|
9
11
|
|
|
10
12
|
## Installation
|
|
11
13
|
|
|
14
|
+
```
|
|
15
|
+
gem install ruby-fst
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Or add to your Gemfile:
|
|
19
|
+
|
|
12
20
|
```ruby
|
|
13
21
|
gem 'ruby-fst'
|
|
14
22
|
```
|
|
15
23
|
|
|
16
|
-
##
|
|
24
|
+
## Keys are byte strings
|
|
25
|
+
|
|
26
|
+
All FST keys are arbitrary byte strings (`Encoding::BINARY`). When a key is returned from the gem (e.g. via `each`, `get_le`, `range`) it always carries the `Encoding::BINARY` encoding — be careful when interpolating into UTF-8 strings or `puts`-ing keys that contain non-ASCII bytes.
|
|
27
|
+
|
|
28
|
+
Keys must be inserted into a builder in **strictly ascending lexicographic byte order**, with no duplicates. Out-of-order or duplicate inserts raise.
|
|
17
29
|
|
|
18
|
-
|
|
30
|
+
## Map
|
|
19
31
|
|
|
20
|
-
Ordered map from byte string keys to unsigned 64-bit integer values.
|
|
32
|
+
Ordered map from byte string keys to unsigned 64-bit integer values.
|
|
21
33
|
|
|
22
34
|
```ruby
|
|
23
35
|
require 'ruby_fst'
|
|
@@ -28,15 +40,15 @@ builder.insert('baz', 3)
|
|
|
28
40
|
builder.insert('foo', 1)
|
|
29
41
|
map = RubyFst::Map.new(builder.finish)
|
|
30
42
|
|
|
31
|
-
map['foo']
|
|
32
|
-
map.get('missing')
|
|
33
|
-
map.contains?('bar')
|
|
34
|
-
map.length
|
|
43
|
+
map['foo'] # => 1
|
|
44
|
+
map.get('missing') # => nil
|
|
45
|
+
map.contains?('bar') # => true
|
|
46
|
+
map.length # => 3
|
|
35
47
|
|
|
36
|
-
map.each { |key, value| puts
|
|
48
|
+
map.each { |key, value| puts("#{key}: #{value}") }
|
|
37
49
|
```
|
|
38
50
|
|
|
39
|
-
|
|
51
|
+
## Set
|
|
40
52
|
|
|
41
53
|
Ordered set of byte string keys.
|
|
42
54
|
|
|
@@ -47,13 +59,30 @@ builder.insert('baz')
|
|
|
47
59
|
builder.insert('foo')
|
|
48
60
|
set = RubyFst::Set.new(builder.finish)
|
|
49
61
|
|
|
50
|
-
set.contains?('foo')
|
|
51
|
-
set.length
|
|
62
|
+
set.contains?('foo') # => true
|
|
63
|
+
set.length # => 3
|
|
52
64
|
|
|
53
|
-
set.each { |key| puts
|
|
65
|
+
set.each { |key| puts(key) }
|
|
54
66
|
```
|
|
55
67
|
|
|
56
|
-
|
|
68
|
+
## Range queries
|
|
69
|
+
|
|
70
|
+
`Map#range` and `Set#range` stream every entry whose key falls in `[ge, le]`. Either bound may be omitted.
|
|
71
|
+
|
|
72
|
+
```ruby
|
|
73
|
+
map.range(ge: 'b', le: 'f') { |key, value| puts("#{key} -> #{value}") }
|
|
74
|
+
map.range(ge: 'b').to_a # Enumerator without a block
|
|
75
|
+
set.range(le: 'baz') { |key| puts(key) }
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
`Map#starts_with` and `Set#starts_with` stream every entry whose key begins with the given prefix.
|
|
79
|
+
|
|
80
|
+
```ruby
|
|
81
|
+
map.starts_with('foo') { |key, value| puts("#{key} -> #{value}") }
|
|
82
|
+
set.starts_with('app').to_a
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Floor and ceiling lookups
|
|
57
86
|
|
|
58
87
|
`get_le` returns the greatest key less than or equal to the query. `get_ge` returns the smallest key greater than or equal to the query. Both return `[key, value]` or `nil`.
|
|
59
88
|
|
|
@@ -64,61 +93,80 @@ builder.insert('foo', 2)
|
|
|
64
93
|
builder.insert('qux', 3)
|
|
65
94
|
map = RubyFst::Map.new(builder.finish)
|
|
66
95
|
|
|
67
|
-
map.get_le('dog')
|
|
68
|
-
map.get_ge('dog')
|
|
69
|
-
map.get_le('aaa')
|
|
96
|
+
map.get_le('dog') # => ["bar", 1]
|
|
97
|
+
map.get_ge('dog') # => ["foo", 2]
|
|
98
|
+
map.get_le('aaa') # => nil
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
For range lookups where you only need the value, `get_le_value` and `get_ge_value` skip key reconstruction and return only the `Integer` (or `nil`):
|
|
102
|
+
|
|
103
|
+
```ruby
|
|
104
|
+
map.get_le_value('dog') # => 1
|
|
105
|
+
map.get_ge_value('dog') # => 2
|
|
106
|
+
map.get_le_value('aaa') # => nil
|
|
70
107
|
```
|
|
71
108
|
|
|
72
|
-
This is useful for IP range lookups. Encode range starts as 4-byte big-endian keys and use `
|
|
109
|
+
This is useful for IP range lookups. Encode range starts as 4-byte big-endian keys and use `get_le_value` to find which range an IP falls into:
|
|
73
110
|
|
|
74
111
|
```ruby
|
|
112
|
+
require 'ipaddr'
|
|
113
|
+
|
|
75
114
|
builder = RubyFst::MapBuilder.new
|
|
76
|
-
builder.insert([167_772_160].pack('N'), 1)
|
|
115
|
+
builder.insert([167_772_160].pack('N'), 1) # 10.0.0.0
|
|
77
116
|
builder.insert([3_232_235_520].pack('N'), 2) # 192.168.0.0
|
|
78
117
|
map = RubyFst::Map.new(builder.finish)
|
|
79
118
|
|
|
80
119
|
ip = IPAddr.new('10.0.0.100').to_i
|
|
81
|
-
|
|
120
|
+
label_id = map.get_le_value([ip].pack('N'))
|
|
82
121
|
```
|
|
83
122
|
|
|
84
|
-
|
|
123
|
+
## Levenshtein search
|
|
85
124
|
|
|
86
125
|
Find all keys within a given edit distance. The search runs as an automaton intersection with the FST, visiting only reachable states.
|
|
87
126
|
|
|
127
|
+
The query must be valid UTF-8 (the Levenshtein automaton is defined over Unicode codepoints), but the keys themselves can be any bytes — the automaton matches against the UTF-8 interpretation of the keys.
|
|
128
|
+
|
|
88
129
|
```ruby
|
|
89
130
|
builder = RubyFst::MapBuilder.new
|
|
90
131
|
%w(bar baz cat foo fun).each_with_index { |w, i| builder.insert(w, i) }
|
|
91
132
|
map = RubyFst::Map.new(builder.finish)
|
|
92
133
|
|
|
93
|
-
map.search_levenshtein('far', 1) { |key, value| puts
|
|
134
|
+
map.search_levenshtein('far', 1) { |key, value| puts(key) }
|
|
94
135
|
# => bar
|
|
95
136
|
```
|
|
96
137
|
|
|
97
138
|
Works on sets too:
|
|
98
139
|
|
|
99
140
|
```ruby
|
|
100
|
-
set.search_levenshtein('university', 2) { |key| puts
|
|
141
|
+
set.search_levenshtein('university', 2) { |key| puts(key) }
|
|
101
142
|
```
|
|
102
143
|
|
|
103
|
-
|
|
144
|
+
## Serialization
|
|
104
145
|
|
|
105
146
|
```ruby
|
|
106
|
-
#
|
|
107
|
-
bytes = map.to_bytes
|
|
147
|
+
# Bytes
|
|
148
|
+
bytes = map.to_bytes # binary string
|
|
108
149
|
map = RubyFst::Map.new(bytes)
|
|
109
150
|
|
|
110
|
-
#
|
|
151
|
+
# File: read entire file into memory (good for small/medium FSTs)
|
|
111
152
|
map.save('/path/to/file.fst')
|
|
112
153
|
map = RubyFst::Map.from_path('/path/to/file.fst')
|
|
154
|
+
|
|
155
|
+
# File: memory-map the file (good for large FSTs; lookups page in only what they touch)
|
|
156
|
+
map = RubyFst::Map.from_path_mmap('/path/to/file.fst')
|
|
113
157
|
```
|
|
114
158
|
|
|
159
|
+
When using `from_path_mmap`, the file must remain unchanged on disk for the lifetime of the resulting `Map` or `Set` — modifying or truncating it causes undefined behavior.
|
|
160
|
+
|
|
115
161
|
## Development
|
|
116
162
|
|
|
117
163
|
```
|
|
118
164
|
bundle install
|
|
119
|
-
bundle exec rake compile test
|
|
165
|
+
bundle exec rake # compile + test + rubocop
|
|
120
166
|
```
|
|
121
167
|
|
|
168
|
+
Individual tasks: `rake compile`, `rake test`, `rake rubocop`. Bump the version (and CHANGELOG) with `rake bump[patch]` / `rake bump[minor]` / `rake bump[major]`. Tagging `vX.Y.Z` on GitHub triggers cross-compile and publish to RubyGems.
|
|
169
|
+
|
|
122
170
|
## License
|
|
123
171
|
|
|
124
172
|
MIT. See [LICENSE.txt](LICENSE.txt) for details.
|
data/Rakefile
CHANGED
|
@@ -2,11 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
require 'rake/testtask'
|
|
4
4
|
require 'rb_sys/extensiontask'
|
|
5
|
+
require 'rubocop/rake_task'
|
|
5
6
|
|
|
6
7
|
GEMSPEC = Gem::Specification.load('ruby_fst.gemspec')
|
|
7
8
|
|
|
8
9
|
RbSys::ExtensionTask.new('ruby_fst', GEMSPEC) do |ext|
|
|
9
10
|
ext.lib_dir = 'lib/ruby_fst'
|
|
11
|
+
ext.cross_compile = true
|
|
12
|
+
ext.cross_platform = %w(
|
|
13
|
+
aarch64-linux
|
|
14
|
+
aarch64-linux-musl
|
|
15
|
+
arm64-darwin
|
|
16
|
+
x64-mingw-ucrt
|
|
17
|
+
x86_64-darwin
|
|
18
|
+
x86_64-linux
|
|
19
|
+
x86_64-linux-musl
|
|
20
|
+
)
|
|
10
21
|
end
|
|
11
22
|
|
|
12
23
|
Rake::TestTask.new do |t|
|
|
@@ -14,31 +25,55 @@ Rake::TestTask.new do |t|
|
|
|
14
25
|
t.test_files = FileList['test/**/*_test.rb']
|
|
15
26
|
end
|
|
16
27
|
|
|
17
|
-
|
|
28
|
+
RuboCop::RakeTask.new
|
|
29
|
+
|
|
30
|
+
task default: %i(compile test rubocop)
|
|
18
31
|
|
|
19
32
|
desc 'Bump version (rake bump[patch], rake bump[minor], rake bump[major])'
|
|
20
33
|
task :bump, [:level] do |_, args|
|
|
21
34
|
level = args[:level] || 'patch'
|
|
22
35
|
version_file = File.join(__dir__, 'lib', 'ruby_fst', 'version.rb')
|
|
23
36
|
cargo_file = File.join(__dir__, 'ext', 'ruby_fst', 'Cargo.toml')
|
|
37
|
+
lock_file = File.join(__dir__, 'Cargo.lock')
|
|
24
38
|
|
|
25
39
|
content = File.read(version_file)
|
|
26
40
|
current = content[/VERSION = '(.+)'/, 1]
|
|
27
41
|
major, minor, patch = current.split('.').map(&:to_i)
|
|
28
42
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
new_version = "#{major}.#{minor}.#{patch}"
|
|
43
|
+
new_version =
|
|
44
|
+
case level
|
|
45
|
+
when 'major' then "#{major + 1}.0.0"
|
|
46
|
+
when 'minor' then "#{major}.#{minor + 1}.0"
|
|
47
|
+
when 'patch' then "#{major}.#{minor}.#{patch + 1}"
|
|
48
|
+
else abort("Unknown level: #{level}. Use major, minor, or patch.")
|
|
49
|
+
end
|
|
37
50
|
|
|
38
51
|
File.write(version_file, content.sub(/VERSION = '.+'/, "VERSION = '#{new_version}'"))
|
|
39
52
|
|
|
40
53
|
cargo = File.read(cargo_file)
|
|
41
54
|
File.write(cargo_file, cargo.sub(/^version = ".+"/, "version = \"#{new_version}\""))
|
|
42
55
|
|
|
56
|
+
if File.exist?(lock_file)
|
|
57
|
+
lock = File.read(lock_file)
|
|
58
|
+
File.write(
|
|
59
|
+
lock_file,
|
|
60
|
+
lock.sub(
|
|
61
|
+
/(\[\[package\]\]\nname = "ruby_fst"\nversion = ").+(")/,
|
|
62
|
+
"\\1#{new_version}\\2"
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
changelog_file = File.join(__dir__, 'CHANGELOG.md')
|
|
68
|
+
if File.exist?(changelog_file)
|
|
69
|
+
changelog = File.read(changelog_file)
|
|
70
|
+
today = Time.now.utc.strftime('%Y-%m-%d')
|
|
71
|
+
promoted = changelog.sub(
|
|
72
|
+
'## [Unreleased]',
|
|
73
|
+
"## [Unreleased]\n\n## [#{new_version}] — #{today}"
|
|
74
|
+
)
|
|
75
|
+
File.write(changelog_file, promoted) if promoted != changelog
|
|
76
|
+
end
|
|
77
|
+
|
|
43
78
|
puts("#{current} -> #{new_version}")
|
|
44
79
|
end
|
data/ext/ruby_fst/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "ruby_fst"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
publish = false
|
|
6
6
|
|
|
@@ -8,6 +8,7 @@ publish = false
|
|
|
8
8
|
crate-type = ["cdylib"]
|
|
9
9
|
|
|
10
10
|
[dependencies]
|
|
11
|
-
magnus = { version = "0.
|
|
11
|
+
magnus = { version = "0.8", features = ["rb-sys"] }
|
|
12
12
|
rb-sys = { version = "0.9", features = ["stable-api-compiled-fallback"] }
|
|
13
13
|
fst = { version = "0.4", features = ["levenshtein"] }
|
|
14
|
+
memmap2 = "0.9"
|
data/ext/ruby_fst/src/lib.rs
CHANGED
|
@@ -1,39 +1,105 @@
|
|
|
1
1
|
use std::cell::RefCell;
|
|
2
|
-
use std::fs;
|
|
2
|
+
use std::fs::{self, File};
|
|
3
3
|
|
|
4
4
|
use fst::automaton::Levenshtein;
|
|
5
5
|
use fst::raw::{CompiledAddr, Fst, Node, Output};
|
|
6
6
|
use fst::{IntoStreamer, Streamer};
|
|
7
7
|
use magnus::prelude::*;
|
|
8
|
-
use magnus::{
|
|
8
|
+
use magnus::{function, method, Error, RArray, RString, Ruby, Value};
|
|
9
|
+
use memmap2::Mmap;
|
|
9
10
|
|
|
10
11
|
fn err(msg: impl std::fmt::Display) -> Error {
|
|
11
|
-
Error::new(
|
|
12
|
+
Error::new(ruby().exception_runtime_error(), msg.to_string())
|
|
12
13
|
}
|
|
13
14
|
|
|
15
|
+
// SAFETY: every method exposed to Ruby is invoked by the VM on the GVL-owning
|
|
16
|
+
// thread, so `Ruby::get_unchecked()` is sound — calling code must not invoke
|
|
17
|
+
// this from a non-Ruby thread.
|
|
14
18
|
fn ruby() -> Ruby {
|
|
15
19
|
unsafe { Ruby::get_unchecked() }
|
|
16
20
|
}
|
|
17
21
|
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Storage: backing buffer for an FST. Either an owned heap allocation or a
|
|
24
|
+
// memory map. Both implement AsRef<[u8]> so fst::Map / fst::Set can accept
|
|
25
|
+
// either without monomorphising the wrapper structs.
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
enum Storage {
|
|
29
|
+
Mem(Vec<u8>),
|
|
30
|
+
Mmap(Mmap),
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
impl AsRef<[u8]> for Storage {
|
|
34
|
+
fn as_ref(&self) -> &[u8] {
|
|
35
|
+
match self {
|
|
36
|
+
Storage::Mem(v) => v.as_ref(),
|
|
37
|
+
Storage::Mmap(m) => m.as_ref(),
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
fn read_storage(path: &str) -> Result<Storage, Error> {
|
|
43
|
+
let data = fs::read(path).map_err(err)?;
|
|
44
|
+
Ok(Storage::Mem(data))
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
fn mmap_storage(path: &str) -> Result<Storage, Error> {
|
|
48
|
+
let file = File::open(path).map_err(err)?;
|
|
49
|
+
// SAFETY: callers opt in to mmap and accept the contract that the file
|
|
50
|
+
// must not be modified or truncated while the FST is alive. We document
|
|
51
|
+
// this in the Ruby-level docs.
|
|
52
|
+
let mmap = unsafe { Mmap::map(&file) }.map_err(err)?;
|
|
53
|
+
Ok(Storage::Mmap(mmap))
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// SAFETY for `RString::as_slice()` callers below: between obtaining the slice
|
|
57
|
+
// and dropping it, no Ruby allocation, GC trigger, or string mutation occurs.
|
|
58
|
+
// We either copy into a Vec immediately or pass the slice into pure-Rust fst
|
|
59
|
+
// operations that never re-enter Ruby.
|
|
60
|
+
|
|
61
|
+
fn rstring_to_vec(s: RString) -> Vec<u8> {
|
|
62
|
+
unsafe { s.as_slice() }.to_vec()
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Returns the exclusive upper bound for a prefix scan: the smallest byte
|
|
66
|
+
// string strictly greater than `prefix` that does NOT have `prefix` as a
|
|
67
|
+
// prefix. None when no such bound exists (empty prefix, or all-0xFF prefix);
|
|
68
|
+
// the scan is then unbounded above.
|
|
69
|
+
fn prefix_upper_bound(prefix: &[u8]) -> Option<Vec<u8>> {
|
|
70
|
+
let mut upper = prefix.to_vec();
|
|
71
|
+
while let Some(byte) = upper.last_mut() {
|
|
72
|
+
if *byte < 0xFF {
|
|
73
|
+
*byte += 1;
|
|
74
|
+
return Some(upper);
|
|
75
|
+
}
|
|
76
|
+
upper.pop();
|
|
77
|
+
}
|
|
78
|
+
None
|
|
79
|
+
}
|
|
80
|
+
|
|
18
81
|
// ---------------------------------------------------------------------------
|
|
19
82
|
// Map
|
|
20
83
|
// ---------------------------------------------------------------------------
|
|
21
84
|
|
|
22
85
|
#[magnus::wrap(class = "RubyFst::Map", free_immediately, size)]
|
|
23
86
|
struct FstMap {
|
|
24
|
-
inner: fst::Map<
|
|
87
|
+
inner: fst::Map<Storage>,
|
|
25
88
|
}
|
|
26
89
|
|
|
27
90
|
impl FstMap {
|
|
28
91
|
fn new(bytes: RString) -> Result<Self, Error> {
|
|
29
|
-
let
|
|
30
|
-
let inner = fst::Map::new(data).map_err(err)?;
|
|
92
|
+
let inner = fst::Map::new(Storage::Mem(rstring_to_vec(bytes))).map_err(err)?;
|
|
31
93
|
Ok(Self { inner })
|
|
32
94
|
}
|
|
33
95
|
|
|
34
96
|
fn from_path(path: String) -> Result<Self, Error> {
|
|
35
|
-
let
|
|
36
|
-
|
|
97
|
+
let inner = fst::Map::new(read_storage(&path)?).map_err(err)?;
|
|
98
|
+
Ok(Self { inner })
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
fn from_path_mmap(path: String) -> Result<Self, Error> {
|
|
102
|
+
let inner = fst::Map::new(mmap_storage(&path)?).map_err(err)?;
|
|
37
103
|
Ok(Self { inner })
|
|
38
104
|
}
|
|
39
105
|
|
|
@@ -77,6 +143,11 @@ impl FstMap {
|
|
|
77
143
|
}
|
|
78
144
|
}
|
|
79
145
|
|
|
146
|
+
fn get_le_value(&self, key: RString) -> Option<u64> {
|
|
147
|
+
let key = unsafe { key.as_slice() };
|
|
148
|
+
floor_value(self.inner.as_fst(), key)
|
|
149
|
+
}
|
|
150
|
+
|
|
80
151
|
fn get_ge(&self, key: RString) -> Result<Option<RArray>, Error> {
|
|
81
152
|
let r = ruby();
|
|
82
153
|
let key = unsafe { key.as_slice() };
|
|
@@ -92,12 +163,53 @@ impl FstMap {
|
|
|
92
163
|
}
|
|
93
164
|
}
|
|
94
165
|
|
|
166
|
+
fn get_ge_value(&self, key: RString) -> Option<u64> {
|
|
167
|
+
let key = unsafe { key.as_slice() };
|
|
168
|
+
let mut stream = self.inner.range().ge(key).into_stream();
|
|
169
|
+
stream.next().map(|(_, v)| v)
|
|
170
|
+
}
|
|
171
|
+
|
|
95
172
|
fn each(&self) -> Result<(), Error> {
|
|
96
173
|
let r = ruby();
|
|
97
174
|
let mut stream = (&self.inner).into_stream();
|
|
98
175
|
while let Some((key, value)) = stream.next() {
|
|
99
176
|
let rb_key = r.str_from_slice(key);
|
|
100
|
-
let _: Value =
|
|
177
|
+
let _: Value = r.yield_values((rb_key, value))?;
|
|
178
|
+
}
|
|
179
|
+
Ok(())
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
fn range(&self, ge: Option<RString>, le: Option<RString>) -> Result<(), Error> {
|
|
183
|
+
let r = ruby();
|
|
184
|
+
let ge_bytes = ge.map(rstring_to_vec);
|
|
185
|
+
let le_bytes = le.map(rstring_to_vec);
|
|
186
|
+
let mut builder = self.inner.range();
|
|
187
|
+
if let Some(ref b) = ge_bytes {
|
|
188
|
+
builder = builder.ge(b);
|
|
189
|
+
}
|
|
190
|
+
if let Some(ref b) = le_bytes {
|
|
191
|
+
builder = builder.le(b);
|
|
192
|
+
}
|
|
193
|
+
let mut stream = builder.into_stream();
|
|
194
|
+
while let Some((key, value)) = stream.next() {
|
|
195
|
+
let rb_key = r.str_from_slice(key);
|
|
196
|
+
let _: Value = r.yield_values((rb_key, value))?;
|
|
197
|
+
}
|
|
198
|
+
Ok(())
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
fn starts_with(&self, prefix: RString) -> Result<(), Error> {
|
|
202
|
+
let r = ruby();
|
|
203
|
+
let prefix_bytes = rstring_to_vec(prefix);
|
|
204
|
+
let upper = prefix_upper_bound(&prefix_bytes);
|
|
205
|
+
let mut builder = self.inner.range().ge(&prefix_bytes);
|
|
206
|
+
if let Some(ref u) = upper {
|
|
207
|
+
builder = builder.lt(u);
|
|
208
|
+
}
|
|
209
|
+
let mut stream = builder.into_stream();
|
|
210
|
+
while let Some((key, value)) = stream.next() {
|
|
211
|
+
let rb_key = r.str_from_slice(key);
|
|
212
|
+
let _: Value = r.yield_values((rb_key, value))?;
|
|
101
213
|
}
|
|
102
214
|
Ok(())
|
|
103
215
|
}
|
|
@@ -108,7 +220,7 @@ impl FstMap {
|
|
|
108
220
|
let mut stream = self.inner.search(lev).into_stream();
|
|
109
221
|
while let Some((key, value)) = stream.next() {
|
|
110
222
|
let rb_key = r.str_from_slice(key);
|
|
111
|
-
let _: Value =
|
|
223
|
+
let _: Value = r.yield_values((rb_key, value))?;
|
|
112
224
|
}
|
|
113
225
|
Ok(())
|
|
114
226
|
}
|
|
@@ -131,7 +243,7 @@ impl FstMapBuilder {
|
|
|
131
243
|
}
|
|
132
244
|
|
|
133
245
|
fn insert(&self, key: RString, value: u64) -> Result<(), Error> {
|
|
134
|
-
let key =
|
|
246
|
+
let key = rstring_to_vec(key);
|
|
135
247
|
let mut guard = self.inner.borrow_mut();
|
|
136
248
|
let b = guard.as_mut().ok_or_else(|| err("builder already finished"))?;
|
|
137
249
|
b.insert(&key, value).map_err(err)
|
|
@@ -151,19 +263,22 @@ impl FstMapBuilder {
|
|
|
151
263
|
|
|
152
264
|
#[magnus::wrap(class = "RubyFst::Set", free_immediately, size)]
|
|
153
265
|
struct FstSet {
|
|
154
|
-
inner: fst::Set<
|
|
266
|
+
inner: fst::Set<Storage>,
|
|
155
267
|
}
|
|
156
268
|
|
|
157
269
|
impl FstSet {
|
|
158
270
|
fn new(bytes: RString) -> Result<Self, Error> {
|
|
159
|
-
let
|
|
160
|
-
let inner = fst::Set::new(data).map_err(err)?;
|
|
271
|
+
let inner = fst::Set::new(Storage::Mem(rstring_to_vec(bytes))).map_err(err)?;
|
|
161
272
|
Ok(Self { inner })
|
|
162
273
|
}
|
|
163
274
|
|
|
164
275
|
fn from_path(path: String) -> Result<Self, Error> {
|
|
165
|
-
let
|
|
166
|
-
|
|
276
|
+
let inner = fst::Set::new(read_storage(&path)?).map_err(err)?;
|
|
277
|
+
Ok(Self { inner })
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
fn from_path_mmap(path: String) -> Result<Self, Error> {
|
|
281
|
+
let inner = fst::Set::new(mmap_storage(&path)?).map_err(err)?;
|
|
167
282
|
Ok(Self { inner })
|
|
168
283
|
}
|
|
169
284
|
|
|
@@ -193,7 +308,42 @@ impl FstSet {
|
|
|
193
308
|
let mut stream = (&self.inner).into_stream();
|
|
194
309
|
while let Some(key) = stream.next() {
|
|
195
310
|
let rb_key = r.str_from_slice(key);
|
|
196
|
-
let _: Value =
|
|
311
|
+
let _: Value = r.yield_value(rb_key)?;
|
|
312
|
+
}
|
|
313
|
+
Ok(())
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
fn range(&self, ge: Option<RString>, le: Option<RString>) -> Result<(), Error> {
|
|
317
|
+
let r = ruby();
|
|
318
|
+
let ge_bytes = ge.map(rstring_to_vec);
|
|
319
|
+
let le_bytes = le.map(rstring_to_vec);
|
|
320
|
+
let mut builder = self.inner.range();
|
|
321
|
+
if let Some(ref b) = ge_bytes {
|
|
322
|
+
builder = builder.ge(b);
|
|
323
|
+
}
|
|
324
|
+
if let Some(ref b) = le_bytes {
|
|
325
|
+
builder = builder.le(b);
|
|
326
|
+
}
|
|
327
|
+
let mut stream = builder.into_stream();
|
|
328
|
+
while let Some(key) = stream.next() {
|
|
329
|
+
let rb_key = r.str_from_slice(key);
|
|
330
|
+
let _: Value = r.yield_value(rb_key)?;
|
|
331
|
+
}
|
|
332
|
+
Ok(())
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
fn starts_with(&self, prefix: RString) -> Result<(), Error> {
|
|
336
|
+
let r = ruby();
|
|
337
|
+
let prefix_bytes = rstring_to_vec(prefix);
|
|
338
|
+
let upper = prefix_upper_bound(&prefix_bytes);
|
|
339
|
+
let mut builder = self.inner.range().ge(&prefix_bytes);
|
|
340
|
+
if let Some(ref u) = upper {
|
|
341
|
+
builder = builder.lt(u);
|
|
342
|
+
}
|
|
343
|
+
let mut stream = builder.into_stream();
|
|
344
|
+
while let Some(key) = stream.next() {
|
|
345
|
+
let rb_key = r.str_from_slice(key);
|
|
346
|
+
let _: Value = r.yield_value(rb_key)?;
|
|
197
347
|
}
|
|
198
348
|
Ok(())
|
|
199
349
|
}
|
|
@@ -204,7 +354,7 @@ impl FstSet {
|
|
|
204
354
|
let mut stream = self.inner.search(lev).into_stream();
|
|
205
355
|
while let Some(key) = stream.next() {
|
|
206
356
|
let rb_key = r.str_from_slice(key);
|
|
207
|
-
let _: Value =
|
|
357
|
+
let _: Value = r.yield_value(rb_key)?;
|
|
208
358
|
}
|
|
209
359
|
Ok(())
|
|
210
360
|
}
|
|
@@ -227,7 +377,7 @@ impl FstSetBuilder {
|
|
|
227
377
|
}
|
|
228
378
|
|
|
229
379
|
fn insert(&self, key: RString) -> Result<(), Error> {
|
|
230
|
-
let key =
|
|
380
|
+
let key = rstring_to_vec(key);
|
|
231
381
|
let mut guard = self.inner.borrow_mut();
|
|
232
382
|
let b = guard.as_mut().ok_or_else(|| err("builder already finished"))?;
|
|
233
383
|
b.insert(&key).map_err(err)
|
|
@@ -242,7 +392,13 @@ impl FstSetBuilder {
|
|
|
242
392
|
}
|
|
243
393
|
|
|
244
394
|
// ---------------------------------------------------------------------------
|
|
245
|
-
// Floor lookup (get_le): greatest key <= query
|
|
395
|
+
// Floor lookup (get_le): greatest key <= query.
|
|
396
|
+
//
|
|
397
|
+
// The upstream fst crate exposes `range().ge(...)` natively but not a floor
|
|
398
|
+
// operation, so this walks the FST manually: descend matching the query while
|
|
399
|
+
// recording, at each step, the rightmost transition strictly less than the
|
|
400
|
+
// current query byte. If exact match fails we backtrack to the most recent
|
|
401
|
+
// such transition and follow the rightmost path to a leaf.
|
|
246
402
|
// ---------------------------------------------------------------------------
|
|
247
403
|
|
|
248
404
|
struct Frame {
|
|
@@ -281,7 +437,7 @@ fn rightmost_to_leaf<D: AsRef<[u8]>>(
|
|
|
281
437
|
let mut out = output;
|
|
282
438
|
let mut suffix = Vec::new();
|
|
283
439
|
|
|
284
|
-
while node.
|
|
440
|
+
while !node.is_empty() {
|
|
285
441
|
let last = node.len() - 1;
|
|
286
442
|
let t = node.transition(last);
|
|
287
443
|
suffix.push(t.inp);
|
|
@@ -292,6 +448,20 @@ fn rightmost_to_leaf<D: AsRef<[u8]>>(
|
|
|
292
448
|
(suffix, out.cat(node.final_output()).value())
|
|
293
449
|
}
|
|
294
450
|
|
|
451
|
+
fn rightmost_value<D: AsRef<[u8]>>(fst: &Fst<D>, addr: CompiledAddr, output: Output) -> u64 {
|
|
452
|
+
let mut node = fst.node(addr);
|
|
453
|
+
let mut out = output;
|
|
454
|
+
|
|
455
|
+
while !node.is_empty() {
|
|
456
|
+
let last = node.len() - 1;
|
|
457
|
+
let t = node.transition(last);
|
|
458
|
+
out = out.cat(t.out);
|
|
459
|
+
node = fst.node(t.addr);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
out.cat(node.final_output()).value()
|
|
463
|
+
}
|
|
464
|
+
|
|
295
465
|
fn floor_lookup<D: AsRef<[u8]>>(fst: &Fst<D>, key: &[u8]) -> Option<(Vec<u8>, u64)> {
|
|
296
466
|
let root = fst.root();
|
|
297
467
|
|
|
@@ -355,6 +525,65 @@ fn floor_lookup<D: AsRef<[u8]>>(fst: &Fst<D>, key: &[u8]) -> Option<(Vec<u8>, u6
|
|
|
355
525
|
None
|
|
356
526
|
}
|
|
357
527
|
|
|
528
|
+
fn floor_value<D: AsRef<[u8]>>(fst: &Fst<D>, key: &[u8]) -> Option<u64> {
|
|
529
|
+
let root = fst.root();
|
|
530
|
+
|
|
531
|
+
if key.is_empty() {
|
|
532
|
+
return if root.is_final() {
|
|
533
|
+
Some(root.final_output().value())
|
|
534
|
+
} else {
|
|
535
|
+
None
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
let mut node = root;
|
|
540
|
+
let mut output = Output::zero();
|
|
541
|
+
let mut stack: Vec<Frame> = Vec::with_capacity(key.len());
|
|
542
|
+
let mut matched: usize = 0;
|
|
543
|
+
|
|
544
|
+
for &byte in key.iter() {
|
|
545
|
+
let lesser = find_max_lesser(&node, byte);
|
|
546
|
+
|
|
547
|
+
stack.push(Frame {
|
|
548
|
+
node_addr: node.addr(),
|
|
549
|
+
output,
|
|
550
|
+
prefix_len: matched,
|
|
551
|
+
max_lesser_idx: lesser,
|
|
552
|
+
is_final: node.is_final(),
|
|
553
|
+
final_value: output.cat(node.final_output()).value(),
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
match node.find_input(byte) {
|
|
557
|
+
Some(idx) => {
|
|
558
|
+
let t = node.transition(idx);
|
|
559
|
+
output = output.cat(t.out);
|
|
560
|
+
node = fst.node(t.addr);
|
|
561
|
+
matched += 1;
|
|
562
|
+
}
|
|
563
|
+
None => break,
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
if matched == key.len() && node.is_final() {
|
|
568
|
+
return Some(output.cat(node.final_output()).value());
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
while let Some(frame) = stack.pop() {
|
|
572
|
+
if let Some(j) = frame.max_lesser_idx {
|
|
573
|
+
let frame_node = fst.node(frame.node_addr);
|
|
574
|
+
let t = frame_node.transition(j);
|
|
575
|
+
let branch_output = frame.output.cat(t.out);
|
|
576
|
+
return Some(rightmost_value(fst, t.addr, branch_output));
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
if frame.is_final {
|
|
580
|
+
return Some(frame.final_value);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
None
|
|
585
|
+
}
|
|
586
|
+
|
|
358
587
|
// ---------------------------------------------------------------------------
|
|
359
588
|
// Init
|
|
360
589
|
// ---------------------------------------------------------------------------
|
|
@@ -366,6 +595,7 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
366
595
|
let map_class = module.define_class("Map", ruby.class_object())?;
|
|
367
596
|
map_class.define_singleton_method("new", function!(FstMap::new, 1))?;
|
|
368
597
|
map_class.define_singleton_method("from_path", function!(FstMap::from_path, 1))?;
|
|
598
|
+
map_class.define_singleton_method("from_path_mmap", function!(FstMap::from_path_mmap, 1))?;
|
|
369
599
|
map_class.define_method("get", method!(FstMap::get, 1))?;
|
|
370
600
|
map_class.define_method("[]", method!(FstMap::get, 1))?;
|
|
371
601
|
map_class.define_method("contains?", method!(FstMap::contains, 1))?;
|
|
@@ -375,8 +605,12 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
375
605
|
map_class.define_method("to_bytes", method!(FstMap::to_bytes, 0))?;
|
|
376
606
|
map_class.define_method("save", method!(FstMap::save, 1))?;
|
|
377
607
|
map_class.define_method("get_le", method!(FstMap::get_le, 1))?;
|
|
608
|
+
map_class.define_method("get_le_value", method!(FstMap::get_le_value, 1))?;
|
|
378
609
|
map_class.define_method("get_ge", method!(FstMap::get_ge, 1))?;
|
|
610
|
+
map_class.define_method("get_ge_value", method!(FstMap::get_ge_value, 1))?;
|
|
379
611
|
map_class.define_method("each", method!(FstMap::each, 0))?;
|
|
612
|
+
map_class.define_method("range", method!(FstMap::range, 2))?;
|
|
613
|
+
map_class.define_method("starts_with", method!(FstMap::starts_with, 1))?;
|
|
380
614
|
map_class.define_method("search_levenshtein", method!(FstMap::search_levenshtein, 2))?;
|
|
381
615
|
|
|
382
616
|
let map_builder = module.define_class("MapBuilder", ruby.class_object())?;
|
|
@@ -387,6 +621,7 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
387
621
|
let set_class = module.define_class("Set", ruby.class_object())?;
|
|
388
622
|
set_class.define_singleton_method("new", function!(FstSet::new, 1))?;
|
|
389
623
|
set_class.define_singleton_method("from_path", function!(FstSet::from_path, 1))?;
|
|
624
|
+
set_class.define_singleton_method("from_path_mmap", function!(FstSet::from_path_mmap, 1))?;
|
|
390
625
|
set_class.define_method("contains?", method!(FstSet::contains, 1))?;
|
|
391
626
|
set_class.define_method("length", method!(FstSet::len, 0))?;
|
|
392
627
|
set_class.define_method("size", method!(FstSet::len, 0))?;
|
|
@@ -394,6 +629,8 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
394
629
|
set_class.define_method("to_bytes", method!(FstSet::to_bytes, 0))?;
|
|
395
630
|
set_class.define_method("save", method!(FstSet::save, 1))?;
|
|
396
631
|
set_class.define_method("each", method!(FstSet::each, 0))?;
|
|
632
|
+
set_class.define_method("range", method!(FstSet::range, 2))?;
|
|
633
|
+
set_class.define_method("starts_with", method!(FstSet::starts_with, 1))?;
|
|
397
634
|
set_class.define_method("search_levenshtein", method!(FstSet::search_levenshtein, 2))?;
|
|
398
635
|
|
|
399
636
|
let set_builder = module.define_class("SetBuilder", ruby.class_object())?;
|
data/lib/ruby_fst/version.rb
CHANGED
data/lib/ruby_fst.rb
CHANGED
|
@@ -4,11 +4,37 @@ require_relative 'ruby_fst/version'
|
|
|
4
4
|
require_relative 'ruby_fst/ruby_fst'
|
|
5
5
|
|
|
6
6
|
module RubyFst
|
|
7
|
+
module RangeQuery
|
|
8
|
+
def range(ge: nil, le: nil, &block)
|
|
9
|
+
return enum_for(:range, ge:, le:) unless block
|
|
10
|
+
|
|
11
|
+
_range(ge, le, &block)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def starts_with(prefix, &block)
|
|
15
|
+
return enum_for(:starts_with, prefix) unless block
|
|
16
|
+
|
|
17
|
+
_starts_with(prefix, &block)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
7
21
|
class Map
|
|
8
22
|
include Enumerable
|
|
23
|
+
|
|
24
|
+
alias _range range
|
|
25
|
+
alias _starts_with starts_with
|
|
26
|
+
private :_range, :_starts_with
|
|
27
|
+
|
|
28
|
+
prepend RangeQuery
|
|
9
29
|
end
|
|
10
30
|
|
|
11
31
|
class Set
|
|
12
32
|
include Enumerable
|
|
33
|
+
|
|
34
|
+
alias _range range
|
|
35
|
+
alias _starts_with starts_with
|
|
36
|
+
private :_range, :_starts_with
|
|
37
|
+
|
|
38
|
+
prepend RangeQuery
|
|
13
39
|
end
|
|
14
40
|
end
|
data/ruby_fst.gemspec
CHANGED
|
@@ -11,11 +11,11 @@ Gem::Specification.new do |spec|
|
|
|
11
11
|
spec.description = 'Finite state transducer backed ordered sets and maps via the Rust fst crate by BurntSushi'
|
|
12
12
|
spec.homepage = 'https://github.com/dsablic/ruby-fst'
|
|
13
13
|
spec.license = 'MIT'
|
|
14
|
-
spec.required_ruby_version = '>= 3.
|
|
14
|
+
spec.required_ruby_version = '>= 3.2'
|
|
15
15
|
|
|
16
16
|
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
17
17
|
spec.metadata['source_code_uri'] = 'https://github.com/dsablic/ruby-fst'
|
|
18
|
-
spec.metadata['changelog_uri'] = 'https://github.com/dsablic/ruby-fst/
|
|
18
|
+
spec.metadata['changelog_uri'] = 'https://github.com/dsablic/ruby-fst/blob/main/CHANGELOG.md'
|
|
19
19
|
|
|
20
20
|
spec.files = Dir.chdir(__dir__) do
|
|
21
21
|
`git ls-files -z`.split("\x0").reject { |f| f.start_with?('test/', '.git') }
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby-fst
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Denis Sablic
|
|
@@ -32,6 +32,8 @@ extensions:
|
|
|
32
32
|
- ext/ruby_fst/extconf.rb
|
|
33
33
|
extra_rdoc_files: []
|
|
34
34
|
files:
|
|
35
|
+
- ".rubocop.yml"
|
|
36
|
+
- CHANGELOG.md
|
|
35
37
|
- Cargo.lock
|
|
36
38
|
- Cargo.toml
|
|
37
39
|
- Gemfile
|
|
@@ -50,7 +52,7 @@ licenses:
|
|
|
50
52
|
metadata:
|
|
51
53
|
rubygems_mfa_required: 'true'
|
|
52
54
|
source_code_uri: https://github.com/dsablic/ruby-fst
|
|
53
|
-
changelog_uri: https://github.com/dsablic/ruby-fst/
|
|
55
|
+
changelog_uri: https://github.com/dsablic/ruby-fst/blob/main/CHANGELOG.md
|
|
54
56
|
rdoc_options: []
|
|
55
57
|
require_paths:
|
|
56
58
|
- lib
|
|
@@ -58,7 +60,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
58
60
|
requirements:
|
|
59
61
|
- - ">="
|
|
60
62
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: '3.
|
|
63
|
+
version: '3.2'
|
|
62
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
65
|
requirements:
|
|
64
66
|
- - ">="
|