fast_fuzzy_matcher 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +66 -0
- data/Rakefile +8 -0
- data/ext/.idea/.gitignore +8 -0
- data/ext/.idea/ext.iml +9 -0
- data/ext/.idea/modules.xml +8 -0
- data/ext/.idea/vcs.xml +6 -0
- data/ext/fuzzy.go +416 -0
- data/ext/fuzzy.h +87 -0
- data/ext/fuzzy.so +0 -0
- data/ext/go.mod +5 -0
- data/ext/go.sum +2 -0
- data/fast_fuzzy_matcher.gemspec +41 -0
- data/lib/fuzzy_matcher/version.rb +5 -0
- data/lib/fuzzy_matcher.rb +58 -0
- data/spec/fuzzy_matcher_spec.rb +27 -0
- data/spec/spec_helper.rb +15 -0
- metadata +84 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e10b6a2b5f47c25d479a1a3623db229ecbc24f79176c3d6ae879ce6767e931b0
|
4
|
+
data.tar.gz: c4c910d1e4a462fab8819f2d406d2affe1c872d58f2943c7be196d25c439a7b6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3cef0e3e2a233385a0daf2c882002be8af28e38df4c58c500bbee29fd3b319d904b9ee918f6fbeb44c3557602562b7b0f3d1f53c3b03c1c197f2824f6caf81fd
|
7
|
+
data.tar.gz: 78c6671ffaad4807137977cc90100be0214e241b0895ab71955baf96500db286b235b62e121c9c13f00134a86f3138b92e652af597a37ed9bb0dfca559215069
|
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2023 wowinter13
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# FuzzyMatch
|
2
|
+
|
3
|
+
This library is a work in progress.
|
4
|
+
|
5
|
+
The fastest Fuzzy Matcher in the wild west. FFI-based.
|
6
|
+
|
7
|
+
Find a needle in a haystack based on string similarity and regular expression rules.
|
8
|
+
|
9
|
+
|
10
|
+
### Basic usage
|
11
|
+
|
12
|
+
Just pass an array of strings to the matcher and it will return the best match(es) for the given needle.
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
require 'fast_fuzzy_matcher'
|
16
|
+
|
17
|
+
FuzzyMatcher.find("whl", ["cartwheel", "foobar", "wheel", "baz"])
|
18
|
+
=> ["cartwheel", "wheel"]
|
19
|
+
|
20
|
+
```
|
21
|
+
|
22
|
+
### Advanced usage
|
23
|
+
|
24
|
+
Better documentation is coming soon. For now, please refer to the specs.
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
# Benchmarks
|
29
|
+
|
30
|
+
To be done.
|
31
|
+
|
32
|
+
Approximately 10-60x faster than the fastest Ruby implementation. The difference is more pronounced for longer strings and larger dictionaries.
|
33
|
+
|
34
|
+
|
35
|
+
## Documentation
|
36
|
+
|
37
|
+
Detailed documentation is available at [rubydoc](https://rubydoc.info/gems/fast_fuzzy_matcher).
|
38
|
+
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
fast_fuzzy_matcher is available as a gem, to install it just install the gem:
|
42
|
+
|
43
|
+
gem install fast_fuzzy_matcher
|
44
|
+
|
45
|
+
If you're using Bundler, add the gem to Gemfile.
|
46
|
+
|
47
|
+
gem 'fast_fuzzy_matcher'
|
48
|
+
|
49
|
+
Run `bundle install`.
|
50
|
+
|
51
|
+
## Running tests
|
52
|
+
|
53
|
+
bundle exec rspec spec/
|
54
|
+
|
55
|
+
|
56
|
+
## Contributing
|
57
|
+
|
58
|
+
1. Fork it ( https://github.com/wowinter13/fast_fuzzy_matcher/fork )
|
59
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
60
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
61
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
62
|
+
5. Create a new Pull Request
|
63
|
+
|
64
|
+
## License
|
65
|
+
|
66
|
+
MIT License. See LICENSE for details.
|
data/Rakefile
ADDED
data/ext/.idea/ext.iml
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="WEB_MODULE" version="4">
|
3
|
+
<component name="Go" enabled="true" />
|
4
|
+
<component name="NewModuleRootManager">
|
5
|
+
<content url="file://$MODULE_DIR$" />
|
6
|
+
<orderEntry type="inheritedJdk" />
|
7
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
8
|
+
</component>
|
9
|
+
</module>
|
data/ext/.idea/vcs.xml
ADDED
data/ext/fuzzy.go
ADDED
@@ -0,0 +1,416 @@
|
|
1
|
+
// Fuzzy searching allows for flexibly matching a string with partial input,
|
2
|
+
// useful for filtering data very quickly based on lightweight user input.
|
3
|
+
package main
|
4
|
+
|
5
|
+
import (
|
6
|
+
"unicode"
|
7
|
+
"unicode/utf8"
|
8
|
+
"unsafe"
|
9
|
+
|
10
|
+
"golang.org/x/text/runes"
|
11
|
+
"golang.org/x/text/transform"
|
12
|
+
"golang.org/x/text/unicode/norm"
|
13
|
+
)
|
14
|
+
|
15
|
+
/*
|
16
|
+
#include <stdlib.h> // for C.free
|
17
|
+
*/
|
18
|
+
import "C"
|
19
|
+
|
20
|
+
func noopTransformer() transform.Transformer {
|
21
|
+
return nopTransformer{}
|
22
|
+
}
|
23
|
+
|
24
|
+
func foldTransformer() transform.Transformer {
|
25
|
+
return unicodeFoldTransformer{}
|
26
|
+
}
|
27
|
+
|
28
|
+
func normalizeTransformer() transform.Transformer {
|
29
|
+
return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
|
30
|
+
}
|
31
|
+
|
32
|
+
func normalizedFoldTransformer() transform.Transformer {
|
33
|
+
return transform.Chain(normalizeTransformer(), foldTransformer())
|
34
|
+
}
|
35
|
+
|
36
|
+
// Match returns true if source matches target using a fuzzy-searching
|
37
|
+
// algorithm. Note that it doesn't implement Levenshtein distance (see
|
38
|
+
// RankMatch instead), but rather a simplified version where there's no
|
39
|
+
// approximation. The method will return true only if each character in the
|
40
|
+
// source can be found in the target and occurs after the preceding matches.
|
41
|
+
|
42
|
+
// TODO: export Match
|
43
|
+
func Match(source, target string) bool {
|
44
|
+
return match(source, target, noopTransformer())
|
45
|
+
}
|
46
|
+
|
47
|
+
// MatchFold is a case-insensitive version of Match.
|
48
|
+
|
49
|
+
// TODO: export MatchFold
|
50
|
+
func MatchFold(source, target string) bool {
|
51
|
+
return match(source, target, foldTransformer())
|
52
|
+
}
|
53
|
+
|
54
|
+
// MatchNormalized is a unicode-normalized version of Match.
|
55
|
+
|
56
|
+
// TODO: export MatchNormalized
|
57
|
+
func MatchNormalized(source, target string) bool {
|
58
|
+
return match(source, target, normalizeTransformer())
|
59
|
+
}
|
60
|
+
|
61
|
+
// MatchNormalizedFold is a unicode-normalized and case-insensitive version of Match.
|
62
|
+
|
63
|
+
// TODO: export MatchNormalizedFold
|
64
|
+
func MatchNormalizedFold(source, target string) bool {
|
65
|
+
return match(source, target, normalizedFoldTransformer())
|
66
|
+
}
|
67
|
+
|
68
|
+
func match(source, target string, transformer transform.Transformer) bool {
|
69
|
+
sourceT := stringTransform(source, transformer)
|
70
|
+
targetT := stringTransform(target, transformer)
|
71
|
+
return matchTransformed(sourceT, targetT)
|
72
|
+
}
|
73
|
+
|
74
|
+
func matchTransformed(source, target string) bool {
|
75
|
+
lenDiff := len(target) - len(source)
|
76
|
+
|
77
|
+
if lenDiff < 0 {
|
78
|
+
return false
|
79
|
+
}
|
80
|
+
|
81
|
+
if lenDiff == 0 && source == target {
|
82
|
+
return true
|
83
|
+
}
|
84
|
+
|
85
|
+
Outer:
|
86
|
+
for _, r1 := range source {
|
87
|
+
for i, r2 := range target {
|
88
|
+
if r1 == r2 {
|
89
|
+
target = target[i+utf8.RuneLen(r2):]
|
90
|
+
continue Outer
|
91
|
+
}
|
92
|
+
}
|
93
|
+
return false
|
94
|
+
}
|
95
|
+
|
96
|
+
return true
|
97
|
+
}
|
98
|
+
|
99
|
+
// Find will return a list of strings in targets that fuzzy matches source.
|
100
|
+
//
|
101
|
+
// The returned list will be ordered by best matches first, or nil if there are
|
102
|
+
// no matches. The algorithm is optimized for matches where the target string
|
103
|
+
// has a prefix that matches the source.
|
104
|
+
//
|
105
|
+
// FFI note: the returned list is allocated using malloc and must be freed using
|
106
|
+
// free_cstrings.
|
107
|
+
//
|
108
|
+
//export Find
|
109
|
+
func Find(source *C.char, targets **C.char, targetsLen C.int) **C.char {
|
110
|
+
goSource := C.GoString(source)
|
111
|
+
sliceHeaders := (*[1 << 30]*C.char)(unsafe.Pointer(targets))[:targetsLen:targetsLen]
|
112
|
+
|
113
|
+
goTargets := make([]string, int(targetsLen))
|
114
|
+
for i := 0; i < int(targetsLen); i++ {
|
115
|
+
goTargets[i] = C.GoString(sliceHeaders[i])
|
116
|
+
}
|
117
|
+
|
118
|
+
results := find(goSource, goTargets, noopTransformer())
|
119
|
+
|
120
|
+
cResults := C.malloc(C.size_t(targetsLen) * C.size_t(unsafe.Sizeof(uintptr(0))))
|
121
|
+
cArray := (*[1 << 30]*C.char)(cResults)
|
122
|
+
|
123
|
+
for i := 0; i < int(targetsLen); i++ {
|
124
|
+
cArray[i] = C.CString("")
|
125
|
+
}
|
126
|
+
|
127
|
+
for i, s := range results {
|
128
|
+
C.free(unsafe.Pointer(cArray[i]))
|
129
|
+
cArray[i] = C.CString(s)
|
130
|
+
}
|
131
|
+
|
132
|
+
return (**C.char)(cResults)
|
133
|
+
}
|
134
|
+
|
135
|
+
//export free_cstrings
|
136
|
+
func free_cstrings(strs **C.char, len C.int) {
|
137
|
+
slice := (*[1 << 30]*C.char)(unsafe.Pointer(strs))[:len:len]
|
138
|
+
for i := 0; i < int(len); i++ {
|
139
|
+
C.free(unsafe.Pointer(slice[i]))
|
140
|
+
}
|
141
|
+
C.free(unsafe.Pointer(strs))
|
142
|
+
}
|
143
|
+
|
144
|
+
// FindFold is a case-insensitive version of Find.
|
145
|
+
|
146
|
+
// TODO: export FindFold
|
147
|
+
func FindFold(source string, targets []string) []string {
|
148
|
+
return find(source, targets, foldTransformer())
|
149
|
+
}
|
150
|
+
|
151
|
+
// FindNormalized is a unicode-normalized version of Find.
|
152
|
+
|
153
|
+
// TODO: export FindNormalized
|
154
|
+
func FindNormalized(source string, targets []string) []string {
|
155
|
+
return find(source, targets, normalizeTransformer())
|
156
|
+
}
|
157
|
+
|
158
|
+
// FindNormalizedFold is a unicode-normalized and case-insensitive version of Find.
|
159
|
+
|
160
|
+
// TODO: export FindNormalizedFold
|
161
|
+
func FindNormalizedFold(source string, targets []string) []string {
|
162
|
+
return find(source, targets, normalizedFoldTransformer())
|
163
|
+
}
|
164
|
+
|
165
|
+
func find(source string, targets []string, transformer transform.Transformer) []string {
|
166
|
+
sourceT := stringTransform(source, transformer)
|
167
|
+
|
168
|
+
var matches []string
|
169
|
+
|
170
|
+
for _, target := range targets {
|
171
|
+
targetT := stringTransform(target, transformer)
|
172
|
+
if matchTransformed(sourceT, targetT) {
|
173
|
+
matches = append(matches, target)
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
return matches
|
178
|
+
}
|
179
|
+
|
180
|
+
// RankMatch is similar to Match except it will measure the Levenshtein
|
181
|
+
// distance between the source and the target and return its result. If there
|
182
|
+
// was no match, it will return -1.
|
183
|
+
// Given the requirements of match, RankMatch only needs to perform a subset of
|
184
|
+
// the Levenshtein calculation, only deletions need be considered, required
|
185
|
+
// additions and substitutions would fail the match test.
|
186
|
+
|
187
|
+
// TODO: export RankMatch
|
188
|
+
func RankMatch(source, target string) int {
|
189
|
+
return rank(source, target, noopTransformer())
|
190
|
+
}
|
191
|
+
|
192
|
+
// RankMatchFold is a case-insensitive version of RankMatch.
|
193
|
+
|
194
|
+
// TODO: export RankMatchFold
|
195
|
+
func RankMatchFold(source, target string) int {
|
196
|
+
return rank(source, target, foldTransformer())
|
197
|
+
}
|
198
|
+
|
199
|
+
// RankMatchNormalized is a unicode-normalized version of RankMatch.
|
200
|
+
|
201
|
+
// TODO: export RankMatchNormalized
|
202
|
+
func RankMatchNormalized(source, target string) int {
|
203
|
+
return rank(source, target, normalizeTransformer())
|
204
|
+
}
|
205
|
+
|
206
|
+
// RankMatchNormalizedFold is a unicode-normalized and case-insensitive version of RankMatch.
|
207
|
+
|
208
|
+
// TODO: export RankMatchNormalizedFold
|
209
|
+
func RankMatchNormalizedFold(source, target string) int {
|
210
|
+
return rank(source, target, normalizedFoldTransformer())
|
211
|
+
}
|
212
|
+
|
213
|
+
func rank(source, target string, transformer transform.Transformer) int {
|
214
|
+
lenDiff := len(target) - len(source)
|
215
|
+
|
216
|
+
if lenDiff < 0 {
|
217
|
+
return -1
|
218
|
+
}
|
219
|
+
|
220
|
+
source = stringTransform(source, transformer)
|
221
|
+
target = stringTransform(target, transformer)
|
222
|
+
|
223
|
+
if lenDiff == 0 && source == target {
|
224
|
+
return 0
|
225
|
+
}
|
226
|
+
|
227
|
+
runeDiff := 0
|
228
|
+
|
229
|
+
Outer:
|
230
|
+
for _, r1 := range source {
|
231
|
+
for i, r2 := range target {
|
232
|
+
if r1 == r2 {
|
233
|
+
target = target[i+utf8.RuneLen(r2):]
|
234
|
+
continue Outer
|
235
|
+
} else {
|
236
|
+
runeDiff++
|
237
|
+
}
|
238
|
+
}
|
239
|
+
return -1
|
240
|
+
}
|
241
|
+
|
242
|
+
// Count up remaining char
|
243
|
+
runeDiff += utf8.RuneCountInString(target)
|
244
|
+
|
245
|
+
return runeDiff
|
246
|
+
}
|
247
|
+
|
248
|
+
// RankFind is similar to Find, except it will also rank all matches using
|
249
|
+
// Levenshtein distance.
|
250
|
+
|
251
|
+
// TODO: export RankFind
|
252
|
+
func RankFind(source string, targets []string) Ranks {
|
253
|
+
return rankFind(source, targets, noopTransformer())
|
254
|
+
}
|
255
|
+
|
256
|
+
// RankFindFold is a case-insensitive version of RankFind.
|
257
|
+
|
258
|
+
// TODO: export RankFindFold
|
259
|
+
func RankFindFold(source string, targets []string) Ranks {
|
260
|
+
return rankFind(source, targets, foldTransformer())
|
261
|
+
}
|
262
|
+
|
263
|
+
// RankFindNormalized is a unicode-normalized version of RankFind.
|
264
|
+
|
265
|
+
// TODO: export RankFindNormalized
|
266
|
+
func RankFindNormalized(source string, targets []string) Ranks {
|
267
|
+
return rankFind(source, targets, normalizeTransformer())
|
268
|
+
}
|
269
|
+
|
270
|
+
// RankFindNormalizedFold is a unicode-normalized and case-insensitive version of RankFind.
|
271
|
+
|
272
|
+
// TODO: export RankFindNormalizedFold
|
273
|
+
func RankFindNormalizedFold(source string, targets []string) Ranks {
|
274
|
+
return rankFind(source, targets, normalizedFoldTransformer())
|
275
|
+
}
|
276
|
+
|
277
|
+
func rankFind(source string, targets []string, transformer transform.Transformer) Ranks {
|
278
|
+
sourceT := stringTransform(source, transformer)
|
279
|
+
|
280
|
+
var r Ranks
|
281
|
+
|
282
|
+
for index, target := range targets {
|
283
|
+
targetT := stringTransform(target, transformer)
|
284
|
+
if matchTransformed(sourceT, targetT) {
|
285
|
+
distance := LevenshteinDistance(source, target)
|
286
|
+
r = append(r, Rank{source, target, distance, index})
|
287
|
+
}
|
288
|
+
}
|
289
|
+
return r
|
290
|
+
}
|
291
|
+
|
292
|
+
type Rank struct {
|
293
|
+
// Source is used as the source for matching.
|
294
|
+
Source string
|
295
|
+
|
296
|
+
// Target is the word matched against.
|
297
|
+
Target string
|
298
|
+
|
299
|
+
// Distance is the Levenshtein distance between Source and Target.
|
300
|
+
Distance int
|
301
|
+
|
302
|
+
// Location of Target in original list
|
303
|
+
OriginalIndex int
|
304
|
+
}
|
305
|
+
|
306
|
+
type Ranks []Rank
|
307
|
+
|
308
|
+
func (r Ranks) Len() int {
|
309
|
+
return len(r)
|
310
|
+
}
|
311
|
+
|
312
|
+
func (r Ranks) Swap(i, j int) {
|
313
|
+
r[i], r[j] = r[j], r[i]
|
314
|
+
}
|
315
|
+
|
316
|
+
func (r Ranks) Less(i, j int) bool {
|
317
|
+
return r[i].Distance < r[j].Distance
|
318
|
+
}
|
319
|
+
|
320
|
+
func stringTransform(s string, t transform.Transformer) (transformed string) {
|
321
|
+
// Fast path for the nop transformer to prevent unnecessary allocations.
|
322
|
+
if _, ok := t.(nopTransformer); ok {
|
323
|
+
return s
|
324
|
+
}
|
325
|
+
|
326
|
+
var err error
|
327
|
+
transformed, _, err = transform.String(t, s)
|
328
|
+
if err != nil {
|
329
|
+
transformed = s
|
330
|
+
}
|
331
|
+
|
332
|
+
return
|
333
|
+
}
|
334
|
+
|
335
|
+
type unicodeFoldTransformer struct{ transform.NopResetter }
|
336
|
+
|
337
|
+
func (unicodeFoldTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
338
|
+
// Converting src to a string allocates.
|
339
|
+
// In theory, it need not; see https://go.dev/issue/27148.
|
340
|
+
// It is possible to write this loop using utf8.DecodeRune
|
341
|
+
// and thereby avoid allocations, but it is noticeably slower.
|
342
|
+
// So just let's wait for the compiler to get smarter.
|
343
|
+
for _, r := range string(src) {
|
344
|
+
if r == utf8.RuneError {
|
345
|
+
// Go spec for ranging over a string says:
|
346
|
+
// If the iteration encounters an invalid UTF-8 sequence,
|
347
|
+
// the second value will be 0xFFFD, the Unicode replacement character,
|
348
|
+
// and the next iteration will advance a single byte in the string.
|
349
|
+
nSrc++
|
350
|
+
} else {
|
351
|
+
nSrc += utf8.RuneLen(r)
|
352
|
+
}
|
353
|
+
r = unicode.ToLower(r)
|
354
|
+
x := utf8.RuneLen(r)
|
355
|
+
if x > len(dst[nDst:]) {
|
356
|
+
err = transform.ErrShortDst
|
357
|
+
break
|
358
|
+
}
|
359
|
+
nDst += utf8.EncodeRune(dst[nDst:], r)
|
360
|
+
}
|
361
|
+
return nDst, nSrc, err
|
362
|
+
}
|
363
|
+
|
364
|
+
type nopTransformer struct{ transform.NopResetter }
|
365
|
+
|
366
|
+
func (nopTransformer) Transform(dst []byte, src []byte, atEOF bool) (int, int, error) {
|
367
|
+
return 0, len(src), nil
|
368
|
+
}
|
369
|
+
|
370
|
+
// LevenshteinDistance measures the difference between two strings.
|
371
|
+
// The Levenshtein distance between two words is the minimum number of
|
372
|
+
// single-character edits (i.e. insertions, deletions or substitutions)
|
373
|
+
// required to change one word into the other.
|
374
|
+
//
|
375
|
+
// This implemention is optimized to use O(min(m,n)) space and is based on the
|
376
|
+
// optimized C version found here:
|
377
|
+
// http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C
|
378
|
+
|
379
|
+
// export LevenshteinDistance
|
380
|
+
func LevenshteinDistance(s, t string) int {
|
381
|
+
r1, r2 := []rune(s), []rune(t)
|
382
|
+
column := make([]int, 1, 64)
|
383
|
+
|
384
|
+
for y := 1; y <= len(r1); y++ {
|
385
|
+
column = append(column, y)
|
386
|
+
}
|
387
|
+
|
388
|
+
for x := 1; x <= len(r2); x++ {
|
389
|
+
column[0] = x
|
390
|
+
|
391
|
+
for y, lastDiag := 1, x-1; y <= len(r1); y++ {
|
392
|
+
oldDiag := column[y]
|
393
|
+
cost := 0
|
394
|
+
if r1[y-1] != r2[x-1] {
|
395
|
+
cost = 1
|
396
|
+
}
|
397
|
+
column[y] = min(column[y]+1, column[y-1]+1, lastDiag+cost)
|
398
|
+
lastDiag = oldDiag
|
399
|
+
}
|
400
|
+
}
|
401
|
+
|
402
|
+
return column[len(r1)]
|
403
|
+
}
|
404
|
+
|
405
|
+
func min2(a, b int) int {
|
406
|
+
if a < b {
|
407
|
+
return a
|
408
|
+
}
|
409
|
+
return b
|
410
|
+
}
|
411
|
+
|
412
|
+
func min(a, b, c int) int {
|
413
|
+
return min2(min2(a, b), c)
|
414
|
+
}
|
415
|
+
|
416
|
+
func main() {}
|
data/ext/fuzzy.h
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
/* Code generated by cmd/cgo; DO NOT EDIT. */
|
2
|
+
|
3
|
+
/* package command-line-arguments */
|
4
|
+
|
5
|
+
|
6
|
+
#line 1 "cgo-builtin-export-prolog"
|
7
|
+
|
8
|
+
#include <stddef.h>
|
9
|
+
|
10
|
+
#ifndef GO_CGO_EXPORT_PROLOGUE_H
|
11
|
+
#define GO_CGO_EXPORT_PROLOGUE_H
|
12
|
+
|
13
|
+
#ifndef GO_CGO_GOSTRING_TYPEDEF
|
14
|
+
typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
15
|
+
#endif
|
16
|
+
|
17
|
+
#endif
|
18
|
+
|
19
|
+
/* Start of preamble from import "C" comments. */
|
20
|
+
|
21
|
+
|
22
|
+
#line 15 "fuzzy.go"
|
23
|
+
|
24
|
+
#include <stdlib.h> // for C.free
|
25
|
+
|
26
|
+
#line 1 "cgo-generated-wrapper"
|
27
|
+
|
28
|
+
|
29
|
+
/* End of preamble from import "C" comments. */
|
30
|
+
|
31
|
+
|
32
|
+
/* Start of boilerplate cgo prologue. */
|
33
|
+
#line 1 "cgo-gcc-export-header-prolog"
|
34
|
+
|
35
|
+
#ifndef GO_CGO_PROLOGUE_H
|
36
|
+
#define GO_CGO_PROLOGUE_H
|
37
|
+
|
38
|
+
typedef signed char GoInt8;
|
39
|
+
typedef unsigned char GoUint8;
|
40
|
+
typedef short GoInt16;
|
41
|
+
typedef unsigned short GoUint16;
|
42
|
+
typedef int GoInt32;
|
43
|
+
typedef unsigned int GoUint32;
|
44
|
+
typedef long long GoInt64;
|
45
|
+
typedef unsigned long long GoUint64;
|
46
|
+
typedef GoInt64 GoInt;
|
47
|
+
typedef GoUint64 GoUint;
|
48
|
+
typedef size_t GoUintptr;
|
49
|
+
typedef float GoFloat32;
|
50
|
+
typedef double GoFloat64;
|
51
|
+
#ifdef _MSC_VER
|
52
|
+
#include <complex.h>
|
53
|
+
typedef _Fcomplex GoComplex64;
|
54
|
+
typedef _Dcomplex GoComplex128;
|
55
|
+
#else
|
56
|
+
typedef float _Complex GoComplex64;
|
57
|
+
typedef double _Complex GoComplex128;
|
58
|
+
#endif
|
59
|
+
|
60
|
+
/*
|
61
|
+
static assertion to make sure the file is being used on architecture
|
62
|
+
at least with matching size of GoInt.
|
63
|
+
*/
|
64
|
+
typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1];
|
65
|
+
|
66
|
+
#ifndef GO_CGO_GOSTRING_TYPEDEF
|
67
|
+
typedef _GoString_ GoString;
|
68
|
+
#endif
|
69
|
+
typedef void *GoMap;
|
70
|
+
typedef void *GoChan;
|
71
|
+
typedef struct { void *t; void *v; } GoInterface;
|
72
|
+
typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;
|
73
|
+
|
74
|
+
#endif
|
75
|
+
|
76
|
+
/* End of boilerplate cgo prologue. */
|
77
|
+
|
78
|
+
#ifdef __cplusplus
|
79
|
+
extern "C" {
|
80
|
+
#endif
|
81
|
+
|
82
|
+
extern char** Find(char* source, char** targets, int targetsLen);
|
83
|
+
extern void free_cstrings(char** strs, int len);
|
84
|
+
|
85
|
+
#ifdef __cplusplus
|
86
|
+
}
|
87
|
+
#endif
|
data/ext/fuzzy.so
ADDED
Binary file
|
data/ext/go.mod
ADDED
data/ext/go.sum
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/fuzzy_matcher/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "fast_fuzzy_matcher"
|
7
|
+
spec.version = FuzzyMatcher::VERSION
|
8
|
+
spec.authors = ["Vlad Dyachenko"]
|
9
|
+
spec.email = ["vla-dy@yandex.ru"]
|
10
|
+
|
11
|
+
spec.summary = "fast_fuzzy_matcher is the fastest fuzzy search library for Ruby."
|
12
|
+
spec.description = "A tiny and blazing-fast fuzzy search in pure Ruby with FFI bindings to Go."\
|
13
|
+
"Fuzzy searching allows for flexibly matching a string with partial input, " \
|
14
|
+
"useful for filtering data very quickly based on lightweight user input."
|
15
|
+
spec.homepage = "https://github.com/wowinter13/fast_fuzzy_matcher"
|
16
|
+
spec.license = "MIT"
|
17
|
+
spec.required_ruby_version = ">= 2.6.0"
|
18
|
+
|
19
|
+
spec.metadata = {
|
20
|
+
'bug_tracker_uri' => 'https://github.com/wowinter13/fast_fuzzy_matcher/issues',
|
21
|
+
'changelog_uri' => "https://github.com/wowinter13/fast_fuzzy_matcher/blob/master/CHANGELOG.md",
|
22
|
+
'documentation_uri' => "https://www.rubydoc.info/github/wowinter13/fast_fuzzy_matcher",
|
23
|
+
'source_code_uri' => "https://github.com/wowinter13/fast_fuzzy_matcher"
|
24
|
+
}
|
25
|
+
|
26
|
+
# Specify which files should be added to the gem when it is released.
|
27
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
28
|
+
spec.files = Dir.chdir(__dir__) do
|
29
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
30
|
+
(File.expand_path(f) == __FILE__) ||
|
31
|
+
f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile])
|
32
|
+
end
|
33
|
+
end
|
34
|
+
spec.bindir = "exe"
|
35
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
36
|
+
spec.require_paths = ["lib"]
|
37
|
+
|
38
|
+
spec.test_files = Dir['spec/**/*']
|
39
|
+
|
40
|
+
spec.add_dependency "ffi"
|
41
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "fuzzy_matcher/version"
|
4
|
+
|
5
|
+
require 'ffi'
|
6
|
+
|
7
|
+
module FuzzyMatcher
|
8
|
+
class Error < StandardError; end
|
9
|
+
|
10
|
+
# find() will return a list of strings in targets that fuzzy matches source.
|
11
|
+
#
|
12
|
+
# @param [String] source The string to match against.
|
13
|
+
# @param [Array<String>] targets The strings to match.
|
14
|
+
#
|
15
|
+
# @return [Array<String>] The strings in targets that fuzzy match source.
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
# require 'fast_fuzzy_matcher'
|
19
|
+
# FuzzyMatch.find("whl", ["cartwheel", "foobar", "wheel", "baz"])
|
20
|
+
# => ["cartwheel", "wheel"]
|
21
|
+
#
|
22
|
+
# @note This method possibly is not thread safe.
|
23
|
+
# @note This method is case sensitive. For case insensitive matching, downcase targets/source or use a case insensitive matcher (#find_fold)
|
24
|
+
#
|
25
|
+
# @see ext/fuzzy.go#Find for the implementation of this method.
|
26
|
+
def self.find(source, targets)
|
27
|
+
pointers = targets.map { |t| FFI::MemoryPointer.from_string(t) }
|
28
|
+
targets_ptr = FFI::MemoryPointer.new(:pointer, targets.size)
|
29
|
+
targets_ptr.write_array_of_pointer(pointers)
|
30
|
+
|
31
|
+
result_ptr = FuzzyBinding.Find(source, targets_ptr, targets.size)
|
32
|
+
|
33
|
+
return [] if result_ptr.null?
|
34
|
+
|
35
|
+
pointers_array = result_ptr.read_array_of_pointer(targets.size)
|
36
|
+
|
37
|
+
result_array = pointers_array.each_with_object([]) do |ptr, arr|
|
38
|
+
if ptr && !ptr.null?
|
39
|
+
value = ptr.read_string_to_null
|
40
|
+
arr << value unless value.nil? || value == ""
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
FuzzyBinding.free_cstrings(result_ptr, targets.size)
|
45
|
+
|
46
|
+
FFI::MemoryPointer.new(:pointer).write_pointer(result_ptr).free
|
47
|
+
|
48
|
+
result_array
|
49
|
+
end
|
50
|
+
|
51
|
+
module FuzzyBinding
|
52
|
+
extend FFI::Library
|
53
|
+
ffi_lib File.expand_path("../ext/fuzzy.so", File.dirname(__FILE__))
|
54
|
+
|
55
|
+
attach_function :Find, [:string, :pointer, :int], :pointer
|
56
|
+
attach_function :free_cstrings, [:pointer, :int], :void
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe FuzzyMatcher do
|
6
|
+
it "has a version number" do
|
7
|
+
expect(FuzzyMatcher::VERSION).not_to be nil
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "#find" do
|
11
|
+
it "responds with an empty array when no matches are found" do
|
12
|
+
expect(FuzzyMatcher.find("foo", ["bar", "baz"])).to eq([])
|
13
|
+
end
|
14
|
+
|
15
|
+
it "responds with an empty array when no targets are given" do
|
16
|
+
expect(FuzzyMatcher.find("foo", [])).to eq([])
|
17
|
+
end
|
18
|
+
|
19
|
+
it "responds with matches when the source is a substring of a target" do
|
20
|
+
expect(FuzzyMatcher.find("whl", ["cartwheel", "foobar", "wheel", "baz"])).to eq(["cartwheel", "wheel"])
|
21
|
+
end
|
22
|
+
|
23
|
+
it "does not respond with matches when the source is a substring of a target and the source is uppercase" do
|
24
|
+
expect(FuzzyMatcher.find("WHL", ["cartwheel", "foobar", "wheel", "baz"])).to eq([])
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "fuzzy_matcher"
|
4
|
+
|
5
|
+
RSpec.configure do |config|
|
6
|
+
# Enable flags like --only-failures and --next-failure
|
7
|
+
config.example_status_persistence_file_path = ".rspec_status"
|
8
|
+
|
9
|
+
# Disable RSpec exposing methods globally on `Module` and `main`
|
10
|
+
config.disable_monkey_patching!
|
11
|
+
|
12
|
+
config.expect_with :rspec do |c|
|
13
|
+
c.syntax = :expect
|
14
|
+
end
|
15
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fast_fuzzy_matcher
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Vlad Dyachenko
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-01-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description: A tiny and blazing-fast fuzzy search in pure Ruby with FFI bindings to
|
28
|
+
Go.Fuzzy searching allows for flexibly matching a string with partial input, useful
|
29
|
+
for filtering data very quickly based on lightweight user input.
|
30
|
+
email:
|
31
|
+
- vla-dy@yandex.ru
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- ".rspec"
|
37
|
+
- CHANGELOG.md
|
38
|
+
- LICENSE.txt
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- ext/.idea/.gitignore
|
42
|
+
- ext/.idea/ext.iml
|
43
|
+
- ext/.idea/modules.xml
|
44
|
+
- ext/.idea/vcs.xml
|
45
|
+
- ext/fuzzy.go
|
46
|
+
- ext/fuzzy.h
|
47
|
+
- ext/fuzzy.so
|
48
|
+
- ext/go.mod
|
49
|
+
- ext/go.sum
|
50
|
+
- fast_fuzzy_matcher.gemspec
|
51
|
+
- lib/fuzzy_matcher.rb
|
52
|
+
- lib/fuzzy_matcher/version.rb
|
53
|
+
- spec/fuzzy_matcher_spec.rb
|
54
|
+
- spec/spec_helper.rb
|
55
|
+
homepage: https://github.com/wowinter13/fast_fuzzy_matcher
|
56
|
+
licenses:
|
57
|
+
- MIT
|
58
|
+
metadata:
|
59
|
+
bug_tracker_uri: https://github.com/wowinter13/fast_fuzzy_matcher/issues
|
60
|
+
changelog_uri: https://github.com/wowinter13/fast_fuzzy_matcher/blob/master/CHANGELOG.md
|
61
|
+
documentation_uri: https://www.rubydoc.info/github/wowinter13/fast_fuzzy_matcher
|
62
|
+
source_code_uri: https://github.com/wowinter13/fast_fuzzy_matcher
|
63
|
+
post_install_message:
|
64
|
+
rdoc_options: []
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: 2.6.0
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
requirements: []
|
78
|
+
rubygems_version: 3.4.10
|
79
|
+
signing_key:
|
80
|
+
specification_version: 4
|
81
|
+
summary: fast_fuzzy_matcher is the fastest fuzzy search library for Ruby.
|
82
|
+
test_files:
|
83
|
+
- spec/fuzzy_matcher_spec.rb
|
84
|
+
- spec/spec_helper.rb
|