fast_fuzzy_matcher 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e10b6a2b5f47c25d479a1a3623db229ecbc24f79176c3d6ae879ce6767e931b0
4
+ data.tar.gz: c4c910d1e4a462fab8819f2d406d2affe1c872d58f2943c7be196d25c439a7b6
5
+ SHA512:
6
+ metadata.gz: 3cef0e3e2a233385a0daf2c882002be8af28e38df4c58c500bbee29fd3b319d904b9ee918f6fbeb44c3557602562b7b0f3d1f53c3b03c1c197f2824f6caf81fd
7
+ data.tar.gz: 78c6671ffaad4807137977cc90100be0214e241b0895ab71955baf96500db286b235b62e121c9c13f00134a86f3138b92e652af597a37ed9bb0dfca559215069
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2023-10-16
4
+
5
+ - Initial release
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2023 wowinter13
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,66 @@
1
+ # FuzzyMatch
2
+
3
+ This library is a work in progress.
4
+
5
+ The fastest Fuzzy Matcher in the wild west. FFI-based.
6
+
7
+ Find a needle in a haystack based on string similarity and regular expression rules.
8
+
9
+
10
+ ### Basic usage
11
+
12
+ Just pass an array of strings to the matcher and it will return the best match(es) for the given needle.
13
+
14
+ ```ruby
15
+ require 'fast_fuzzy_matcher'
16
+
17
+ FuzzyMatcher.find("whl", ["cartwheel", "foobar", "wheel", "baz"])
18
+ => ["cartwheel", "wheel"]
19
+
20
+ ```
21
+
22
+ ### Advanced usage
23
+
24
+ Better documentation is coming soon. For now, please refer to the specs.
25
+
26
+
27
+
28
+ # Benchmarks
29
+
30
+ To be done.
31
+
32
+ Approximately 10-60x faster than the fastest Ruby implementation. The difference is more pronounced for longer strings and larger dictionaries.
33
+
34
+
35
+ ## Documentation
36
+
37
+ Detailed documentation is available at [rubydoc](https://rubydoc.info/gems/fast_fuzzy_matcher).
38
+
39
+ ## Installation
40
+
41
+ fast_fuzzy_matcher is available as a gem, to install it just install the gem:
42
+
43
+ gem install fast_fuzzy_matcher
44
+
45
+ If you're using Bundler, add the gem to Gemfile.
46
+
47
+ gem 'fast_fuzzy_matcher'
48
+
49
+ Run `bundle install`.
50
+
51
+ ## Running tests
52
+
53
+ bundle exec rspec spec/
54
+
55
+
56
+ ## Contributing
57
+
58
+ 1. Fork it ( https://github.com/wowinter13/fast_fuzzy_matcher/fork )
59
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
60
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
61
+ 4. Push to the branch (`git push origin my-new-feature`)
62
+ 5. Create a new Pull Request
63
+
64
+ ## License
65
+
66
+ MIT License. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,8 @@
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
data/ext/.idea/ext.iml ADDED
@@ -0,0 +1,9 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="WEB_MODULE" version="4">
3
+ <component name="Go" enabled="true" />
4
+ <component name="NewModuleRootManager">
5
+ <content url="file://$MODULE_DIR$" />
6
+ <orderEntry type="inheritedJdk" />
7
+ <orderEntry type="sourceFolder" forTests="false" />
8
+ </component>
9
+ </module>
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/ext.iml" filepath="$PROJECT_DIR$/.idea/ext.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
data/ext/.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5
+ </component>
6
+ </project>
data/ext/fuzzy.go ADDED
@@ -0,0 +1,416 @@
1
+ // Fuzzy searching allows for flexibly matching a string with partial input,
2
+ // useful for filtering data very quickly based on lightweight user input.
3
+ package main
4
+
5
+ import (
6
+ "unicode"
7
+ "unicode/utf8"
8
+ "unsafe"
9
+
10
+ "golang.org/x/text/runes"
11
+ "golang.org/x/text/transform"
12
+ "golang.org/x/text/unicode/norm"
13
+ )
14
+
15
+ /*
16
+ #include <stdlib.h> // for C.free
17
+ */
18
+ import "C"
19
+
20
+ func noopTransformer() transform.Transformer {
21
+ return nopTransformer{}
22
+ }
23
+
24
+ func foldTransformer() transform.Transformer {
25
+ return unicodeFoldTransformer{}
26
+ }
27
+
28
+ func normalizeTransformer() transform.Transformer {
29
+ return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
30
+ }
31
+
32
+ func normalizedFoldTransformer() transform.Transformer {
33
+ return transform.Chain(normalizeTransformer(), foldTransformer())
34
+ }
35
+
36
+ // Match returns true if source matches target using a fuzzy-searching
37
+ // algorithm. Note that it doesn't implement Levenshtein distance (see
38
+ // RankMatch instead), but rather a simplified version where there's no
39
+ // approximation. The method will return true only if each character in the
40
+ // source can be found in the target and occurs after the preceding matches.
41
+
42
+ // TODO: export Match
43
+ func Match(source, target string) bool {
44
+ return match(source, target, noopTransformer())
45
+ }
46
+
47
+ // MatchFold is a case-insensitive version of Match.
48
+
49
+ // TODO: export MatchFold
50
+ func MatchFold(source, target string) bool {
51
+ return match(source, target, foldTransformer())
52
+ }
53
+
54
+ // MatchNormalized is a unicode-normalized version of Match.
55
+
56
+ // TODO: export MatchNormalized
57
+ func MatchNormalized(source, target string) bool {
58
+ return match(source, target, normalizeTransformer())
59
+ }
60
+
61
+ // MatchNormalizedFold is a unicode-normalized and case-insensitive version of Match.
62
+
63
+ // TODO: export MatchNormalizedFold
64
+ func MatchNormalizedFold(source, target string) bool {
65
+ return match(source, target, normalizedFoldTransformer())
66
+ }
67
+
68
+ func match(source, target string, transformer transform.Transformer) bool {
69
+ sourceT := stringTransform(source, transformer)
70
+ targetT := stringTransform(target, transformer)
71
+ return matchTransformed(sourceT, targetT)
72
+ }
73
+
74
+ func matchTransformed(source, target string) bool {
75
+ lenDiff := len(target) - len(source)
76
+
77
+ if lenDiff < 0 {
78
+ return false
79
+ }
80
+
81
+ if lenDiff == 0 && source == target {
82
+ return true
83
+ }
84
+
85
+ Outer:
86
+ for _, r1 := range source {
87
+ for i, r2 := range target {
88
+ if r1 == r2 {
89
+ target = target[i+utf8.RuneLen(r2):]
90
+ continue Outer
91
+ }
92
+ }
93
+ return false
94
+ }
95
+
96
+ return true
97
+ }
98
+
99
+ // Find will return a list of strings in targets that fuzzy matches source.
100
+ //
101
+ // The returned list will be ordered by best matches first, or nil if there are
102
+ // no matches. The algorithm is optimized for matches where the target string
103
+ // has a prefix that matches the source.
104
+ //
105
+ // FFI note: the returned list is allocated using malloc and must be freed using
106
+ // free_cstrings.
107
+ //
108
+ //export Find
109
+ func Find(source *C.char, targets **C.char, targetsLen C.int) **C.char {
110
+ goSource := C.GoString(source)
111
+ sliceHeaders := (*[1 << 30]*C.char)(unsafe.Pointer(targets))[:targetsLen:targetsLen]
112
+
113
+ goTargets := make([]string, int(targetsLen))
114
+ for i := 0; i < int(targetsLen); i++ {
115
+ goTargets[i] = C.GoString(sliceHeaders[i])
116
+ }
117
+
118
+ results := find(goSource, goTargets, noopTransformer())
119
+
120
+ cResults := C.malloc(C.size_t(targetsLen) * C.size_t(unsafe.Sizeof(uintptr(0))))
121
+ cArray := (*[1 << 30]*C.char)(cResults)
122
+
123
+ for i := 0; i < int(targetsLen); i++ {
124
+ cArray[i] = C.CString("")
125
+ }
126
+
127
+ for i, s := range results {
128
+ C.free(unsafe.Pointer(cArray[i]))
129
+ cArray[i] = C.CString(s)
130
+ }
131
+
132
+ return (**C.char)(cResults)
133
+ }
134
+
135
+ //export free_cstrings
136
+ func free_cstrings(strs **C.char, len C.int) {
137
+ slice := (*[1 << 30]*C.char)(unsafe.Pointer(strs))[:len:len]
138
+ for i := 0; i < int(len); i++ {
139
+ C.free(unsafe.Pointer(slice[i]))
140
+ }
141
+ C.free(unsafe.Pointer(strs))
142
+ }
143
+
144
+ // FindFold is a case-insensitive version of Find.
145
+
146
+ // TODO: export FindFold
147
+ func FindFold(source string, targets []string) []string {
148
+ return find(source, targets, foldTransformer())
149
+ }
150
+
151
+ // FindNormalized is a unicode-normalized version of Find.
152
+
153
+ // TODO: export FindNormalized
154
+ func FindNormalized(source string, targets []string) []string {
155
+ return find(source, targets, normalizeTransformer())
156
+ }
157
+
158
+ // FindNormalizedFold is a unicode-normalized and case-insensitive version of Find.
159
+
160
+ // TODO: export FindNormalizedFold
161
+ func FindNormalizedFold(source string, targets []string) []string {
162
+ return find(source, targets, normalizedFoldTransformer())
163
+ }
164
+
165
+ func find(source string, targets []string, transformer transform.Transformer) []string {
166
+ sourceT := stringTransform(source, transformer)
167
+
168
+ var matches []string
169
+
170
+ for _, target := range targets {
171
+ targetT := stringTransform(target, transformer)
172
+ if matchTransformed(sourceT, targetT) {
173
+ matches = append(matches, target)
174
+ }
175
+ }
176
+
177
+ return matches
178
+ }
179
+
180
+ // RankMatch is similar to Match except it will measure the Levenshtein
181
+ // distance between the source and the target and return its result. If there
182
+ // was no match, it will return -1.
183
+ // Given the requirements of match, RankMatch only needs to perform a subset of
184
+ // the Levenshtein calculation, only deletions need be considered, required
185
+ // additions and substitutions would fail the match test.
186
+
187
+ // TODO: export RankMatch
188
+ func RankMatch(source, target string) int {
189
+ return rank(source, target, noopTransformer())
190
+ }
191
+
192
+ // RankMatchFold is a case-insensitive version of RankMatch.
193
+
194
+ // TODO: export RankMatchFold
195
+ func RankMatchFold(source, target string) int {
196
+ return rank(source, target, foldTransformer())
197
+ }
198
+
199
+ // RankMatchNormalized is a unicode-normalized version of RankMatch.
200
+
201
+ // TODO: export RankMatchNormalized
202
+ func RankMatchNormalized(source, target string) int {
203
+ return rank(source, target, normalizeTransformer())
204
+ }
205
+
206
+ // RankMatchNormalizedFold is a unicode-normalized and case-insensitive version of RankMatch.
207
+
208
+ // TODO: export RankMatchNormalizedFold
209
+ func RankMatchNormalizedFold(source, target string) int {
210
+ return rank(source, target, normalizedFoldTransformer())
211
+ }
212
+
213
+ func rank(source, target string, transformer transform.Transformer) int {
214
+ lenDiff := len(target) - len(source)
215
+
216
+ if lenDiff < 0 {
217
+ return -1
218
+ }
219
+
220
+ source = stringTransform(source, transformer)
221
+ target = stringTransform(target, transformer)
222
+
223
+ if lenDiff == 0 && source == target {
224
+ return 0
225
+ }
226
+
227
+ runeDiff := 0
228
+
229
+ Outer:
230
+ for _, r1 := range source {
231
+ for i, r2 := range target {
232
+ if r1 == r2 {
233
+ target = target[i+utf8.RuneLen(r2):]
234
+ continue Outer
235
+ } else {
236
+ runeDiff++
237
+ }
238
+ }
239
+ return -1
240
+ }
241
+
242
+ // Count up remaining char
243
+ runeDiff += utf8.RuneCountInString(target)
244
+
245
+ return runeDiff
246
+ }
247
+
248
+ // RankFind is similar to Find, except it will also rank all matches using
249
+ // Levenshtein distance.
250
+
251
+ // TODO: export RankFind
252
+ func RankFind(source string, targets []string) Ranks {
253
+ return rankFind(source, targets, noopTransformer())
254
+ }
255
+
256
+ // RankFindFold is a case-insensitive version of RankFind.
257
+
258
+ // TODO: export RankFindFold
259
+ func RankFindFold(source string, targets []string) Ranks {
260
+ return rankFind(source, targets, foldTransformer())
261
+ }
262
+
263
+ // RankFindNormalized is a unicode-normalized version of RankFind.
264
+
265
+ // TODO: export RankFindNormalized
266
+ func RankFindNormalized(source string, targets []string) Ranks {
267
+ return rankFind(source, targets, normalizeTransformer())
268
+ }
269
+
270
+ // RankFindNormalizedFold is a unicode-normalized and case-insensitive version of RankFind.
271
+
272
+ // TODO: export RankFindNormalizedFold
273
+ func RankFindNormalizedFold(source string, targets []string) Ranks {
274
+ return rankFind(source, targets, normalizedFoldTransformer())
275
+ }
276
+
277
+ func rankFind(source string, targets []string, transformer transform.Transformer) Ranks {
278
+ sourceT := stringTransform(source, transformer)
279
+
280
+ var r Ranks
281
+
282
+ for index, target := range targets {
283
+ targetT := stringTransform(target, transformer)
284
+ if matchTransformed(sourceT, targetT) {
285
+ distance := LevenshteinDistance(source, target)
286
+ r = append(r, Rank{source, target, distance, index})
287
+ }
288
+ }
289
+ return r
290
+ }
291
+
292
+ type Rank struct {
293
+ // Source is used as the source for matching.
294
+ Source string
295
+
296
+ // Target is the word matched against.
297
+ Target string
298
+
299
+ // Distance is the Levenshtein distance between Source and Target.
300
+ Distance int
301
+
302
+ // Location of Target in original list
303
+ OriginalIndex int
304
+ }
305
+
306
+ type Ranks []Rank
307
+
308
+ func (r Ranks) Len() int {
309
+ return len(r)
310
+ }
311
+
312
+ func (r Ranks) Swap(i, j int) {
313
+ r[i], r[j] = r[j], r[i]
314
+ }
315
+
316
+ func (r Ranks) Less(i, j int) bool {
317
+ return r[i].Distance < r[j].Distance
318
+ }
319
+
320
+ func stringTransform(s string, t transform.Transformer) (transformed string) {
321
+ // Fast path for the nop transformer to prevent unnecessary allocations.
322
+ if _, ok := t.(nopTransformer); ok {
323
+ return s
324
+ }
325
+
326
+ var err error
327
+ transformed, _, err = transform.String(t, s)
328
+ if err != nil {
329
+ transformed = s
330
+ }
331
+
332
+ return
333
+ }
334
+
335
+ type unicodeFoldTransformer struct{ transform.NopResetter }
336
+
337
+ func (unicodeFoldTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
338
+ // Converting src to a string allocates.
339
+ // In theory, it need not; see https://go.dev/issue/27148.
340
+ // It is possible to write this loop using utf8.DecodeRune
341
+ // and thereby avoid allocations, but it is noticeably slower.
342
+ // So just let's wait for the compiler to get smarter.
343
+ for _, r := range string(src) {
344
+ if r == utf8.RuneError {
345
+ // Go spec for ranging over a string says:
346
+ // If the iteration encounters an invalid UTF-8 sequence,
347
+ // the second value will be 0xFFFD, the Unicode replacement character,
348
+ // and the next iteration will advance a single byte in the string.
349
+ nSrc++
350
+ } else {
351
+ nSrc += utf8.RuneLen(r)
352
+ }
353
+ r = unicode.ToLower(r)
354
+ x := utf8.RuneLen(r)
355
+ if x > len(dst[nDst:]) {
356
+ err = transform.ErrShortDst
357
+ break
358
+ }
359
+ nDst += utf8.EncodeRune(dst[nDst:], r)
360
+ }
361
+ return nDst, nSrc, err
362
+ }
363
+
364
+ type nopTransformer struct{ transform.NopResetter }
365
+
366
+ func (nopTransformer) Transform(dst []byte, src []byte, atEOF bool) (int, int, error) {
367
+ return 0, len(src), nil
368
+ }
369
+
370
+ // LevenshteinDistance measures the difference between two strings.
371
+ // The Levenshtein distance between two words is the minimum number of
372
+ // single-character edits (i.e. insertions, deletions or substitutions)
373
+ // required to change one word into the other.
374
+ //
375
+ // This implemention is optimized to use O(min(m,n)) space and is based on the
376
+ // optimized C version found here:
377
+ // http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C
378
+
379
+ // export LevenshteinDistance
380
+ func LevenshteinDistance(s, t string) int {
381
+ r1, r2 := []rune(s), []rune(t)
382
+ column := make([]int, 1, 64)
383
+
384
+ for y := 1; y <= len(r1); y++ {
385
+ column = append(column, y)
386
+ }
387
+
388
+ for x := 1; x <= len(r2); x++ {
389
+ column[0] = x
390
+
391
+ for y, lastDiag := 1, x-1; y <= len(r1); y++ {
392
+ oldDiag := column[y]
393
+ cost := 0
394
+ if r1[y-1] != r2[x-1] {
395
+ cost = 1
396
+ }
397
+ column[y] = min(column[y]+1, column[y-1]+1, lastDiag+cost)
398
+ lastDiag = oldDiag
399
+ }
400
+ }
401
+
402
+ return column[len(r1)]
403
+ }
404
+
405
+ func min2(a, b int) int {
406
+ if a < b {
407
+ return a
408
+ }
409
+ return b
410
+ }
411
+
412
+ func min(a, b, c int) int {
413
+ return min2(min2(a, b), c)
414
+ }
415
+
416
+ func main() {}
data/ext/fuzzy.h ADDED
@@ -0,0 +1,87 @@
1
+ /* Code generated by cmd/cgo; DO NOT EDIT. */
2
+
3
+ /* package command-line-arguments */
4
+
5
+
6
+ #line 1 "cgo-builtin-export-prolog"
7
+
8
+ #include <stddef.h>
9
+
10
+ #ifndef GO_CGO_EXPORT_PROLOGUE_H
11
+ #define GO_CGO_EXPORT_PROLOGUE_H
12
+
13
+ #ifndef GO_CGO_GOSTRING_TYPEDEF
14
+ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
15
+ #endif
16
+
17
+ #endif
18
+
19
+ /* Start of preamble from import "C" comments. */
20
+
21
+
22
+ #line 15 "fuzzy.go"
23
+
24
+ #include <stdlib.h> // for C.free
25
+
26
+ #line 1 "cgo-generated-wrapper"
27
+
28
+
29
+ /* End of preamble from import "C" comments. */
30
+
31
+
32
+ /* Start of boilerplate cgo prologue. */
33
+ #line 1 "cgo-gcc-export-header-prolog"
34
+
35
+ #ifndef GO_CGO_PROLOGUE_H
36
+ #define GO_CGO_PROLOGUE_H
37
+
38
+ typedef signed char GoInt8;
39
+ typedef unsigned char GoUint8;
40
+ typedef short GoInt16;
41
+ typedef unsigned short GoUint16;
42
+ typedef int GoInt32;
43
+ typedef unsigned int GoUint32;
44
+ typedef long long GoInt64;
45
+ typedef unsigned long long GoUint64;
46
+ typedef GoInt64 GoInt;
47
+ typedef GoUint64 GoUint;
48
+ typedef size_t GoUintptr;
49
+ typedef float GoFloat32;
50
+ typedef double GoFloat64;
51
+ #ifdef _MSC_VER
52
+ #include <complex.h>
53
+ typedef _Fcomplex GoComplex64;
54
+ typedef _Dcomplex GoComplex128;
55
+ #else
56
+ typedef float _Complex GoComplex64;
57
+ typedef double _Complex GoComplex128;
58
+ #endif
59
+
60
+ /*
61
+ static assertion to make sure the file is being used on architecture
62
+ at least with matching size of GoInt.
63
+ */
64
+ typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1];
65
+
66
+ #ifndef GO_CGO_GOSTRING_TYPEDEF
67
+ typedef _GoString_ GoString;
68
+ #endif
69
+ typedef void *GoMap;
70
+ typedef void *GoChan;
71
+ typedef struct { void *t; void *v; } GoInterface;
72
+ typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;
73
+
74
+ #endif
75
+
76
+ /* End of boilerplate cgo prologue. */
77
+
78
+ #ifdef __cplusplus
79
+ extern "C" {
80
+ #endif
81
+
82
+ extern char** Find(char* source, char** targets, int targetsLen);
83
+ extern void free_cstrings(char** strs, int len);
84
+
85
+ #ifdef __cplusplus
86
+ }
87
+ #endif
data/ext/fuzzy.so ADDED
Binary file
data/ext/go.mod ADDED
@@ -0,0 +1,5 @@
1
+ module github.com/wowinter13/fast_fuzzy_matcher
2
+
3
+ go 1.19
4
+
5
+ require golang.org/x/text v0.13.0
data/ext/go.sum ADDED
@@ -0,0 +1,2 @@
1
+ golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
2
+ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/fuzzy_matcher/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "fast_fuzzy_matcher"
7
+ spec.version = FuzzyMatcher::VERSION
8
+ spec.authors = ["Vlad Dyachenko"]
9
+ spec.email = ["vla-dy@yandex.ru"]
10
+
11
+ spec.summary = "fast_fuzzy_matcher is the fastest fuzzy search library for Ruby."
12
+ spec.description = "A tiny and blazing-fast fuzzy search in pure Ruby with FFI bindings to Go."\
13
+ "Fuzzy searching allows for flexibly matching a string with partial input, " \
14
+ "useful for filtering data very quickly based on lightweight user input."
15
+ spec.homepage = "https://github.com/wowinter13/fast_fuzzy_matcher"
16
+ spec.license = "MIT"
17
+ spec.required_ruby_version = ">= 2.6.0"
18
+
19
+ spec.metadata = {
20
+ 'bug_tracker_uri' => 'https://github.com/wowinter13/fast_fuzzy_matcher/issues',
21
+ 'changelog_uri' => "https://github.com/wowinter13/fast_fuzzy_matcher/blob/master/CHANGELOG.md",
22
+ 'documentation_uri' => "https://www.rubydoc.info/github/wowinter13/fast_fuzzy_matcher",
23
+ 'source_code_uri' => "https://github.com/wowinter13/fast_fuzzy_matcher"
24
+ }
25
+
26
+ # Specify which files should be added to the gem when it is released.
27
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
28
+ spec.files = Dir.chdir(__dir__) do
29
+ `git ls-files -z`.split("\x0").reject do |f|
30
+ (File.expand_path(f) == __FILE__) ||
31
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile])
32
+ end
33
+ end
34
+ spec.bindir = "exe"
35
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
36
+ spec.require_paths = ["lib"]
37
+
38
+ spec.test_files = Dir['spec/**/*']
39
+
40
+ spec.add_dependency "ffi"
41
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FuzzyMatcher
4
+ VERSION = "0.2.0"
5
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "fuzzy_matcher/version"
4
+
5
+ require 'ffi'
6
+
7
+ module FuzzyMatcher
8
+ class Error < StandardError; end
9
+
10
+ # find() will return a list of strings in targets that fuzzy matches source.
11
+ #
12
+ # @param [String] source The string to match against.
13
+ # @param [Array<String>] targets The strings to match.
14
+ #
15
+ # @return [Array<String>] The strings in targets that fuzzy match source.
16
+ #
17
+ # @example
18
+ # require 'fast_fuzzy_matcher'
19
+ # FuzzyMatch.find("whl", ["cartwheel", "foobar", "wheel", "baz"])
20
+ # => ["cartwheel", "wheel"]
21
+ #
22
+ # @note This method possibly is not thread safe.
23
+ # @note This method is case sensitive. For case insensitive matching, downcase targets/source or use a case insensitive matcher (#find_fold)
24
+ #
25
+ # @see ext/fuzzy.go#Find for the implementation of this method.
26
+ def self.find(source, targets)
27
+ pointers = targets.map { |t| FFI::MemoryPointer.from_string(t) }
28
+ targets_ptr = FFI::MemoryPointer.new(:pointer, targets.size)
29
+ targets_ptr.write_array_of_pointer(pointers)
30
+
31
+ result_ptr = FuzzyBinding.Find(source, targets_ptr, targets.size)
32
+
33
+ return [] if result_ptr.null?
34
+
35
+ pointers_array = result_ptr.read_array_of_pointer(targets.size)
36
+
37
+ result_array = pointers_array.each_with_object([]) do |ptr, arr|
38
+ if ptr && !ptr.null?
39
+ value = ptr.read_string_to_null
40
+ arr << value unless value.nil? || value == ""
41
+ end
42
+ end
43
+
44
+ FuzzyBinding.free_cstrings(result_ptr, targets.size)
45
+
46
+ FFI::MemoryPointer.new(:pointer).write_pointer(result_ptr).free
47
+
48
+ result_array
49
+ end
50
+
51
+ module FuzzyBinding
52
+ extend FFI::Library
53
+ ffi_lib File.expand_path("../ext/fuzzy.so", File.dirname(__FILE__))
54
+
55
+ attach_function :Find, [:string, :pointer, :int], :pointer
56
+ attach_function :free_cstrings, [:pointer, :int], :void
57
+ end
58
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe FuzzyMatcher do
6
+ it "has a version number" do
7
+ expect(FuzzyMatcher::VERSION).not_to be nil
8
+ end
9
+
10
+ describe "#find" do
11
+ it "responds with an empty array when no matches are found" do
12
+ expect(FuzzyMatcher.find("foo", ["bar", "baz"])).to eq([])
13
+ end
14
+
15
+ it "responds with an empty array when no targets are given" do
16
+ expect(FuzzyMatcher.find("foo", [])).to eq([])
17
+ end
18
+
19
+ it "responds with matches when the source is a substring of a target" do
20
+ expect(FuzzyMatcher.find("whl", ["cartwheel", "foobar", "wheel", "baz"])).to eq(["cartwheel", "wheel"])
21
+ end
22
+
23
+ it "does not respond with matches when the source is a substring of a target and the source is uppercase" do
24
+ expect(FuzzyMatcher.find("WHL", ["cartwheel", "foobar", "wheel", "baz"])).to eq([])
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fuzzy_matcher"
4
+
5
+ RSpec.configure do |config|
6
+ # Enable flags like --only-failures and --next-failure
7
+ config.example_status_persistence_file_path = ".rspec_status"
8
+
9
+ # Disable RSpec exposing methods globally on `Module` and `main`
10
+ config.disable_monkey_patching!
11
+
12
+ config.expect_with :rspec do |c|
13
+ c.syntax = :expect
14
+ end
15
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fast_fuzzy_matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Vlad Dyachenko
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-01-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: A tiny and blazing-fast fuzzy search in pure Ruby with FFI bindings to
28
+ Go.Fuzzy searching allows for flexibly matching a string with partial input, useful
29
+ for filtering data very quickly based on lightweight user input.
30
+ email:
31
+ - vla-dy@yandex.ru
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - ".rspec"
37
+ - CHANGELOG.md
38
+ - LICENSE.txt
39
+ - README.md
40
+ - Rakefile
41
+ - ext/.idea/.gitignore
42
+ - ext/.idea/ext.iml
43
+ - ext/.idea/modules.xml
44
+ - ext/.idea/vcs.xml
45
+ - ext/fuzzy.go
46
+ - ext/fuzzy.h
47
+ - ext/fuzzy.so
48
+ - ext/go.mod
49
+ - ext/go.sum
50
+ - fast_fuzzy_matcher.gemspec
51
+ - lib/fuzzy_matcher.rb
52
+ - lib/fuzzy_matcher/version.rb
53
+ - spec/fuzzy_matcher_spec.rb
54
+ - spec/spec_helper.rb
55
+ homepage: https://github.com/wowinter13/fast_fuzzy_matcher
56
+ licenses:
57
+ - MIT
58
+ metadata:
59
+ bug_tracker_uri: https://github.com/wowinter13/fast_fuzzy_matcher/issues
60
+ changelog_uri: https://github.com/wowinter13/fast_fuzzy_matcher/blob/master/CHANGELOG.md
61
+ documentation_uri: https://www.rubydoc.info/github/wowinter13/fast_fuzzy_matcher
62
+ source_code_uri: https://github.com/wowinter13/fast_fuzzy_matcher
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: 2.6.0
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubygems_version: 3.4.10
79
+ signing_key:
80
+ specification_version: 4
81
+ summary: fast_fuzzy_matcher is the fastest fuzzy search library for Ruby.
82
+ test_files:
83
+ - spec/fuzzy_matcher_spec.rb
84
+ - spec/spec_helper.rb