fast_fuzzy_matcher 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e10b6a2b5f47c25d479a1a3623db229ecbc24f79176c3d6ae879ce6767e931b0
4
+ data.tar.gz: c4c910d1e4a462fab8819f2d406d2affe1c872d58f2943c7be196d25c439a7b6
5
+ SHA512:
6
+ metadata.gz: 3cef0e3e2a233385a0daf2c882002be8af28e38df4c58c500bbee29fd3b319d904b9ee918f6fbeb44c3557602562b7b0f3d1f53c3b03c1c197f2824f6caf81fd
7
+ data.tar.gz: 78c6671ffaad4807137977cc90100be0214e241b0895ab71955baf96500db286b235b62e121c9c13f00134a86f3138b92e652af597a37ed9bb0dfca559215069
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2023-10-16
4
+
5
+ - Initial release
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2023 wowinter13
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,66 @@
1
+ # FuzzyMatch
2
+
3
+ This library is a work in progress.
4
+
5
+ The fastest Fuzzy Matcher in the wild west. FFI-based.
6
+
7
+ Find a needle in a haystack based on string similarity and regular expression rules.
8
+
9
+
10
+ ### Basic usage
11
+
12
+ Just pass an array of strings to the matcher and it will return the best match(es) for the given needle.
13
+
14
+ ```ruby
15
+ require 'fast_fuzzy_matcher'
16
+
17
+ FuzzyMatcher.find("whl", ["cartwheel", "foobar", "wheel", "baz"])
18
+ => ["cartwheel", "wheel"]
19
+
20
+ ```
21
+
22
+ ### Advanced usage
23
+
24
+ Better documentation is coming soon. For now, please refer to the specs.
25
+
26
+
27
+
28
+ # Benchmarks
29
+
30
+ To be done.
31
+
32
+ Approximately 10-60x faster than the fastest Ruby implementation. The difference is more pronounced for longer strings and larger dictionaries.
33
+
34
+
35
+ ## Documentation
36
+
37
+ Detailed documentation is available at [rubydoc](https://rubydoc.info/gems/fast_fuzzy_matcher).
38
+
39
+ ## Installation
40
+
41
+ fast_fuzzy_matcher is available as a gem, to install it just install the gem:
42
+
43
+ gem install fast_fuzzy_matcher
44
+
45
+ If you're using Bundler, add the gem to Gemfile.
46
+
47
+ gem 'fast_fuzzy_matcher'
48
+
49
+ Run `bundle install`.
50
+
51
+ ## Running tests
52
+
53
+ bundle exec rspec spec/
54
+
55
+
56
+ ## Contributing
57
+
58
+ 1. Fork it ( https://github.com/wowinter13/fast_fuzzy_matcher/fork )
59
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
60
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
61
+ 4. Push to the branch (`git push origin my-new-feature`)
62
+ 5. Create a new Pull Request
63
+
64
+ ## License
65
+
66
+ MIT License. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,8 @@
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
data/ext/.idea/ext.iml ADDED
@@ -0,0 +1,9 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="WEB_MODULE" version="4">
3
+ <component name="Go" enabled="true" />
4
+ <component name="NewModuleRootManager">
5
+ <content url="file://$MODULE_DIR$" />
6
+ <orderEntry type="inheritedJdk" />
7
+ <orderEntry type="sourceFolder" forTests="false" />
8
+ </component>
9
+ </module>
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/ext.iml" filepath="$PROJECT_DIR$/.idea/ext.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
data/ext/.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5
+ </component>
6
+ </project>
data/ext/fuzzy.go ADDED
@@ -0,0 +1,416 @@
1
+ // Fuzzy searching allows for flexibly matching a string with partial input,
2
+ // useful for filtering data very quickly based on lightweight user input.
3
+ package main
4
+
5
+ import (
6
+ "unicode"
7
+ "unicode/utf8"
8
+ "unsafe"
9
+
10
+ "golang.org/x/text/runes"
11
+ "golang.org/x/text/transform"
12
+ "golang.org/x/text/unicode/norm"
13
+ )
14
+
15
+ /*
16
+ #include <stdlib.h> // for C.free
17
+ */
18
+ import "C"
19
+
20
+ func noopTransformer() transform.Transformer {
21
+ return nopTransformer{}
22
+ }
23
+
24
+ func foldTransformer() transform.Transformer {
25
+ return unicodeFoldTransformer{}
26
+ }
27
+
28
+ func normalizeTransformer() transform.Transformer {
29
+ return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
30
+ }
31
+
32
+ func normalizedFoldTransformer() transform.Transformer {
33
+ return transform.Chain(normalizeTransformer(), foldTransformer())
34
+ }
35
+
36
+ // Match returns true if source matches target using a fuzzy-searching
37
+ // algorithm. Note that it doesn't implement Levenshtein distance (see
38
+ // RankMatch instead), but rather a simplified version where there's no
39
+ // approximation. The method will return true only if each character in the
40
+ // source can be found in the target and occurs after the preceding matches.
41
+
42
+ // TODO: export Match
43
+ func Match(source, target string) bool {
44
+ return match(source, target, noopTransformer())
45
+ }
46
+
47
+ // MatchFold is a case-insensitive version of Match.
48
+
49
+ // TODO: export MatchFold
50
+ func MatchFold(source, target string) bool {
51
+ return match(source, target, foldTransformer())
52
+ }
53
+
54
+ // MatchNormalized is a unicode-normalized version of Match.
55
+
56
+ // TODO: export MatchNormalized
57
+ func MatchNormalized(source, target string) bool {
58
+ return match(source, target, normalizeTransformer())
59
+ }
60
+
61
+ // MatchNormalizedFold is a unicode-normalized and case-insensitive version of Match.
62
+
63
+ // TODO: export MatchNormalizedFold
64
+ func MatchNormalizedFold(source, target string) bool {
65
+ return match(source, target, normalizedFoldTransformer())
66
+ }
67
+
68
+ func match(source, target string, transformer transform.Transformer) bool {
69
+ sourceT := stringTransform(source, transformer)
70
+ targetT := stringTransform(target, transformer)
71
+ return matchTransformed(sourceT, targetT)
72
+ }
73
+
74
+ func matchTransformed(source, target string) bool {
75
+ lenDiff := len(target) - len(source)
76
+
77
+ if lenDiff < 0 {
78
+ return false
79
+ }
80
+
81
+ if lenDiff == 0 && source == target {
82
+ return true
83
+ }
84
+
85
+ Outer:
86
+ for _, r1 := range source {
87
+ for i, r2 := range target {
88
+ if r1 == r2 {
89
+ target = target[i+utf8.RuneLen(r2):]
90
+ continue Outer
91
+ }
92
+ }
93
+ return false
94
+ }
95
+
96
+ return true
97
+ }
98
+
99
+ // Find will return a list of strings in targets that fuzzy matches source.
100
+ //
101
+ // The returned list will be ordered by best matches first, or nil if there are
102
+ // no matches. The algorithm is optimized for matches where the target string
103
+ // has a prefix that matches the source.
104
+ //
105
+ // FFI note: the returned list is allocated using malloc and must be freed using
106
+ // free_cstrings.
107
+ //
108
+ //export Find
109
+ func Find(source *C.char, targets **C.char, targetsLen C.int) **C.char {
110
+ goSource := C.GoString(source)
111
+ sliceHeaders := (*[1 << 30]*C.char)(unsafe.Pointer(targets))[:targetsLen:targetsLen]
112
+
113
+ goTargets := make([]string, int(targetsLen))
114
+ for i := 0; i < int(targetsLen); i++ {
115
+ goTargets[i] = C.GoString(sliceHeaders[i])
116
+ }
117
+
118
+ results := find(goSource, goTargets, noopTransformer())
119
+
120
+ cResults := C.malloc(C.size_t(targetsLen) * C.size_t(unsafe.Sizeof(uintptr(0))))
121
+ cArray := (*[1 << 30]*C.char)(cResults)
122
+
123
+ for i := 0; i < int(targetsLen); i++ {
124
+ cArray[i] = C.CString("")
125
+ }
126
+
127
+ for i, s := range results {
128
+ C.free(unsafe.Pointer(cArray[i]))
129
+ cArray[i] = C.CString(s)
130
+ }
131
+
132
+ return (**C.char)(cResults)
133
+ }
134
+
135
+ //export free_cstrings
136
+ func free_cstrings(strs **C.char, len C.int) {
137
+ slice := (*[1 << 30]*C.char)(unsafe.Pointer(strs))[:len:len]
138
+ for i := 0; i < int(len); i++ {
139
+ C.free(unsafe.Pointer(slice[i]))
140
+ }
141
+ C.free(unsafe.Pointer(strs))
142
+ }
143
+
144
+ // FindFold is a case-insensitive version of Find.
145
+
146
+ // TODO: export FindFold
147
+ func FindFold(source string, targets []string) []string {
148
+ return find(source, targets, foldTransformer())
149
+ }
150
+
151
+ // FindNormalized is a unicode-normalized version of Find.
152
+
153
+ // TODO: export FindNormalized
154
+ func FindNormalized(source string, targets []string) []string {
155
+ return find(source, targets, normalizeTransformer())
156
+ }
157
+
158
+ // FindNormalizedFold is a unicode-normalized and case-insensitive version of Find.
159
+
160
+ // TODO: export FindNormalizedFold
161
+ func FindNormalizedFold(source string, targets []string) []string {
162
+ return find(source, targets, normalizedFoldTransformer())
163
+ }
164
+
165
+ func find(source string, targets []string, transformer transform.Transformer) []string {
166
+ sourceT := stringTransform(source, transformer)
167
+
168
+ var matches []string
169
+
170
+ for _, target := range targets {
171
+ targetT := stringTransform(target, transformer)
172
+ if matchTransformed(sourceT, targetT) {
173
+ matches = append(matches, target)
174
+ }
175
+ }
176
+
177
+ return matches
178
+ }
179
+
180
+ // RankMatch is similar to Match except it will measure the Levenshtein
181
+ // distance between the source and the target and return its result. If there
182
+ // was no match, it will return -1.
183
+ // Given the requirements of match, RankMatch only needs to perform a subset of
184
+ // the Levenshtein calculation, only deletions need be considered, required
185
+ // additions and substitutions would fail the match test.
186
+
187
+ // TODO: export RankMatch
188
+ func RankMatch(source, target string) int {
189
+ return rank(source, target, noopTransformer())
190
+ }
191
+
192
+ // RankMatchFold is a case-insensitive version of RankMatch.
193
+
194
+ // TODO: export RankMatchFold
195
+ func RankMatchFold(source, target string) int {
196
+ return rank(source, target, foldTransformer())
197
+ }
198
+
199
+ // RankMatchNormalized is a unicode-normalized version of RankMatch.
200
+
201
+ // TODO: export RankMatchNormalized
202
+ func RankMatchNormalized(source, target string) int {
203
+ return rank(source, target, normalizeTransformer())
204
+ }
205
+
206
+ // RankMatchNormalizedFold is a unicode-normalized and case-insensitive version of RankMatch.
207
+
208
+ // TODO: export RankMatchNormalizedFold
209
+ func RankMatchNormalizedFold(source, target string) int {
210
+ return rank(source, target, normalizedFoldTransformer())
211
+ }
212
+
213
+ func rank(source, target string, transformer transform.Transformer) int {
214
+ lenDiff := len(target) - len(source)
215
+
216
+ if lenDiff < 0 {
217
+ return -1
218
+ }
219
+
220
+ source = stringTransform(source, transformer)
221
+ target = stringTransform(target, transformer)
222
+
223
+ if lenDiff == 0 && source == target {
224
+ return 0
225
+ }
226
+
227
+ runeDiff := 0
228
+
229
+ Outer:
230
+ for _, r1 := range source {
231
+ for i, r2 := range target {
232
+ if r1 == r2 {
233
+ target = target[i+utf8.RuneLen(r2):]
234
+ continue Outer
235
+ } else {
236
+ runeDiff++
237
+ }
238
+ }
239
+ return -1
240
+ }
241
+
242
+ // Count up remaining char
243
+ runeDiff += utf8.RuneCountInString(target)
244
+
245
+ return runeDiff
246
+ }
247
+
248
+ // RankFind is similar to Find, except it will also rank all matches using
249
+ // Levenshtein distance.
250
+
251
+ // TODO: export RankFind
252
+ func RankFind(source string, targets []string) Ranks {
253
+ return rankFind(source, targets, noopTransformer())
254
+ }
255
+
256
+ // RankFindFold is a case-insensitive version of RankFind.
257
+
258
+ // TODO: export RankFindFold
259
+ func RankFindFold(source string, targets []string) Ranks {
260
+ return rankFind(source, targets, foldTransformer())
261
+ }
262
+
263
+ // RankFindNormalized is a unicode-normalized version of RankFind.
264
+
265
+ // TODO: export RankFindNormalized
266
+ func RankFindNormalized(source string, targets []string) Ranks {
267
+ return rankFind(source, targets, normalizeTransformer())
268
+ }
269
+
270
+ // RankFindNormalizedFold is a unicode-normalized and case-insensitive version of RankFind.
271
+
272
+ // TODO: export RankFindNormalizedFold
273
+ func RankFindNormalizedFold(source string, targets []string) Ranks {
274
+ return rankFind(source, targets, normalizedFoldTransformer())
275
+ }
276
+
277
+ func rankFind(source string, targets []string, transformer transform.Transformer) Ranks {
278
+ sourceT := stringTransform(source, transformer)
279
+
280
+ var r Ranks
281
+
282
+ for index, target := range targets {
283
+ targetT := stringTransform(target, transformer)
284
+ if matchTransformed(sourceT, targetT) {
285
+ distance := LevenshteinDistance(source, target)
286
+ r = append(r, Rank{source, target, distance, index})
287
+ }
288
+ }
289
+ return r
290
+ }
291
+
292
+ type Rank struct {
293
+ // Source is used as the source for matching.
294
+ Source string
295
+
296
+ // Target is the word matched against.
297
+ Target string
298
+
299
+ // Distance is the Levenshtein distance between Source and Target.
300
+ Distance int
301
+
302
+ // Location of Target in original list
303
+ OriginalIndex int
304
+ }
305
+
306
+ type Ranks []Rank
307
+
308
+ func (r Ranks) Len() int {
309
+ return len(r)
310
+ }
311
+
312
+ func (r Ranks) Swap(i, j int) {
313
+ r[i], r[j] = r[j], r[i]
314
+ }
315
+
316
+ func (r Ranks) Less(i, j int) bool {
317
+ return r[i].Distance < r[j].Distance
318
+ }
319
+
320
+ func stringTransform(s string, t transform.Transformer) (transformed string) {
321
+ // Fast path for the nop transformer to prevent unnecessary allocations.
322
+ if _, ok := t.(nopTransformer); ok {
323
+ return s
324
+ }
325
+
326
+ var err error
327
+ transformed, _, err = transform.String(t, s)
328
+ if err != nil {
329
+ transformed = s
330
+ }
331
+
332
+ return
333
+ }
334
+
335
+ type unicodeFoldTransformer struct{ transform.NopResetter }
336
+
337
+ func (unicodeFoldTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
338
+ // Converting src to a string allocates.
339
+ // In theory, it need not; see https://go.dev/issue/27148.
340
+ // It is possible to write this loop using utf8.DecodeRune
341
+ // and thereby avoid allocations, but it is noticeably slower.
342
+ // So just let's wait for the compiler to get smarter.
343
+ for _, r := range string(src) {
344
+ if r == utf8.RuneError {
345
+ // Go spec for ranging over a string says:
346
+ // If the iteration encounters an invalid UTF-8 sequence,
347
+ // the second value will be 0xFFFD, the Unicode replacement character,
348
+ // and the next iteration will advance a single byte in the string.
349
+ nSrc++
350
+ } else {
351
+ nSrc += utf8.RuneLen(r)
352
+ }
353
+ r = unicode.ToLower(r)
354
+ x := utf8.RuneLen(r)
355
+ if x > len(dst[nDst:]) {
356
+ err = transform.ErrShortDst
357
+ break
358
+ }
359
+ nDst += utf8.EncodeRune(dst[nDst:], r)
360
+ }
361
+ return nDst, nSrc, err
362
+ }
363
+
364
+ type nopTransformer struct{ transform.NopResetter }
365
+
366
+ func (nopTransformer) Transform(dst []byte, src []byte, atEOF bool) (int, int, error) {
367
+ return 0, len(src), nil
368
+ }
369
+
370
+ // LevenshteinDistance measures the difference between two strings.
371
+ // The Levenshtein distance between two words is the minimum number of
372
+ // single-character edits (i.e. insertions, deletions or substitutions)
373
+ // required to change one word into the other.
374
+ //
375
+ // This implemention is optimized to use O(min(m,n)) space and is based on the
376
+ // optimized C version found here:
377
+ // http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C
378
+
379
+ // export LevenshteinDistance
380
+ func LevenshteinDistance(s, t string) int {
381
+ r1, r2 := []rune(s), []rune(t)
382
+ column := make([]int, 1, 64)
383
+
384
+ for y := 1; y <= len(r1); y++ {
385
+ column = append(column, y)
386
+ }
387
+
388
+ for x := 1; x <= len(r2); x++ {
389
+ column[0] = x
390
+
391
+ for y, lastDiag := 1, x-1; y <= len(r1); y++ {
392
+ oldDiag := column[y]
393
+ cost := 0
394
+ if r1[y-1] != r2[x-1] {
395
+ cost = 1
396
+ }
397
+ column[y] = min(column[y]+1, column[y-1]+1, lastDiag+cost)
398
+ lastDiag = oldDiag
399
+ }
400
+ }
401
+
402
+ return column[len(r1)]
403
+ }
404
+
405
+ func min2(a, b int) int {
406
+ if a < b {
407
+ return a
408
+ }
409
+ return b
410
+ }
411
+
412
+ func min(a, b, c int) int {
413
+ return min2(min2(a, b), c)
414
+ }
415
+
416
+ func main() {}
data/ext/fuzzy.h ADDED
@@ -0,0 +1,87 @@
1
+ /* Code generated by cmd/cgo; DO NOT EDIT. */
2
+
3
+ /* package command-line-arguments */
4
+
5
+
6
+ #line 1 "cgo-builtin-export-prolog"
7
+
8
+ #include <stddef.h>
9
+
10
+ #ifndef GO_CGO_EXPORT_PROLOGUE_H
11
+ #define GO_CGO_EXPORT_PROLOGUE_H
12
+
13
+ #ifndef GO_CGO_GOSTRING_TYPEDEF
14
+ typedef struct { const char *p; ptrdiff_t n; } _GoString_;
15
+ #endif
16
+
17
+ #endif
18
+
19
+ /* Start of preamble from import "C" comments. */
20
+
21
+
22
+ #line 15 "fuzzy.go"
23
+
24
+ #include <stdlib.h> // for C.free
25
+
26
+ #line 1 "cgo-generated-wrapper"
27
+
28
+
29
+ /* End of preamble from import "C" comments. */
30
+
31
+
32
+ /* Start of boilerplate cgo prologue. */
33
+ #line 1 "cgo-gcc-export-header-prolog"
34
+
35
+ #ifndef GO_CGO_PROLOGUE_H
36
+ #define GO_CGO_PROLOGUE_H
37
+
38
+ typedef signed char GoInt8;
39
+ typedef unsigned char GoUint8;
40
+ typedef short GoInt16;
41
+ typedef unsigned short GoUint16;
42
+ typedef int GoInt32;
43
+ typedef unsigned int GoUint32;
44
+ typedef long long GoInt64;
45
+ typedef unsigned long long GoUint64;
46
+ typedef GoInt64 GoInt;
47
+ typedef GoUint64 GoUint;
48
+ typedef size_t GoUintptr;
49
+ typedef float GoFloat32;
50
+ typedef double GoFloat64;
51
+ #ifdef _MSC_VER
52
+ #include <complex.h>
53
+ typedef _Fcomplex GoComplex64;
54
+ typedef _Dcomplex GoComplex128;
55
+ #else
56
+ typedef float _Complex GoComplex64;
57
+ typedef double _Complex GoComplex128;
58
+ #endif
59
+
60
+ /*
61
+ static assertion to make sure the file is being used on architecture
62
+ at least with matching size of GoInt.
63
+ */
64
+ typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1];
65
+
66
+ #ifndef GO_CGO_GOSTRING_TYPEDEF
67
+ typedef _GoString_ GoString;
68
+ #endif
69
+ typedef void *GoMap;
70
+ typedef void *GoChan;
71
+ typedef struct { void *t; void *v; } GoInterface;
72
+ typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;
73
+
74
+ #endif
75
+
76
+ /* End of boilerplate cgo prologue. */
77
+
78
+ #ifdef __cplusplus
79
+ extern "C" {
80
+ #endif
81
+
82
+ extern char** Find(char* source, char** targets, int targetsLen);
83
+ extern void free_cstrings(char** strs, int len);
84
+
85
+ #ifdef __cplusplus
86
+ }
87
+ #endif
data/ext/fuzzy.so ADDED
Binary file
data/ext/go.mod ADDED
@@ -0,0 +1,5 @@
1
+ module github.com/wowinter13/fast_fuzzy_matcher
2
+
3
+ go 1.19
4
+
5
+ require golang.org/x/text v0.13.0
data/ext/go.sum ADDED
@@ -0,0 +1,2 @@
1
+ golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
2
+ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/fuzzy_matcher/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "fast_fuzzy_matcher"
7
+ spec.version = FuzzyMatcher::VERSION
8
+ spec.authors = ["Vlad Dyachenko"]
9
+ spec.email = ["vla-dy@yandex.ru"]
10
+
11
+ spec.summary = "fast_fuzzy_matcher is the fastest fuzzy search library for Ruby."
12
+ spec.description = "A tiny and blazing-fast fuzzy search in pure Ruby with FFI bindings to Go."\
13
+ "Fuzzy searching allows for flexibly matching a string with partial input, " \
14
+ "useful for filtering data very quickly based on lightweight user input."
15
+ spec.homepage = "https://github.com/wowinter13/fast_fuzzy_matcher"
16
+ spec.license = "MIT"
17
+ spec.required_ruby_version = ">= 2.6.0"
18
+
19
+ spec.metadata = {
20
+ 'bug_tracker_uri' => 'https://github.com/wowinter13/fast_fuzzy_matcher/issues',
21
+ 'changelog_uri' => "https://github.com/wowinter13/fast_fuzzy_matcher/blob/master/CHANGELOG.md",
22
+ 'documentation_uri' => "https://www.rubydoc.info/github/wowinter13/fast_fuzzy_matcher",
23
+ 'source_code_uri' => "https://github.com/wowinter13/fast_fuzzy_matcher"
24
+ }
25
+
26
+ # Specify which files should be added to the gem when it is released.
27
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
28
+ spec.files = Dir.chdir(__dir__) do
29
+ `git ls-files -z`.split("\x0").reject do |f|
30
+ (File.expand_path(f) == __FILE__) ||
31
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor Gemfile])
32
+ end
33
+ end
34
+ spec.bindir = "exe"
35
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
36
+ spec.require_paths = ["lib"]
37
+
38
+ spec.test_files = Dir['spec/**/*']
39
+
40
+ spec.add_dependency "ffi"
41
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FuzzyMatcher
4
+ VERSION = "0.2.0"
5
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "fuzzy_matcher/version"
4
+
5
+ require 'ffi'
6
+
7
+ module FuzzyMatcher
8
+ class Error < StandardError; end
9
+
10
+ # find() will return a list of strings in targets that fuzzy matches source.
11
+ #
12
+ # @param [String] source The string to match against.
13
+ # @param [Array<String>] targets The strings to match.
14
+ #
15
+ # @return [Array<String>] The strings in targets that fuzzy match source.
16
+ #
17
+ # @example
18
+ # require 'fast_fuzzy_matcher'
19
+ # FuzzyMatch.find("whl", ["cartwheel", "foobar", "wheel", "baz"])
20
+ # => ["cartwheel", "wheel"]
21
+ #
22
+ # @note This method possibly is not thread safe.
23
+ # @note This method is case sensitive. For case insensitive matching, downcase targets/source or use a case insensitive matcher (#find_fold)
24
+ #
25
+ # @see ext/fuzzy.go#Find for the implementation of this method.
26
+ def self.find(source, targets)
27
+ pointers = targets.map { |t| FFI::MemoryPointer.from_string(t) }
28
+ targets_ptr = FFI::MemoryPointer.new(:pointer, targets.size)
29
+ targets_ptr.write_array_of_pointer(pointers)
30
+
31
+ result_ptr = FuzzyBinding.Find(source, targets_ptr, targets.size)
32
+
33
+ return [] if result_ptr.null?
34
+
35
+ pointers_array = result_ptr.read_array_of_pointer(targets.size)
36
+
37
+ result_array = pointers_array.each_with_object([]) do |ptr, arr|
38
+ if ptr && !ptr.null?
39
+ value = ptr.read_string_to_null
40
+ arr << value unless value.nil? || value == ""
41
+ end
42
+ end
43
+
44
+ FuzzyBinding.free_cstrings(result_ptr, targets.size)
45
+
46
+ FFI::MemoryPointer.new(:pointer).write_pointer(result_ptr).free
47
+
48
+ result_array
49
+ end
50
+
51
+ module FuzzyBinding
52
+ extend FFI::Library
53
+ ffi_lib File.expand_path("../ext/fuzzy.so", File.dirname(__FILE__))
54
+
55
+ attach_function :Find, [:string, :pointer, :int], :pointer
56
+ attach_function :free_cstrings, [:pointer, :int], :void
57
+ end
58
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe FuzzyMatcher do
6
+ it "has a version number" do
7
+ expect(FuzzyMatcher::VERSION).not_to be nil
8
+ end
9
+
10
+ describe "#find" do
11
+ it "responds with an empty array when no matches are found" do
12
+ expect(FuzzyMatcher.find("foo", ["bar", "baz"])).to eq([])
13
+ end
14
+
15
+ it "responds with an empty array when no targets are given" do
16
+ expect(FuzzyMatcher.find("foo", [])).to eq([])
17
+ end
18
+
19
+ it "responds with matches when the source is a substring of a target" do
20
+ expect(FuzzyMatcher.find("whl", ["cartwheel", "foobar", "wheel", "baz"])).to eq(["cartwheel", "wheel"])
21
+ end
22
+
23
+ it "does not respond with matches when the source is a substring of a target and the source is uppercase" do
24
+ expect(FuzzyMatcher.find("WHL", ["cartwheel", "foobar", "wheel", "baz"])).to eq([])
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fuzzy_matcher"
4
+
5
+ RSpec.configure do |config|
6
+ # Enable flags like --only-failures and --next-failure
7
+ config.example_status_persistence_file_path = ".rspec_status"
8
+
9
+ # Disable RSpec exposing methods globally on `Module` and `main`
10
+ config.disable_monkey_patching!
11
+
12
+ config.expect_with :rspec do |c|
13
+ c.syntax = :expect
14
+ end
15
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fast_fuzzy_matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Vlad Dyachenko
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-01-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: A tiny and blazing-fast fuzzy search in pure Ruby with FFI bindings to
28
+ Go.Fuzzy searching allows for flexibly matching a string with partial input, useful
29
+ for filtering data very quickly based on lightweight user input.
30
+ email:
31
+ - vla-dy@yandex.ru
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - ".rspec"
37
+ - CHANGELOG.md
38
+ - LICENSE.txt
39
+ - README.md
40
+ - Rakefile
41
+ - ext/.idea/.gitignore
42
+ - ext/.idea/ext.iml
43
+ - ext/.idea/modules.xml
44
+ - ext/.idea/vcs.xml
45
+ - ext/fuzzy.go
46
+ - ext/fuzzy.h
47
+ - ext/fuzzy.so
48
+ - ext/go.mod
49
+ - ext/go.sum
50
+ - fast_fuzzy_matcher.gemspec
51
+ - lib/fuzzy_matcher.rb
52
+ - lib/fuzzy_matcher/version.rb
53
+ - spec/fuzzy_matcher_spec.rb
54
+ - spec/spec_helper.rb
55
+ homepage: https://github.com/wowinter13/fast_fuzzy_matcher
56
+ licenses:
57
+ - MIT
58
+ metadata:
59
+ bug_tracker_uri: https://github.com/wowinter13/fast_fuzzy_matcher/issues
60
+ changelog_uri: https://github.com/wowinter13/fast_fuzzy_matcher/blob/master/CHANGELOG.md
61
+ documentation_uri: https://www.rubydoc.info/github/wowinter13/fast_fuzzy_matcher
62
+ source_code_uri: https://github.com/wowinter13/fast_fuzzy_matcher
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: 2.6.0
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubygems_version: 3.4.10
79
+ signing_key:
80
+ specification_version: 4
81
+ summary: fast_fuzzy_matcher is the fastest fuzzy search library for Ruby.
82
+ test_files:
83
+ - spec/fuzzy_matcher_spec.rb
84
+ - spec/spec_helper.rb