fts_fuzzy_match 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +1 -17
- data/.github/workflows/{fuzzy_match.yml → fts_fuzzy_match.yml} +5 -5
- data/.gitignore +1 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/README.md +23 -0
- data/ext/fts_fuzzy_match/fts_fuzzy_match.c +47 -15
- data/ext/fts_fuzzy_match/fts_fuzzy_match.h +204 -4
- data/lib/fts_fuzzy_match/version.rb +2 -2
- data/lib/fts_fuzzy_match.rb +28 -7
- metadata +2 -7
- data/.github/workflows/packaged_source.yml +0 -37
- data/.github/workflows/packaged_tarball.yml +0 -41
- data/.github/workflows/precompiled.yml +0 -232
- data/.github/workflows/system.yml +0 -40
- data/ext/fts_fuzzy_match/fts_fuzzy_match_impl.h +0 -203
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ffcb83f6d3c87cff136f4a58e5c0cf4765b9f3841f88dc655bd0eaa7dae0d2bf
|
4
|
+
data.tar.gz: 1794738cfbec5e7071703f12d06ab616ef7f1fc891563077fddd72bb16649443
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a05b083ca4acc80980bcb74a9de129a0f77961e825efc417d78eb5024a3064b68be2f4185cc2a672f4e2a5aeaed1294fd9df1d4512d206162883be2d1329e875
|
7
|
+
data.tar.gz: 4a975a00538b977feeb0b446cda6f4a28368713da525f834d7bf9cfe915f48755fc8528b2baf75acf97ca6ecbbbfb9fe7dff7cee89467bb64bb195412a55b062
|
data/.github/dependabot.yml
CHANGED
@@ -5,22 +5,6 @@ updates:
|
|
5
5
|
schedule:
|
6
6
|
interval: "weekly"
|
7
7
|
- package-ecosystem: "bundler"
|
8
|
-
directory: "/
|
9
|
-
schedule:
|
10
|
-
interval: "weekly"
|
11
|
-
- package-ecosystem: "bundler"
|
12
|
-
directory: "/system"
|
13
|
-
schedule:
|
14
|
-
interval: "weekly"
|
15
|
-
- package-ecosystem: "bundler"
|
16
|
-
directory: "/packaged_source"
|
17
|
-
schedule:
|
18
|
-
interval: "weekly"
|
19
|
-
- package-ecosystem: "bundler"
|
20
|
-
directory: "/packaged_tarball"
|
21
|
-
schedule:
|
22
|
-
interval: "weekly"
|
23
|
-
- package-ecosystem: "bundler"
|
24
|
-
directory: "/precompiled"
|
8
|
+
directory: "/"
|
25
9
|
schedule:
|
26
10
|
interval: "weekly"
|
@@ -1,4 +1,4 @@
|
|
1
|
-
name:
|
1
|
+
name: fts_fuzzy_match
|
2
2
|
concurrency:
|
3
3
|
group: "${{github.workflow}}-${{github.ref}}"
|
4
4
|
cancel-in-progress: true
|
@@ -16,22 +16,22 @@ on:
|
|
16
16
|
types: [opened, synchronize]
|
17
17
|
branches:
|
18
18
|
- '*'
|
19
|
-
paths: ["
|
19
|
+
paths: ["**/*"]
|
20
20
|
|
21
21
|
jobs:
|
22
22
|
fuzzy_match:
|
23
23
|
strategy:
|
24
24
|
fail-fast: false
|
25
25
|
matrix:
|
26
|
-
ruby: ["3.
|
26
|
+
ruby: ["3.3", "3.4", "head"]
|
27
27
|
runs-on: ["ubuntu-latest", "macos-latest", "windows-latest"]
|
28
28
|
runs-on: ${{matrix.runs-on}}
|
29
29
|
steps:
|
30
30
|
- uses: actions/checkout@v4
|
31
31
|
- uses: ruby/setup-ruby@v1
|
32
32
|
with:
|
33
|
-
working-directory:
|
33
|
+
working-directory: .
|
34
34
|
ruby-version: ${{matrix.ruby}}
|
35
35
|
bundler-cache: true
|
36
36
|
- run: bundle exec rake compile test
|
37
|
-
working-directory:
|
37
|
+
working-directory: .
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -7,6 +7,7 @@ GEM
|
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
ast (2.4.3)
|
10
|
+
benchmark (0.4.1)
|
10
11
|
json (2.12.2)
|
11
12
|
language_server-protocol (3.17.0.5)
|
12
13
|
lint_roller (1.1.0)
|
@@ -57,6 +58,7 @@ PLATFORMS
|
|
57
58
|
ruby
|
58
59
|
|
59
60
|
DEPENDENCIES
|
61
|
+
benchmark
|
60
62
|
fts_fuzzy_match!
|
61
63
|
json
|
62
64
|
minitest (~> 5.0)
|
data/README.md
CHANGED
@@ -7,6 +7,29 @@ useful for sorting against each other.
|
|
7
7
|
|
8
8
|
Add this line to your application's Gemfile:
|
9
9
|
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
To find the score for a pattern and string, use the `fuzzy_match` method:
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
subject = ::FtsFuzzyMatch.new
|
16
|
+
result = subject.fuzzy_match("got", "game of thrones")
|
17
|
+
```
|
18
|
+
|
19
|
+
To sort strings based on their match scores, use the `sort` method:
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
subject = ::FtsFuzzyMatch.new(sequential_bonus: 20, camel_bonus: 0, string_length_penalty: -1)
|
23
|
+
sorted_strings = subject.sort("got", ["game of thrones", "got", "winter is coming"])
|
24
|
+
```
|
25
|
+
|
26
|
+
To sort strings based on their match scores, returning only the top 2 results, use the `sort` method:
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
subject = ::FtsFuzzyMatch.new
|
30
|
+
sorted_strings = subject.sort_n("got", ["game of thrones", "got", "winter is coming"], 2)
|
31
|
+
```
|
32
|
+
|
10
33
|
## Credits
|
11
34
|
|
12
35
|
- This gem was started by using the Ruby C Extensions Explained project at
|
@@ -1,10 +1,33 @@
|
|
1
|
-
#include "
|
1
|
+
#include "ruby.h"
|
2
2
|
#define FTS_FUZZY_MATCH_IMPLEMENTATION
|
3
|
-
#include "
|
3
|
+
#include "fts_fuzzy_match.h"
|
4
4
|
|
5
|
-
VALUE
|
5
|
+
VALUE rb_cFtsFuzzyMatch;
|
6
6
|
VALUE rb_cFtsFuzzyMatchExtension;
|
7
7
|
|
8
|
+
int get_num_from_self(VALUE self, char *name, int default_value) {
|
9
|
+
VALUE val = rb_iv_get(self, name);
|
10
|
+
switch TYPE(val) {
|
11
|
+
case 7: return NUM2INT(RARRAY_AREF(val, 0));
|
12
|
+
case 21: return NUM2INT(val);
|
13
|
+
default: return default_value;
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
struct FtsConfig fts_config(VALUE self) {
|
18
|
+
struct FtsConfig config = {
|
19
|
+
.sequential_bonus = get_num_from_self(self, (char*)"@sequential_bonus", 15),
|
20
|
+
.separator_bonus = get_num_from_self(self, (char*)"@separator_bonus", 30),
|
21
|
+
.camel_bonus = get_num_from_self(self, (char*)"@camel_bonus", 30),
|
22
|
+
.first_letter_bonus = get_num_from_self(self, (char*)"@first_letter_bonus", 15),
|
23
|
+
.leading_letter_penalty = get_num_from_self(self, (char*)"@leading_letter_penalty", -5),
|
24
|
+
.max_leading_letter_penalty = get_num_from_self(self, (char*)"@max_leading_letter_penalty", -15),
|
25
|
+
.unmatched_letter_penalty = get_num_from_self(self, (char*)"@unmatched_letter_penalty", -1),
|
26
|
+
.string_length_penalty = get_num_from_self(self, (char*)"@string_length_penalty", 0)
|
27
|
+
};
|
28
|
+
return config;
|
29
|
+
}
|
30
|
+
|
8
31
|
static VALUE
|
9
32
|
rb_fts_fuzzy_match_extension_class_fuzzy_match(VALUE self, VALUE pattern, VALUE str)
|
10
33
|
{
|
@@ -13,8 +36,9 @@ rb_fts_fuzzy_match_extension_class_fuzzy_match(VALUE self, VALUE pattern, VALUE
|
|
13
36
|
char* strPtr;
|
14
37
|
strPtr = StringValueCStr(str);
|
15
38
|
|
39
|
+
struct FtsConfig config = fts_config(self);
|
16
40
|
int outScore;
|
17
|
-
int matched = fts_fuzzy_match_simple(patternPtr, strPtr, &outScore);
|
41
|
+
int matched = fts_fuzzy_match_simple(patternPtr, strPtr, &config, &outScore);
|
18
42
|
// return rb_sprintf("Matched: %d\nScore: %d\n", matched, outScore);
|
19
43
|
if (matched) {
|
20
44
|
return INT2FIX(outScore);
|
@@ -24,7 +48,8 @@ rb_fts_fuzzy_match_extension_class_fuzzy_match(VALUE self, VALUE pattern, VALUE
|
|
24
48
|
}
|
25
49
|
|
26
50
|
struct StringScore {
|
27
|
-
VALUE
|
51
|
+
VALUE rbStr;
|
52
|
+
char *cStr;
|
28
53
|
bool matched;
|
29
54
|
int score;
|
30
55
|
};
|
@@ -39,7 +64,12 @@ int comp(const void *a, const void *b) {
|
|
39
64
|
} else if (!aa->matched && bb->matched) {
|
40
65
|
return 1;
|
41
66
|
}
|
42
|
-
|
67
|
+
if (aa->score != bb->score) {
|
68
|
+
return bb->score - aa->score;
|
69
|
+
} else {
|
70
|
+
// Given the score is the same, sort alphabetically to keep the order consistent
|
71
|
+
return strcasecmp(aa->cStr, bb->cStr);
|
72
|
+
}
|
43
73
|
}
|
44
74
|
|
45
75
|
static VALUE
|
@@ -49,12 +79,14 @@ rb_fts_fuzzy_match_extension_class_sort_n(VALUE self, VALUE pattern, VALUE strin
|
|
49
79
|
patternPtr = StringValueCStr(pattern);
|
50
80
|
long stringsLen = RARRAY_LEN(strings);
|
51
81
|
|
82
|
+
struct FtsConfig config = fts_config(self);
|
83
|
+
|
52
84
|
struct StringScore *scores = (struct StringScore *)malloc(stringsLen * sizeof(struct StringScore));
|
53
85
|
for (long i=0; i<stringsLen; i++) {
|
54
|
-
|
55
|
-
|
56
|
-
scores[i].
|
57
|
-
scores[i].matched = fts_fuzzy_match_simple(patternPtr,
|
86
|
+
volatile VALUE str = RARRAY_AREF(strings, i);
|
87
|
+
scores[i].rbStr = str;
|
88
|
+
scores[i].cStr = StringValueCStr(str);
|
89
|
+
scores[i].matched = fts_fuzzy_match_simple(patternPtr, scores[i].cStr, &config, &scores[i].score);
|
58
90
|
}
|
59
91
|
|
60
92
|
qsort(scores, stringsLen, sizeof(struct StringScore), comp);
|
@@ -64,7 +96,7 @@ rb_fts_fuzzy_match_extension_class_sort_n(VALUE self, VALUE pattern, VALUE strin
|
|
64
96
|
|
65
97
|
VALUE result = rb_ary_new_capa(n2);
|
66
98
|
for (long i=0; i<n2; i++) {
|
67
|
-
rb_ary_push(result, scores[i].
|
99
|
+
rb_ary_push(result, scores[i].rbStr);
|
68
100
|
}
|
69
101
|
|
70
102
|
return result;
|
@@ -73,10 +105,10 @@ rb_fts_fuzzy_match_extension_class_sort_n(VALUE self, VALUE pattern, VALUE strin
|
|
73
105
|
void
|
74
106
|
Init_fts_fuzzy_match(void)
|
75
107
|
{
|
76
|
-
|
77
|
-
rb_cFtsFuzzyMatchExtension = rb_define_class_under(
|
78
|
-
|
108
|
+
rb_cFtsFuzzyMatch = rb_define_class("FtsFuzzyMatch", rb_cObject);
|
109
|
+
rb_cFtsFuzzyMatchExtension = rb_define_class_under(rb_cFtsFuzzyMatch, "Extension", rb_cObject);
|
110
|
+
rb_define_method(rb_cFtsFuzzyMatchExtension, "fuzzy_match",
|
79
111
|
rb_fts_fuzzy_match_extension_class_fuzzy_match, 2);
|
80
|
-
|
112
|
+
rb_define_method(rb_cFtsFuzzyMatchExtension, "sort_n",
|
81
113
|
rb_fts_fuzzy_match_extension_class_sort_n, 3);
|
82
114
|
}
|
@@ -1,6 +1,206 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
// LICENSE
|
2
|
+
//
|
3
|
+
// This software is dual-licensed to the public domain and under the following
|
4
|
+
// license: you are granted a perpetual, irrevocable license to copy, modify,
|
5
|
+
// publish, and distribute this file as you see fit.
|
6
|
+
//
|
7
|
+
// VERSION
|
8
|
+
// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
|
9
|
+
// 0.1.0 (2016-03-28) Initial release
|
10
|
+
//
|
11
|
+
// AUTHOR
|
12
|
+
// Forrest Smith
|
13
|
+
//
|
14
|
+
// NOTES
|
15
|
+
// Compiling
|
16
|
+
// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
|
17
|
+
//
|
18
|
+
// fts_fuzzy_match_simple(...)
|
19
|
+
// Simplified version of fts_fuzzy_match
|
20
|
+
//
|
21
|
+
// fts_fuzzy_match(...)
|
22
|
+
// Returns true if pattern is found AND calculates a score.
|
23
|
+
// Performs exhaustive search via recursion to find all possible matches and match with highest score.
|
24
|
+
// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
|
25
|
+
// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
|
26
|
+
// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
|
27
|
+
// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
|
3
28
|
|
4
|
-
#include "ruby.h"
|
5
29
|
|
6
|
-
#
|
30
|
+
#ifndef FTS_FUZZY_MATCH_H
|
31
|
+
#define FTS_FUZZY_MATCH_H
|
32
|
+
|
33
|
+
|
34
|
+
#include <ctype.h> // tolower, toupper
|
35
|
+
|
36
|
+
// Public interface
|
37
|
+
struct FtsConfig {
|
38
|
+
int sequential_bonus; // bonus for adjacent matches (DEFAULT: 15)
|
39
|
+
int separator_bonus; // bonus if match occurs after a separator
|
40
|
+
int camel_bonus; // bonus if match is uppercase and prev is lower (DEFAULT: 30)
|
41
|
+
int first_letter_bonus; // bonus if the first letter is matched
|
42
|
+
|
43
|
+
int leading_letter_penalty; // penalty applied for every letter in str before the first match
|
44
|
+
int max_leading_letter_penalty; // maximum penalty for leading letters
|
45
|
+
int unmatched_letter_penalty; // penalty for every letter that doesn't matter
|
46
|
+
int string_length_penalty; // (DEFAULT: 0)
|
47
|
+
};
|
48
|
+
|
49
|
+
|
50
|
+
static bool fts_fuzzy_match_simple(char const * pattern, char const * str, struct FtsConfig const * config, int * outScore);
|
51
|
+
static bool fts_fuzzy_match(char const * pattern, char const * str, struct FtsConfig const * config, int * outScore, uint8_t * matches, int maxMatches);
|
52
|
+
|
53
|
+
#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
|
54
|
+
|
55
|
+
// Private interface
|
56
|
+
static bool fts_fuzzy_match_recursive(const char * pattern, const char * str, struct FtsConfig const * config, int * outScore,
|
57
|
+
const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
|
58
|
+
int nextMatch, int * recursionCount, int recursionLimit);
|
59
|
+
|
60
|
+
static bool fts_fuzzy_match_simple(char const * pattern, char const * str, struct FtsConfig const * config, int * outScore) {
|
61
|
+
uint8_t matches[256];
|
62
|
+
return fts_fuzzy_match(pattern, str, config, outScore, matches, sizeof(matches));
|
63
|
+
}
|
64
|
+
|
65
|
+
static bool fts_fuzzy_match(char const * pattern, char const * str, struct FtsConfig const * config, int * outScore, uint8_t * matches, int maxMatches) {
|
66
|
+
int recursionCount = 0;
|
67
|
+
int recursionLimit = 10;
|
68
|
+
|
69
|
+
return fts_fuzzy_match_recursive(pattern, str, config, outScore, str, NULL, matches, maxMatches, 0, &recursionCount, recursionLimit);
|
70
|
+
}
|
71
|
+
|
72
|
+
// Private implementation
|
73
|
+
static bool fts_fuzzy_match_recursive(const char * pattern, const char * str, struct FtsConfig const * config, int * outScore,
|
74
|
+
const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
|
75
|
+
int nextMatch, int * recursionCount, int recursionLimit)
|
76
|
+
{
|
77
|
+
// Count recursions
|
78
|
+
++*recursionCount;
|
79
|
+
if (*recursionCount >= recursionLimit)
|
80
|
+
return false;
|
81
|
+
|
82
|
+
// Detect end of strings
|
83
|
+
if (*pattern == '\0' || *str == '\0')
|
84
|
+
return false;
|
85
|
+
|
86
|
+
unsigned long stringLength = strlen(str);
|
87
|
+
|
88
|
+
// Recursion params
|
89
|
+
bool recursiveMatch = false;
|
90
|
+
uint8_t bestRecursiveMatches[256];
|
91
|
+
int bestRecursiveScore = 0;
|
92
|
+
|
93
|
+
// Loop through pattern and str looking for a match
|
94
|
+
bool first_match = true;
|
95
|
+
while (*pattern != '\0' && *str != '\0') {
|
96
|
+
|
97
|
+
// Found match
|
98
|
+
if (tolower(*pattern) == tolower(*str)) {
|
99
|
+
|
100
|
+
// Supplied matches buffer was too short
|
101
|
+
if (nextMatch >= maxMatches)
|
102
|
+
return false;
|
103
|
+
|
104
|
+
// "Copy-on-Write" srcMatches into matches
|
105
|
+
if (first_match && srcMatches) {
|
106
|
+
memcpy(matches, srcMatches, nextMatch);
|
107
|
+
first_match = false;
|
108
|
+
}
|
109
|
+
|
110
|
+
// Recursive call that "skips" this match
|
111
|
+
uint8_t recursiveMatches[256];
|
112
|
+
int recursiveScore;
|
113
|
+
if (fts_fuzzy_match_recursive(pattern, str + 1, config, &recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
|
114
|
+
|
115
|
+
// Pick best recursive score
|
116
|
+
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
|
117
|
+
memcpy(bestRecursiveMatches, recursiveMatches, 256);
|
118
|
+
bestRecursiveScore = recursiveScore;
|
119
|
+
}
|
120
|
+
recursiveMatch = true;
|
121
|
+
}
|
122
|
+
|
123
|
+
// Advance
|
124
|
+
matches[nextMatch++] = (uint8_t)(str - strBegin);
|
125
|
+
++pattern;
|
126
|
+
}
|
127
|
+
++str;
|
128
|
+
}
|
129
|
+
|
130
|
+
// Determine if full pattern was matched
|
131
|
+
bool matched = *pattern == '\0' ? true : false;
|
132
|
+
|
133
|
+
// Calculate score
|
134
|
+
if (matched) {
|
135
|
+
// Iterate str to end
|
136
|
+
while (*str != '\0')
|
137
|
+
++str;
|
138
|
+
|
139
|
+
// Initialize score
|
140
|
+
*outScore = 100;
|
141
|
+
|
142
|
+
// Apply length penalty
|
143
|
+
*outScore += stringLength * config->string_length_penalty;
|
144
|
+
|
145
|
+
// Apply leading letter penalty
|
146
|
+
int penalty = config->leading_letter_penalty * matches[0];
|
147
|
+
if (penalty < config->max_leading_letter_penalty)
|
148
|
+
penalty = config->max_leading_letter_penalty;
|
149
|
+
*outScore += penalty;
|
150
|
+
|
151
|
+
// Apply unmatched penalty
|
152
|
+
int unmatched = (int)(str - strBegin) - nextMatch;
|
153
|
+
*outScore += config->unmatched_letter_penalty * unmatched;
|
154
|
+
|
155
|
+
// Apply ordering bonuses
|
156
|
+
for (int i = 0; i < nextMatch; ++i) {
|
157
|
+
uint8_t currIdx = matches[i];
|
158
|
+
|
159
|
+
if (i > 0) {
|
160
|
+
uint8_t prevIdx = matches[i - 1];
|
161
|
+
|
162
|
+
// Sequential
|
163
|
+
if (currIdx == (prevIdx + 1))
|
164
|
+
*outScore += config->sequential_bonus;
|
165
|
+
}
|
166
|
+
|
167
|
+
// Check for bonuses based on neighbor character value
|
168
|
+
if (currIdx > 0) {
|
169
|
+
// Camel case
|
170
|
+
char neighbor = strBegin[currIdx - 1];
|
171
|
+
char curr = strBegin[currIdx];
|
172
|
+
if (islower(neighbor) && isupper(curr))
|
173
|
+
*outScore += config->camel_bonus;
|
174
|
+
|
175
|
+
// Separator
|
176
|
+
bool neighborSeparator = neighbor == '_' || neighbor == ' ';
|
177
|
+
if (neighborSeparator)
|
178
|
+
*outScore += config->separator_bonus;
|
179
|
+
}
|
180
|
+
else {
|
181
|
+
// First letter
|
182
|
+
*outScore += config->first_letter_bonus;
|
183
|
+
}
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
// Return best result
|
188
|
+
if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) {
|
189
|
+
// Recursive score is better than "this"
|
190
|
+
memcpy(matches, bestRecursiveMatches, maxMatches);
|
191
|
+
*outScore = bestRecursiveScore;
|
192
|
+
return true;
|
193
|
+
}
|
194
|
+
else if (matched) {
|
195
|
+
// "this" score is better than recursive
|
196
|
+
return true;
|
197
|
+
}
|
198
|
+
else {
|
199
|
+
// no match
|
200
|
+
return false;
|
201
|
+
}
|
202
|
+
}
|
203
|
+
|
204
|
+
#endif // FTS_FUZZY_MATCH_IMPLEMENTATION
|
205
|
+
|
206
|
+
#endif // FTS_FUZZY_MATCH_H
|
data/lib/fts_fuzzy_match.rb
CHANGED
@@ -4,19 +4,40 @@ require_relative "fts_fuzzy_match/version"
|
|
4
4
|
require_relative "fts_fuzzy_match/fts_fuzzy_match"
|
5
5
|
|
6
6
|
# FTS Fuzzy Match module. Can score or sort
|
7
|
-
|
7
|
+
class FtsFuzzyMatch
|
8
8
|
class Error < StandardError; end
|
9
9
|
|
10
|
-
def
|
10
|
+
def initialize(...)
|
11
|
+
@extension = ::FtsFuzzyMatch::Extension.new(...)
|
12
|
+
end
|
13
|
+
|
14
|
+
def fuzzy_match(pattern, string)
|
15
|
+
@extension.fuzzy_match(pattern, string)
|
16
|
+
end
|
17
|
+
|
18
|
+
def sort_in_ruby(pattern, strings)
|
11
19
|
# fuzzy_match is -50..50 so -200 is the lowest possible score
|
12
|
-
strings.sort_by { |string| -1 * (
|
20
|
+
strings.sort_by { |string| -1 * (fuzzy_match(pattern, string) || -200) }
|
21
|
+
end
|
22
|
+
|
23
|
+
def sort_n(pattern, strings, n)
|
24
|
+
@extension.sort_n(pattern, strings, n)
|
13
25
|
end
|
14
26
|
|
15
|
-
def
|
16
|
-
|
27
|
+
def sort(pattern, strings)
|
28
|
+
@extension.sort_n(pattern, strings, strings.length)
|
17
29
|
end
|
18
30
|
|
19
|
-
|
20
|
-
|
31
|
+
class Extension
|
32
|
+
def initialize(**args)
|
33
|
+
@sequential_bonus = args[:sequential_bonus]
|
34
|
+
@separator_bonus = args[:separator_bonus]
|
35
|
+
@camel_bonus = args[:camel_bonus]
|
36
|
+
@first_letter_bonus = args[:first_letter_bonus]
|
37
|
+
@leading_letter_penalty = args[:leading_letter_penalty]
|
38
|
+
@max_leading_letter_penalty = args[:max_leading_letter_penalty]
|
39
|
+
@unmatched_letter_penalty = args[:unmatched_letter_penalty]
|
40
|
+
@string_length_penalty = args[:string_length_penalty]
|
41
|
+
end
|
21
42
|
end
|
22
43
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fts_fuzzy_match
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dave Goddard
|
@@ -18,11 +18,7 @@ extensions:
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- ".github/dependabot.yml"
|
21
|
-
- ".github/workflows/
|
22
|
-
- ".github/workflows/packaged_source.yml"
|
23
|
-
- ".github/workflows/packaged_tarball.yml"
|
24
|
-
- ".github/workflows/precompiled.yml"
|
25
|
-
- ".github/workflows/system.yml"
|
21
|
+
- ".github/workflows/fts_fuzzy_match.yml"
|
26
22
|
- ".gitignore"
|
27
23
|
- ".rubocop.yml"
|
28
24
|
- Gemfile
|
@@ -32,7 +28,6 @@ files:
|
|
32
28
|
- ext/fts_fuzzy_match/extconf.rb
|
33
29
|
- ext/fts_fuzzy_match/fts_fuzzy_match.c
|
34
30
|
- ext/fts_fuzzy_match/fts_fuzzy_match.h
|
35
|
-
- ext/fts_fuzzy_match/fts_fuzzy_match_impl.h
|
36
31
|
- fts_fuzzy_match.gemspec
|
37
32
|
- lib/fts_fuzzy_match.rb
|
38
33
|
- lib/fts_fuzzy_match/version.rb
|
@@ -1,37 +0,0 @@
|
|
1
|
-
name: packaged_source
|
2
|
-
concurrency:
|
3
|
-
group: "${{github.workflow}}-${{github.ref}}"
|
4
|
-
cancel-in-progress: true
|
5
|
-
on:
|
6
|
-
workflow_dispatch:
|
7
|
-
schedule:
|
8
|
-
- cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3
|
9
|
-
push:
|
10
|
-
branches:
|
11
|
-
- main
|
12
|
-
- v*.*.x
|
13
|
-
tags:
|
14
|
-
- v*.*.*
|
15
|
-
pull_request:
|
16
|
-
types: [opened, synchronize]
|
17
|
-
branches:
|
18
|
-
- '*'
|
19
|
-
paths: ["packaged_source/**/*", ".github/workflows/packaged_source.yml"]
|
20
|
-
|
21
|
-
jobs:
|
22
|
-
packaged_source:
|
23
|
-
strategy:
|
24
|
-
fail-fast: false
|
25
|
-
matrix:
|
26
|
-
ruby: ["3.0", "3.1", "3.2", "3.3", "3.4", "head"]
|
27
|
-
runs-on: ["ubuntu-latest", "macos-latest", "windows-latest"]
|
28
|
-
runs-on: ${{matrix.runs-on}}
|
29
|
-
steps:
|
30
|
-
- uses: actions/checkout@v4
|
31
|
-
- uses: ruby/setup-ruby@v1
|
32
|
-
with:
|
33
|
-
working-directory: packaged_source
|
34
|
-
ruby-version: ${{matrix.ruby}}
|
35
|
-
bundler-cache: true
|
36
|
-
- run: bundle exec rake compile test
|
37
|
-
working-directory: packaged_source
|
@@ -1,41 +0,0 @@
|
|
1
|
-
name: packaged_tarball
|
2
|
-
concurrency:
|
3
|
-
group: "${{github.workflow}}-${{github.ref}}"
|
4
|
-
cancel-in-progress: true
|
5
|
-
on:
|
6
|
-
workflow_dispatch:
|
7
|
-
schedule:
|
8
|
-
- cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3
|
9
|
-
push:
|
10
|
-
branches:
|
11
|
-
- main
|
12
|
-
- v*.*.x
|
13
|
-
tags:
|
14
|
-
- v*.*.*
|
15
|
-
pull_request:
|
16
|
-
types: [opened, synchronize]
|
17
|
-
branches:
|
18
|
-
- '*'
|
19
|
-
paths: ["packaged_tarball/**/*", ".github/workflows/packaged_tarball.yml"]
|
20
|
-
|
21
|
-
jobs:
|
22
|
-
packaged_tarball:
|
23
|
-
strategy:
|
24
|
-
fail-fast: false
|
25
|
-
matrix:
|
26
|
-
ruby: ["3.0", "3.1", "3.2", "3.3", "3.4", "head"]
|
27
|
-
runs-on: ["ubuntu-latest", "macos-13", "windows-latest"]
|
28
|
-
runs-on: ${{matrix.runs-on}}
|
29
|
-
steps:
|
30
|
-
- uses: actions/checkout@v4
|
31
|
-
- uses: ruby/setup-ruby@v1
|
32
|
-
with:
|
33
|
-
working-directory: packaged_tarball
|
34
|
-
ruby-version: ${{matrix.ruby}}
|
35
|
-
bundler-cache: true
|
36
|
-
- uses: actions/cache@v4
|
37
|
-
with:
|
38
|
-
path: packaged_tarball/ports
|
39
|
-
key: packaged_tarball-ports-${{matrix.runs-on}}-${{hashFiles('packaged_tarball/ext/packaged_tarball/extconf.rb')}}
|
40
|
-
- run: bundle exec rake compile test
|
41
|
-
working-directory: packaged_tarball
|
@@ -1,232 +0,0 @@
|
|
1
|
-
name: precompiled
|
2
|
-
concurrency:
|
3
|
-
group: "${{github.workflow}}-${{github.ref}}"
|
4
|
-
cancel-in-progress: true
|
5
|
-
on:
|
6
|
-
workflow_dispatch:
|
7
|
-
schedule:
|
8
|
-
- cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3
|
9
|
-
push:
|
10
|
-
branches:
|
11
|
-
- main
|
12
|
-
- v*.*.x
|
13
|
-
tags:
|
14
|
-
- v*.*.*
|
15
|
-
pull_request:
|
16
|
-
types: [opened, synchronize]
|
17
|
-
branches:
|
18
|
-
- '*'
|
19
|
-
paths: ["precompiled/**/*", ".github/workflows/precompiled.yml"]
|
20
|
-
|
21
|
-
jobs:
|
22
|
-
ruby_versions:
|
23
|
-
outputs:
|
24
|
-
setup_ruby: "['3.1', '3.2', '3.3', '3.4']"
|
25
|
-
image_tag: "['3.1', '3.2', '3.3', '3.4']"
|
26
|
-
runs-on: ubuntu-latest
|
27
|
-
steps:
|
28
|
-
- run: echo "generating rubies ..."
|
29
|
-
|
30
|
-
rcd_image_version:
|
31
|
-
runs-on: ubuntu-latest
|
32
|
-
outputs:
|
33
|
-
rcd_image_version: ${{steps.rcd_image_version.outputs.rcd_image_version}}
|
34
|
-
steps:
|
35
|
-
- uses: actions/checkout@v4
|
36
|
-
- uses: ruby/setup-ruby@v1
|
37
|
-
with:
|
38
|
-
working-directory: precompiled
|
39
|
-
ruby-version: "3.3"
|
40
|
-
bundler-cache: true
|
41
|
-
bundler: latest
|
42
|
-
- id: rcd_image_version
|
43
|
-
run: bundle exec ruby -e 'require "rake_compiler_dock"; puts "rcd_image_version=#{RakeCompilerDock::IMAGE_VERSION}"' >> $GITHUB_OUTPUT
|
44
|
-
working-directory: precompiled
|
45
|
-
|
46
|
-
test:
|
47
|
-
needs: ["ruby_versions"]
|
48
|
-
strategy:
|
49
|
-
fail-fast: false
|
50
|
-
matrix:
|
51
|
-
runs-on: ["ubuntu-latest", "macos-13", "windows-latest"]
|
52
|
-
ruby: ${{ fromJSON(needs.ruby_versions.outputs.setup_ruby) }}
|
53
|
-
runs-on: ${{matrix.runs-on}}
|
54
|
-
steps:
|
55
|
-
- uses: actions/checkout@v4
|
56
|
-
- uses: ruby/setup-ruby@v1
|
57
|
-
with:
|
58
|
-
working-directory: precompiled
|
59
|
-
ruby-version: ${{matrix.ruby}}
|
60
|
-
bundler-cache: true
|
61
|
-
- uses: actions/cache@v4
|
62
|
-
with:
|
63
|
-
path: precompiled/ports
|
64
|
-
key: precompiled-ports-${{matrix.runs-on}}-${{hashFiles('precompiled/ext/precompiled/extconf.rb')}}
|
65
|
-
- run: bundle exec rake compile test
|
66
|
-
working-directory: precompiled
|
67
|
-
|
68
|
-
generic-package:
|
69
|
-
runs-on: "ubuntu-latest"
|
70
|
-
steps:
|
71
|
-
- uses: actions/checkout@v4
|
72
|
-
- uses: actions/cache@v4
|
73
|
-
with:
|
74
|
-
path: precompiled/ports/archives
|
75
|
-
key: archives-ubuntu-${{hashFiles('precompiled/ext/precompiled/extconf.rb')}}
|
76
|
-
- uses: ruby/setup-ruby@v1
|
77
|
-
with:
|
78
|
-
working-directory: precompiled
|
79
|
-
ruby-version: "3.3"
|
80
|
-
bundler-cache: true
|
81
|
-
- run: ./bin/test-gem-build gems ruby
|
82
|
-
working-directory: precompiled
|
83
|
-
- uses: actions/upload-artifact@v4
|
84
|
-
with:
|
85
|
-
name: cruby-gem
|
86
|
-
path: precompiled/gems
|
87
|
-
retention-days: 1
|
88
|
-
|
89
|
-
generic-install:
|
90
|
-
needs: ["generic-package", "ruby_versions"]
|
91
|
-
strategy:
|
92
|
-
fail-fast: false
|
93
|
-
matrix:
|
94
|
-
os: ["ubuntu-latest", "macos-13", "windows-latest"]
|
95
|
-
ruby: ${{ fromJSON(needs.ruby_versions.outputs.setup_ruby) }}
|
96
|
-
runs-on: ${{ matrix.os }}
|
97
|
-
steps:
|
98
|
-
- uses: actions/checkout@v4
|
99
|
-
- uses: ruby/setup-ruby@v1
|
100
|
-
with:
|
101
|
-
working-directory: precompiled
|
102
|
-
ruby-version: "${{ matrix.ruby }}"
|
103
|
-
- uses: actions/download-artifact@v4
|
104
|
-
with:
|
105
|
-
name: cruby-gem
|
106
|
-
path: precompiled/gems
|
107
|
-
- run: ./bin/test-gem-install gems
|
108
|
-
working-directory: precompiled
|
109
|
-
shell: bash
|
110
|
-
|
111
|
-
native-package:
|
112
|
-
needs: ["rcd_image_version"]
|
113
|
-
strategy:
|
114
|
-
fail-fast: false
|
115
|
-
matrix:
|
116
|
-
platform:
|
117
|
-
- "aarch64-linux-gnu"
|
118
|
-
- "aarch64-linux-musl"
|
119
|
-
- "arm-linux-gnu"
|
120
|
-
- "arm-linux-musl"
|
121
|
-
- "x86-linux-gnu"
|
122
|
-
- "x86-linux-musl"
|
123
|
-
- "x86_64-linux-gnu"
|
124
|
-
- "x86_64-linux-musl"
|
125
|
-
- "arm64-darwin"
|
126
|
-
- "x86_64-darwin"
|
127
|
-
- "x64-mingw-ucrt"
|
128
|
-
runs-on: ubuntu-latest
|
129
|
-
steps:
|
130
|
-
- uses: actions/checkout@v4
|
131
|
-
- uses: actions/cache@v4
|
132
|
-
with:
|
133
|
-
path: precompiled/ports/archives
|
134
|
-
key: archives-ubuntu-${{hashFiles('precompiled/ext/precompiled/extconf.rb')}}
|
135
|
-
- run: |
|
136
|
-
docker run --rm -v $PWD/precompiled:/precompiled -w /precompiled \
|
137
|
-
ghcr.io/rake-compiler/rake-compiler-dock-image:${{ needs.rcd_image_version.outputs.rcd_image_version }}-mri-${{ matrix.platform }} \
|
138
|
-
./bin/test-gem-build gems ${{ matrix.platform }}
|
139
|
-
- uses: actions/upload-artifact@v4
|
140
|
-
with:
|
141
|
-
name: "cruby-${{ matrix.platform }}-gem"
|
142
|
-
path: precompiled/gems
|
143
|
-
retention-days: 1
|
144
|
-
|
145
|
-
linux-install:
|
146
|
-
needs: ["native-package", "ruby_versions"]
|
147
|
-
strategy:
|
148
|
-
fail-fast: false
|
149
|
-
matrix:
|
150
|
-
platform:
|
151
|
-
- "aarch64-linux-gnu"
|
152
|
-
- "aarch64-linux-musl"
|
153
|
-
- "arm-linux-gnu"
|
154
|
-
- "arm-linux-musl"
|
155
|
-
- "x86-linux-gnu"
|
156
|
-
- "x86-linux-musl"
|
157
|
-
- "x86_64-linux-gnu"
|
158
|
-
- "x86_64-linux-musl"
|
159
|
-
ruby: ${{ fromJSON(needs.ruby_versions.outputs.image_tag) }}
|
160
|
-
include:
|
161
|
-
# declare docker image for each platform
|
162
|
-
- { platform: aarch64-linux-musl, docker_tag: "-alpine", bootstrap: "apk add bash &&" }
|
163
|
-
- { platform: arm-linux-musl, docker_tag: "-alpine", bootstrap: "apk add bash &&" }
|
164
|
-
- { platform: x86-linux-musl, docker_tag: "-alpine", bootstrap: "apk add bash &&" }
|
165
|
-
- { platform: x86_64-linux-musl, docker_tag: "-alpine", bootstrap: "apk add bash &&" }
|
166
|
-
# declare docker platform for each platform
|
167
|
-
- { platform: aarch64-linux-gnu, docker_platform: "--platform=linux/arm64" }
|
168
|
-
- { platform: aarch64-linux-musl, docker_platform: "--platform=linux/arm64" }
|
169
|
-
- { platform: arm-linux-gnu, docker_platform: "--platform=linux/arm/v7" }
|
170
|
-
- { platform: arm-linux-musl, docker_platform: "--platform=linux/arm/v7" }
|
171
|
-
- { platform: x86-linux-gnu, docker_platform: "--platform=linux/386" }
|
172
|
-
- { platform: x86-linux-musl, docker_platform: "--platform=linux/386" }
|
173
|
-
runs-on: ubuntu-latest
|
174
|
-
steps:
|
175
|
-
- uses: actions/checkout@v4
|
176
|
-
- uses: actions/download-artifact@v4
|
177
|
-
with:
|
178
|
-
name: cruby-${{ matrix.platform }}-gem
|
179
|
-
path: precompiled/gems
|
180
|
-
- run: |
|
181
|
-
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
182
|
-
docker run --rm -v $PWD/precompiled:/precompiled -w /precompiled \
|
183
|
-
${{ matrix.docker_platform }} ruby:${{ matrix.ruby }}${{ matrix.docker_tag }} \
|
184
|
-
sh -c "
|
185
|
-
gem update --system &&
|
186
|
-
${{ matrix.bootstrap }}
|
187
|
-
./bin/test-gem-install gems
|
188
|
-
"
|
189
|
-
|
190
|
-
darwin-install:
|
191
|
-
needs: ["native-package", "ruby_versions"]
|
192
|
-
strategy:
|
193
|
-
fail-fast: false
|
194
|
-
matrix:
|
195
|
-
platform:
|
196
|
-
- arm64-darwin
|
197
|
-
- x86_64-darwin
|
198
|
-
ruby: ${{ fromJSON(needs.ruby_versions.outputs.setup_ruby) }}
|
199
|
-
include:
|
200
|
-
- { platform: arm64-darwin, os: macos-14 }
|
201
|
-
- { platform: x86_64-darwin, os: macos-13 }
|
202
|
-
runs-on: ${{matrix.os}}
|
203
|
-
steps:
|
204
|
-
- uses: actions/checkout@v4
|
205
|
-
- uses: ruby/setup-ruby@v1
|
206
|
-
with:
|
207
|
-
ruby-version: "${{matrix.ruby}}"
|
208
|
-
- uses: actions/download-artifact@v4
|
209
|
-
with:
|
210
|
-
name: cruby-${{matrix.platform}}-gem
|
211
|
-
path: precompiled/gems
|
212
|
-
- run: ./bin/test-gem-install gems
|
213
|
-
working-directory: precompiled
|
214
|
-
|
215
|
-
windows-install:
|
216
|
-
needs: ["native-package", "ruby_versions"]
|
217
|
-
strategy:
|
218
|
-
fail-fast: false
|
219
|
-
matrix:
|
220
|
-
ruby: ${{ fromJSON(needs.ruby_versions.outputs.setup_ruby) }}
|
221
|
-
runs-on: windows-2022
|
222
|
-
steps:
|
223
|
-
- uses: actions/checkout@v4
|
224
|
-
- uses: ruby/setup-ruby@v1
|
225
|
-
with:
|
226
|
-
ruby-version: "${{matrix.ruby}}"
|
227
|
-
- uses: actions/download-artifact@v4
|
228
|
-
with:
|
229
|
-
name: cruby-x64-mingw-ucrt-gem
|
230
|
-
path: precompiled/gems
|
231
|
-
- run: ./bin/test-gem-install gems
|
232
|
-
working-directory: precompiled
|
@@ -1,40 +0,0 @@
|
|
1
|
-
name: system
|
2
|
-
concurrency:
|
3
|
-
group: "${{github.workflow}}-${{github.ref}}"
|
4
|
-
cancel-in-progress: true
|
5
|
-
on:
|
6
|
-
workflow_dispatch:
|
7
|
-
schedule:
|
8
|
-
- cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3
|
9
|
-
push:
|
10
|
-
branches:
|
11
|
-
- main
|
12
|
-
- v*.*.x
|
13
|
-
tags:
|
14
|
-
- v*.*.*
|
15
|
-
pull_request:
|
16
|
-
types: [opened, synchronize]
|
17
|
-
branches:
|
18
|
-
- '*'
|
19
|
-
paths: ["system/**/*", ".github/workflows/system.yml"]
|
20
|
-
|
21
|
-
jobs:
|
22
|
-
system:
|
23
|
-
strategy:
|
24
|
-
fail-fast: false
|
25
|
-
matrix:
|
26
|
-
ruby: ["3.0", "3.1", "3.2", "3.3", "3.4", "head"]
|
27
|
-
runs-on: ["ubuntu-latest", "macos-13", "windows-latest"]
|
28
|
-
runs-on: ${{matrix.runs-on}}
|
29
|
-
steps:
|
30
|
-
- uses: actions/checkout@v4
|
31
|
-
- uses: MSP-Greg/setup-ruby-pkgs@v1
|
32
|
-
with:
|
33
|
-
working-directory: system
|
34
|
-
ruby-version: ${{matrix.ruby}}
|
35
|
-
bundler-cache: true
|
36
|
-
mingw: "libyaml" # windows
|
37
|
-
apt-get: "libyaml-dev" # linux
|
38
|
-
brew: "libyaml" # macos
|
39
|
-
- run: bundle exec rake compile test
|
40
|
-
working-directory: system
|
@@ -1,203 +0,0 @@
|
|
1
|
-
// LICENSE
|
2
|
-
//
|
3
|
-
// This software is dual-licensed to the public domain and under the following
|
4
|
-
// license: you are granted a perpetual, irrevocable license to copy, modify,
|
5
|
-
// publish, and distribute this file as you see fit.
|
6
|
-
//
|
7
|
-
// VERSION
|
8
|
-
// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
|
9
|
-
// 0.1.0 (2016-03-28) Initial release
|
10
|
-
//
|
11
|
-
// AUTHOR
|
12
|
-
// Forrest Smith
|
13
|
-
//
|
14
|
-
// NOTES
|
15
|
-
// Compiling
|
16
|
-
// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
|
17
|
-
//
|
18
|
-
// fts_fuzzy_match_simple(...)
|
19
|
-
// Simplified version of fts_fuzzy_match
|
20
|
-
//
|
21
|
-
// fts_fuzzy_match(...)
|
22
|
-
// Returns true if pattern is found AND calculates a score.
|
23
|
-
// Performs exhaustive search via recursion to find all possible matches and match with highest score.
|
24
|
-
// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
|
25
|
-
// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
|
26
|
-
// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
|
27
|
-
// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
|
28
|
-
|
29
|
-
|
30
|
-
#ifndef FTS_FUZZY_MATCH_H
|
31
|
-
#define FTS_FUZZY_MATCH_H
|
32
|
-
|
33
|
-
|
34
|
-
#include <ctype.h> // tolower, toupper
|
35
|
-
|
36
|
-
// Public interface
|
37
|
-
static bool fts_fuzzy_match_simple(char const * pattern, char const * str, int * outScore);
|
38
|
-
static bool fts_fuzzy_match(char const * pattern, char const * str, int * outScore, uint8_t * matches, int maxMatches);
|
39
|
-
|
40
|
-
#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
|
41
|
-
|
42
|
-
// Private interface
|
43
|
-
static bool fts_fuzzy_match_recursive(const char * pattern, const char * str, int * outScore,
|
44
|
-
const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
|
45
|
-
int nextMatch, int * recursionCount, int recursionLimit);
|
46
|
-
|
47
|
-
static bool fts_fuzzy_match_simple(char const * pattern, char const * str, int * outScore) {
|
48
|
-
uint8_t matches[256];
|
49
|
-
return fts_fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
|
50
|
-
}
|
51
|
-
|
52
|
-
static bool fts_fuzzy_match(char const * pattern, char const * str, int * outScore, uint8_t * matches, int maxMatches) {
|
53
|
-
int recursionCount = 0;
|
54
|
-
int recursionLimit = 10;
|
55
|
-
|
56
|
-
return fts_fuzzy_match_recursive(pattern, str, outScore, str, NULL, matches, maxMatches, 0, &recursionCount, recursionLimit);
|
57
|
-
}
|
58
|
-
|
59
|
-
// Private implementation
|
60
|
-
static bool fts_fuzzy_match_recursive(const char * pattern, const char * str, int * outScore,
|
61
|
-
const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
|
62
|
-
int nextMatch, int * recursionCount, int recursionLimit)
|
63
|
-
{
|
64
|
-
// Count recursions
|
65
|
-
++*recursionCount;
|
66
|
-
if (*recursionCount >= recursionLimit)
|
67
|
-
return false;
|
68
|
-
|
69
|
-
// Detect end of strings
|
70
|
-
if (*pattern == '\0' || *str == '\0')
|
71
|
-
return false;
|
72
|
-
|
73
|
-
unsigned long stringLength = strlen(str);
|
74
|
-
|
75
|
-
// Recursion params
|
76
|
-
bool recursiveMatch = false;
|
77
|
-
uint8_t bestRecursiveMatches[256];
|
78
|
-
int bestRecursiveScore = 0;
|
79
|
-
|
80
|
-
// Loop through pattern and str looking for a match
|
81
|
-
bool first_match = true;
|
82
|
-
while (*pattern != '\0' && *str != '\0') {
|
83
|
-
|
84
|
-
// Found match
|
85
|
-
if (tolower(*pattern) == tolower(*str)) {
|
86
|
-
|
87
|
-
// Supplied matches buffer was too short
|
88
|
-
if (nextMatch >= maxMatches)
|
89
|
-
return false;
|
90
|
-
|
91
|
-
// "Copy-on-Write" srcMatches into matches
|
92
|
-
if (first_match && srcMatches) {
|
93
|
-
memcpy(matches, srcMatches, nextMatch);
|
94
|
-
first_match = false;
|
95
|
-
}
|
96
|
-
|
97
|
-
// Recursive call that "skips" this match
|
98
|
-
uint8_t recursiveMatches[256];
|
99
|
-
int recursiveScore;
|
100
|
-
if (fts_fuzzy_match_recursive(pattern, str + 1, &recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
|
101
|
-
|
102
|
-
// Pick best recursive score
|
103
|
-
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
|
104
|
-
memcpy(bestRecursiveMatches, recursiveMatches, 256);
|
105
|
-
bestRecursiveScore = recursiveScore;
|
106
|
-
}
|
107
|
-
recursiveMatch = true;
|
108
|
-
}
|
109
|
-
|
110
|
-
// Advance
|
111
|
-
matches[nextMatch++] = (uint8_t)(str - strBegin);
|
112
|
-
++pattern;
|
113
|
-
}
|
114
|
-
++str;
|
115
|
-
}
|
116
|
-
|
117
|
-
// Determine if full pattern was matched
|
118
|
-
bool matched = *pattern == '\0' ? true : false;
|
119
|
-
|
120
|
-
// Calculate score
|
121
|
-
if (matched) {
|
122
|
-
const int sequential_bonus = 20; // bonus for adjacent matches (DEFAULT: 15)
|
123
|
-
const int separator_bonus = 30; // bonus if match occurs after a separator
|
124
|
-
const int camel_bonus = 0; // bonus if match is uppercase and prev is lower (DEFAULT: 30)
|
125
|
-
const int first_letter_bonus = 15; // bonus if the first letter is matched
|
126
|
-
|
127
|
-
const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
|
128
|
-
const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
|
129
|
-
const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
|
130
|
-
const int string_length_penalty = -1; // (DEFAULT: 0)
|
131
|
-
|
132
|
-
// Iterate str to end
|
133
|
-
while (*str != '\0')
|
134
|
-
++str;
|
135
|
-
|
136
|
-
// Initialize score
|
137
|
-
*outScore = 100;
|
138
|
-
|
139
|
-
// Apply length penalty
|
140
|
-
*outScore += stringLength * string_length_penalty;
|
141
|
-
|
142
|
-
// Apply leading letter penalty
|
143
|
-
int penalty = leading_letter_penalty * matches[0];
|
144
|
-
if (penalty < max_leading_letter_penalty)
|
145
|
-
penalty = max_leading_letter_penalty;
|
146
|
-
*outScore += penalty;
|
147
|
-
|
148
|
-
// Apply unmatched penalty
|
149
|
-
int unmatched = (int)(str - strBegin) - nextMatch;
|
150
|
-
*outScore += unmatched_letter_penalty * unmatched;
|
151
|
-
|
152
|
-
// Apply ordering bonuses
|
153
|
-
for (int i = 0; i < nextMatch; ++i) {
|
154
|
-
uint8_t currIdx = matches[i];
|
155
|
-
|
156
|
-
if (i > 0) {
|
157
|
-
uint8_t prevIdx = matches[i - 1];
|
158
|
-
|
159
|
-
// Sequential
|
160
|
-
if (currIdx == (prevIdx + 1))
|
161
|
-
*outScore += sequential_bonus;
|
162
|
-
}
|
163
|
-
|
164
|
-
// Check for bonuses based on neighbor character value
|
165
|
-
if (currIdx > 0) {
|
166
|
-
// Camel case
|
167
|
-
char neighbor = strBegin[currIdx - 1];
|
168
|
-
char curr = strBegin[currIdx];
|
169
|
-
if (islower(neighbor) && isupper(curr))
|
170
|
-
*outScore += camel_bonus;
|
171
|
-
|
172
|
-
// Separator
|
173
|
-
bool neighborSeparator = neighbor == '_' || neighbor == ' ';
|
174
|
-
if (neighborSeparator)
|
175
|
-
*outScore += separator_bonus;
|
176
|
-
}
|
177
|
-
else {
|
178
|
-
// First letter
|
179
|
-
*outScore += first_letter_bonus;
|
180
|
-
}
|
181
|
-
}
|
182
|
-
}
|
183
|
-
|
184
|
-
// Return best result
|
185
|
-
if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) {
|
186
|
-
// Recursive score is better than "this"
|
187
|
-
memcpy(matches, bestRecursiveMatches, maxMatches);
|
188
|
-
*outScore = bestRecursiveScore;
|
189
|
-
return true;
|
190
|
-
}
|
191
|
-
else if (matched) {
|
192
|
-
// "this" score is better than recursive
|
193
|
-
return true;
|
194
|
-
}
|
195
|
-
else {
|
196
|
-
// no match
|
197
|
-
return false;
|
198
|
-
}
|
199
|
-
}
|
200
|
-
|
201
|
-
#endif // FTS_FUZZY_MATCH_IMPLEMENTATION
|
202
|
-
|
203
|
-
#endif // FTS_FUZZY_MATCH_H
|