fts_fuzzy_match 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5ae4ec8437fae32168ace83c701014d8ad3172a9e31abbf68c4501f5b33b59eb
4
- data.tar.gz: e4ca65980fda174f25361290429f8ebadf1a859b91b6c0723342464c7888f6b6
3
+ metadata.gz: ffcb83f6d3c87cff136f4a58e5c0cf4765b9f3841f88dc655bd0eaa7dae0d2bf
4
+ data.tar.gz: 1794738cfbec5e7071703f12d06ab616ef7f1fc891563077fddd72bb16649443
5
5
  SHA512:
6
- metadata.gz: e03afcccdf173c3106442247df9574aba34fe3b36fc9a3418a4b41c39714048f6e2f10314b53d66aaff10f7b101b657c4958efc97ba5f2b6178f2f0cd4d3ebc1
7
- data.tar.gz: 952900929bfa9eb2282e81f47cb4ef0b65511f4d233f6ca99252f9050858b008d51f7486c61aebab2eeb28f3b0feb10c011683ca59e974604ceefda3ee105759
6
+ metadata.gz: a05b083ca4acc80980bcb74a9de129a0f77961e825efc417d78eb5024a3064b68be2f4185cc2a672f4e2a5aeaed1294fd9df1d4512d206162883be2d1329e875
7
+ data.tar.gz: 4a975a00538b977feeb0b446cda6f4a28368713da525f834d7bf9cfe915f48755fc8528b2baf75acf97ca6ecbbbfb9fe7dff7cee89467bb64bb195412a55b062
@@ -5,22 +5,6 @@ updates:
5
5
  schedule:
6
6
  interval: "weekly"
7
7
  - package-ecosystem: "bundler"
8
- directory: "/fuzzy_match"
9
- schedule:
10
- interval: "weekly"
11
- - package-ecosystem: "bundler"
12
- directory: "/system"
13
- schedule:
14
- interval: "weekly"
15
- - package-ecosystem: "bundler"
16
- directory: "/packaged_source"
17
- schedule:
18
- interval: "weekly"
19
- - package-ecosystem: "bundler"
20
- directory: "/packaged_tarball"
21
- schedule:
22
- interval: "weekly"
23
- - package-ecosystem: "bundler"
24
- directory: "/precompiled"
8
+ directory: "/"
25
9
  schedule:
26
10
  interval: "weekly"
@@ -1,4 +1,4 @@
1
- name: fuzzy_match
1
+ name: fts_fuzzy_match
2
2
  concurrency:
3
3
  group: "${{github.workflow}}-${{github.ref}}"
4
4
  cancel-in-progress: true
@@ -16,22 +16,22 @@ on:
16
16
  types: [opened, synchronize]
17
17
  branches:
18
18
  - '*'
19
- paths: ["fuzzy_match/**/*", ".github/workflows/fuzzy_match.yml"]
19
+ paths: ["**/*"]
20
20
 
21
21
  jobs:
22
22
  fuzzy_match:
23
23
  strategy:
24
24
  fail-fast: false
25
25
  matrix:
26
- ruby: ["3.0", "3.1", "3.2", "3.3", "3.4", "head"]
26
+ ruby: ["3.3", "3.4", "head"]
27
27
  runs-on: ["ubuntu-latest", "macos-latest", "windows-latest"]
28
28
  runs-on: ${{matrix.runs-on}}
29
29
  steps:
30
30
  - uses: actions/checkout@v4
31
31
  - uses: ruby/setup-ruby@v1
32
32
  with:
33
- working-directory: fuzzy_match
33
+ working-directory: .
34
34
  ruby-version: ${{matrix.ruby}}
35
35
  bundler-cache: true
36
36
  - run: bundle exec rake compile test
37
- working-directory: fuzzy_match
37
+ working-directory: .
data/.gitignore CHANGED
@@ -12,3 +12,4 @@
12
12
  *.a
13
13
  mkmf.log
14
14
  /fuzzy_match-*.gem
15
+ /fts_fuzzy_match-*.gem
data/Gemfile CHANGED
@@ -9,6 +9,7 @@ gem 'rake', '~> 13.0'
9
9
  gem 'rake-compiler'
10
10
  gem 'minitest', '~> 5.0'
11
11
  gem 'json'
12
+ gem 'benchmark'
12
13
 
13
14
  gem 'rubocop-minitest'
14
15
  gem 'rubocop-performance'
data/Gemfile.lock CHANGED
@@ -7,6 +7,7 @@ GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
9
  ast (2.4.3)
10
+ benchmark (0.4.1)
10
11
  json (2.12.2)
11
12
  language_server-protocol (3.17.0.5)
12
13
  lint_roller (1.1.0)
@@ -57,6 +58,7 @@ PLATFORMS
57
58
  ruby
58
59
 
59
60
  DEPENDENCIES
61
+ benchmark
60
62
  fts_fuzzy_match!
61
63
  json
62
64
  minitest (~> 5.0)
data/README.md CHANGED
@@ -7,6 +7,29 @@ useful for sorting against each other.
7
7
 
8
8
  Add this line to your application's Gemfile:
9
9
 
10
+ ## Usage
11
+
12
+ To find the score for a pattern and string, use the `fuzzy_match` method:
13
+
14
+ ```ruby
15
+ subject = ::FtsFuzzyMatch.new
16
+ result = subject.fuzzy_match("got", "game of thrones")
17
+ ```
18
+
19
+ To sort strings based on their match scores, use the `sort` method:
20
+
21
+ ```ruby
22
+ subject = ::FtsFuzzyMatch.new(sequential_bonus: 20, camel_bonus: 0, string_length_penalty: -1)
23
+ sorted_strings = subject.sort("got", ["game of thrones", "got", "winter is coming"])
24
+ ```
25
+
26
+ To sort strings based on their match scores, returning only the top 2 results, use the `sort` method:
27
+
28
+ ```ruby
29
+ subject = ::FtsFuzzyMatch.new
30
+ sorted_strings = subject.sort_n("got", ["game of thrones", "got", "winter is coming"], 2)
31
+ ```
32
+
10
33
  ## Credits
11
34
 
12
35
  - This gem was started by using the Ruby C Extensions Explained project at
@@ -1,10 +1,33 @@
1
- #include "fts_fuzzy_match.h"
1
+ #include "ruby.h"
2
2
  #define FTS_FUZZY_MATCH_IMPLEMENTATION
3
- #include "fts_fuzzy_match_impl.h"
3
+ #include "fts_fuzzy_match.h"
4
4
 
5
- VALUE rb_mFtsFuzzyMatch;
5
+ VALUE rb_cFtsFuzzyMatch;
6
6
  VALUE rb_cFtsFuzzyMatchExtension;
7
7
 
8
+ int get_num_from_self(VALUE self, char *name, int default_value) {
9
+ VALUE val = rb_iv_get(self, name);
10
+ switch TYPE(val) {
11
+ case 7: return NUM2INT(RARRAY_AREF(val, 0));
12
+ case 21: return NUM2INT(val);
13
+ default: return default_value;
14
+ }
15
+ }
16
+
17
+ struct FtsConfig fts_config(VALUE self) {
18
+ struct FtsConfig config = {
19
+ .sequential_bonus = get_num_from_self(self, (char*)"@sequential_bonus", 15),
20
+ .separator_bonus = get_num_from_self(self, (char*)"@separator_bonus", 30),
21
+ .camel_bonus = get_num_from_self(self, (char*)"@camel_bonus", 30),
22
+ .first_letter_bonus = get_num_from_self(self, (char*)"@first_letter_bonus", 15),
23
+ .leading_letter_penalty = get_num_from_self(self, (char*)"@leading_letter_penalty", -5),
24
+ .max_leading_letter_penalty = get_num_from_self(self, (char*)"@max_leading_letter_penalty", -15),
25
+ .unmatched_letter_penalty = get_num_from_self(self, (char*)"@unmatched_letter_penalty", -1),
26
+ .string_length_penalty = get_num_from_self(self, (char*)"@string_length_penalty", 0)
27
+ };
28
+ return config;
29
+ }
30
+
8
31
  static VALUE
9
32
  rb_fts_fuzzy_match_extension_class_fuzzy_match(VALUE self, VALUE pattern, VALUE str)
10
33
  {
@@ -13,8 +36,9 @@ rb_fts_fuzzy_match_extension_class_fuzzy_match(VALUE self, VALUE pattern, VALUE
13
36
  char* strPtr;
14
37
  strPtr = StringValueCStr(str);
15
38
 
39
+ struct FtsConfig config = fts_config(self);
16
40
  int outScore;
17
- int matched = fts_fuzzy_match_simple(patternPtr, strPtr, &outScore);
41
+ int matched = fts_fuzzy_match_simple(patternPtr, strPtr, &config, &outScore);
18
42
  // return rb_sprintf("Matched: %d\nScore: %d\n", matched, outScore);
19
43
  if (matched) {
20
44
  return INT2FIX(outScore);
@@ -24,7 +48,8 @@ rb_fts_fuzzy_match_extension_class_fuzzy_match(VALUE self, VALUE pattern, VALUE
24
48
  }
25
49
 
26
50
  struct StringScore {
27
- VALUE str;
51
+ VALUE rbStr;
52
+ char *cStr;
28
53
  bool matched;
29
54
  int score;
30
55
  };
@@ -39,7 +64,12 @@ int comp(const void *a, const void *b) {
39
64
  } else if (!aa->matched && bb->matched) {
40
65
  return 1;
41
66
  }
42
- return bb->score - aa->score;
67
+ if (aa->score != bb->score) {
68
+ return bb->score - aa->score;
69
+ } else {
70
+ // Given the score is the same, sort alphabetically to keep the order consistent
71
+ return strcasecmp(aa->cStr, bb->cStr);
72
+ }
43
73
  }
44
74
 
45
75
  static VALUE
@@ -49,12 +79,14 @@ rb_fts_fuzzy_match_extension_class_sort_n(VALUE self, VALUE pattern, VALUE strin
49
79
  patternPtr = StringValueCStr(pattern);
50
80
  long stringsLen = RARRAY_LEN(strings);
51
81
 
82
+ struct FtsConfig config = fts_config(self);
83
+
52
84
  struct StringScore *scores = (struct StringScore *)malloc(stringsLen * sizeof(struct StringScore));
53
85
  for (long i=0; i<stringsLen; i++) {
54
- const VALUE str = RARRAY_AREF(strings, i);
55
- const char* strPtr = StringValueCStr(str);
56
- scores[i].str = str;
57
- scores[i].matched = fts_fuzzy_match_simple(patternPtr, strPtr, &scores[i].score);
86
+ volatile VALUE str = RARRAY_AREF(strings, i);
87
+ scores[i].rbStr = str;
88
+ scores[i].cStr = StringValueCStr(str);
89
+ scores[i].matched = fts_fuzzy_match_simple(patternPtr, scores[i].cStr, &config, &scores[i].score);
58
90
  }
59
91
 
60
92
  qsort(scores, stringsLen, sizeof(struct StringScore), comp);
@@ -64,7 +96,7 @@ rb_fts_fuzzy_match_extension_class_sort_n(VALUE self, VALUE pattern, VALUE strin
64
96
 
65
97
  VALUE result = rb_ary_new_capa(n2);
66
98
  for (long i=0; i<n2; i++) {
67
- rb_ary_push(result, scores[i].str);
99
+ rb_ary_push(result, scores[i].rbStr);
68
100
  }
69
101
 
70
102
  return result;
@@ -73,10 +105,10 @@ rb_fts_fuzzy_match_extension_class_sort_n(VALUE self, VALUE pattern, VALUE strin
73
105
  void
74
106
  Init_fts_fuzzy_match(void)
75
107
  {
76
- rb_mFtsFuzzyMatch = rb_define_module("FtsFuzzyMatch");
77
- rb_cFtsFuzzyMatchExtension = rb_define_class_under(rb_mFtsFuzzyMatch, "Extension", rb_cObject);
78
- rb_define_singleton_method(rb_cFtsFuzzyMatchExtension, "fuzzy_match",
108
+ rb_cFtsFuzzyMatch = rb_define_class("FtsFuzzyMatch", rb_cObject);
109
+ rb_cFtsFuzzyMatchExtension = rb_define_class_under(rb_cFtsFuzzyMatch, "Extension", rb_cObject);
110
+ rb_define_method(rb_cFtsFuzzyMatchExtension, "fuzzy_match",
79
111
  rb_fts_fuzzy_match_extension_class_fuzzy_match, 2);
80
- rb_define_singleton_method(rb_cFtsFuzzyMatchExtension, "sort_n",
112
+ rb_define_method(rb_cFtsFuzzyMatchExtension, "sort_n",
81
113
  rb_fts_fuzzy_match_extension_class_sort_n, 3);
82
114
  }
@@ -1,6 +1,206 @@
1
- #ifndef FUZZY_MATCH_H
2
- #define FUZZY_MATCH_H 1
1
+ // LICENSE
2
+ //
3
+ // This software is dual-licensed to the public domain and under the following
4
+ // license: you are granted a perpetual, irrevocable license to copy, modify,
5
+ // publish, and distribute this file as you see fit.
6
+ //
7
+ // VERSION
8
+ // 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
9
+ // 0.1.0 (2016-03-28) Initial release
10
+ //
11
+ // AUTHOR
12
+ // Forrest Smith
13
+ //
14
+ // NOTES
15
+ // Compiling
16
+ // You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
17
+ //
18
+ // fts_fuzzy_match_simple(...)
19
+ // Simplified version of fts_fuzzy_match
20
+ //
21
+ // fts_fuzzy_match(...)
22
+ // Returns true if pattern is found AND calculates a score.
23
+ // Performs exhaustive search via recursion to find all possible matches and match with highest score.
24
+ // Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
25
+ // Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
26
+ // Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
27
+ // Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
3
28
 
4
- #include "ruby.h"
5
29
 
6
- #endif /* FUZZY_MATCH_H */
30
+ #ifndef FTS_FUZZY_MATCH_H
31
+ #define FTS_FUZZY_MATCH_H
32
+
33
+
34
+ #include <ctype.h> // tolower, toupper
35
+
36
+ // Public interface
37
+ struct FtsConfig {
38
+ int sequential_bonus; // bonus for adjacent matches (DEFAULT: 15)
39
+ int separator_bonus; // bonus if match occurs after a separator
40
+ int camel_bonus; // bonus if match is uppercase and prev is lower (DEFAULT: 30)
41
+ int first_letter_bonus; // bonus if the first letter is matched
42
+
43
+ int leading_letter_penalty; // penalty applied for every letter in str before the first match
44
+ int max_leading_letter_penalty; // maximum penalty for leading letters
45
+ int unmatched_letter_penalty; // penalty for every letter that doesn't matter
46
+ int string_length_penalty; // (DEFAULT: 0)
47
+ };
48
+
49
+
50
+ static bool fts_fuzzy_match_simple(char const * pattern, char const * str, struct FtsConfig const * config, int * outScore);
51
+ static bool fts_fuzzy_match(char const * pattern, char const * str, struct FtsConfig const * config, int * outScore, uint8_t * matches, int maxMatches);
52
+
53
+ #ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
54
+
55
+ // Private interface
56
+ static bool fts_fuzzy_match_recursive(const char * pattern, const char * str, struct FtsConfig const * config, int * outScore,
57
+ const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
58
+ int nextMatch, int * recursionCount, int recursionLimit);
59
+
60
+ static bool fts_fuzzy_match_simple(char const * pattern, char const * str, struct FtsConfig const * config, int * outScore) {
61
+ uint8_t matches[256];
62
+ return fts_fuzzy_match(pattern, str, config, outScore, matches, sizeof(matches));
63
+ }
64
+
65
+ static bool fts_fuzzy_match(char const * pattern, char const * str, struct FtsConfig const * config, int * outScore, uint8_t * matches, int maxMatches) {
66
+ int recursionCount = 0;
67
+ int recursionLimit = 10;
68
+
69
+ return fts_fuzzy_match_recursive(pattern, str, config, outScore, str, NULL, matches, maxMatches, 0, &recursionCount, recursionLimit);
70
+ }
71
+
72
+ // Private implementation
73
+ static bool fts_fuzzy_match_recursive(const char * pattern, const char * str, struct FtsConfig const * config, int * outScore,
74
+ const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
75
+ int nextMatch, int * recursionCount, int recursionLimit)
76
+ {
77
+ // Count recursions
78
+ ++*recursionCount;
79
+ if (*recursionCount >= recursionLimit)
80
+ return false;
81
+
82
+ // Detect end of strings
83
+ if (*pattern == '\0' || *str == '\0')
84
+ return false;
85
+
86
+ unsigned long stringLength = strlen(str);
87
+
88
+ // Recursion params
89
+ bool recursiveMatch = false;
90
+ uint8_t bestRecursiveMatches[256];
91
+ int bestRecursiveScore = 0;
92
+
93
+ // Loop through pattern and str looking for a match
94
+ bool first_match = true;
95
+ while (*pattern != '\0' && *str != '\0') {
96
+
97
+ // Found match
98
+ if (tolower(*pattern) == tolower(*str)) {
99
+
100
+ // Supplied matches buffer was too short
101
+ if (nextMatch >= maxMatches)
102
+ return false;
103
+
104
+ // "Copy-on-Write" srcMatches into matches
105
+ if (first_match && srcMatches) {
106
+ memcpy(matches, srcMatches, nextMatch);
107
+ first_match = false;
108
+ }
109
+
110
+ // Recursive call that "skips" this match
111
+ uint8_t recursiveMatches[256];
112
+ int recursiveScore;
113
+ if (fts_fuzzy_match_recursive(pattern, str + 1, config, &recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
114
+
115
+ // Pick best recursive score
116
+ if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
117
+ memcpy(bestRecursiveMatches, recursiveMatches, 256);
118
+ bestRecursiveScore = recursiveScore;
119
+ }
120
+ recursiveMatch = true;
121
+ }
122
+
123
+ // Advance
124
+ matches[nextMatch++] = (uint8_t)(str - strBegin);
125
+ ++pattern;
126
+ }
127
+ ++str;
128
+ }
129
+
130
+ // Determine if full pattern was matched
131
+ bool matched = *pattern == '\0' ? true : false;
132
+
133
+ // Calculate score
134
+ if (matched) {
135
+ // Iterate str to end
136
+ while (*str != '\0')
137
+ ++str;
138
+
139
+ // Initialize score
140
+ *outScore = 100;
141
+
142
+ // Apply length penalty
143
+ *outScore += stringLength * config->string_length_penalty;
144
+
145
+ // Apply leading letter penalty
146
+ int penalty = config->leading_letter_penalty * matches[0];
147
+ if (penalty < config->max_leading_letter_penalty)
148
+ penalty = config->max_leading_letter_penalty;
149
+ *outScore += penalty;
150
+
151
+ // Apply unmatched penalty
152
+ int unmatched = (int)(str - strBegin) - nextMatch;
153
+ *outScore += config->unmatched_letter_penalty * unmatched;
154
+
155
+ // Apply ordering bonuses
156
+ for (int i = 0; i < nextMatch; ++i) {
157
+ uint8_t currIdx = matches[i];
158
+
159
+ if (i > 0) {
160
+ uint8_t prevIdx = matches[i - 1];
161
+
162
+ // Sequential
163
+ if (currIdx == (prevIdx + 1))
164
+ *outScore += config->sequential_bonus;
165
+ }
166
+
167
+ // Check for bonuses based on neighbor character value
168
+ if (currIdx > 0) {
169
+ // Camel case
170
+ char neighbor = strBegin[currIdx - 1];
171
+ char curr = strBegin[currIdx];
172
+ if (islower(neighbor) && isupper(curr))
173
+ *outScore += config->camel_bonus;
174
+
175
+ // Separator
176
+ bool neighborSeparator = neighbor == '_' || neighbor == ' ';
177
+ if (neighborSeparator)
178
+ *outScore += config->separator_bonus;
179
+ }
180
+ else {
181
+ // First letter
182
+ *outScore += config->first_letter_bonus;
183
+ }
184
+ }
185
+ }
186
+
187
+ // Return best result
188
+ if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) {
189
+ // Recursive score is better than "this"
190
+ memcpy(matches, bestRecursiveMatches, maxMatches);
191
+ *outScore = bestRecursiveScore;
192
+ return true;
193
+ }
194
+ else if (matched) {
195
+ // "this" score is better than recursive
196
+ return true;
197
+ }
198
+ else {
199
+ // no match
200
+ return false;
201
+ }
202
+ }
203
+
204
+ #endif // FTS_FUZZY_MATCH_IMPLEMENTATION
205
+
206
+ #endif // FTS_FUZZY_MATCH_H
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module FtsFuzzyMatch
4
- VERSION = "0.1.0"
3
+ class FtsFuzzyMatch
4
+ VERSION = "0.2.0"
5
5
  end
@@ -4,19 +4,40 @@ require_relative "fts_fuzzy_match/version"
4
4
  require_relative "fts_fuzzy_match/fts_fuzzy_match"
5
5
 
6
6
  # FTS Fuzzy Match module. Can score or sort
7
- module FtsFuzzyMatch
7
+ class FtsFuzzyMatch
8
8
  class Error < StandardError; end
9
9
 
10
- def self.sort_in_ruby(pattern, strings)
10
+ def initialize(...)
11
+ @extension = ::FtsFuzzyMatch::Extension.new(...)
12
+ end
13
+
14
+ def fuzzy_match(pattern, string)
15
+ @extension.fuzzy_match(pattern, string)
16
+ end
17
+
18
+ def sort_in_ruby(pattern, strings)
11
19
  # fuzzy_match is -50..50 so -200 is the lowest possible score
12
- strings.sort_by { |string| -1 * (::FtsFuzzyMatch::Extension.fuzzy_match(pattern, string) || -200) }
20
+ strings.sort_by { |string| -1 * (fuzzy_match(pattern, string) || -200) }
21
+ end
22
+
23
+ def sort_n(pattern, strings, n)
24
+ @extension.sort_n(pattern, strings, n)
13
25
  end
14
26
 
15
- def self.sort_n(pattern, strings, n)
16
- ::FtsFuzzyMatch::Extension.sort_n(pattern, strings, n)
27
+ def sort(pattern, strings)
28
+ @extension.sort_n(pattern, strings, strings.length)
17
29
  end
18
30
 
19
- def self.sort(pattern, strings)
20
- ::FtsFuzzyMatch::Extension.sort_n(pattern, strings, strings.length)
31
+ class Extension
32
+ def initialize(**args)
33
+ @sequential_bonus = args[:sequential_bonus]
34
+ @separator_bonus = args[:separator_bonus]
35
+ @camel_bonus = args[:camel_bonus]
36
+ @first_letter_bonus = args[:first_letter_bonus]
37
+ @leading_letter_penalty = args[:leading_letter_penalty]
38
+ @max_leading_letter_penalty = args[:max_leading_letter_penalty]
39
+ @unmatched_letter_penalty = args[:unmatched_letter_penalty]
40
+ @string_length_penalty = args[:string_length_penalty]
41
+ end
21
42
  end
22
43
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fts_fuzzy_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dave Goddard
@@ -18,11 +18,7 @@ extensions:
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - ".github/dependabot.yml"
21
- - ".github/workflows/fuzzy_match.yml"
22
- - ".github/workflows/packaged_source.yml"
23
- - ".github/workflows/packaged_tarball.yml"
24
- - ".github/workflows/precompiled.yml"
25
- - ".github/workflows/system.yml"
21
+ - ".github/workflows/fts_fuzzy_match.yml"
26
22
  - ".gitignore"
27
23
  - ".rubocop.yml"
28
24
  - Gemfile
@@ -32,7 +28,6 @@ files:
32
28
  - ext/fts_fuzzy_match/extconf.rb
33
29
  - ext/fts_fuzzy_match/fts_fuzzy_match.c
34
30
  - ext/fts_fuzzy_match/fts_fuzzy_match.h
35
- - ext/fts_fuzzy_match/fts_fuzzy_match_impl.h
36
31
  - fts_fuzzy_match.gemspec
37
32
  - lib/fts_fuzzy_match.rb
38
33
  - lib/fts_fuzzy_match/version.rb
@@ -1,37 +0,0 @@
1
- name: packaged_source
2
- concurrency:
3
- group: "${{github.workflow}}-${{github.ref}}"
4
- cancel-in-progress: true
5
- on:
6
- workflow_dispatch:
7
- schedule:
8
- - cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3
9
- push:
10
- branches:
11
- - main
12
- - v*.*.x
13
- tags:
14
- - v*.*.*
15
- pull_request:
16
- types: [opened, synchronize]
17
- branches:
18
- - '*'
19
- paths: ["packaged_source/**/*", ".github/workflows/packaged_source.yml"]
20
-
21
- jobs:
22
- packaged_source:
23
- strategy:
24
- fail-fast: false
25
- matrix:
26
- ruby: ["3.0", "3.1", "3.2", "3.3", "3.4", "head"]
27
- runs-on: ["ubuntu-latest", "macos-latest", "windows-latest"]
28
- runs-on: ${{matrix.runs-on}}
29
- steps:
30
- - uses: actions/checkout@v4
31
- - uses: ruby/setup-ruby@v1
32
- with:
33
- working-directory: packaged_source
34
- ruby-version: ${{matrix.ruby}}
35
- bundler-cache: true
36
- - run: bundle exec rake compile test
37
- working-directory: packaged_source
@@ -1,41 +0,0 @@
1
- name: packaged_tarball
2
- concurrency:
3
- group: "${{github.workflow}}-${{github.ref}}"
4
- cancel-in-progress: true
5
- on:
6
- workflow_dispatch:
7
- schedule:
8
- - cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3
9
- push:
10
- branches:
11
- - main
12
- - v*.*.x
13
- tags:
14
- - v*.*.*
15
- pull_request:
16
- types: [opened, synchronize]
17
- branches:
18
- - '*'
19
- paths: ["packaged_tarball/**/*", ".github/workflows/packaged_tarball.yml"]
20
-
21
- jobs:
22
- packaged_tarball:
23
- strategy:
24
- fail-fast: false
25
- matrix:
26
- ruby: ["3.0", "3.1", "3.2", "3.3", "3.4", "head"]
27
- runs-on: ["ubuntu-latest", "macos-13", "windows-latest"]
28
- runs-on: ${{matrix.runs-on}}
29
- steps:
30
- - uses: actions/checkout@v4
31
- - uses: ruby/setup-ruby@v1
32
- with:
33
- working-directory: packaged_tarball
34
- ruby-version: ${{matrix.ruby}}
35
- bundler-cache: true
36
- - uses: actions/cache@v4
37
- with:
38
- path: packaged_tarball/ports
39
- key: packaged_tarball-ports-${{matrix.runs-on}}-${{hashFiles('packaged_tarball/ext/packaged_tarball/extconf.rb')}}
40
- - run: bundle exec rake compile test
41
- working-directory: packaged_tarball
@@ -1,232 +0,0 @@
1
- name: precompiled
2
- concurrency:
3
- group: "${{github.workflow}}-${{github.ref}}"
4
- cancel-in-progress: true
5
- on:
6
- workflow_dispatch:
7
- schedule:
8
- - cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3
9
- push:
10
- branches:
11
- - main
12
- - v*.*.x
13
- tags:
14
- - v*.*.*
15
- pull_request:
16
- types: [opened, synchronize]
17
- branches:
18
- - '*'
19
- paths: ["precompiled/**/*", ".github/workflows/precompiled.yml"]
20
-
21
- jobs:
22
- ruby_versions:
23
- outputs:
24
- setup_ruby: "['3.1', '3.2', '3.3', '3.4']"
25
- image_tag: "['3.1', '3.2', '3.3', '3.4']"
26
- runs-on: ubuntu-latest
27
- steps:
28
- - run: echo "generating rubies ..."
29
-
30
- rcd_image_version:
31
- runs-on: ubuntu-latest
32
- outputs:
33
- rcd_image_version: ${{steps.rcd_image_version.outputs.rcd_image_version}}
34
- steps:
35
- - uses: actions/checkout@v4
36
- - uses: ruby/setup-ruby@v1
37
- with:
38
- working-directory: precompiled
39
- ruby-version: "3.3"
40
- bundler-cache: true
41
- bundler: latest
42
- - id: rcd_image_version
43
- run: bundle exec ruby -e 'require "rake_compiler_dock"; puts "rcd_image_version=#{RakeCompilerDock::IMAGE_VERSION}"' >> $GITHUB_OUTPUT
44
- working-directory: precompiled
45
-
46
- test:
47
- needs: ["ruby_versions"]
48
- strategy:
49
- fail-fast: false
50
- matrix:
51
- runs-on: ["ubuntu-latest", "macos-13", "windows-latest"]
52
- ruby: ${{ fromJSON(needs.ruby_versions.outputs.setup_ruby) }}
53
- runs-on: ${{matrix.runs-on}}
54
- steps:
55
- - uses: actions/checkout@v4
56
- - uses: ruby/setup-ruby@v1
57
- with:
58
- working-directory: precompiled
59
- ruby-version: ${{matrix.ruby}}
60
- bundler-cache: true
61
- - uses: actions/cache@v4
62
- with:
63
- path: precompiled/ports
64
- key: precompiled-ports-${{matrix.runs-on}}-${{hashFiles('precompiled/ext/precompiled/extconf.rb')}}
65
- - run: bundle exec rake compile test
66
- working-directory: precompiled
67
-
68
- generic-package:
69
- runs-on: "ubuntu-latest"
70
- steps:
71
- - uses: actions/checkout@v4
72
- - uses: actions/cache@v4
73
- with:
74
- path: precompiled/ports/archives
75
- key: archives-ubuntu-${{hashFiles('precompiled/ext/precompiled/extconf.rb')}}
76
- - uses: ruby/setup-ruby@v1
77
- with:
78
- working-directory: precompiled
79
- ruby-version: "3.3"
80
- bundler-cache: true
81
- - run: ./bin/test-gem-build gems ruby
82
- working-directory: precompiled
83
- - uses: actions/upload-artifact@v4
84
- with:
85
- name: cruby-gem
86
- path: precompiled/gems
87
- retention-days: 1
88
-
89
- generic-install:
90
- needs: ["generic-package", "ruby_versions"]
91
- strategy:
92
- fail-fast: false
93
- matrix:
94
- os: ["ubuntu-latest", "macos-13", "windows-latest"]
95
- ruby: ${{ fromJSON(needs.ruby_versions.outputs.setup_ruby) }}
96
- runs-on: ${{ matrix.os }}
97
- steps:
98
- - uses: actions/checkout@v4
99
- - uses: ruby/setup-ruby@v1
100
- with:
101
- working-directory: precompiled
102
- ruby-version: "${{ matrix.ruby }}"
103
- - uses: actions/download-artifact@v4
104
- with:
105
- name: cruby-gem
106
- path: precompiled/gems
107
- - run: ./bin/test-gem-install gems
108
- working-directory: precompiled
109
- shell: bash
110
-
111
- native-package:
112
- needs: ["rcd_image_version"]
113
- strategy:
114
- fail-fast: false
115
- matrix:
116
- platform:
117
- - "aarch64-linux-gnu"
118
- - "aarch64-linux-musl"
119
- - "arm-linux-gnu"
120
- - "arm-linux-musl"
121
- - "x86-linux-gnu"
122
- - "x86-linux-musl"
123
- - "x86_64-linux-gnu"
124
- - "x86_64-linux-musl"
125
- - "arm64-darwin"
126
- - "x86_64-darwin"
127
- - "x64-mingw-ucrt"
128
- runs-on: ubuntu-latest
129
- steps:
130
- - uses: actions/checkout@v4
131
- - uses: actions/cache@v4
132
- with:
133
- path: precompiled/ports/archives
134
- key: archives-ubuntu-${{hashFiles('precompiled/ext/precompiled/extconf.rb')}}
135
- - run: |
136
- docker run --rm -v $PWD/precompiled:/precompiled -w /precompiled \
137
- ghcr.io/rake-compiler/rake-compiler-dock-image:${{ needs.rcd_image_version.outputs.rcd_image_version }}-mri-${{ matrix.platform }} \
138
- ./bin/test-gem-build gems ${{ matrix.platform }}
139
- - uses: actions/upload-artifact@v4
140
- with:
141
- name: "cruby-${{ matrix.platform }}-gem"
142
- path: precompiled/gems
143
- retention-days: 1
144
-
145
- linux-install:
146
- needs: ["native-package", "ruby_versions"]
147
- strategy:
148
- fail-fast: false
149
- matrix:
150
- platform:
151
- - "aarch64-linux-gnu"
152
- - "aarch64-linux-musl"
153
- - "arm-linux-gnu"
154
- - "arm-linux-musl"
155
- - "x86-linux-gnu"
156
- - "x86-linux-musl"
157
- - "x86_64-linux-gnu"
158
- - "x86_64-linux-musl"
159
- ruby: ${{ fromJSON(needs.ruby_versions.outputs.image_tag) }}
160
- include:
161
- # declare docker image for each platform
162
- - { platform: aarch64-linux-musl, docker_tag: "-alpine", bootstrap: "apk add bash &&" }
163
- - { platform: arm-linux-musl, docker_tag: "-alpine", bootstrap: "apk add bash &&" }
164
- - { platform: x86-linux-musl, docker_tag: "-alpine", bootstrap: "apk add bash &&" }
165
- - { platform: x86_64-linux-musl, docker_tag: "-alpine", bootstrap: "apk add bash &&" }
166
- # declare docker platform for each platform
167
- - { platform: aarch64-linux-gnu, docker_platform: "--platform=linux/arm64" }
168
- - { platform: aarch64-linux-musl, docker_platform: "--platform=linux/arm64" }
169
- - { platform: arm-linux-gnu, docker_platform: "--platform=linux/arm/v7" }
170
- - { platform: arm-linux-musl, docker_platform: "--platform=linux/arm/v7" }
171
- - { platform: x86-linux-gnu, docker_platform: "--platform=linux/386" }
172
- - { platform: x86-linux-musl, docker_platform: "--platform=linux/386" }
173
- runs-on: ubuntu-latest
174
- steps:
175
- - uses: actions/checkout@v4
176
- - uses: actions/download-artifact@v4
177
- with:
178
- name: cruby-${{ matrix.platform }}-gem
179
- path: precompiled/gems
180
- - run: |
181
- docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
182
- docker run --rm -v $PWD/precompiled:/precompiled -w /precompiled \
183
- ${{ matrix.docker_platform }} ruby:${{ matrix.ruby }}${{ matrix.docker_tag }} \
184
- sh -c "
185
- gem update --system &&
186
- ${{ matrix.bootstrap }}
187
- ./bin/test-gem-install gems
188
- "
189
-
190
- darwin-install:
191
- needs: ["native-package", "ruby_versions"]
192
- strategy:
193
- fail-fast: false
194
- matrix:
195
- platform:
196
- - arm64-darwin
197
- - x86_64-darwin
198
- ruby: ${{ fromJSON(needs.ruby_versions.outputs.setup_ruby) }}
199
- include:
200
- - { platform: arm64-darwin, os: macos-14 }
201
- - { platform: x86_64-darwin, os: macos-13 }
202
- runs-on: ${{matrix.os}}
203
- steps:
204
- - uses: actions/checkout@v4
205
- - uses: ruby/setup-ruby@v1
206
- with:
207
- ruby-version: "${{matrix.ruby}}"
208
- - uses: actions/download-artifact@v4
209
- with:
210
- name: cruby-${{matrix.platform}}-gem
211
- path: precompiled/gems
212
- - run: ./bin/test-gem-install gems
213
- working-directory: precompiled
214
-
215
- windows-install:
216
- needs: ["native-package", "ruby_versions"]
217
- strategy:
218
- fail-fast: false
219
- matrix:
220
- ruby: ${{ fromJSON(needs.ruby_versions.outputs.setup_ruby) }}
221
- runs-on: windows-2022
222
- steps:
223
- - uses: actions/checkout@v4
224
- - uses: ruby/setup-ruby@v1
225
- with:
226
- ruby-version: "${{matrix.ruby}}"
227
- - uses: actions/download-artifact@v4
228
- with:
229
- name: cruby-x64-mingw-ucrt-gem
230
- path: precompiled/gems
231
- - run: ./bin/test-gem-install gems
232
- working-directory: precompiled
@@ -1,40 +0,0 @@
1
- name: system
2
- concurrency:
3
- group: "${{github.workflow}}-${{github.ref}}"
4
- cancel-in-progress: true
5
- on:
6
- workflow_dispatch:
7
- schedule:
8
- - cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3
9
- push:
10
- branches:
11
- - main
12
- - v*.*.x
13
- tags:
14
- - v*.*.*
15
- pull_request:
16
- types: [opened, synchronize]
17
- branches:
18
- - '*'
19
- paths: ["system/**/*", ".github/workflows/system.yml"]
20
-
21
- jobs:
22
- system:
23
- strategy:
24
- fail-fast: false
25
- matrix:
26
- ruby: ["3.0", "3.1", "3.2", "3.3", "3.4", "head"]
27
- runs-on: ["ubuntu-latest", "macos-13", "windows-latest"]
28
- runs-on: ${{matrix.runs-on}}
29
- steps:
30
- - uses: actions/checkout@v4
31
- - uses: MSP-Greg/setup-ruby-pkgs@v1
32
- with:
33
- working-directory: system
34
- ruby-version: ${{matrix.ruby}}
35
- bundler-cache: true
36
- mingw: "libyaml" # windows
37
- apt-get: "libyaml-dev" # linux
38
- brew: "libyaml" # macos
39
- - run: bundle exec rake compile test
40
- working-directory: system
@@ -1,203 +0,0 @@
1
- // LICENSE
2
- //
3
- // This software is dual-licensed to the public domain and under the following
4
- // license: you are granted a perpetual, irrevocable license to copy, modify,
5
- // publish, and distribute this file as you see fit.
6
- //
7
- // VERSION
8
- // 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
9
- // 0.1.0 (2016-03-28) Initial release
10
- //
11
- // AUTHOR
12
- // Forrest Smith
13
- //
14
- // NOTES
15
- // Compiling
16
- // You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
17
- //
18
- // fts_fuzzy_match_simple(...)
19
- // Simplified version of fts_fuzzy_match
20
- //
21
- // fts_fuzzy_match(...)
22
- // Returns true if pattern is found AND calculates a score.
23
- // Performs exhaustive search via recursion to find all possible matches and match with highest score.
24
- // Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
25
- // Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
26
- // Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
27
- // Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
28
-
29
-
30
- #ifndef FTS_FUZZY_MATCH_H
31
- #define FTS_FUZZY_MATCH_H
32
-
33
-
34
- #include <ctype.h> // tolower, toupper
35
-
36
- // Public interface
37
- static bool fts_fuzzy_match_simple(char const * pattern, char const * str, int * outScore);
38
- static bool fts_fuzzy_match(char const * pattern, char const * str, int * outScore, uint8_t * matches, int maxMatches);
39
-
40
- #ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
41
-
42
- // Private interface
43
- static bool fts_fuzzy_match_recursive(const char * pattern, const char * str, int * outScore,
44
- const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
45
- int nextMatch, int * recursionCount, int recursionLimit);
46
-
47
- static bool fts_fuzzy_match_simple(char const * pattern, char const * str, int * outScore) {
48
- uint8_t matches[256];
49
- return fts_fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
50
- }
51
-
52
- static bool fts_fuzzy_match(char const * pattern, char const * str, int * outScore, uint8_t * matches, int maxMatches) {
53
- int recursionCount = 0;
54
- int recursionLimit = 10;
55
-
56
- return fts_fuzzy_match_recursive(pattern, str, outScore, str, NULL, matches, maxMatches, 0, &recursionCount, recursionLimit);
57
- }
58
-
59
- // Private implementation
60
- static bool fts_fuzzy_match_recursive(const char * pattern, const char * str, int * outScore,
61
- const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
62
- int nextMatch, int * recursionCount, int recursionLimit)
63
- {
64
- // Count recursions
65
- ++*recursionCount;
66
- if (*recursionCount >= recursionLimit)
67
- return false;
68
-
69
- // Detect end of strings
70
- if (*pattern == '\0' || *str == '\0')
71
- return false;
72
-
73
- unsigned long stringLength = strlen(str);
74
-
75
- // Recursion params
76
- bool recursiveMatch = false;
77
- uint8_t bestRecursiveMatches[256];
78
- int bestRecursiveScore = 0;
79
-
80
- // Loop through pattern and str looking for a match
81
- bool first_match = true;
82
- while (*pattern != '\0' && *str != '\0') {
83
-
84
- // Found match
85
- if (tolower(*pattern) == tolower(*str)) {
86
-
87
- // Supplied matches buffer was too short
88
- if (nextMatch >= maxMatches)
89
- return false;
90
-
91
- // "Copy-on-Write" srcMatches into matches
92
- if (first_match && srcMatches) {
93
- memcpy(matches, srcMatches, nextMatch);
94
- first_match = false;
95
- }
96
-
97
- // Recursive call that "skips" this match
98
- uint8_t recursiveMatches[256];
99
- int recursiveScore;
100
- if (fts_fuzzy_match_recursive(pattern, str + 1, &recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
101
-
102
- // Pick best recursive score
103
- if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
104
- memcpy(bestRecursiveMatches, recursiveMatches, 256);
105
- bestRecursiveScore = recursiveScore;
106
- }
107
- recursiveMatch = true;
108
- }
109
-
110
- // Advance
111
- matches[nextMatch++] = (uint8_t)(str - strBegin);
112
- ++pattern;
113
- }
114
- ++str;
115
- }
116
-
117
- // Determine if full pattern was matched
118
- bool matched = *pattern == '\0' ? true : false;
119
-
120
- // Calculate score
121
- if (matched) {
122
- const int sequential_bonus = 20; // bonus for adjacent matches (DEFAULT: 15)
123
- const int separator_bonus = 30; // bonus if match occurs after a separator
124
- const int camel_bonus = 0; // bonus if match is uppercase and prev is lower (DEFAULT: 30)
125
- const int first_letter_bonus = 15; // bonus if the first letter is matched
126
-
127
- const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
128
- const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
129
- const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
130
- const int string_length_penalty = -1; // (DEFAULT: 0)
131
-
132
- // Iterate str to end
133
- while (*str != '\0')
134
- ++str;
135
-
136
- // Initialize score
137
- *outScore = 100;
138
-
139
- // Apply length penalty
140
- *outScore += stringLength * string_length_penalty;
141
-
142
- // Apply leading letter penalty
143
- int penalty = leading_letter_penalty * matches[0];
144
- if (penalty < max_leading_letter_penalty)
145
- penalty = max_leading_letter_penalty;
146
- *outScore += penalty;
147
-
148
- // Apply unmatched penalty
149
- int unmatched = (int)(str - strBegin) - nextMatch;
150
- *outScore += unmatched_letter_penalty * unmatched;
151
-
152
- // Apply ordering bonuses
153
- for (int i = 0; i < nextMatch; ++i) {
154
- uint8_t currIdx = matches[i];
155
-
156
- if (i > 0) {
157
- uint8_t prevIdx = matches[i - 1];
158
-
159
- // Sequential
160
- if (currIdx == (prevIdx + 1))
161
- *outScore += sequential_bonus;
162
- }
163
-
164
- // Check for bonuses based on neighbor character value
165
- if (currIdx > 0) {
166
- // Camel case
167
- char neighbor = strBegin[currIdx - 1];
168
- char curr = strBegin[currIdx];
169
- if (islower(neighbor) && isupper(curr))
170
- *outScore += camel_bonus;
171
-
172
- // Separator
173
- bool neighborSeparator = neighbor == '_' || neighbor == ' ';
174
- if (neighborSeparator)
175
- *outScore += separator_bonus;
176
- }
177
- else {
178
- // First letter
179
- *outScore += first_letter_bonus;
180
- }
181
- }
182
- }
183
-
184
- // Return best result
185
- if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) {
186
- // Recursive score is better than "this"
187
- memcpy(matches, bestRecursiveMatches, maxMatches);
188
- *outScore = bestRecursiveScore;
189
- return true;
190
- }
191
- else if (matched) {
192
- // "this" score is better than recursive
193
- return true;
194
- }
195
- else {
196
- // no match
197
- return false;
198
- }
199
- }
200
-
201
- #endif // FTS_FUZZY_MATCH_IMPLEMENTATION
202
-
203
- #endif // FTS_FUZZY_MATCH_H