quickmatch-js 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/config.rs +19 -2
- package/src/index.js +303 -212
- package/src/lib.rs +72 -38
- package/LICENSE +0 -21
- package/README.md +0 -26
package/package.json
CHANGED
package/src/config.rs
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
const DEFAULT_SEPARATORS: &[char] = &['_', '-', ' '];
|
|
1
|
+
const DEFAULT_SEPARATORS: &[char] = &['_', '-', ' ', ':', '/'];
|
|
2
2
|
const DEFAULT_TRIGRAM_BUDGET: usize = 6;
|
|
3
3
|
const DEFAULT_LIMIT: usize = 100;
|
|
4
|
+
const DEFAULT_MIN_SCORE: usize = 2;
|
|
4
5
|
|
|
5
6
|
pub struct QuickMatchConfig {
|
|
6
7
|
/// Separators used to split words.
|
|
7
8
|
///
|
|
8
|
-
/// Default: ['_', '-', ' ']
|
|
9
|
+
/// Default: ['_', '-', ' ', ':', '/']
|
|
9
10
|
separators: &'static [char],
|
|
10
11
|
/// Maximum number of results to return.
|
|
11
12
|
///
|
|
@@ -22,6 +23,12 @@ pub struct QuickMatchConfig {
|
|
|
22
23
|
/// - High (9-15): Slower, more accurate fuzzy matching
|
|
23
24
|
/// - Max: 20
|
|
24
25
|
trigram_budget: usize,
|
|
26
|
+
/// Minimum trigram score required for fuzzy matches.
|
|
27
|
+
/// Higher values require more trigram overlap, reducing noise.
|
|
28
|
+
///
|
|
29
|
+
/// Default: 2
|
|
30
|
+
/// - Min: 1
|
|
31
|
+
min_score: usize,
|
|
25
32
|
}
|
|
26
33
|
|
|
27
34
|
impl Default for QuickMatchConfig {
|
|
@@ -30,6 +37,7 @@ impl Default for QuickMatchConfig {
|
|
|
30
37
|
separators: DEFAULT_SEPARATORS,
|
|
31
38
|
limit: DEFAULT_LIMIT,
|
|
32
39
|
trigram_budget: DEFAULT_TRIGRAM_BUDGET,
|
|
40
|
+
min_score: DEFAULT_MIN_SCORE,
|
|
33
41
|
}
|
|
34
42
|
}
|
|
35
43
|
}
|
|
@@ -54,6 +62,11 @@ impl QuickMatchConfig {
|
|
|
54
62
|
self
|
|
55
63
|
}
|
|
56
64
|
|
|
65
|
+
pub fn with_min_score(mut self, min_score: usize) -> Self {
|
|
66
|
+
self.min_score = min_score.max(1);
|
|
67
|
+
self
|
|
68
|
+
}
|
|
69
|
+
|
|
57
70
|
pub fn limit(&self) -> usize {
|
|
58
71
|
self.limit
|
|
59
72
|
}
|
|
@@ -65,4 +78,8 @@ impl QuickMatchConfig {
|
|
|
65
78
|
pub fn separators(&self) -> &[char] {
|
|
66
79
|
self.separators
|
|
67
80
|
}
|
|
81
|
+
|
|
82
|
+
pub fn min_score(&self) -> usize {
|
|
83
|
+
self.min_score
|
|
84
|
+
}
|
|
68
85
|
}
|
package/src/index.js
CHANGED
|
@@ -1,335 +1,426 @@
|
|
|
1
|
-
const DEFAULT_SEPARATORS =
|
|
1
|
+
const DEFAULT_SEPARATORS = "_- :/";
|
|
2
2
|
const DEFAULT_TRIGRAM_BUDGET = 6;
|
|
3
3
|
const DEFAULT_LIMIT = 100;
|
|
4
|
+
const DEFAULT_MIN_SCORE = 2;
|
|
4
5
|
|
|
6
|
+
/**
|
|
7
|
+
* Configuration for QuickMatch.
|
|
8
|
+
*/
|
|
5
9
|
export class QuickMatchConfig {
|
|
10
|
+
/**
|
|
11
|
+
* Separators used to split words.
|
|
12
|
+
* @type {string}
|
|
13
|
+
* @default "_- :/"
|
|
14
|
+
*/
|
|
6
15
|
separators = DEFAULT_SEPARATORS;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Maximum number of results to return.
|
|
19
|
+
* @type {number}
|
|
20
|
+
* @default 100
|
|
21
|
+
*/
|
|
7
22
|
limit = DEFAULT_LIMIT;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Budget of trigrams to process from unknown words.
|
|
26
|
+
* This budget is distributed fairly across all unknown words.
|
|
27
|
+
*
|
|
28
|
+
* - 0: Disable trigram matching (only exact word matches)
|
|
29
|
+
* - Low (3-6): Faster, less accurate fuzzy matching
|
|
30
|
+
* - High (9-15): Slower, more accurate fuzzy matching
|
|
31
|
+
* - Max: 20
|
|
32
|
+
* @type {number}
|
|
33
|
+
* @default 6
|
|
34
|
+
*/
|
|
8
35
|
trigramBudget = DEFAULT_TRIGRAM_BUDGET;
|
|
9
36
|
|
|
37
|
+
/**
|
|
38
|
+
* Minimum trigram score required for fuzzy matches.
|
|
39
|
+
* Higher values require more trigram overlap, reducing noise.
|
|
40
|
+
* @type {number}
|
|
41
|
+
* @default 2
|
|
42
|
+
*/
|
|
43
|
+
minScore = DEFAULT_MIN_SCORE;
|
|
44
|
+
|
|
45
|
+
/** @param {number} n - Max results (default: 100, min: 1) */
|
|
10
46
|
withLimit(n) {
|
|
11
47
|
this.limit = Math.max(1, n);
|
|
12
48
|
return this;
|
|
13
49
|
}
|
|
14
50
|
|
|
51
|
+
/** @param {number} n - Trigram budget (0-20, default: 6) */
|
|
15
52
|
withTrigramBudget(n) {
|
|
16
53
|
this.trigramBudget = Math.max(0, Math.min(20, n));
|
|
17
54
|
return this;
|
|
18
55
|
}
|
|
19
56
|
|
|
57
|
+
/** @param {string} s - Separator characters (default: '_- :/') */
|
|
20
58
|
withSeparators(s) {
|
|
21
59
|
this.separators = s;
|
|
22
60
|
return this;
|
|
23
61
|
}
|
|
62
|
+
|
|
63
|
+
/** @param {number} n - Min trigram score (default: 2, min: 1) */
|
|
64
|
+
withMinScore(n) {
|
|
65
|
+
this.minScore = Math.max(1, n);
|
|
66
|
+
return this;
|
|
67
|
+
}
|
|
24
68
|
}
|
|
25
69
|
|
|
70
|
+
/**
|
|
71
|
+
* Fast fuzzy string matcher using word and trigram indexing.
|
|
72
|
+
*/
|
|
26
73
|
export class QuickMatch {
|
|
74
|
+
/**
|
|
75
|
+
* @param {string[]} items - Items to index (should be lowercase)
|
|
76
|
+
* @param {QuickMatchConfig} [config]
|
|
77
|
+
*/
|
|
27
78
|
constructor(items, config = new QuickMatchConfig()) {
|
|
28
79
|
this.config = config;
|
|
29
80
|
this.items = items;
|
|
81
|
+
/** @type {Map<string, number[]>} */
|
|
30
82
|
this.wordIndex = new Map();
|
|
83
|
+
/** @type {Map<string, number[]>} */
|
|
31
84
|
this.trigramIndex = new Map();
|
|
85
|
+
this._sepLookup = sepLookup(config.separators);
|
|
86
|
+
this._scores = new Uint32Array(items.length);
|
|
87
|
+
/** @type {number[]} */
|
|
88
|
+
this._dirty = [];
|
|
32
89
|
|
|
33
|
-
let
|
|
34
|
-
let
|
|
35
|
-
let
|
|
36
|
-
|
|
37
|
-
const { separators } = config;
|
|
90
|
+
let maxWordLen = 0;
|
|
91
|
+
let maxQueryLen = 0;
|
|
92
|
+
let maxWords = 0;
|
|
93
|
+
const sep = this._sepLookup;
|
|
38
94
|
|
|
39
|
-
for (let
|
|
40
|
-
const item = items[
|
|
41
|
-
|
|
42
|
-
if (item.length > maxQueryLength) {
|
|
43
|
-
maxQueryLength = item.length;
|
|
44
|
-
}
|
|
95
|
+
for (let idx = 0; idx < items.length; idx++) {
|
|
96
|
+
const item = items[idx];
|
|
97
|
+
if (item.length > maxQueryLen) maxQueryLen = item.length;
|
|
45
98
|
|
|
46
|
-
|
|
47
|
-
let
|
|
99
|
+
const words = [];
|
|
100
|
+
let start = 0;
|
|
48
101
|
|
|
49
102
|
for (let i = 0; i <= item.length; i++) {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
maxWordLength = word.length;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
addToIndex(this.wordIndex, word, itemIndex);
|
|
61
|
-
addTrigramsToIndex(this.trigramIndex, word, itemIndex);
|
|
62
|
-
|
|
63
|
-
wordStart = i + 1;
|
|
64
|
-
} else if (isEndOfWord) {
|
|
65
|
-
wordStart = i + 1;
|
|
103
|
+
if (i < item.length && !sep[item.charCodeAt(i)]) continue;
|
|
104
|
+
if (i > start) {
|
|
105
|
+
const word = item.slice(start, i);
|
|
106
|
+
words.push(word);
|
|
107
|
+
if (word.length > maxWordLen) maxWordLen = word.length;
|
|
108
|
+
addToIndex(this.wordIndex, word, idx);
|
|
109
|
+
indexTrigrams(this.trigramIndex, word, idx);
|
|
66
110
|
}
|
|
111
|
+
start = i + 1;
|
|
67
112
|
}
|
|
68
113
|
|
|
69
|
-
|
|
70
|
-
|
|
114
|
+
for (let i = 0; i < words.length - 1; i++) {
|
|
115
|
+
addToIndex(this.wordIndex, words[i] + words[i + 1], idx);
|
|
71
116
|
}
|
|
117
|
+
|
|
118
|
+
if (words.length > maxWords) maxWords = words.length;
|
|
72
119
|
}
|
|
73
120
|
|
|
74
|
-
this.
|
|
75
|
-
this.
|
|
76
|
-
this.
|
|
121
|
+
this.maxWordLen = maxWordLen + 4;
|
|
122
|
+
this.maxQueryLen = maxQueryLen + 6;
|
|
123
|
+
this.maxWords = maxWords + 2;
|
|
77
124
|
}
|
|
78
125
|
|
|
126
|
+
/** @param {string} query */
|
|
79
127
|
matches(query) {
|
|
80
128
|
return this.matchesWith(query, this.config);
|
|
81
129
|
}
|
|
82
130
|
|
|
131
|
+
/**
|
|
132
|
+
* @param {string} query
|
|
133
|
+
* @param {QuickMatchConfig} config
|
|
134
|
+
*/
|
|
83
135
|
matchesWith(query, config) {
|
|
84
|
-
const { limit, trigramBudget
|
|
136
|
+
const { limit, trigramBudget } = config;
|
|
137
|
+
const sep =
|
|
138
|
+
config.separators === this.config.separators
|
|
139
|
+
? this._sepLookup
|
|
140
|
+
: sepLookup(config.separators);
|
|
85
141
|
|
|
86
|
-
const
|
|
142
|
+
const q = normalize(query);
|
|
143
|
+
if (!q || q.length > this.maxQueryLen) return [];
|
|
87
144
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
}
|
|
145
|
+
const qwords = splitWords(q, sep, this.maxWordLen);
|
|
146
|
+
if (!qwords.length || qwords.length > this.maxWords) return [];
|
|
91
147
|
|
|
92
|
-
const
|
|
148
|
+
const known = [];
|
|
149
|
+
const unknown = [];
|
|
93
150
|
|
|
94
|
-
|
|
95
|
-
|
|
151
|
+
for (const w of qwords) {
|
|
152
|
+
const hits = this.wordIndex.get(w);
|
|
153
|
+
if (hits) known.push(hits);
|
|
154
|
+
else if (w.length >= 3 && unknown.length < trigramBudget) unknown.push(w);
|
|
96
155
|
}
|
|
97
156
|
|
|
98
|
-
const
|
|
99
|
-
const
|
|
157
|
+
const pool = intersect(known);
|
|
158
|
+
const hasPool = pool.length > 0;
|
|
100
159
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if (matchingItems) {
|
|
105
|
-
knownWords.push(matchingItems);
|
|
106
|
-
} else if (word.length >= 3 && unknownWords.length < trigramBudget) {
|
|
107
|
-
unknownWords.push(word);
|
|
108
|
-
}
|
|
160
|
+
if (!unknown.length || !trigramBudget) {
|
|
161
|
+
if (!hasPool) return [];
|
|
162
|
+
return this._rank(pool, null, qwords, sep, limit);
|
|
109
163
|
}
|
|
110
164
|
|
|
111
|
-
|
|
112
|
-
const
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
return this.sortedByLength(exactMatches, limit);
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
const scores = new Map();
|
|
121
|
-
|
|
122
|
-
if (hasExactMatches) {
|
|
123
|
-
for (const index of exactMatches) {
|
|
124
|
-
scores.set(index, 1);
|
|
165
|
+
// Seed scores from exact-match pool
|
|
166
|
+
const { _scores: scores, _dirty: dirty } = this;
|
|
167
|
+
if (hasPool) {
|
|
168
|
+
for (const i of pool) {
|
|
169
|
+
scores[i] = 1;
|
|
170
|
+
dirty.push(i);
|
|
125
171
|
}
|
|
126
172
|
}
|
|
127
173
|
|
|
128
|
-
const
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
return this.rankedResults(scores, minScoreToInclude, limit);
|
|
174
|
+
const hitCount = this._scoreTrigrams(
|
|
175
|
+
unknown,
|
|
176
|
+
trigramBudget,
|
|
177
|
+
hasPool,
|
|
178
|
+
Math.max(0, q.length - 3),
|
|
179
|
+
);
|
|
180
|
+
const minScore = Math.max(config.minScore, Math.ceil(hitCount / 2));
|
|
181
|
+
const result = this._rank(dirty, minScore, qwords, sep, limit);
|
|
182
|
+
|
|
183
|
+
for (const i of dirty) scores[i] = 0;
|
|
184
|
+
dirty.length = 0;
|
|
185
|
+
return result;
|
|
141
186
|
}
|
|
142
187
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
if (
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
const
|
|
164
|
-
if (
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
188
|
+
/**
|
|
189
|
+
* @private
|
|
190
|
+
* @param {string[]} unknown
|
|
191
|
+
* @param {number} budget
|
|
192
|
+
* @param {boolean} poolOnly
|
|
193
|
+
* @param {number} minLen
|
|
194
|
+
*/
|
|
195
|
+
_scoreTrigrams(unknown, budget, poolOnly, minLen) {
|
|
196
|
+
const visited = new Set();
|
|
197
|
+
const { _scores: scores, _dirty: dirty, items } = this;
|
|
198
|
+
let remaining = budget;
|
|
199
|
+
let hits = 0;
|
|
200
|
+
|
|
201
|
+
outer: for (let round = 0; round < budget; round++) {
|
|
202
|
+
for (const word of unknown) {
|
|
203
|
+
if (remaining <= 0) break outer;
|
|
204
|
+
|
|
205
|
+
const pos = trigramPosition(word.length, round);
|
|
206
|
+
if (pos < 0) continue;
|
|
207
|
+
|
|
208
|
+
const tri = word[pos] + word[pos + 1] + word[pos + 2];
|
|
209
|
+
if (visited.has(tri)) continue;
|
|
210
|
+
visited.add(tri);
|
|
211
|
+
remaining--;
|
|
212
|
+
|
|
213
|
+
const matched = this.trigramIndex.get(tri);
|
|
214
|
+
if (!matched) continue;
|
|
215
|
+
hits++;
|
|
216
|
+
|
|
217
|
+
if (poolOnly) {
|
|
218
|
+
for (let j = 0; j < matched.length; j++) {
|
|
219
|
+
const i = matched[j];
|
|
220
|
+
if (scores[i] > 0) scores[i]++;
|
|
221
|
+
}
|
|
222
|
+
} else {
|
|
223
|
+
for (let j = 0; j < matched.length; j++) {
|
|
224
|
+
const i = matched[j];
|
|
225
|
+
if (items[i].length >= minLen) {
|
|
226
|
+
if (scores[i] === 0) dirty.push(i);
|
|
227
|
+
scores[i]++;
|
|
173
228
|
}
|
|
174
|
-
} else if (this.items[itemIndex].length >= minItemLength) {
|
|
175
|
-
scores.set(itemIndex, (scores.get(itemIndex) || 0) + 1);
|
|
176
229
|
}
|
|
177
230
|
}
|
|
178
231
|
}
|
|
179
232
|
}
|
|
180
233
|
|
|
181
|
-
return
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
sortedByLength(indices, limit) {
|
|
185
|
-
const { items } = this;
|
|
186
|
-
indices.sort((a, b) => items[a].length - items[b].length);
|
|
187
|
-
if (indices.length > limit) indices.length = limit;
|
|
188
|
-
return indices.map(i => items[i]);
|
|
234
|
+
return hits;
|
|
189
235
|
}
|
|
190
236
|
|
|
191
|
-
|
|
192
|
-
|
|
237
|
+
/**
|
|
238
|
+
* Rank candidates by prefix match, then score, then length.
|
|
239
|
+
* @private
|
|
240
|
+
* @param {number[]} indices
|
|
241
|
+
* @param {number|null} minScore - null = no score filtering (exact-match path)
|
|
242
|
+
* @param {string[]} qwords
|
|
243
|
+
* @param {Uint8Array} sep
|
|
244
|
+
* @param {number} limit
|
|
245
|
+
*/
|
|
246
|
+
_rank(indices, minScore, qwords, sep, limit) {
|
|
247
|
+
const { items, _scores: scores } = this;
|
|
193
248
|
const results = [];
|
|
194
249
|
|
|
195
|
-
for (
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
250
|
+
for (let i = 0; i < indices.length; i++) {
|
|
251
|
+
const idx = indices[i];
|
|
252
|
+
if (minScore !== null && scores[idx] < minScore) continue;
|
|
253
|
+
results.push(idx);
|
|
199
254
|
}
|
|
200
255
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
}
|
|
256
|
+
const pscores = new Uint8Array(items.length);
|
|
257
|
+
for (let i = 0; i < results.length; i++) {
|
|
258
|
+
pscores[results[i]] = prefixScore(items[results[i]], qwords, sep);
|
|
259
|
+
}
|
|
205
260
|
|
|
206
|
-
|
|
261
|
+
results.sort(
|
|
262
|
+
(a, b) =>
|
|
263
|
+
pscores[b] - pscores[a] ||
|
|
264
|
+
scores[b] - scores[a] ||
|
|
265
|
+
items[a].length - items[b].length,
|
|
266
|
+
);
|
|
207
267
|
|
|
208
|
-
|
|
268
|
+
if (results.length > limit) results.length = limit;
|
|
269
|
+
return results.map((i) => items[i]);
|
|
209
270
|
}
|
|
210
271
|
}
|
|
211
272
|
|
|
212
|
-
|
|
213
|
-
|
|
273
|
+
// --- Helpers ---
|
|
274
|
+
|
|
275
|
+
/** @param {string} query */
|
|
276
|
+
function normalize(query) {
|
|
277
|
+
let out = "";
|
|
214
278
|
let start = 0;
|
|
215
279
|
let end = query.length;
|
|
216
|
-
|
|
217
280
|
while (start < end && query.charCodeAt(start) <= 32) start++;
|
|
218
281
|
while (end > start && query.charCodeAt(end - 1) <= 32) end--;
|
|
219
|
-
|
|
220
282
|
for (let i = start; i < end; i++) {
|
|
221
|
-
const
|
|
222
|
-
if (
|
|
223
|
-
|
|
283
|
+
const c = query.charCodeAt(i);
|
|
284
|
+
if (c >= 128) continue;
|
|
285
|
+
out += c >= 65 && c <= 90 ? String.fromCharCode(c + 32) : query[i];
|
|
224
286
|
}
|
|
287
|
+
return out;
|
|
288
|
+
}
|
|
225
289
|
|
|
226
|
-
|
|
290
|
+
/** @param {string} separators */
|
|
291
|
+
function sepLookup(separators) {
|
|
292
|
+
const t = new Uint8Array(128);
|
|
293
|
+
for (let i = 0; i < separators.length; i++) {
|
|
294
|
+
const c = separators.charCodeAt(i);
|
|
295
|
+
if (c < 128) t[c] = 1;
|
|
296
|
+
}
|
|
297
|
+
return t;
|
|
227
298
|
}
|
|
228
299
|
|
|
229
|
-
|
|
300
|
+
/**
|
|
301
|
+
* @param {string} text
|
|
302
|
+
* @param {Uint8Array} sep
|
|
303
|
+
* @param {number} maxLen
|
|
304
|
+
*/
|
|
305
|
+
function splitWords(text, sep, maxLen) {
|
|
306
|
+
/** @type {string[]} */
|
|
230
307
|
const words = [];
|
|
231
308
|
let start = 0;
|
|
232
|
-
|
|
233
309
|
for (let i = 0; i <= text.length; i++) {
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
if (word.length <= maxLength && !words.includes(word)) {
|
|
239
|
-
words.push(word);
|
|
240
|
-
}
|
|
241
|
-
start = i + 1;
|
|
242
|
-
} else if (isEnd) {
|
|
243
|
-
start = i + 1;
|
|
310
|
+
if (i < text.length && !sep[text.charCodeAt(i)]) continue;
|
|
311
|
+
if (i > start) {
|
|
312
|
+
const w = text.slice(start, i);
|
|
313
|
+
if (w.length <= maxLen && !words.includes(w)) words.push(w);
|
|
244
314
|
}
|
|
315
|
+
start = i + 1;
|
|
245
316
|
}
|
|
246
|
-
|
|
247
317
|
return words;
|
|
248
318
|
}
|
|
249
319
|
|
|
320
|
+
/**
|
|
321
|
+
* @param {Map<string, number[]>} index
|
|
322
|
+
* @param {string} key
|
|
323
|
+
* @param {number} value
|
|
324
|
+
*/
|
|
250
325
|
function addToIndex(index, key, value) {
|
|
251
|
-
const
|
|
252
|
-
if (
|
|
253
|
-
|
|
254
|
-
} else {
|
|
255
|
-
index.set(key, [value]);
|
|
256
|
-
}
|
|
326
|
+
const arr = index.get(key);
|
|
327
|
+
if (arr) arr.push(value);
|
|
328
|
+
else index.set(key, [value]);
|
|
257
329
|
}
|
|
258
330
|
|
|
259
|
-
|
|
331
|
+
/**
|
|
332
|
+
* @param {Map<string, number[]>} index
|
|
333
|
+
* @param {string} word
|
|
334
|
+
* @param {number} idx
|
|
335
|
+
*/
|
|
336
|
+
function indexTrigrams(index, word, idx) {
|
|
260
337
|
if (word.length < 3) return;
|
|
261
|
-
|
|
262
338
|
for (let i = 0; i <= word.length - 3; i++) {
|
|
263
|
-
const
|
|
264
|
-
const
|
|
265
|
-
|
|
266
|
-
if (
|
|
267
|
-
index.set(trigram, [itemIndex]);
|
|
268
|
-
} else if (existing[existing.length - 1] !== itemIndex) {
|
|
269
|
-
existing.push(itemIndex);
|
|
270
|
-
}
|
|
339
|
+
const tri = word[i] + word[i + 1] + word[i + 2];
|
|
340
|
+
const arr = index.get(tri);
|
|
341
|
+
if (!arr) index.set(tri, [idx]);
|
|
342
|
+
else if (arr[arr.length - 1] !== idx) arr.push(idx);
|
|
271
343
|
}
|
|
272
344
|
}
|
|
273
345
|
|
|
274
|
-
|
|
346
|
+
/** @param {number[][]} arrays */
|
|
347
|
+
function intersect(arrays) {
|
|
275
348
|
if (!arrays.length) return [];
|
|
276
349
|
|
|
277
|
-
let
|
|
350
|
+
let si = 0;
|
|
278
351
|
for (let i = 1; i < arrays.length; i++) {
|
|
279
|
-
if (arrays[i].length < arrays[
|
|
280
|
-
smallestIndex = i;
|
|
281
|
-
}
|
|
352
|
+
if (arrays[i].length < arrays[si].length) si = i;
|
|
282
353
|
}
|
|
283
354
|
|
|
284
|
-
const result = arrays[
|
|
285
|
-
|
|
355
|
+
const result = arrays[si].slice();
|
|
286
356
|
for (let i = 0; i < arrays.length && result.length > 0; i++) {
|
|
287
|
-
if (i ===
|
|
288
|
-
|
|
289
|
-
let writeIndex = 0;
|
|
357
|
+
if (i === si) continue;
|
|
358
|
+
let w = 0;
|
|
290
359
|
for (let j = 0; j < result.length; j++) {
|
|
291
|
-
if (
|
|
292
|
-
result[writeIndex++] = result[j];
|
|
293
|
-
}
|
|
360
|
+
if (bsearch(arrays[i], result[j])) result[w++] = result[j];
|
|
294
361
|
}
|
|
295
|
-
result.length =
|
|
362
|
+
result.length = w;
|
|
296
363
|
}
|
|
297
|
-
|
|
298
364
|
return result;
|
|
299
365
|
}
|
|
300
366
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
if (
|
|
311
|
-
|
|
367
|
+
/**
|
|
368
|
+
* @param {number[]} arr
|
|
369
|
+
* @param {number} val
|
|
370
|
+
*/
|
|
371
|
+
function bsearch(arr, val) {
|
|
372
|
+
let lo = 0,
|
|
373
|
+
hi = arr.length - 1;
|
|
374
|
+
while (lo <= hi) {
|
|
375
|
+
const mid = (lo + hi) >> 1;
|
|
376
|
+
if (arr[mid] === val) return true;
|
|
377
|
+
if (arr[mid] < val) lo = mid + 1;
|
|
378
|
+
else hi = mid - 1;
|
|
312
379
|
}
|
|
313
|
-
|
|
314
380
|
return false;
|
|
315
381
|
}
|
|
316
382
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
const
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
383
|
+
/**
|
|
384
|
+
* 2 = exact match, 1 = prefix match, 0 = no match
|
|
385
|
+
* @param {string} item
|
|
386
|
+
* @param {string[]} qwords
|
|
387
|
+
* @param {Uint8Array} sep
|
|
388
|
+
*/
|
|
389
|
+
function prefixScore(item, qwords, sep) {
|
|
390
|
+
let qi = 0,
|
|
391
|
+
pos = 0;
|
|
392
|
+
const len = item.length;
|
|
393
|
+
|
|
394
|
+
while (qi < qwords.length) {
|
|
395
|
+
while (pos < len && sep[item.charCodeAt(pos)]) pos++;
|
|
396
|
+
if (pos >= len) return 0;
|
|
397
|
+
|
|
398
|
+
const ws = pos;
|
|
399
|
+
while (pos < len && !sep[item.charCodeAt(pos)]) pos++;
|
|
400
|
+
|
|
401
|
+
const qw = qwords[qi];
|
|
402
|
+
if (pos - ws !== qw.length) return 0;
|
|
403
|
+
for (let j = 0; j < qw.length; j++) {
|
|
404
|
+
if (item.charCodeAt(ws + j) !== qw.charCodeAt(j)) return 0;
|
|
405
|
+
}
|
|
406
|
+
qi++;
|
|
332
407
|
}
|
|
333
408
|
|
|
334
|
-
|
|
409
|
+
while (pos < len && sep[item.charCodeAt(pos)]) pos++;
|
|
410
|
+
return pos >= len ? 2 : 1;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/** @param {number} len @param {number} round */
|
|
414
|
+
function trigramPosition(len, round) {
|
|
415
|
+
const max = len - 3;
|
|
416
|
+
if (max < 0) return -1;
|
|
417
|
+
if (round === 0) return 0;
|
|
418
|
+
if (round === 1 && max > 0) return max;
|
|
419
|
+
if (round === 2 && max > 1) return max >> 1;
|
|
420
|
+
if (max <= 2) return -1;
|
|
421
|
+
|
|
422
|
+
const mid = max >> 1;
|
|
423
|
+
const off = (round - 2) >> 1;
|
|
424
|
+
const pos = round & 1 ? Math.max(0, mid - off) : mid + off;
|
|
425
|
+
return pos === 0 || pos >= max || pos === mid ? -1 : pos;
|
|
335
426
|
}
|
package/src/lib.rs
CHANGED
|
@@ -36,16 +36,16 @@ impl<'a> QuickMatch<'a> {
|
|
|
36
36
|
|
|
37
37
|
for &item in items {
|
|
38
38
|
max_query_len = max_query_len.max(item.len());
|
|
39
|
-
let
|
|
40
|
-
|
|
41
|
-
word_count += 1;
|
|
42
|
-
if word.is_empty() {
|
|
43
|
-
continue;
|
|
44
|
-
}
|
|
39
|
+
let item_words: Vec<&str> = item.split(separators).filter(|w| !w.is_empty()).collect();
|
|
40
|
+
max_words = max_words.max(item_words.len());
|
|
45
41
|
|
|
46
|
-
|
|
42
|
+
for word in &item_words {
|
|
43
|
+
max_word_len = max_word_len.max(word.len());
|
|
47
44
|
|
|
48
|
-
word_index
|
|
45
|
+
word_index
|
|
46
|
+
.entry(word.to_string())
|
|
47
|
+
.or_default()
|
|
48
|
+
.insert(item);
|
|
49
49
|
|
|
50
50
|
if word.len() >= 3 {
|
|
51
51
|
let chars = word.chars().collect::<Vec<_>>();
|
|
@@ -57,13 +57,18 @@ impl<'a> QuickMatch<'a> {
|
|
|
57
57
|
}
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
|
-
|
|
60
|
+
|
|
61
|
+
// Index adjacent word pairs as compounds (e.g. "hash"+"rate" → "hashrate")
|
|
62
|
+
for pair in item_words.windows(2) {
|
|
63
|
+
let compound = format!("{}{}", pair[0], pair[1]);
|
|
64
|
+
word_index.entry(compound).or_default().insert(item);
|
|
65
|
+
}
|
|
61
66
|
}
|
|
62
67
|
|
|
63
68
|
Self {
|
|
64
69
|
max_query_len: max_query_len + 6,
|
|
65
70
|
max_word_len: max_word_len + 4,
|
|
66
|
-
max_word_count:
|
|
71
|
+
max_word_count: max_words + 2,
|
|
67
72
|
word_index,
|
|
68
73
|
trigram_index,
|
|
69
74
|
config,
|
|
@@ -71,26 +76,15 @@ impl<'a> QuickMatch<'a> {
|
|
|
71
76
|
}
|
|
72
77
|
}
|
|
73
78
|
|
|
74
|
-
///
|
|
75
|
-
/// `limit`: max number of returned matches
|
|
76
|
-
///
|
|
77
|
-
/// `max_trigrams`: max number of processed trigrams in unknown words (0-10 recommended)
|
|
78
|
-
///
|
|
79
79
|
pub fn matches(&self, query: &str) -> Vec<&'a str> {
|
|
80
80
|
self.matches_with(query, &self.config)
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
-
///
|
|
84
|
-
/// `limit`: max number of returned matches
|
|
85
|
-
///
|
|
86
|
-
/// `max_trigrams`: max number of processed trigrams in unknown words (0-10 recommended)
|
|
87
|
-
///
|
|
88
83
|
pub fn matches_with(&self, query: &str, config: &QuickMatchConfig) -> Vec<&'a str> {
|
|
89
84
|
let limit = config.limit();
|
|
90
85
|
let trigram_budget = config.trigram_budget();
|
|
91
|
-
let query_len = query.len();
|
|
92
86
|
|
|
93
|
-
if query.is_empty()
|
|
87
|
+
if query.is_empty() {
|
|
94
88
|
return vec![];
|
|
95
89
|
}
|
|
96
90
|
|
|
@@ -101,22 +95,30 @@ impl<'a> QuickMatch<'a> {
|
|
|
101
95
|
.collect::<String>()
|
|
102
96
|
.to_ascii_lowercase();
|
|
103
97
|
|
|
104
|
-
|
|
105
|
-
|
|
98
|
+
if query.is_empty() || query.len() > self.max_query_len {
|
|
99
|
+
return vec![];
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
let separators = config.separators();
|
|
103
|
+
|
|
104
|
+
let query_words: Vec<&str> = query
|
|
105
|
+
.split(separators)
|
|
106
106
|
.filter(|w| !w.is_empty() && w.len() <= self.max_word_len)
|
|
107
|
-
.collect
|
|
107
|
+
.collect();
|
|
108
|
+
|
|
109
|
+
let words: FxHashSet<&str> = query_words.iter().copied().collect();
|
|
108
110
|
|
|
109
111
|
if words.is_empty() || words.len() > self.max_word_count {
|
|
110
112
|
return vec![];
|
|
111
113
|
}
|
|
112
114
|
|
|
113
|
-
let min_len =
|
|
115
|
+
let min_len = query.len().saturating_sub(3);
|
|
114
116
|
|
|
115
117
|
let mut pool: Option<FxHashSet<*const str>> = None;
|
|
116
118
|
let mut unknown_words = Vec::new();
|
|
117
119
|
|
|
118
120
|
let mut words_to_intersect = vec![];
|
|
119
|
-
for word in words {
|
|
121
|
+
for &word in &words {
|
|
120
122
|
if let Some(items) = self.word_index.get(word) {
|
|
121
123
|
words_to_intersect.push(items)
|
|
122
124
|
} else if word.len() >= 3 && unknown_words.len() < trigram_budget {
|
|
@@ -144,17 +146,23 @@ impl<'a> QuickMatch<'a> {
|
|
|
144
146
|
let mut results: Vec<_> = pool
|
|
145
147
|
.unwrap_or_default()
|
|
146
148
|
.into_iter()
|
|
147
|
-
.map(|item|
|
|
149
|
+
.map(|item| {
|
|
150
|
+
let s = unsafe { &*item as &str };
|
|
151
|
+
(s, prefix_score(s, &query_words, separators))
|
|
152
|
+
})
|
|
148
153
|
.collect();
|
|
149
154
|
|
|
155
|
+
let cmp =
|
|
156
|
+
|a: &(&str, u8), b: &(&str, u8)| b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len()));
|
|
157
|
+
|
|
150
158
|
if results.len() > limit {
|
|
151
|
-
results.
|
|
159
|
+
results.select_nth_unstable_by(limit, cmp);
|
|
152
160
|
results.truncate(limit);
|
|
153
161
|
}
|
|
154
162
|
|
|
155
|
-
results.
|
|
163
|
+
results.sort_unstable_by(cmp);
|
|
156
164
|
|
|
157
|
-
return results;
|
|
165
|
+
return results.into_iter().map(|(item, _)| item).collect();
|
|
158
166
|
}
|
|
159
167
|
|
|
160
168
|
let mut scores: FxHashMap<*const str, usize> = FxHashMap::default();
|
|
@@ -232,26 +240,52 @@ impl<'a> QuickMatch<'a> {
|
|
|
232
240
|
}
|
|
233
241
|
}
|
|
234
242
|
|
|
235
|
-
let min_score = hit_count.div_ceil(2).max(
|
|
243
|
+
let min_score = hit_count.div_ceil(2).max(config.min_score());
|
|
236
244
|
let mut results: Vec<_> = scores
|
|
237
245
|
.into_iter()
|
|
238
246
|
.filter(|(_, s)| *s >= min_score)
|
|
239
|
-
.map(|(item, score)|
|
|
247
|
+
.map(|(item, score)| {
|
|
248
|
+
let s = unsafe { &*item as &str };
|
|
249
|
+
(s, score, prefix_score(s, &query_words, separators))
|
|
250
|
+
})
|
|
240
251
|
.collect();
|
|
241
252
|
|
|
253
|
+
let cmp = |a: &(&str, usize, u8), b: &(&str, usize, u8)| {
|
|
254
|
+
b.2.cmp(&a.2)
|
|
255
|
+
.then_with(|| b.1.cmp(&a.1))
|
|
256
|
+
.then_with(|| a.0.len().cmp(&b.0.len()))
|
|
257
|
+
};
|
|
258
|
+
|
|
242
259
|
if results.len() > limit {
|
|
243
|
-
results.select_nth_unstable_by(limit,
|
|
244
|
-
b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len()))
|
|
245
|
-
});
|
|
260
|
+
results.select_nth_unstable_by(limit, cmp);
|
|
246
261
|
results.truncate(limit);
|
|
247
262
|
}
|
|
248
263
|
|
|
249
|
-
results.sort_unstable_by(
|
|
264
|
+
results.sort_unstable_by(cmp);
|
|
250
265
|
|
|
251
266
|
results
|
|
252
267
|
.into_iter()
|
|
253
268
|
.take(limit)
|
|
254
|
-
.map(|(item, _)| item)
|
|
269
|
+
.map(|(item, _, _)| item)
|
|
255
270
|
.collect()
|
|
256
271
|
}
|
|
257
272
|
}
|
|
273
|
+
|
|
274
|
+
/// Score how well an item's word sequence matches the query as a prefix.
|
|
275
|
+
/// - 2: exact match (all words match, no extra words in item)
|
|
276
|
+
/// - 1: prefix match (item starts with query words but has more)
|
|
277
|
+
/// - 0: no prefix match
|
|
278
|
+
fn prefix_score(item: &str, query_words: &[&str], separators: &[char]) -> u8 {
|
|
279
|
+
let mut item_words = item.split(separators).filter(|w| !w.is_empty());
|
|
280
|
+
for &qw in query_words {
|
|
281
|
+
match item_words.next() {
|
|
282
|
+
Some(iw) if iw == qw => continue,
|
|
283
|
+
_ => return 0,
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
if item_words.next().is_none() {
|
|
287
|
+
2
|
|
288
|
+
} else {
|
|
289
|
+
1
|
|
290
|
+
}
|
|
291
|
+
}
|
package/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 quickmatch
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
package/README.md
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# quickmatch
|
|
2
|
-
|
|
3
|
-
**Lightning-fast fuzzy string matching for Rust.**
|
|
4
|
-
|
|
5
|
-
A high-performance string matching library optimized for interactive search experiences like autocomplete, command palettes, and search-as-you-type interfaces.
|
|
6
|
-
|
|
7
|
-
[](https://crates.io/crates/quickmatch)
|
|
8
|
-
[](https://docs.rs/quickmatch)
|
|
9
|
-
|
|
10
|
-
## Features
|
|
11
|
-
|
|
12
|
-
- **Blazing fast** - Optimized for sub-millisecond search times
|
|
13
|
-
- **Hybrid matching** - Word-level matching with trigram-based fuzzy fallback
|
|
14
|
-
- **Memory efficient** - Zero-copy string storage with pointer-based indexing
|
|
15
|
-
- **Ranked results** - Intelligent scoring based on match quality
|
|
16
|
-
- **Zero external dependencies** - Only uses `rustc-hash` for fast hashing
|
|
17
|
-
|
|
18
|
-
## Installation
|
|
19
|
-
|
|
20
|
-
```bash
|
|
21
|
-
# rust
|
|
22
|
-
cargo add quickmatch
|
|
23
|
-
|
|
24
|
-
# js
|
|
25
|
-
npm install quickmatch-js
|
|
26
|
-
```
|