quickmatch-js 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +22 -0
- package/package.json +25 -0
- package/src/config.rs +68 -0
- package/src/index.js +335 -0
- package/src/lib.rs +257 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 quickmatch
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# quickmatch
|
|
2
|
+
|
|
3
|
+
**Lightning-fast fuzzy string matching for Rust.**
|
|
4
|
+
|
|
5
|
+
A high-performance string matching library optimized for interactive search experiences like autocomplete, command palettes, and search-as-you-type interfaces.
|
|
6
|
+
|
|
7
|
+
[](https://crates.io/crates/quickmatch)
|
|
8
|
+
[](https://docs.rs/quickmatch)
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
- **Blazing fast** - Optimized for sub-millisecond search times
|
|
13
|
+
- **Hybrid matching** - Word-level matching with trigram-based fuzzy fallback
|
|
14
|
+
- **Memory efficient** - Zero-copy string storage with pointer-based indexing
|
|
15
|
+
- **Ranked results** - Intelligent scoring based on match quality
|
|
16
|
+
- **Zero external dependencies** - Only uses `rustc-hash` for fast hashing
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
cargo add quickmatch
|
|
22
|
+
```
|
package/package.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "quickmatch-js",
|
|
3
|
+
"version": "0.2.1",
|
|
4
|
+
"description": "Lightning-fast fuzzy string matching",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.js",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/index.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"src"
|
|
12
|
+
],
|
|
13
|
+
"keywords": [
|
|
14
|
+
"fuzzy",
|
|
15
|
+
"search",
|
|
16
|
+
"autocomplete",
|
|
17
|
+
"trigram",
|
|
18
|
+
"matching"
|
|
19
|
+
],
|
|
20
|
+
"license": "MIT",
|
|
21
|
+
"repository": {
|
|
22
|
+
"type": "git",
|
|
23
|
+
"url": "git+https://github.com/nym21/quickmatch.git"
|
|
24
|
+
}
|
|
25
|
+
}
|
package/src/config.rs
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
const DEFAULT_SEPARATORS: &[char] = &['_', '-', ' '];
|
|
2
|
+
const DEFAULT_TRIGRAM_BUDGET: usize = 6;
|
|
3
|
+
const DEFAULT_LIMIT: usize = 100;
|
|
4
|
+
|
|
5
|
+
pub struct QuickMatchConfig {
|
|
6
|
+
/// Separators used to split words.
|
|
7
|
+
///
|
|
8
|
+
/// Default: ['_', '-', ' ']
|
|
9
|
+
separators: &'static [char],
|
|
10
|
+
/// Maximum number of results to return.
|
|
11
|
+
///
|
|
12
|
+
/// Default: 100
|
|
13
|
+
/// - Min: 1
|
|
14
|
+
/// - Max: No hard limit (but large values may impact performance)
|
|
15
|
+
limit: usize,
|
|
16
|
+
/// Budget of trigrams to process from unknown words.
|
|
17
|
+
/// This budget is distributed fairly across all unknown words.
|
|
18
|
+
///
|
|
19
|
+
/// Default: 6 (recommended: 3-9)
|
|
20
|
+
/// - 0: Disable trigram matching (only exact word matches)
|
|
21
|
+
/// - Low (3-6): Faster, less accurate fuzzy matching
|
|
22
|
+
/// - High (9-15): Slower, more accurate fuzzy matching
|
|
23
|
+
/// - Max: 20
|
|
24
|
+
trigram_budget: usize,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
impl Default for QuickMatchConfig {
|
|
28
|
+
fn default() -> Self {
|
|
29
|
+
Self {
|
|
30
|
+
separators: DEFAULT_SEPARATORS,
|
|
31
|
+
limit: DEFAULT_LIMIT,
|
|
32
|
+
trigram_budget: DEFAULT_TRIGRAM_BUDGET,
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
impl QuickMatchConfig {
|
|
38
|
+
pub fn new() -> Self {
|
|
39
|
+
Self::default()
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
pub fn with_limit(mut self, limit: usize) -> Self {
|
|
43
|
+
self.limit = limit.max(1);
|
|
44
|
+
self
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
pub fn with_trigram_budget(mut self, trigram_budget: usize) -> Self {
|
|
48
|
+
self.trigram_budget = trigram_budget.clamp(0, 20);
|
|
49
|
+
self
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
pub fn with_separators(mut self, separators: &'static [char]) -> Self {
|
|
53
|
+
self.separators = separators;
|
|
54
|
+
self
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
pub fn limit(&self) -> usize {
|
|
58
|
+
self.limit
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
pub fn trigram_budget(&self) -> usize {
|
|
62
|
+
self.trigram_budget
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
pub fn separators(&self) -> &[char] {
|
|
66
|
+
self.separators
|
|
67
|
+
}
|
|
68
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
const DEFAULT_SEPARATORS = '_- ';
|
|
2
|
+
const DEFAULT_TRIGRAM_BUDGET = 6;
|
|
3
|
+
const DEFAULT_LIMIT = 100;
|
|
4
|
+
|
|
5
|
+
export class QuickMatchConfig {
|
|
6
|
+
separators = DEFAULT_SEPARATORS;
|
|
7
|
+
limit = DEFAULT_LIMIT;
|
|
8
|
+
trigramBudget = DEFAULT_TRIGRAM_BUDGET;
|
|
9
|
+
|
|
10
|
+
withLimit(n) {
|
|
11
|
+
this.limit = Math.max(1, n);
|
|
12
|
+
return this;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
withTrigramBudget(n) {
|
|
16
|
+
this.trigramBudget = Math.max(0, Math.min(20, n));
|
|
17
|
+
return this;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
withSeparators(s) {
|
|
21
|
+
this.separators = s;
|
|
22
|
+
return this;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export class QuickMatch {
|
|
27
|
+
constructor(items, config = new QuickMatchConfig()) {
|
|
28
|
+
this.config = config;
|
|
29
|
+
this.items = items;
|
|
30
|
+
this.wordIndex = new Map();
|
|
31
|
+
this.trigramIndex = new Map();
|
|
32
|
+
|
|
33
|
+
let maxWordLength = 0;
|
|
34
|
+
let maxQueryLength = 0;
|
|
35
|
+
let maxWordCount = 0;
|
|
36
|
+
|
|
37
|
+
const { separators } = config;
|
|
38
|
+
|
|
39
|
+
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
|
|
40
|
+
const item = items[itemIndex];
|
|
41
|
+
|
|
42
|
+
if (item.length > maxQueryLength) {
|
|
43
|
+
maxQueryLength = item.length;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
let wordCount = 0;
|
|
47
|
+
let wordStart = 0;
|
|
48
|
+
|
|
49
|
+
for (let i = 0; i <= item.length; i++) {
|
|
50
|
+
const isEndOfWord = i === item.length || separators.includes(item[i]);
|
|
51
|
+
|
|
52
|
+
if (isEndOfWord && i > wordStart) {
|
|
53
|
+
wordCount++;
|
|
54
|
+
const word = item.slice(wordStart, i);
|
|
55
|
+
|
|
56
|
+
if (word.length > maxWordLength) {
|
|
57
|
+
maxWordLength = word.length;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
addToIndex(this.wordIndex, word, itemIndex);
|
|
61
|
+
addTrigramsToIndex(this.trigramIndex, word, itemIndex);
|
|
62
|
+
|
|
63
|
+
wordStart = i + 1;
|
|
64
|
+
} else if (isEndOfWord) {
|
|
65
|
+
wordStart = i + 1;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (wordCount > maxWordCount) {
|
|
70
|
+
maxWordCount = wordCount;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
this.maxWordLength = maxWordLength + 4;
|
|
75
|
+
this.maxQueryLength = maxQueryLength + 6;
|
|
76
|
+
this.maxWordCount = maxWordCount + 2;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
matches(query) {
|
|
80
|
+
return this.matchesWith(query, this.config);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
matchesWith(query, config) {
|
|
84
|
+
const { limit, trigramBudget, separators } = config;
|
|
85
|
+
|
|
86
|
+
const normalizedQuery = normalizeQuery(query);
|
|
87
|
+
|
|
88
|
+
if (!normalizedQuery || normalizedQuery.length > this.maxQueryLength) {
|
|
89
|
+
return [];
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const queryWords = parseWords(normalizedQuery, separators, this.maxWordLength);
|
|
93
|
+
|
|
94
|
+
if (!queryWords.length || queryWords.length > this.maxWordCount) {
|
|
95
|
+
return [];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const knownWords = [];
|
|
99
|
+
const unknownWords = [];
|
|
100
|
+
|
|
101
|
+
for (const word of queryWords) {
|
|
102
|
+
const matchingItems = this.wordIndex.get(word);
|
|
103
|
+
|
|
104
|
+
if (matchingItems) {
|
|
105
|
+
knownWords.push(matchingItems);
|
|
106
|
+
} else if (word.length >= 3 && unknownWords.length < trigramBudget) {
|
|
107
|
+
unknownWords.push(word);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const exactMatches = intersectAll(knownWords);
|
|
112
|
+
const hasExactMatches = exactMatches.length > 0;
|
|
113
|
+
const needsFuzzyMatching = unknownWords.length > 0 && trigramBudget > 0;
|
|
114
|
+
|
|
115
|
+
if (!needsFuzzyMatching) {
|
|
116
|
+
if (!hasExactMatches) return [];
|
|
117
|
+
return this.sortedByLength(exactMatches, limit);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const scores = new Map();
|
|
121
|
+
|
|
122
|
+
if (hasExactMatches) {
|
|
123
|
+
for (const index of exactMatches) {
|
|
124
|
+
scores.set(index, 1);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const minItemLength = Math.max(0, normalizedQuery.length - 3);
|
|
129
|
+
|
|
130
|
+
const trigramCount = this.scoreByTrigrams({
|
|
131
|
+
unknownWords,
|
|
132
|
+
budget: trigramBudget,
|
|
133
|
+
scores,
|
|
134
|
+
hasExactMatches,
|
|
135
|
+
minItemLength,
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
const minScoreToInclude = Math.max(1, Math.ceil(trigramCount / 2));
|
|
139
|
+
|
|
140
|
+
return this.rankedResults(scores, minScoreToInclude, limit);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
scoreByTrigrams({ unknownWords, budget, scores, hasExactMatches, minItemLength }) {
|
|
144
|
+
const visitedTrigrams = new Set();
|
|
145
|
+
let budgetRemaining = budget;
|
|
146
|
+
let hitCount = 0;
|
|
147
|
+
|
|
148
|
+
outer:
|
|
149
|
+
for (let round = 0; round < budget; round++) {
|
|
150
|
+
for (const word of unknownWords) {
|
|
151
|
+
if (budgetRemaining <= 0) break outer;
|
|
152
|
+
|
|
153
|
+
const position = pickTrigramPosition(word.length, round);
|
|
154
|
+
if (position < 0) continue;
|
|
155
|
+
|
|
156
|
+
const trigram = word[position] + word[position + 1] + word[position + 2];
|
|
157
|
+
|
|
158
|
+
if (visitedTrigrams.has(trigram)) continue;
|
|
159
|
+
visitedTrigrams.add(trigram);
|
|
160
|
+
|
|
161
|
+
budgetRemaining--;
|
|
162
|
+
|
|
163
|
+
const matchingItems = this.trigramIndex.get(trigram);
|
|
164
|
+
if (!matchingItems) continue;
|
|
165
|
+
|
|
166
|
+
hitCount++;
|
|
167
|
+
|
|
168
|
+
for (const itemIndex of matchingItems) {
|
|
169
|
+
if (hasExactMatches) {
|
|
170
|
+
const currentScore = scores.get(itemIndex);
|
|
171
|
+
if (currentScore !== undefined) {
|
|
172
|
+
scores.set(itemIndex, currentScore + 1);
|
|
173
|
+
}
|
|
174
|
+
} else if (this.items[itemIndex].length >= minItemLength) {
|
|
175
|
+
scores.set(itemIndex, (scores.get(itemIndex) || 0) + 1);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return hitCount;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
sortedByLength(indices, limit) {
|
|
185
|
+
const { items } = this;
|
|
186
|
+
indices.sort((a, b) => items[a].length - items[b].length);
|
|
187
|
+
if (indices.length > limit) indices.length = limit;
|
|
188
|
+
return indices.map(i => items[i]);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
rankedResults(scores, minScore, limit) {
|
|
192
|
+
const { items } = this;
|
|
193
|
+
const results = [];
|
|
194
|
+
|
|
195
|
+
for (const [index, score] of scores) {
|
|
196
|
+
if (score >= minScore) {
|
|
197
|
+
results.push({ index, score });
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
results.sort((a, b) => {
|
|
202
|
+
if (b.score !== a.score) return b.score - a.score;
|
|
203
|
+
return items[a.index].length - items[b.index].length;
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
if (results.length > limit) results.length = limit;
|
|
207
|
+
|
|
208
|
+
return results.map(r => items[r.index]);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function normalizeQuery(query) {
|
|
213
|
+
let result = '';
|
|
214
|
+
let start = 0;
|
|
215
|
+
let end = query.length;
|
|
216
|
+
|
|
217
|
+
while (start < end && query.charCodeAt(start) <= 32) start++;
|
|
218
|
+
while (end > start && query.charCodeAt(end - 1) <= 32) end--;
|
|
219
|
+
|
|
220
|
+
for (let i = start; i < end; i++) {
|
|
221
|
+
const code = query.charCodeAt(i);
|
|
222
|
+
if (code >= 128) continue;
|
|
223
|
+
result += code >= 65 && code <= 90 ? String.fromCharCode(code + 32) : query[i];
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return result;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function parseWords(text, separators, maxLength) {
|
|
230
|
+
const words = [];
|
|
231
|
+
let start = 0;
|
|
232
|
+
|
|
233
|
+
for (let i = 0; i <= text.length; i++) {
|
|
234
|
+
const isEnd = i === text.length || separators.includes(text[i]);
|
|
235
|
+
|
|
236
|
+
if (isEnd && i > start) {
|
|
237
|
+
const word = text.slice(start, i);
|
|
238
|
+
if (word.length <= maxLength && !words.includes(word)) {
|
|
239
|
+
words.push(word);
|
|
240
|
+
}
|
|
241
|
+
start = i + 1;
|
|
242
|
+
} else if (isEnd) {
|
|
243
|
+
start = i + 1;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return words;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function addToIndex(index, key, value) {
|
|
251
|
+
const existing = index.get(key);
|
|
252
|
+
if (existing) {
|
|
253
|
+
existing.push(value);
|
|
254
|
+
} else {
|
|
255
|
+
index.set(key, [value]);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function addTrigramsToIndex(index, word, itemIndex) {
|
|
260
|
+
if (word.length < 3) return;
|
|
261
|
+
|
|
262
|
+
for (let i = 0; i <= word.length - 3; i++) {
|
|
263
|
+
const trigram = word[i] + word[i + 1] + word[i + 2];
|
|
264
|
+
const existing = index.get(trigram);
|
|
265
|
+
|
|
266
|
+
if (!existing) {
|
|
267
|
+
index.set(trigram, [itemIndex]);
|
|
268
|
+
} else if (existing[existing.length - 1] !== itemIndex) {
|
|
269
|
+
existing.push(itemIndex);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function intersectAll(arrays) {
|
|
275
|
+
if (!arrays.length) return [];
|
|
276
|
+
|
|
277
|
+
let smallestIndex = 0;
|
|
278
|
+
for (let i = 1; i < arrays.length; i++) {
|
|
279
|
+
if (arrays[i].length < arrays[smallestIndex].length) {
|
|
280
|
+
smallestIndex = i;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
const result = arrays[smallestIndex].slice();
|
|
285
|
+
|
|
286
|
+
for (let i = 0; i < arrays.length && result.length > 0; i++) {
|
|
287
|
+
if (i === smallestIndex) continue;
|
|
288
|
+
|
|
289
|
+
let writeIndex = 0;
|
|
290
|
+
for (let j = 0; j < result.length; j++) {
|
|
291
|
+
if (binarySearch(arrays[i], result[j])) {
|
|
292
|
+
result[writeIndex++] = result[j];
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
result.length = writeIndex;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
return result;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function binarySearch(sortedArray, value) {
|
|
302
|
+
let low = 0;
|
|
303
|
+
let high = sortedArray.length - 1;
|
|
304
|
+
|
|
305
|
+
while (low <= high) {
|
|
306
|
+
const mid = (low + high) >> 1;
|
|
307
|
+
const midValue = sortedArray[mid];
|
|
308
|
+
|
|
309
|
+
if (midValue === value) return true;
|
|
310
|
+
if (midValue < value) low = mid + 1;
|
|
311
|
+
else high = mid - 1;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
return false;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function pickTrigramPosition(wordLength, round) {
|
|
318
|
+
const maxPosition = wordLength - 3;
|
|
319
|
+
if (maxPosition < 0) return -1;
|
|
320
|
+
|
|
321
|
+
if (round === 0) return 0;
|
|
322
|
+
if (round === 1 && maxPosition > 0) return maxPosition;
|
|
323
|
+
if (round === 2 && maxPosition > 1) return maxPosition >> 1;
|
|
324
|
+
if (maxPosition <= 2) return -1;
|
|
325
|
+
|
|
326
|
+
const middle = maxPosition >> 1;
|
|
327
|
+
const offset = (round - 2) >> 1;
|
|
328
|
+
const position = (round & 1) ? Math.max(0, middle - offset) : middle + offset;
|
|
329
|
+
|
|
330
|
+
if (position === 0 || position >= maxPosition || position === middle) {
|
|
331
|
+
return -1;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
return position;
|
|
335
|
+
}
|
package/src/lib.rs
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
use std::{marker::PhantomData, ptr};
|
|
2
|
+
|
|
3
|
+
use rustc_hash::{FxHashMap, FxHashSet};
|
|
4
|
+
|
|
5
|
+
mod config;
|
|
6
|
+
|
|
7
|
+
pub use config::*;
|
|
8
|
+
|
|
9
|
+
pub struct QuickMatch<'a> {
|
|
10
|
+
config: QuickMatchConfig,
|
|
11
|
+
max_word_count: usize,
|
|
12
|
+
max_word_len: usize,
|
|
13
|
+
max_query_len: usize,
|
|
14
|
+
word_index: FxHashMap<String, FxHashSet<*const str>>,
|
|
15
|
+
trigram_index: FxHashMap<[char; 3], FxHashSet<*const str>>,
|
|
16
|
+
_phantom: PhantomData<&'a str>,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
unsafe impl<'a> Send for QuickMatch<'a> {}
|
|
20
|
+
unsafe impl<'a> Sync for QuickMatch<'a> {}
|
|
21
|
+
|
|
22
|
+
impl<'a> QuickMatch<'a> {
|
|
23
|
+
/// Expect the items to be pre-formatted (lowercase)
|
|
24
|
+
pub fn new(items: &[&'a str]) -> Self {
|
|
25
|
+
Self::new_with(items, QuickMatchConfig::default())
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/// Expect the items to be pre-formatted (lowercase)
|
|
29
|
+
pub fn new_with(items: &[&'a str], config: QuickMatchConfig) -> Self {
|
|
30
|
+
let mut word_index: FxHashMap<String, FxHashSet<*const str>> = FxHashMap::default();
|
|
31
|
+
let mut trigram_index: FxHashMap<[char; 3], FxHashSet<*const str>> = FxHashMap::default();
|
|
32
|
+
let mut max_word_len = 0;
|
|
33
|
+
let mut max_query_len = 0;
|
|
34
|
+
let mut max_words = 0;
|
|
35
|
+
let separators = config.separators();
|
|
36
|
+
|
|
37
|
+
for &item in items {
|
|
38
|
+
max_query_len = max_query_len.max(item.len());
|
|
39
|
+
let mut word_count = 0;
|
|
40
|
+
for word in item.split(separators) {
|
|
41
|
+
word_count += 1;
|
|
42
|
+
if word.is_empty() {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
max_word_len = max_word_len.max(item.len());
|
|
47
|
+
|
|
48
|
+
word_index.entry(word.to_string()).or_default().insert(item);
|
|
49
|
+
|
|
50
|
+
if word.len() >= 3 {
|
|
51
|
+
let chars = word.chars().collect::<Vec<_>>();
|
|
52
|
+
for window in chars.windows(3) {
|
|
53
|
+
trigram_index
|
|
54
|
+
.entry(unsafe { ptr::read(window.as_ptr() as *const [char; 3]) })
|
|
55
|
+
.or_default()
|
|
56
|
+
.insert(item);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
max_words = max_words.max(word_count);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
Self {
|
|
64
|
+
max_query_len: max_query_len + 6,
|
|
65
|
+
max_word_len: max_word_len + 4,
|
|
66
|
+
max_word_count: max_word_len + 2,
|
|
67
|
+
word_index,
|
|
68
|
+
trigram_index,
|
|
69
|
+
config,
|
|
70
|
+
_phantom: PhantomData,
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
///
|
|
75
|
+
/// `limit`: max number of returned matches
|
|
76
|
+
///
|
|
77
|
+
/// `max_trigrams`: max number of processed trigrams in unknown words (0-10 recommended)
|
|
78
|
+
///
|
|
79
|
+
pub fn matches(&self, query: &str) -> Vec<&'a str> {
|
|
80
|
+
self.matches_with(query, &self.config)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
///
|
|
84
|
+
/// `limit`: max number of returned matches
|
|
85
|
+
///
|
|
86
|
+
/// `max_trigrams`: max number of processed trigrams in unknown words (0-10 recommended)
|
|
87
|
+
///
|
|
88
|
+
pub fn matches_with(&self, query: &str, config: &QuickMatchConfig) -> Vec<&'a str> {
|
|
89
|
+
let limit = config.limit();
|
|
90
|
+
let trigram_budget = config.trigram_budget();
|
|
91
|
+
let query_len = query.len();
|
|
92
|
+
|
|
93
|
+
if query.is_empty() || query_len > self.max_query_len {
|
|
94
|
+
return vec![];
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
let query = query
|
|
98
|
+
.trim()
|
|
99
|
+
.chars()
|
|
100
|
+
.filter(|c| c.is_ascii())
|
|
101
|
+
.collect::<String>()
|
|
102
|
+
.to_ascii_lowercase();
|
|
103
|
+
|
|
104
|
+
let words = query
|
|
105
|
+
.split(config.separators())
|
|
106
|
+
.filter(|w| !w.is_empty() && w.len() <= self.max_word_len)
|
|
107
|
+
.collect::<FxHashSet<_>>();
|
|
108
|
+
|
|
109
|
+
if words.is_empty() || words.len() > self.max_word_count {
|
|
110
|
+
return vec![];
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
let min_len = query_len.saturating_sub(3);
|
|
114
|
+
|
|
115
|
+
let mut pool: Option<FxHashSet<*const str>> = None;
|
|
116
|
+
let mut unknown_words = Vec::new();
|
|
117
|
+
|
|
118
|
+
let mut words_to_intersect = vec![];
|
|
119
|
+
for word in words {
|
|
120
|
+
if let Some(items) = self.word_index.get(word) {
|
|
121
|
+
words_to_intersect.push(items)
|
|
122
|
+
} else if word.len() >= 3 && unknown_words.len() < trigram_budget {
|
|
123
|
+
unknown_words.push(word.chars().collect::<Vec<_>>())
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if !words_to_intersect.is_empty() {
|
|
128
|
+
words_to_intersect.sort_unstable_by_key(|set| -(set.len() as i64));
|
|
129
|
+
|
|
130
|
+
let mut intersect = words_to_intersect.pop().cloned().unwrap();
|
|
131
|
+
|
|
132
|
+
for other_set in words_to_intersect.iter().rev() {
|
|
133
|
+
intersect.retain(|ptr| other_set.contains(ptr));
|
|
134
|
+
if intersect.is_empty() {
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
pool = Some(intersect);
|
|
140
|
+
}
|
|
141
|
+
let some_pool = pool.is_some();
|
|
142
|
+
|
|
143
|
+
if unknown_words.is_empty() || trigram_budget == 0 {
|
|
144
|
+
let mut results: Vec<_> = pool
|
|
145
|
+
.unwrap_or_default()
|
|
146
|
+
.into_iter()
|
|
147
|
+
.map(|item| unsafe { &*item as &str })
|
|
148
|
+
.collect();
|
|
149
|
+
|
|
150
|
+
if results.len() > limit {
|
|
151
|
+
results.select_nth_unstable_by_key(limit, |item| item.len());
|
|
152
|
+
results.truncate(limit);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
results.sort_unstable_by_key(|item| item.len());
|
|
156
|
+
|
|
157
|
+
return results;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
let mut scores: FxHashMap<*const str, usize> = FxHashMap::default();
|
|
161
|
+
scores.reserve(256);
|
|
162
|
+
if let Some(pool) = &pool {
|
|
163
|
+
for &item in pool {
|
|
164
|
+
scores.insert(item, 1);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
let mut budget = trigram_budget;
|
|
169
|
+
let mut hit_count: usize = 0;
|
|
170
|
+
let mut visited: FxHashSet<[char; 3]> = FxHashSet::default();
|
|
171
|
+
|
|
172
|
+
'outer: for round in 0..trigram_budget {
|
|
173
|
+
for chars in &unknown_words {
|
|
174
|
+
if budget == 0 {
|
|
175
|
+
break 'outer;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let len = chars.len();
|
|
179
|
+
let max_pos = len - 3;
|
|
180
|
+
|
|
181
|
+
let pos = if round == 0 {
|
|
182
|
+
0
|
|
183
|
+
} else if round == 1 && max_pos > 0 {
|
|
184
|
+
max_pos
|
|
185
|
+
} else if round == 2 && max_pos > 1 {
|
|
186
|
+
max_pos / 2
|
|
187
|
+
} else if max_pos > 2 {
|
|
188
|
+
let mid = max_pos / 2;
|
|
189
|
+
let offset = (round - 2) >> 1;
|
|
190
|
+
let p = if (round & 1) == 1 {
|
|
191
|
+
mid.saturating_sub(offset)
|
|
192
|
+
} else {
|
|
193
|
+
mid + offset
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
if p == 0 || p >= max_pos || p == mid {
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
p
|
|
200
|
+
} else {
|
|
201
|
+
continue;
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
let trigram = [chars[pos], chars[pos + 1], chars[pos + 2]];
|
|
205
|
+
|
|
206
|
+
if !visited.insert(trigram) {
|
|
207
|
+
continue;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
budget -= 1;
|
|
211
|
+
|
|
212
|
+
let Some(items) = self.trigram_index.get(&trigram) else {
|
|
213
|
+
continue;
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
hit_count += 1;
|
|
217
|
+
|
|
218
|
+
if some_pool {
|
|
219
|
+
for &item in items {
|
|
220
|
+
if let Some(score) = scores.get_mut(&item) {
|
|
221
|
+
*score += 1;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
} else {
|
|
225
|
+
for &item in items {
|
|
226
|
+
let len = unsafe { &*item }.len();
|
|
227
|
+
if len >= min_len {
|
|
228
|
+
*scores.entry(item).or_default() += 1;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
let min_score = hit_count.div_ceil(2).max(1);
|
|
236
|
+
let mut results: Vec<_> = scores
|
|
237
|
+
.into_iter()
|
|
238
|
+
.filter(|(_, s)| *s >= min_score)
|
|
239
|
+
.map(|(item, score)| (unsafe { &*item as &str }, score))
|
|
240
|
+
.collect();
|
|
241
|
+
|
|
242
|
+
if results.len() > limit {
|
|
243
|
+
results.select_nth_unstable_by(limit, |a, b| {
|
|
244
|
+
b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len()))
|
|
245
|
+
});
|
|
246
|
+
results.truncate(limit);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
results.sort_unstable_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len())));
|
|
250
|
+
|
|
251
|
+
results
|
|
252
|
+
.into_iter()
|
|
253
|
+
.take(limit)
|
|
254
|
+
.map(|(item, _)| item)
|
|
255
|
+
.collect()
|
|
256
|
+
}
|
|
257
|
+
}
|