quickmatch-js 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/package.json +1 -1
- package/src/index.js +105 -13
package/README.md
CHANGED
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -1,33 +1,64 @@
|
|
|
1
|
-
const DEFAULT_SEPARATORS =
|
|
1
|
+
const DEFAULT_SEPARATORS = "_- ";
|
|
2
2
|
const DEFAULT_TRIGRAM_BUDGET = 6;
|
|
3
3
|
const DEFAULT_LIMIT = 100;
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Configuration for QuickMatch.
|
|
7
|
+
*/
|
|
5
8
|
export class QuickMatchConfig {
|
|
9
|
+
/** @type {string} Characters used to split items into words */
|
|
6
10
|
separators = DEFAULT_SEPARATORS;
|
|
11
|
+
|
|
12
|
+
/** @type {number} Maximum number of results to return */
|
|
7
13
|
limit = DEFAULT_LIMIT;
|
|
14
|
+
|
|
15
|
+
/** @type {number} Number of trigram lookups for fuzzy matching (0-20) */
|
|
8
16
|
trigramBudget = DEFAULT_TRIGRAM_BUDGET;
|
|
9
17
|
|
|
18
|
+
/**
|
|
19
|
+
* Set maximum number of results.
|
|
20
|
+
* @param {number} n
|
|
21
|
+
*/
|
|
10
22
|
withLimit(n) {
|
|
11
23
|
this.limit = Math.max(1, n);
|
|
12
24
|
return this;
|
|
13
25
|
}
|
|
14
26
|
|
|
27
|
+
/**
|
|
28
|
+
* Set trigram budget for fuzzy matching.
|
|
29
|
+
* Higher values find more typos but cost more.
|
|
30
|
+
* @param {number} n - Budget (0-20, default: 6)
|
|
31
|
+
*/
|
|
15
32
|
withTrigramBudget(n) {
|
|
16
33
|
this.trigramBudget = Math.max(0, Math.min(20, n));
|
|
17
34
|
return this;
|
|
18
35
|
}
|
|
19
36
|
|
|
37
|
+
/**
|
|
38
|
+
* Set word separator characters.
|
|
39
|
+
* @param {string} s - Separator characters (default: '_- ')
|
|
40
|
+
*/
|
|
20
41
|
withSeparators(s) {
|
|
21
42
|
this.separators = s;
|
|
22
43
|
return this;
|
|
23
44
|
}
|
|
24
45
|
}
|
|
25
46
|
|
|
47
|
+
/**
|
|
48
|
+
* Fast fuzzy string matcher using word and trigram indexing.
|
|
49
|
+
*/
|
|
26
50
|
export class QuickMatch {
|
|
51
|
+
/**
|
|
52
|
+
* Create a new matcher.
|
|
53
|
+
* @param {string[]} items - Items to index (should be lowercase)
|
|
54
|
+
* @param {QuickMatchConfig} [config] - Optional configuration
|
|
55
|
+
*/
|
|
27
56
|
constructor(items, config = new QuickMatchConfig()) {
|
|
28
57
|
this.config = config;
|
|
29
58
|
this.items = items;
|
|
59
|
+
/** @type {Map<string, number[]>} */
|
|
30
60
|
this.wordIndex = new Map();
|
|
61
|
+
/** @type {Map<string, number[]>} */
|
|
31
62
|
this.trigramIndex = new Map();
|
|
32
63
|
|
|
33
64
|
let maxWordLength = 0;
|
|
@@ -76,10 +107,19 @@ export class QuickMatch {
|
|
|
76
107
|
this.maxWordCount = maxWordCount + 2;
|
|
77
108
|
}
|
|
78
109
|
|
|
110
|
+
/**
|
|
111
|
+
* Find matching items. Returns items sorted by relevance.
|
|
112
|
+
* @param {string} query - Search query
|
|
113
|
+
*/
|
|
79
114
|
matches(query) {
|
|
80
115
|
return this.matchesWith(query, this.config);
|
|
81
116
|
}
|
|
82
117
|
|
|
118
|
+
/**
|
|
119
|
+
* Find matching items with custom config. Returns items sorted by relevance.
|
|
120
|
+
* @param {string} query - Search query
|
|
121
|
+
* @param {QuickMatchConfig} config - Configuration to use
|
|
122
|
+
*/
|
|
83
123
|
matchesWith(query, config) {
|
|
84
124
|
const { limit, trigramBudget, separators } = config;
|
|
85
125
|
|
|
@@ -89,7 +129,11 @@ export class QuickMatch {
|
|
|
89
129
|
return [];
|
|
90
130
|
}
|
|
91
131
|
|
|
92
|
-
const queryWords = parseWords(
|
|
132
|
+
const queryWords = parseWords(
|
|
133
|
+
normalizedQuery,
|
|
134
|
+
separators,
|
|
135
|
+
this.maxWordLength,
|
|
136
|
+
);
|
|
93
137
|
|
|
94
138
|
if (!queryWords.length || queryWords.length > this.maxWordCount) {
|
|
95
139
|
return [];
|
|
@@ -127,7 +171,7 @@ export class QuickMatch {
|
|
|
127
171
|
|
|
128
172
|
const minItemLength = Math.max(0, normalizedQuery.length - 3);
|
|
129
173
|
|
|
130
|
-
const
|
|
174
|
+
const hitCount = this.scoreByTrigrams({
|
|
131
175
|
unknownWords,
|
|
132
176
|
budget: trigramBudget,
|
|
133
177
|
scores,
|
|
@@ -135,25 +179,35 @@ export class QuickMatch {
|
|
|
135
179
|
minItemLength,
|
|
136
180
|
});
|
|
137
181
|
|
|
138
|
-
const minScoreToInclude = Math.max(1, Math.ceil(
|
|
182
|
+
const minScoreToInclude = Math.max(1, Math.ceil(hitCount / 2));
|
|
139
183
|
|
|
140
184
|
return this.rankedResults(scores, minScoreToInclude, limit);
|
|
141
185
|
}
|
|
142
186
|
|
|
143
|
-
|
|
187
|
+
/**
|
|
188
|
+
* @private
|
|
189
|
+
* @param {{unknownWords: string[], budget: number, scores: Map<number, number>, hasExactMatches: boolean, minItemLength: number}} args
|
|
190
|
+
*/
|
|
191
|
+
scoreByTrigrams({
|
|
192
|
+
unknownWords,
|
|
193
|
+
budget,
|
|
194
|
+
scores,
|
|
195
|
+
hasExactMatches,
|
|
196
|
+
minItemLength,
|
|
197
|
+
}) {
|
|
144
198
|
const visitedTrigrams = new Set();
|
|
145
199
|
let budgetRemaining = budget;
|
|
146
200
|
let hitCount = 0;
|
|
147
201
|
|
|
148
|
-
outer:
|
|
149
|
-
for (let round = 0; round < budget; round++) {
|
|
202
|
+
outer: for (let round = 0; round < budget; round++) {
|
|
150
203
|
for (const word of unknownWords) {
|
|
151
204
|
if (budgetRemaining <= 0) break outer;
|
|
152
205
|
|
|
153
206
|
const position = pickTrigramPosition(word.length, round);
|
|
154
207
|
if (position < 0) continue;
|
|
155
208
|
|
|
156
|
-
const trigram =
|
|
209
|
+
const trigram =
|
|
210
|
+
word[position] + word[position + 1] + word[position + 2];
|
|
157
211
|
|
|
158
212
|
if (visitedTrigrams.has(trigram)) continue;
|
|
159
213
|
visitedTrigrams.add(trigram);
|
|
@@ -181,13 +235,24 @@ export class QuickMatch {
|
|
|
181
235
|
return hitCount;
|
|
182
236
|
}
|
|
183
237
|
|
|
238
|
+
/**
|
|
239
|
+
* @private
|
|
240
|
+
* @param {number[]} indices
|
|
241
|
+
* @param {number} limit
|
|
242
|
+
*/
|
|
184
243
|
sortedByLength(indices, limit) {
|
|
185
244
|
const { items } = this;
|
|
186
245
|
indices.sort((a, b) => items[a].length - items[b].length);
|
|
187
246
|
if (indices.length > limit) indices.length = limit;
|
|
188
|
-
return indices.map(i => items[i]);
|
|
247
|
+
return indices.map((i) => items[i]);
|
|
189
248
|
}
|
|
190
249
|
|
|
250
|
+
/**
|
|
251
|
+
* @private
|
|
252
|
+
* @param {Map<number, number>} scores
|
|
253
|
+
* @param {number} minScore
|
|
254
|
+
* @param {number} limit
|
|
255
|
+
*/
|
|
191
256
|
rankedResults(scores, minScore, limit) {
|
|
192
257
|
const { items } = this;
|
|
193
258
|
const results = [];
|
|
@@ -205,12 +270,13 @@ export class QuickMatch {
|
|
|
205
270
|
|
|
206
271
|
if (results.length > limit) results.length = limit;
|
|
207
272
|
|
|
208
|
-
return results.map(r => items[r.index]);
|
|
273
|
+
return results.map((r) => items[r.index]);
|
|
209
274
|
}
|
|
210
275
|
}
|
|
211
276
|
|
|
277
|
+
/** @param {string} query */
|
|
212
278
|
function normalizeQuery(query) {
|
|
213
|
-
let result =
|
|
279
|
+
let result = "";
|
|
214
280
|
let start = 0;
|
|
215
281
|
let end = query.length;
|
|
216
282
|
|
|
@@ -220,13 +286,20 @@ function normalizeQuery(query) {
|
|
|
220
286
|
for (let i = start; i < end; i++) {
|
|
221
287
|
const code = query.charCodeAt(i);
|
|
222
288
|
if (code >= 128) continue;
|
|
223
|
-
result +=
|
|
289
|
+
result +=
|
|
290
|
+
code >= 65 && code <= 90 ? String.fromCharCode(code + 32) : query[i];
|
|
224
291
|
}
|
|
225
292
|
|
|
226
293
|
return result;
|
|
227
294
|
}
|
|
228
295
|
|
|
296
|
+
/**
|
|
297
|
+
* @param {string} text
|
|
298
|
+
* @param {string} separators
|
|
299
|
+
* @param {number} maxLength
|
|
300
|
+
*/
|
|
229
301
|
function parseWords(text, separators, maxLength) {
|
|
302
|
+
/** @type {string[]} */
|
|
230
303
|
const words = [];
|
|
231
304
|
let start = 0;
|
|
232
305
|
|
|
@@ -247,6 +320,11 @@ function parseWords(text, separators, maxLength) {
|
|
|
247
320
|
return words;
|
|
248
321
|
}
|
|
249
322
|
|
|
323
|
+
/**
|
|
324
|
+
* @param {Map<string, number[]>} index
|
|
325
|
+
* @param {string} key
|
|
326
|
+
* @param {number} value
|
|
327
|
+
*/
|
|
250
328
|
function addToIndex(index, key, value) {
|
|
251
329
|
const existing = index.get(key);
|
|
252
330
|
if (existing) {
|
|
@@ -256,6 +334,11 @@ function addToIndex(index, key, value) {
|
|
|
256
334
|
}
|
|
257
335
|
}
|
|
258
336
|
|
|
337
|
+
/**
|
|
338
|
+
* @param {Map<string, number[]>} index
|
|
339
|
+
* @param {string} word
|
|
340
|
+
* @param {number} itemIndex
|
|
341
|
+
*/
|
|
259
342
|
function addTrigramsToIndex(index, word, itemIndex) {
|
|
260
343
|
if (word.length < 3) return;
|
|
261
344
|
|
|
@@ -271,6 +354,7 @@ function addTrigramsToIndex(index, word, itemIndex) {
|
|
|
271
354
|
}
|
|
272
355
|
}
|
|
273
356
|
|
|
357
|
+
/** @param {number[][]} arrays */
|
|
274
358
|
function intersectAll(arrays) {
|
|
275
359
|
if (!arrays.length) return [];
|
|
276
360
|
|
|
@@ -298,6 +382,10 @@ function intersectAll(arrays) {
|
|
|
298
382
|
return result;
|
|
299
383
|
}
|
|
300
384
|
|
|
385
|
+
/**
|
|
386
|
+
* @param {number[]} sortedArray
|
|
387
|
+
* @param {number} value
|
|
388
|
+
*/
|
|
301
389
|
function binarySearch(sortedArray, value) {
|
|
302
390
|
let low = 0;
|
|
303
391
|
let high = sortedArray.length - 1;
|
|
@@ -314,6 +402,10 @@ function binarySearch(sortedArray, value) {
|
|
|
314
402
|
return false;
|
|
315
403
|
}
|
|
316
404
|
|
|
405
|
+
/**
|
|
406
|
+
* @param {number} wordLength
|
|
407
|
+
* @param {number} round
|
|
408
|
+
*/
|
|
317
409
|
function pickTrigramPosition(wordLength, round) {
|
|
318
410
|
const maxPosition = wordLength - 3;
|
|
319
411
|
if (maxPosition < 0) return -1;
|
|
@@ -325,7 +417,7 @@ function pickTrigramPosition(wordLength, round) {
|
|
|
325
417
|
|
|
326
418
|
const middle = maxPosition >> 1;
|
|
327
419
|
const offset = (round - 2) >> 1;
|
|
328
|
-
const position =
|
|
420
|
+
const position = round & 1 ? Math.max(0, middle - offset) : middle + offset;
|
|
329
421
|
|
|
330
422
|
if (position === 0 || position >= maxPosition || position === middle) {
|
|
331
423
|
return -1;
|