quickmatch-js 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "quickmatch-js",
3
- "version": "0.3.1",
3
+ "version": "0.3.2",
4
4
  "description": "Lightning-fast fuzzy string matching",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/src/config.rs CHANGED
@@ -1,11 +1,12 @@
1
- const DEFAULT_SEPARATORS: &[char] = &['_', '-', ' '];
1
+ const DEFAULT_SEPARATORS: &[char] = &['_', '-', ' ', ':', '/'];
2
2
  const DEFAULT_TRIGRAM_BUDGET: usize = 6;
3
3
  const DEFAULT_LIMIT: usize = 100;
4
+ const DEFAULT_MIN_SCORE: usize = 2;
4
5
 
5
6
  pub struct QuickMatchConfig {
6
7
  /// Separators used to split words.
7
8
  ///
8
- /// Default: ['_', '-', ' ']
9
+ /// Default: ['_', '-', ' ', ':', '/']
9
10
  separators: &'static [char],
10
11
  /// Maximum number of results to return.
11
12
  ///
@@ -22,6 +23,12 @@ pub struct QuickMatchConfig {
22
23
  /// - High (9-15): Slower, more accurate fuzzy matching
23
24
  /// - Max: 20
24
25
  trigram_budget: usize,
26
+ /// Minimum trigram score required for fuzzy matches.
27
+ /// Higher values require more trigram overlap, reducing noise.
28
+ ///
29
+ /// Default: 2
30
+ /// - Min: 1
31
+ min_score: usize,
25
32
  }
26
33
 
27
34
  impl Default for QuickMatchConfig {
@@ -30,6 +37,7 @@ impl Default for QuickMatchConfig {
30
37
  separators: DEFAULT_SEPARATORS,
31
38
  limit: DEFAULT_LIMIT,
32
39
  trigram_budget: DEFAULT_TRIGRAM_BUDGET,
40
+ min_score: DEFAULT_MIN_SCORE,
33
41
  }
34
42
  }
35
43
  }
@@ -54,6 +62,11 @@ impl QuickMatchConfig {
54
62
  self
55
63
  }
56
64
 
65
+ pub fn with_min_score(mut self, min_score: usize) -> Self {
66
+ self.min_score = min_score.max(1);
67
+ self
68
+ }
69
+
57
70
  pub fn limit(&self) -> usize {
58
71
  self.limit
59
72
  }
@@ -65,4 +78,8 @@ impl QuickMatchConfig {
65
78
  pub fn separators(&self) -> &[char] {
66
79
  self.separators
67
80
  }
81
+
82
+ pub fn min_score(&self) -> usize {
83
+ self.min_score
84
+ }
68
85
  }
package/src/index.js CHANGED
@@ -1,47 +1,70 @@
1
- const DEFAULT_SEPARATORS = "_- ";
1
+ const DEFAULT_SEPARATORS = "_- :/";
2
2
  const DEFAULT_TRIGRAM_BUDGET = 6;
3
3
  const DEFAULT_LIMIT = 100;
4
+ const DEFAULT_MIN_SCORE = 2;
4
5
 
5
6
  /**
6
7
  * Configuration for QuickMatch.
7
8
  */
8
9
  export class QuickMatchConfig {
9
- /** @type {string} Characters used to split items into words */
10
+ /**
11
+ * Separators used to split words.
12
+ * @type {string}
13
+ * @default "_- :/"
14
+ */
10
15
  separators = DEFAULT_SEPARATORS;
11
16
 
12
- /** @type {number} Maximum number of results to return */
17
+ /**
18
+ * Maximum number of results to return.
19
+ * @type {number}
20
+ * @default 100
21
+ */
13
22
  limit = DEFAULT_LIMIT;
14
23
 
15
- /** @type {number} Number of trigram lookups for fuzzy matching (0-20) */
24
+ /**
25
+ * Budget of trigrams to process from unknown words.
26
+ * This budget is distributed fairly across all unknown words.
27
+ *
28
+ * - 0: Disable trigram matching (only exact word matches)
29
+ * - Low (3-6): Faster, less accurate fuzzy matching
30
+ * - High (9-15): Slower, more accurate fuzzy matching
31
+ * - Max: 20
32
+ * @type {number}
33
+ * @default 6
34
+ */
16
35
  trigramBudget = DEFAULT_TRIGRAM_BUDGET;
17
36
 
18
37
  /**
19
- * Set maximum number of results.
20
- * @param {number} n
38
+ * Minimum trigram score required for fuzzy matches.
39
+ * Higher values require more trigram overlap, reducing noise.
40
+ * @type {number}
41
+ * @default 2
21
42
  */
43
+ minScore = DEFAULT_MIN_SCORE;
44
+
45
+ /** @param {number} n - Max results (default: 100, min: 1) */
22
46
  withLimit(n) {
23
47
  this.limit = Math.max(1, n);
24
48
  return this;
25
49
  }
26
50
 
27
- /**
28
- * Set trigram budget for fuzzy matching.
29
- * Higher values find more typos but cost more.
30
- * @param {number} n - Budget (0-20, default: 6)
31
- */
51
+ /** @param {number} n - Trigram budget (0-20, default: 6) */
32
52
  withTrigramBudget(n) {
33
53
  this.trigramBudget = Math.max(0, Math.min(20, n));
34
54
  return this;
35
55
  }
36
56
 
37
- /**
38
- * Set word separator characters.
39
- * @param {string} s - Separator characters (default: '_- ')
40
- */
57
+ /** @param {string} s - Separator characters (default: '_- :/') */
41
58
  withSeparators(s) {
42
59
  this.separators = s;
43
60
  return this;
44
61
  }
62
+
63
+ /** @param {number} n - Min trigram score (default: 2, min: 1) */
64
+ withMinScore(n) {
65
+ this.minScore = Math.max(1, n);
66
+ return this;
67
+ }
45
68
  }
46
69
 
47
70
  /**
@@ -49,9 +72,8 @@ export class QuickMatchConfig {
49
72
  */
50
73
  export class QuickMatch {
51
74
  /**
52
- * Create a new matcher.
53
75
  * @param {string[]} items - Items to index (should be lowercase)
54
- * @param {QuickMatchConfig} [config] - Optional configuration
76
+ * @param {QuickMatchConfig} [config]
55
77
  */
56
78
  constructor(items, config = new QuickMatchConfig()) {
57
79
  this.config = config;
@@ -60,263 +82,238 @@ export class QuickMatch {
60
82
  this.wordIndex = new Map();
61
83
  /** @type {Map<string, number[]>} */
62
84
  this.trigramIndex = new Map();
85
+ this._sepLookup = sepLookup(config.separators);
86
+ this._scores = new Uint32Array(items.length);
87
+ /** @type {number[]} */
88
+ this._dirty = [];
63
89
 
64
- let maxWordLength = 0;
65
- let maxQueryLength = 0;
66
- let maxWordCount = 0;
67
-
68
- const { separators } = config;
69
-
70
- for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
71
- const item = items[itemIndex];
90
+ let maxWordLen = 0;
91
+ let maxQueryLen = 0;
92
+ let maxWords = 0;
93
+ const sep = this._sepLookup;
72
94
 
73
- if (item.length > maxQueryLength) {
74
- maxQueryLength = item.length;
75
- }
95
+ for (let idx = 0; idx < items.length; idx++) {
96
+ const item = items[idx];
97
+ if (item.length > maxQueryLen) maxQueryLen = item.length;
76
98
 
77
- let wordCount = 0;
78
- let wordStart = 0;
99
+ const words = [];
100
+ let start = 0;
79
101
 
80
102
  for (let i = 0; i <= item.length; i++) {
81
- const isEndOfWord = i === item.length || separators.includes(item[i]);
82
-
83
- if (isEndOfWord && i > wordStart) {
84
- wordCount++;
85
- const word = item.slice(wordStart, i);
86
-
87
- if (word.length > maxWordLength) {
88
- maxWordLength = word.length;
89
- }
90
-
91
- addToIndex(this.wordIndex, word, itemIndex);
92
- addTrigramsToIndex(this.trigramIndex, word, itemIndex);
93
-
94
- wordStart = i + 1;
95
- } else if (isEndOfWord) {
96
- wordStart = i + 1;
103
+ if (i < item.length && !sep[item.charCodeAt(i)]) continue;
104
+ if (i > start) {
105
+ const word = item.slice(start, i);
106
+ words.push(word);
107
+ if (word.length > maxWordLen) maxWordLen = word.length;
108
+ addToIndex(this.wordIndex, word, idx);
109
+ indexTrigrams(this.trigramIndex, word, idx);
97
110
  }
111
+ start = i + 1;
98
112
  }
99
113
 
100
- if (wordCount > maxWordCount) {
101
- maxWordCount = wordCount;
114
+ for (let i = 0; i < words.length - 1; i++) {
115
+ addToIndex(this.wordIndex, words[i] + words[i + 1], idx);
102
116
  }
117
+
118
+ if (words.length > maxWords) maxWords = words.length;
103
119
  }
104
120
 
105
- this.maxWordLength = maxWordLength + 4;
106
- this.maxQueryLength = maxQueryLength + 6;
107
- this.maxWordCount = maxWordCount + 2;
121
+ this.maxWordLen = maxWordLen + 4;
122
+ this.maxQueryLen = maxQueryLen + 6;
123
+ this.maxWords = maxWords + 2;
108
124
  }
109
125
 
110
- /**
111
- * Find matching items. Returns items sorted by relevance.
112
- * @param {string} query - Search query
113
- */
126
+ /** @param {string} query */
114
127
  matches(query) {
115
128
  return this.matchesWith(query, this.config);
116
129
  }
117
130
 
118
131
  /**
119
- * Find matching items with custom config. Returns items sorted by relevance.
120
- * @param {string} query - Search query
121
- * @param {QuickMatchConfig} config - Configuration to use
132
+ * @param {string} query
133
+ * @param {QuickMatchConfig} config
122
134
  */
123
135
  matchesWith(query, config) {
124
- const { limit, trigramBudget, separators } = config;
125
-
126
- const normalizedQuery = normalizeQuery(query);
136
+ const { limit, trigramBudget } = config;
137
+ const sep =
138
+ config.separators === this.config.separators
139
+ ? this._sepLookup
140
+ : sepLookup(config.separators);
127
141
 
128
- if (!normalizedQuery || normalizedQuery.length > this.maxQueryLength) {
129
- return [];
130
- }
131
-
132
- const queryWords = parseWords(
133
- normalizedQuery,
134
- separators,
135
- this.maxWordLength,
136
- );
137
-
138
- if (!queryWords.length || queryWords.length > this.maxWordCount) {
139
- return [];
140
- }
142
+ const q = normalize(query);
143
+ if (!q || q.length > this.maxQueryLen) return [];
141
144
 
142
- const knownWords = [];
143
- const unknownWords = [];
145
+ const qwords = splitWords(q, sep, this.maxWordLen);
146
+ if (!qwords.length || qwords.length > this.maxWords) return [];
144
147
 
145
- for (const word of queryWords) {
146
- const matchingItems = this.wordIndex.get(word);
148
+ const known = [];
149
+ const unknown = [];
147
150
 
148
- if (matchingItems) {
149
- knownWords.push(matchingItems);
150
- } else if (word.length >= 3 && unknownWords.length < trigramBudget) {
151
- unknownWords.push(word);
152
- }
151
+ for (const w of qwords) {
152
+ const hits = this.wordIndex.get(w);
153
+ if (hits) known.push(hits);
154
+ else if (w.length >= 3 && unknown.length < trigramBudget) unknown.push(w);
153
155
  }
154
156
 
155
- const exactMatches = intersectAll(knownWords);
156
- const hasExactMatches = exactMatches.length > 0;
157
- const needsFuzzyMatching = unknownWords.length > 0 && trigramBudget > 0;
157
+ const pool = intersect(known);
158
+ const hasPool = pool.length > 0;
158
159
 
159
- if (!needsFuzzyMatching) {
160
- if (!hasExactMatches) return [];
161
- return this.sortedByLength(exactMatches, limit);
160
+ if (!unknown.length || !trigramBudget) {
161
+ if (!hasPool) return [];
162
+ return this._rank(pool, null, qwords, sep, limit);
162
163
  }
163
164
 
164
- const scores = new Map();
165
-
166
- if (hasExactMatches) {
167
- for (const index of exactMatches) {
168
- scores.set(index, 1);
165
+ // Seed scores from exact-match pool
166
+ const { _scores: scores, _dirty: dirty } = this;
167
+ if (hasPool) {
168
+ for (const i of pool) {
169
+ scores[i] = 1;
170
+ dirty.push(i);
169
171
  }
170
172
  }
171
173
 
172
- const minItemLength = Math.max(0, normalizedQuery.length - 3);
173
-
174
- const hitCount = this.scoreByTrigrams({
175
- unknownWords,
176
- budget: trigramBudget,
177
- scores,
178
- hasExactMatches,
179
- minItemLength,
180
- });
181
-
182
- const minScoreToInclude = Math.max(1, Math.ceil(hitCount / 2));
174
+ const hitCount = this._scoreTrigrams(
175
+ unknown,
176
+ trigramBudget,
177
+ hasPool,
178
+ Math.max(0, q.length - 3),
179
+ );
180
+ const minScore = Math.max(config.minScore, Math.ceil(hitCount / 2));
181
+ const result = this._rank(dirty, minScore, qwords, sep, limit);
183
182
 
184
- return this.rankedResults(scores, minScoreToInclude, limit);
183
+ for (const i of dirty) scores[i] = 0;
184
+ dirty.length = 0;
185
+ return result;
185
186
  }
186
187
 
187
188
  /**
188
189
  * @private
189
- * @param {{unknownWords: string[], budget: number, scores: Map<number, number>, hasExactMatches: boolean, minItemLength: number}} args
190
+ * @param {string[]} unknown
191
+ * @param {number} budget
192
+ * @param {boolean} poolOnly
193
+ * @param {number} minLen
190
194
  */
191
- scoreByTrigrams({
192
- unknownWords,
193
- budget,
194
- scores,
195
- hasExactMatches,
196
- minItemLength,
197
- }) {
198
- const visitedTrigrams = new Set();
199
- let budgetRemaining = budget;
200
- let hitCount = 0;
195
+ _scoreTrigrams(unknown, budget, poolOnly, minLen) {
196
+ const visited = new Set();
197
+ const { _scores: scores, _dirty: dirty, items } = this;
198
+ let remaining = budget;
199
+ let hits = 0;
201
200
 
202
201
  outer: for (let round = 0; round < budget; round++) {
203
- for (const word of unknownWords) {
204
- if (budgetRemaining <= 0) break outer;
202
+ for (const word of unknown) {
203
+ if (remaining <= 0) break outer;
205
204
 
206
- const position = pickTrigramPosition(word.length, round);
207
- if (position < 0) continue;
205
+ const pos = trigramPosition(word.length, round);
206
+ if (pos < 0) continue;
208
207
 
209
- const trigram =
210
- word[position] + word[position + 1] + word[position + 2];
208
+ const tri = word[pos] + word[pos + 1] + word[pos + 2];
209
+ if (visited.has(tri)) continue;
210
+ visited.add(tri);
211
+ remaining--;
211
212
 
212
- if (visitedTrigrams.has(trigram)) continue;
213
- visitedTrigrams.add(trigram);
213
+ const matched = this.trigramIndex.get(tri);
214
+ if (!matched) continue;
215
+ hits++;
214
216
 
215
- budgetRemaining--;
216
-
217
- const matchingItems = this.trigramIndex.get(trigram);
218
- if (!matchingItems) continue;
219
-
220
- hitCount++;
221
-
222
- for (const itemIndex of matchingItems) {
223
- if (hasExactMatches) {
224
- const currentScore = scores.get(itemIndex);
225
- if (currentScore !== undefined) {
226
- scores.set(itemIndex, currentScore + 1);
217
+ if (poolOnly) {
218
+ for (let j = 0; j < matched.length; j++) {
219
+ const i = matched[j];
220
+ if (scores[i] > 0) scores[i]++;
221
+ }
222
+ } else {
223
+ for (let j = 0; j < matched.length; j++) {
224
+ const i = matched[j];
225
+ if (items[i].length >= minLen) {
226
+ if (scores[i] === 0) dirty.push(i);
227
+ scores[i]++;
227
228
  }
228
- } else if (this.items[itemIndex].length >= minItemLength) {
229
- scores.set(itemIndex, (scores.get(itemIndex) || 0) + 1);
230
229
  }
231
230
  }
232
231
  }
233
232
  }
234
233
 
235
- return hitCount;
234
+ return hits;
236
235
  }
237
236
 
238
237
  /**
238
+ * Rank candidates by prefix match, then score, then length.
239
239
  * @private
240
240
  * @param {number[]} indices
241
+ * @param {number|null} minScore - null = no score filtering (exact-match path)
242
+ * @param {string[]} qwords
243
+ * @param {Uint8Array} sep
241
244
  * @param {number} limit
242
245
  */
243
- sortedByLength(indices, limit) {
244
- const { items } = this;
245
- indices.sort((a, b) => items[a].length - items[b].length);
246
- if (indices.length > limit) indices.length = limit;
247
- return indices.map((i) => items[i]);
248
- }
249
-
250
- /**
251
- * @private
252
- * @param {Map<number, number>} scores
253
- * @param {number} minScore
254
- * @param {number} limit
255
- */
256
- rankedResults(scores, minScore, limit) {
257
- const { items } = this;
246
+ _rank(indices, minScore, qwords, sep, limit) {
247
+ const { items, _scores: scores } = this;
258
248
  const results = [];
259
249
 
260
- for (const [index, score] of scores) {
261
- if (score >= minScore) {
262
- results.push({ index, score });
263
- }
250
+ for (let i = 0; i < indices.length; i++) {
251
+ const idx = indices[i];
252
+ if (minScore !== null && scores[idx] < minScore) continue;
253
+ results.push(idx);
264
254
  }
265
255
 
266
- results.sort((a, b) => {
267
- if (b.score !== a.score) return b.score - a.score;
268
- return items[a.index].length - items[b.index].length;
269
- });
256
+ const pscores = new Uint8Array(items.length);
257
+ for (let i = 0; i < results.length; i++) {
258
+ pscores[results[i]] = prefixScore(items[results[i]], qwords, sep);
259
+ }
270
260
 
271
- if (results.length > limit) results.length = limit;
261
+ results.sort(
262
+ (a, b) =>
263
+ pscores[b] - pscores[a] ||
264
+ scores[b] - scores[a] ||
265
+ items[a].length - items[b].length,
266
+ );
272
267
 
273
- return results.map((r) => items[r.index]);
268
+ if (results.length > limit) results.length = limit;
269
+ return results.map((i) => items[i]);
274
270
  }
275
271
  }
276
272
 
273
+ // --- Helpers ---
274
+
277
275
  /** @param {string} query */
278
- function normalizeQuery(query) {
279
- let result = "";
276
+ function normalize(query) {
277
+ let out = "";
280
278
  let start = 0;
281
279
  let end = query.length;
282
-
283
280
  while (start < end && query.charCodeAt(start) <= 32) start++;
284
281
  while (end > start && query.charCodeAt(end - 1) <= 32) end--;
285
-
286
282
  for (let i = start; i < end; i++) {
287
- const code = query.charCodeAt(i);
288
- if (code >= 128) continue;
289
- result +=
290
- code >= 65 && code <= 90 ? String.fromCharCode(code + 32) : query[i];
283
+ const c = query.charCodeAt(i);
284
+ if (c >= 128) continue;
285
+ out += c >= 65 && c <= 90 ? String.fromCharCode(c + 32) : query[i];
291
286
  }
287
+ return out;
288
+ }
292
289
 
293
- return result;
290
+ /** @param {string} separators */
291
+ function sepLookup(separators) {
292
+ const t = new Uint8Array(128);
293
+ for (let i = 0; i < separators.length; i++) {
294
+ const c = separators.charCodeAt(i);
295
+ if (c < 128) t[c] = 1;
296
+ }
297
+ return t;
294
298
  }
295
299
 
296
300
  /**
297
301
  * @param {string} text
298
- * @param {string} separators
299
- * @param {number} maxLength
302
+ * @param {Uint8Array} sep
303
+ * @param {number} maxLen
300
304
  */
301
- function parseWords(text, separators, maxLength) {
305
+ function splitWords(text, sep, maxLen) {
302
306
  /** @type {string[]} */
303
307
  const words = [];
304
308
  let start = 0;
305
-
306
309
  for (let i = 0; i <= text.length; i++) {
307
- const isEnd = i === text.length || separators.includes(text[i]);
308
-
309
- if (isEnd && i > start) {
310
- const word = text.slice(start, i);
311
- if (word.length <= maxLength && !words.includes(word)) {
312
- words.push(word);
313
- }
314
- start = i + 1;
315
- } else if (isEnd) {
316
- start = i + 1;
310
+ if (i < text.length && !sep[text.charCodeAt(i)]) continue;
311
+ if (i > start) {
312
+ const w = text.slice(start, i);
313
+ if (w.length <= maxLen && !words.includes(w)) words.push(w);
317
314
  }
315
+ start = i + 1;
318
316
  }
319
-
320
317
  return words;
321
318
  }
322
319
 
@@ -326,102 +323,104 @@ function parseWords(text, separators, maxLength) {
326
323
  * @param {number} value
327
324
  */
328
325
  function addToIndex(index, key, value) {
329
- const existing = index.get(key);
330
- if (existing) {
331
- existing.push(value);
332
- } else {
333
- index.set(key, [value]);
334
- }
326
+ const arr = index.get(key);
327
+ if (arr) arr.push(value);
328
+ else index.set(key, [value]);
335
329
  }
336
330
 
337
331
  /**
338
332
  * @param {Map<string, number[]>} index
339
333
  * @param {string} word
340
- * @param {number} itemIndex
334
+ * @param {number} idx
341
335
  */
342
- function addTrigramsToIndex(index, word, itemIndex) {
336
+ function indexTrigrams(index, word, idx) {
343
337
  if (word.length < 3) return;
344
-
345
338
  for (let i = 0; i <= word.length - 3; i++) {
346
- const trigram = word[i] + word[i + 1] + word[i + 2];
347
- const existing = index.get(trigram);
348
-
349
- if (!existing) {
350
- index.set(trigram, [itemIndex]);
351
- } else if (existing[existing.length - 1] !== itemIndex) {
352
- existing.push(itemIndex);
353
- }
339
+ const tri = word[i] + word[i + 1] + word[i + 2];
340
+ const arr = index.get(tri);
341
+ if (!arr) index.set(tri, [idx]);
342
+ else if (arr[arr.length - 1] !== idx) arr.push(idx);
354
343
  }
355
344
  }
356
345
 
357
346
  /** @param {number[][]} arrays */
358
- function intersectAll(arrays) {
347
+ function intersect(arrays) {
359
348
  if (!arrays.length) return [];
360
349
 
361
- let smallestIndex = 0;
350
+ let si = 0;
362
351
  for (let i = 1; i < arrays.length; i++) {
363
- if (arrays[i].length < arrays[smallestIndex].length) {
364
- smallestIndex = i;
365
- }
352
+ if (arrays[i].length < arrays[si].length) si = i;
366
353
  }
367
354
 
368
- const result = arrays[smallestIndex].slice();
369
-
355
+ const result = arrays[si].slice();
370
356
  for (let i = 0; i < arrays.length && result.length > 0; i++) {
371
- if (i === smallestIndex) continue;
372
-
373
- let writeIndex = 0;
357
+ if (i === si) continue;
358
+ let w = 0;
374
359
  for (let j = 0; j < result.length; j++) {
375
- if (binarySearch(arrays[i], result[j])) {
376
- result[writeIndex++] = result[j];
377
- }
360
+ if (bsearch(arrays[i], result[j])) result[w++] = result[j];
378
361
  }
379
- result.length = writeIndex;
362
+ result.length = w;
380
363
  }
381
-
382
364
  return result;
383
365
  }
384
366
 
385
367
  /**
386
- * @param {number[]} sortedArray
387
- * @param {number} value
368
+ * @param {number[]} arr
369
+ * @param {number} val
388
370
  */
389
- function binarySearch(sortedArray, value) {
390
- let low = 0;
391
- let high = sortedArray.length - 1;
392
-
393
- while (low <= high) {
394
- const mid = (low + high) >> 1;
395
- const midValue = sortedArray[mid];
396
-
397
- if (midValue === value) return true;
398
- if (midValue < value) low = mid + 1;
399
- else high = mid - 1;
371
+ function bsearch(arr, val) {
372
+ let lo = 0,
373
+ hi = arr.length - 1;
374
+ while (lo <= hi) {
375
+ const mid = (lo + hi) >> 1;
376
+ if (arr[mid] === val) return true;
377
+ if (arr[mid] < val) lo = mid + 1;
378
+ else hi = mid - 1;
400
379
  }
401
-
402
380
  return false;
403
381
  }
404
382
 
405
383
  /**
406
- * @param {number} wordLength
407
- * @param {number} round
384
+ * 2 = exact match, 1 = prefix match, 0 = no match
385
+ * @param {string} item
386
+ * @param {string[]} qwords
387
+ * @param {Uint8Array} sep
408
388
  */
409
- function pickTrigramPosition(wordLength, round) {
410
- const maxPosition = wordLength - 3;
411
- if (maxPosition < 0) return -1;
412
-
413
- if (round === 0) return 0;
414
- if (round === 1 && maxPosition > 0) return maxPosition;
415
- if (round === 2 && maxPosition > 1) return maxPosition >> 1;
416
- if (maxPosition <= 2) return -1;
417
-
418
- const middle = maxPosition >> 1;
419
- const offset = (round - 2) >> 1;
420
- const position = round & 1 ? Math.max(0, middle - offset) : middle + offset;
421
-
422
- if (position === 0 || position >= maxPosition || position === middle) {
423
- return -1;
389
+ function prefixScore(item, qwords, sep) {
390
+ let qi = 0,
391
+ pos = 0;
392
+ const len = item.length;
393
+
394
+ while (qi < qwords.length) {
395
+ while (pos < len && sep[item.charCodeAt(pos)]) pos++;
396
+ if (pos >= len) return 0;
397
+
398
+ const ws = pos;
399
+ while (pos < len && !sep[item.charCodeAt(pos)]) pos++;
400
+
401
+ const qw = qwords[qi];
402
+ if (pos - ws !== qw.length) return 0;
403
+ for (let j = 0; j < qw.length; j++) {
404
+ if (item.charCodeAt(ws + j) !== qw.charCodeAt(j)) return 0;
405
+ }
406
+ qi++;
424
407
  }
425
408
 
426
- return position;
409
+ while (pos < len && sep[item.charCodeAt(pos)]) pos++;
410
+ return pos >= len ? 2 : 1;
411
+ }
412
+
413
+ /** @param {number} len @param {number} round */
414
+ function trigramPosition(len, round) {
415
+ const max = len - 3;
416
+ if (max < 0) return -1;
417
+ if (round === 0) return 0;
418
+ if (round === 1 && max > 0) return max;
419
+ if (round === 2 && max > 1) return max >> 1;
420
+ if (max <= 2) return -1;
421
+
422
+ const mid = max >> 1;
423
+ const off = (round - 2) >> 1;
424
+ const pos = round & 1 ? Math.max(0, mid - off) : mid + off;
425
+ return pos === 0 || pos >= max || pos === mid ? -1 : pos;
427
426
  }
package/src/lib.rs CHANGED
@@ -36,16 +36,16 @@ impl<'a> QuickMatch<'a> {
36
36
 
37
37
  for &item in items {
38
38
  max_query_len = max_query_len.max(item.len());
39
- let mut word_count = 0;
40
- for word in item.split(separators) {
41
- word_count += 1;
42
- if word.is_empty() {
43
- continue;
44
- }
39
+ let item_words: Vec<&str> = item.split(separators).filter(|w| !w.is_empty()).collect();
40
+ max_words = max_words.max(item_words.len());
45
41
 
46
- max_word_len = max_word_len.max(item.len());
42
+ for word in &item_words {
43
+ max_word_len = max_word_len.max(word.len());
47
44
 
48
- word_index.entry(word.to_string()).or_default().insert(item);
45
+ word_index
46
+ .entry(word.to_string())
47
+ .or_default()
48
+ .insert(item);
49
49
 
50
50
  if word.len() >= 3 {
51
51
  let chars = word.chars().collect::<Vec<_>>();
@@ -57,13 +57,18 @@ impl<'a> QuickMatch<'a> {
57
57
  }
58
58
  }
59
59
  }
60
- max_words = max_words.max(word_count);
60
+
61
+ // Index adjacent word pairs as compounds (e.g. "hash"+"rate" → "hashrate")
62
+ for pair in item_words.windows(2) {
63
+ let compound = format!("{}{}", pair[0], pair[1]);
64
+ word_index.entry(compound).or_default().insert(item);
65
+ }
61
66
  }
62
67
 
63
68
  Self {
64
69
  max_query_len: max_query_len + 6,
65
70
  max_word_len: max_word_len + 4,
66
- max_word_count: max_word_len + 2,
71
+ max_word_count: max_words + 2,
67
72
  word_index,
68
73
  trigram_index,
69
74
  config,
@@ -71,26 +76,15 @@ impl<'a> QuickMatch<'a> {
71
76
  }
72
77
  }
73
78
 
74
- ///
75
- /// `limit`: max number of returned matches
76
- ///
77
- /// `max_trigrams`: max number of processed trigrams in unknown words (0-10 recommended)
78
- ///
79
79
  pub fn matches(&self, query: &str) -> Vec<&'a str> {
80
80
  self.matches_with(query, &self.config)
81
81
  }
82
82
 
83
- ///
84
- /// `limit`: max number of returned matches
85
- ///
86
- /// `max_trigrams`: max number of processed trigrams in unknown words (0-10 recommended)
87
- ///
88
83
  pub fn matches_with(&self, query: &str, config: &QuickMatchConfig) -> Vec<&'a str> {
89
84
  let limit = config.limit();
90
85
  let trigram_budget = config.trigram_budget();
91
- let query_len = query.len();
92
86
 
93
- if query.is_empty() || query_len > self.max_query_len {
87
+ if query.is_empty() {
94
88
  return vec![];
95
89
  }
96
90
 
@@ -101,22 +95,30 @@ impl<'a> QuickMatch<'a> {
101
95
  .collect::<String>()
102
96
  .to_ascii_lowercase();
103
97
 
104
- let words = query
105
- .split(config.separators())
98
+ if query.is_empty() || query.len() > self.max_query_len {
99
+ return vec![];
100
+ }
101
+
102
+ let separators = config.separators();
103
+
104
+ let query_words: Vec<&str> = query
105
+ .split(separators)
106
106
  .filter(|w| !w.is_empty() && w.len() <= self.max_word_len)
107
- .collect::<FxHashSet<_>>();
107
+ .collect();
108
+
109
+ let words: FxHashSet<&str> = query_words.iter().copied().collect();
108
110
 
109
111
  if words.is_empty() || words.len() > self.max_word_count {
110
112
  return vec![];
111
113
  }
112
114
 
113
- let min_len = query_len.saturating_sub(3);
115
+ let min_len = query.len().saturating_sub(3);
114
116
 
115
117
  let mut pool: Option<FxHashSet<*const str>> = None;
116
118
  let mut unknown_words = Vec::new();
117
119
 
118
120
  let mut words_to_intersect = vec![];
119
- for word in words {
121
+ for &word in &words {
120
122
  if let Some(items) = self.word_index.get(word) {
121
123
  words_to_intersect.push(items)
122
124
  } else if word.len() >= 3 && unknown_words.len() < trigram_budget {
@@ -144,17 +146,23 @@ impl<'a> QuickMatch<'a> {
144
146
  let mut results: Vec<_> = pool
145
147
  .unwrap_or_default()
146
148
  .into_iter()
147
- .map(|item| unsafe { &*item as &str })
149
+ .map(|item| {
150
+ let s = unsafe { &*item as &str };
151
+ (s, prefix_score(s, &query_words, separators))
152
+ })
148
153
  .collect();
149
154
 
155
+ let cmp =
156
+ |a: &(&str, u8), b: &(&str, u8)| b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len()));
157
+
150
158
  if results.len() > limit {
151
- results.select_nth_unstable_by_key(limit, |item| item.len());
159
+ results.select_nth_unstable_by(limit, cmp);
152
160
  results.truncate(limit);
153
161
  }
154
162
 
155
- results.sort_unstable_by_key(|item| item.len());
163
+ results.sort_unstable_by(cmp);
156
164
 
157
- return results;
165
+ return results.into_iter().map(|(item, _)| item).collect();
158
166
  }
159
167
 
160
168
  let mut scores: FxHashMap<*const str, usize> = FxHashMap::default();
@@ -232,26 +240,52 @@ impl<'a> QuickMatch<'a> {
232
240
  }
233
241
  }
234
242
 
235
- let min_score = hit_count.div_ceil(2).max(1);
243
+ let min_score = hit_count.div_ceil(2).max(config.min_score());
236
244
  let mut results: Vec<_> = scores
237
245
  .into_iter()
238
246
  .filter(|(_, s)| *s >= min_score)
239
- .map(|(item, score)| (unsafe { &*item as &str }, score))
247
+ .map(|(item, score)| {
248
+ let s = unsafe { &*item as &str };
249
+ (s, score, prefix_score(s, &query_words, separators))
250
+ })
240
251
  .collect();
241
252
 
253
+ let cmp = |a: &(&str, usize, u8), b: &(&str, usize, u8)| {
254
+ b.2.cmp(&a.2)
255
+ .then_with(|| b.1.cmp(&a.1))
256
+ .then_with(|| a.0.len().cmp(&b.0.len()))
257
+ };
258
+
242
259
  if results.len() > limit {
243
- results.select_nth_unstable_by(limit, |a, b| {
244
- b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len()))
245
- });
260
+ results.select_nth_unstable_by(limit, cmp);
246
261
  results.truncate(limit);
247
262
  }
248
263
 
249
- results.sort_unstable_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len())));
264
+ results.sort_unstable_by(cmp);
250
265
 
251
266
  results
252
267
  .into_iter()
253
268
  .take(limit)
254
- .map(|(item, _)| item)
269
+ .map(|(item, _, _)| item)
255
270
  .collect()
256
271
  }
257
272
  }
273
+
274
+ /// Score how well an item's word sequence matches the query as a prefix.
275
+ /// - 2: exact match (all words match, no extra words in item)
276
+ /// - 1: prefix match (item starts with query words but has more)
277
+ /// - 0: no prefix match
278
+ fn prefix_score(item: &str, query_words: &[&str], separators: &[char]) -> u8 {
279
+ let mut item_words = item.split(separators).filter(|w| !w.is_empty());
280
+ for &qw in query_words {
281
+ match item_words.next() {
282
+ Some(iw) if iw == qw => continue,
283
+ _ => return 0,
284
+ }
285
+ }
286
+ if item_words.next().is_none() {
287
+ 2
288
+ } else {
289
+ 1
290
+ }
291
+ }
package/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 quickmatch
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
package/README.md DELETED
@@ -1,26 +0,0 @@
1
- # quickmatch
2
-
3
- **Lightning-fast fuzzy string matching for Rust.**
4
-
5
- A high-performance string matching library optimized for interactive search experiences like autocomplete, command palettes, and search-as-you-type interfaces.
6
-
7
- [![Crates.io](https://img.shields.io/crates/v/quickmatch.svg)](https://crates.io/crates/quickmatch)
8
- [![Documentation](https://docs.rs/quickmatch/badge.svg)](https://docs.rs/quickmatch)
9
-
10
- ## Features
11
-
12
- - **Blazing fast** - Optimized for sub-millisecond search times
13
- - **Hybrid matching** - Word-level matching with trigram-based fuzzy fallback
14
- - **Memory efficient** - Zero-copy string storage with pointer-based indexing
15
- - **Ranked results** - Intelligent scoring based on match quality
16
- - **Zero external dependencies** - Only uses `rustc-hash` for fast hashing
17
-
18
- ## Installation
19
-
20
- ```bash
21
- # rust
22
- cargo add quickmatch
23
-
24
- # js
25
- npm install quickmatch-js
26
- ```