quickmatch-js 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "quickmatch-js",
3
- "version": "0.3.0",
3
+ "version": "0.3.2",
4
4
  "description": "Lightning-fast fuzzy string matching",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/src/config.rs CHANGED
@@ -1,11 +1,12 @@
1
- const DEFAULT_SEPARATORS: &[char] = &['_', '-', ' '];
1
+ const DEFAULT_SEPARATORS: &[char] = &['_', '-', ' ', ':', '/'];
2
2
  const DEFAULT_TRIGRAM_BUDGET: usize = 6;
3
3
  const DEFAULT_LIMIT: usize = 100;
4
+ const DEFAULT_MIN_SCORE: usize = 2;
4
5
 
5
6
  pub struct QuickMatchConfig {
6
7
  /// Separators used to split words.
7
8
  ///
8
- /// Default: ['_', '-', ' ']
9
+ /// Default: ['_', '-', ' ', ':', '/']
9
10
  separators: &'static [char],
10
11
  /// Maximum number of results to return.
11
12
  ///
@@ -22,6 +23,12 @@ pub struct QuickMatchConfig {
22
23
  /// - High (9-15): Slower, more accurate fuzzy matching
23
24
  /// - Max: 20
24
25
  trigram_budget: usize,
26
+ /// Minimum trigram score required for fuzzy matches.
27
+ /// Higher values require more trigram overlap, reducing noise.
28
+ ///
29
+ /// Default: 2
30
+ /// - Min: 1
31
+ min_score: usize,
25
32
  }
26
33
 
27
34
  impl Default for QuickMatchConfig {
@@ -30,6 +37,7 @@ impl Default for QuickMatchConfig {
30
37
  separators: DEFAULT_SEPARATORS,
31
38
  limit: DEFAULT_LIMIT,
32
39
  trigram_budget: DEFAULT_TRIGRAM_BUDGET,
40
+ min_score: DEFAULT_MIN_SCORE,
33
41
  }
34
42
  }
35
43
  }
@@ -54,6 +62,11 @@ impl QuickMatchConfig {
54
62
  self
55
63
  }
56
64
 
65
+ pub fn with_min_score(mut self, min_score: usize) -> Self {
66
+ self.min_score = min_score.max(1);
67
+ self
68
+ }
69
+
57
70
  pub fn limit(&self) -> usize {
58
71
  self.limit
59
72
  }
@@ -65,4 +78,8 @@ impl QuickMatchConfig {
65
78
  pub fn separators(&self) -> &[char] {
66
79
  self.separators
67
80
  }
81
+
82
+ pub fn min_score(&self) -> usize {
83
+ self.min_score
84
+ }
68
85
  }
package/src/index.js CHANGED
@@ -1,335 +1,426 @@
1
- const DEFAULT_SEPARATORS = '_- ';
1
+ const DEFAULT_SEPARATORS = "_- :/";
2
2
  const DEFAULT_TRIGRAM_BUDGET = 6;
3
3
  const DEFAULT_LIMIT = 100;
4
+ const DEFAULT_MIN_SCORE = 2;
4
5
 
6
+ /**
7
+ * Configuration for QuickMatch.
8
+ */
5
9
  export class QuickMatchConfig {
10
+ /**
11
+ * Separators used to split words.
12
+ * @type {string}
13
+ * @default "_- :/"
14
+ */
6
15
  separators = DEFAULT_SEPARATORS;
16
+
17
+ /**
18
+ * Maximum number of results to return.
19
+ * @type {number}
20
+ * @default 100
21
+ */
7
22
  limit = DEFAULT_LIMIT;
23
+
24
+ /**
25
+ * Budget of trigrams to process from unknown words.
26
+ * This budget is distributed fairly across all unknown words.
27
+ *
28
+ * - 0: Disable trigram matching (only exact word matches)
29
+ * - Low (3-6): Faster, less accurate fuzzy matching
30
+ * - High (9-15): Slower, more accurate fuzzy matching
31
+ * - Max: 20
32
+ * @type {number}
33
+ * @default 6
34
+ */
8
35
  trigramBudget = DEFAULT_TRIGRAM_BUDGET;
9
36
 
37
+ /**
38
+ * Minimum trigram score required for fuzzy matches.
39
+ * Higher values require more trigram overlap, reducing noise.
40
+ * @type {number}
41
+ * @default 2
42
+ */
43
+ minScore = DEFAULT_MIN_SCORE;
44
+
45
+ /** @param {number} n - Max results (default: 100, min: 1) */
10
46
  withLimit(n) {
11
47
  this.limit = Math.max(1, n);
12
48
  return this;
13
49
  }
14
50
 
51
+ /** @param {number} n - Trigram budget (0-20, default: 6) */
15
52
  withTrigramBudget(n) {
16
53
  this.trigramBudget = Math.max(0, Math.min(20, n));
17
54
  return this;
18
55
  }
19
56
 
57
+ /** @param {string} s - Separator characters (default: '_- :/') */
20
58
  withSeparators(s) {
21
59
  this.separators = s;
22
60
  return this;
23
61
  }
62
+
63
+ /** @param {number} n - Min trigram score (default: 2, min: 1) */
64
+ withMinScore(n) {
65
+ this.minScore = Math.max(1, n);
66
+ return this;
67
+ }
24
68
  }
25
69
 
70
+ /**
71
+ * Fast fuzzy string matcher using word and trigram indexing.
72
+ */
26
73
  export class QuickMatch {
74
+ /**
75
+ * @param {string[]} items - Items to index (should be lowercase)
76
+ * @param {QuickMatchConfig} [config]
77
+ */
27
78
  constructor(items, config = new QuickMatchConfig()) {
28
79
  this.config = config;
29
80
  this.items = items;
81
+ /** @type {Map<string, number[]>} */
30
82
  this.wordIndex = new Map();
83
+ /** @type {Map<string, number[]>} */
31
84
  this.trigramIndex = new Map();
85
+ this._sepLookup = sepLookup(config.separators);
86
+ this._scores = new Uint32Array(items.length);
87
+ /** @type {number[]} */
88
+ this._dirty = [];
32
89
 
33
- let maxWordLength = 0;
34
- let maxQueryLength = 0;
35
- let maxWordCount = 0;
36
-
37
- const { separators } = config;
90
+ let maxWordLen = 0;
91
+ let maxQueryLen = 0;
92
+ let maxWords = 0;
93
+ const sep = this._sepLookup;
38
94
 
39
- for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
40
- const item = items[itemIndex];
41
-
42
- if (item.length > maxQueryLength) {
43
- maxQueryLength = item.length;
44
- }
95
+ for (let idx = 0; idx < items.length; idx++) {
96
+ const item = items[idx];
97
+ if (item.length > maxQueryLen) maxQueryLen = item.length;
45
98
 
46
- let wordCount = 0;
47
- let wordStart = 0;
99
+ const words = [];
100
+ let start = 0;
48
101
 
49
102
  for (let i = 0; i <= item.length; i++) {
50
- const isEndOfWord = i === item.length || separators.includes(item[i]);
51
-
52
- if (isEndOfWord && i > wordStart) {
53
- wordCount++;
54
- const word = item.slice(wordStart, i);
55
-
56
- if (word.length > maxWordLength) {
57
- maxWordLength = word.length;
58
- }
59
-
60
- addToIndex(this.wordIndex, word, itemIndex);
61
- addTrigramsToIndex(this.trigramIndex, word, itemIndex);
62
-
63
- wordStart = i + 1;
64
- } else if (isEndOfWord) {
65
- wordStart = i + 1;
103
+ if (i < item.length && !sep[item.charCodeAt(i)]) continue;
104
+ if (i > start) {
105
+ const word = item.slice(start, i);
106
+ words.push(word);
107
+ if (word.length > maxWordLen) maxWordLen = word.length;
108
+ addToIndex(this.wordIndex, word, idx);
109
+ indexTrigrams(this.trigramIndex, word, idx);
66
110
  }
111
+ start = i + 1;
67
112
  }
68
113
 
69
- if (wordCount > maxWordCount) {
70
- maxWordCount = wordCount;
114
+ for (let i = 0; i < words.length - 1; i++) {
115
+ addToIndex(this.wordIndex, words[i] + words[i + 1], idx);
71
116
  }
117
+
118
+ if (words.length > maxWords) maxWords = words.length;
72
119
  }
73
120
 
74
- this.maxWordLength = maxWordLength + 4;
75
- this.maxQueryLength = maxQueryLength + 6;
76
- this.maxWordCount = maxWordCount + 2;
121
+ this.maxWordLen = maxWordLen + 4;
122
+ this.maxQueryLen = maxQueryLen + 6;
123
+ this.maxWords = maxWords + 2;
77
124
  }
78
125
 
126
+ /** @param {string} query */
79
127
  matches(query) {
80
128
  return this.matchesWith(query, this.config);
81
129
  }
82
130
 
131
+ /**
132
+ * @param {string} query
133
+ * @param {QuickMatchConfig} config
134
+ */
83
135
  matchesWith(query, config) {
84
- const { limit, trigramBudget, separators } = config;
136
+ const { limit, trigramBudget } = config;
137
+ const sep =
138
+ config.separators === this.config.separators
139
+ ? this._sepLookup
140
+ : sepLookup(config.separators);
85
141
 
86
- const normalizedQuery = normalizeQuery(query);
142
+ const q = normalize(query);
143
+ if (!q || q.length > this.maxQueryLen) return [];
87
144
 
88
- if (!normalizedQuery || normalizedQuery.length > this.maxQueryLength) {
89
- return [];
90
- }
145
+ const qwords = splitWords(q, sep, this.maxWordLen);
146
+ if (!qwords.length || qwords.length > this.maxWords) return [];
91
147
 
92
- const queryWords = parseWords(normalizedQuery, separators, this.maxWordLength);
148
+ const known = [];
149
+ const unknown = [];
93
150
 
94
- if (!queryWords.length || queryWords.length > this.maxWordCount) {
95
- return [];
151
+ for (const w of qwords) {
152
+ const hits = this.wordIndex.get(w);
153
+ if (hits) known.push(hits);
154
+ else if (w.length >= 3 && unknown.length < trigramBudget) unknown.push(w);
96
155
  }
97
156
 
98
- const knownWords = [];
99
- const unknownWords = [];
157
+ const pool = intersect(known);
158
+ const hasPool = pool.length > 0;
100
159
 
101
- for (const word of queryWords) {
102
- const matchingItems = this.wordIndex.get(word);
103
-
104
- if (matchingItems) {
105
- knownWords.push(matchingItems);
106
- } else if (word.length >= 3 && unknownWords.length < trigramBudget) {
107
- unknownWords.push(word);
108
- }
160
+ if (!unknown.length || !trigramBudget) {
161
+ if (!hasPool) return [];
162
+ return this._rank(pool, null, qwords, sep, limit);
109
163
  }
110
164
 
111
- const exactMatches = intersectAll(knownWords);
112
- const hasExactMatches = exactMatches.length > 0;
113
- const needsFuzzyMatching = unknownWords.length > 0 && trigramBudget > 0;
114
-
115
- if (!needsFuzzyMatching) {
116
- if (!hasExactMatches) return [];
117
- return this.sortedByLength(exactMatches, limit);
118
- }
119
-
120
- const scores = new Map();
121
-
122
- if (hasExactMatches) {
123
- for (const index of exactMatches) {
124
- scores.set(index, 1);
165
+ // Seed scores from exact-match pool
166
+ const { _scores: scores, _dirty: dirty } = this;
167
+ if (hasPool) {
168
+ for (const i of pool) {
169
+ scores[i] = 1;
170
+ dirty.push(i);
125
171
  }
126
172
  }
127
173
 
128
- const minItemLength = Math.max(0, normalizedQuery.length - 3);
129
-
130
- const trigramCount = this.scoreByTrigrams({
131
- unknownWords,
132
- budget: trigramBudget,
133
- scores,
134
- hasExactMatches,
135
- minItemLength,
136
- });
137
-
138
- const minScoreToInclude = Math.max(1, Math.ceil(trigramCount / 2));
139
-
140
- return this.rankedResults(scores, minScoreToInclude, limit);
174
+ const hitCount = this._scoreTrigrams(
175
+ unknown,
176
+ trigramBudget,
177
+ hasPool,
178
+ Math.max(0, q.length - 3),
179
+ );
180
+ const minScore = Math.max(config.minScore, Math.ceil(hitCount / 2));
181
+ const result = this._rank(dirty, minScore, qwords, sep, limit);
182
+
183
+ for (const i of dirty) scores[i] = 0;
184
+ dirty.length = 0;
185
+ return result;
141
186
  }
142
187
 
143
- scoreByTrigrams({ unknownWords, budget, scores, hasExactMatches, minItemLength }) {
144
- const visitedTrigrams = new Set();
145
- let budgetRemaining = budget;
146
- let hitCount = 0;
147
-
148
- outer:
149
- for (let round = 0; round < budget; round++) {
150
- for (const word of unknownWords) {
151
- if (budgetRemaining <= 0) break outer;
152
-
153
- const position = pickTrigramPosition(word.length, round);
154
- if (position < 0) continue;
155
-
156
- const trigram = word[position] + word[position + 1] + word[position + 2];
157
-
158
- if (visitedTrigrams.has(trigram)) continue;
159
- visitedTrigrams.add(trigram);
160
-
161
- budgetRemaining--;
162
-
163
- const matchingItems = this.trigramIndex.get(trigram);
164
- if (!matchingItems) continue;
165
-
166
- hitCount++;
167
-
168
- for (const itemIndex of matchingItems) {
169
- if (hasExactMatches) {
170
- const currentScore = scores.get(itemIndex);
171
- if (currentScore !== undefined) {
172
- scores.set(itemIndex, currentScore + 1);
188
+ /**
189
+ * @private
190
+ * @param {string[]} unknown
191
+ * @param {number} budget
192
+ * @param {boolean} poolOnly
193
+ * @param {number} minLen
194
+ */
195
+ _scoreTrigrams(unknown, budget, poolOnly, minLen) {
196
+ const visited = new Set();
197
+ const { _scores: scores, _dirty: dirty, items } = this;
198
+ let remaining = budget;
199
+ let hits = 0;
200
+
201
+ outer: for (let round = 0; round < budget; round++) {
202
+ for (const word of unknown) {
203
+ if (remaining <= 0) break outer;
204
+
205
+ const pos = trigramPosition(word.length, round);
206
+ if (pos < 0) continue;
207
+
208
+ const tri = word[pos] + word[pos + 1] + word[pos + 2];
209
+ if (visited.has(tri)) continue;
210
+ visited.add(tri);
211
+ remaining--;
212
+
213
+ const matched = this.trigramIndex.get(tri);
214
+ if (!matched) continue;
215
+ hits++;
216
+
217
+ if (poolOnly) {
218
+ for (let j = 0; j < matched.length; j++) {
219
+ const i = matched[j];
220
+ if (scores[i] > 0) scores[i]++;
221
+ }
222
+ } else {
223
+ for (let j = 0; j < matched.length; j++) {
224
+ const i = matched[j];
225
+ if (items[i].length >= minLen) {
226
+ if (scores[i] === 0) dirty.push(i);
227
+ scores[i]++;
173
228
  }
174
- } else if (this.items[itemIndex].length >= minItemLength) {
175
- scores.set(itemIndex, (scores.get(itemIndex) || 0) + 1);
176
229
  }
177
230
  }
178
231
  }
179
232
  }
180
233
 
181
- return hitCount;
182
- }
183
-
184
- sortedByLength(indices, limit) {
185
- const { items } = this;
186
- indices.sort((a, b) => items[a].length - items[b].length);
187
- if (indices.length > limit) indices.length = limit;
188
- return indices.map(i => items[i]);
234
+ return hits;
189
235
  }
190
236
 
191
- rankedResults(scores, minScore, limit) {
192
- const { items } = this;
237
+ /**
238
+ * Rank candidates by prefix match, then score, then length.
239
+ * @private
240
+ * @param {number[]} indices
241
+ * @param {number|null} minScore - null = no score filtering (exact-match path)
242
+ * @param {string[]} qwords
243
+ * @param {Uint8Array} sep
244
+ * @param {number} limit
245
+ */
246
+ _rank(indices, minScore, qwords, sep, limit) {
247
+ const { items, _scores: scores } = this;
193
248
  const results = [];
194
249
 
195
- for (const [index, score] of scores) {
196
- if (score >= minScore) {
197
- results.push({ index, score });
198
- }
250
+ for (let i = 0; i < indices.length; i++) {
251
+ const idx = indices[i];
252
+ if (minScore !== null && scores[idx] < minScore) continue;
253
+ results.push(idx);
199
254
  }
200
255
 
201
- results.sort((a, b) => {
202
- if (b.score !== a.score) return b.score - a.score;
203
- return items[a.index].length - items[b.index].length;
204
- });
256
+ const pscores = new Uint8Array(items.length);
257
+ for (let i = 0; i < results.length; i++) {
258
+ pscores[results[i]] = prefixScore(items[results[i]], qwords, sep);
259
+ }
205
260
 
206
- if (results.length > limit) results.length = limit;
261
+ results.sort(
262
+ (a, b) =>
263
+ pscores[b] - pscores[a] ||
264
+ scores[b] - scores[a] ||
265
+ items[a].length - items[b].length,
266
+ );
207
267
 
208
- return results.map(r => items[r.index]);
268
+ if (results.length > limit) results.length = limit;
269
+ return results.map((i) => items[i]);
209
270
  }
210
271
  }
211
272
 
212
- function normalizeQuery(query) {
213
- let result = '';
273
+ // --- Helpers ---
274
+
275
+ /** @param {string} query */
276
+ function normalize(query) {
277
+ let out = "";
214
278
  let start = 0;
215
279
  let end = query.length;
216
-
217
280
  while (start < end && query.charCodeAt(start) <= 32) start++;
218
281
  while (end > start && query.charCodeAt(end - 1) <= 32) end--;
219
-
220
282
  for (let i = start; i < end; i++) {
221
- const code = query.charCodeAt(i);
222
- if (code >= 128) continue;
223
- result += code >= 65 && code <= 90 ? String.fromCharCode(code + 32) : query[i];
283
+ const c = query.charCodeAt(i);
284
+ if (c >= 128) continue;
285
+ out += c >= 65 && c <= 90 ? String.fromCharCode(c + 32) : query[i];
224
286
  }
287
+ return out;
288
+ }
225
289
 
226
- return result;
290
+ /** @param {string} separators */
291
+ function sepLookup(separators) {
292
+ const t = new Uint8Array(128);
293
+ for (let i = 0; i < separators.length; i++) {
294
+ const c = separators.charCodeAt(i);
295
+ if (c < 128) t[c] = 1;
296
+ }
297
+ return t;
227
298
  }
228
299
 
229
- function parseWords(text, separators, maxLength) {
300
+ /**
301
+ * @param {string} text
302
+ * @param {Uint8Array} sep
303
+ * @param {number} maxLen
304
+ */
305
+ function splitWords(text, sep, maxLen) {
306
+ /** @type {string[]} */
230
307
  const words = [];
231
308
  let start = 0;
232
-
233
309
  for (let i = 0; i <= text.length; i++) {
234
- const isEnd = i === text.length || separators.includes(text[i]);
235
-
236
- if (isEnd && i > start) {
237
- const word = text.slice(start, i);
238
- if (word.length <= maxLength && !words.includes(word)) {
239
- words.push(word);
240
- }
241
- start = i + 1;
242
- } else if (isEnd) {
243
- start = i + 1;
310
+ if (i < text.length && !sep[text.charCodeAt(i)]) continue;
311
+ if (i > start) {
312
+ const w = text.slice(start, i);
313
+ if (w.length <= maxLen && !words.includes(w)) words.push(w);
244
314
  }
315
+ start = i + 1;
245
316
  }
246
-
247
317
  return words;
248
318
  }
249
319
 
320
+ /**
321
+ * @param {Map<string, number[]>} index
322
+ * @param {string} key
323
+ * @param {number} value
324
+ */
250
325
  function addToIndex(index, key, value) {
251
- const existing = index.get(key);
252
- if (existing) {
253
- existing.push(value);
254
- } else {
255
- index.set(key, [value]);
256
- }
326
+ const arr = index.get(key);
327
+ if (arr) arr.push(value);
328
+ else index.set(key, [value]);
257
329
  }
258
330
 
259
- function addTrigramsToIndex(index, word, itemIndex) {
331
+ /**
332
+ * @param {Map<string, number[]>} index
333
+ * @param {string} word
334
+ * @param {number} idx
335
+ */
336
+ function indexTrigrams(index, word, idx) {
260
337
  if (word.length < 3) return;
261
-
262
338
  for (let i = 0; i <= word.length - 3; i++) {
263
- const trigram = word[i] + word[i + 1] + word[i + 2];
264
- const existing = index.get(trigram);
265
-
266
- if (!existing) {
267
- index.set(trigram, [itemIndex]);
268
- } else if (existing[existing.length - 1] !== itemIndex) {
269
- existing.push(itemIndex);
270
- }
339
+ const tri = word[i] + word[i + 1] + word[i + 2];
340
+ const arr = index.get(tri);
341
+ if (!arr) index.set(tri, [idx]);
342
+ else if (arr[arr.length - 1] !== idx) arr.push(idx);
271
343
  }
272
344
  }
273
345
 
274
- function intersectAll(arrays) {
346
+ /** @param {number[][]} arrays */
347
+ function intersect(arrays) {
275
348
  if (!arrays.length) return [];
276
349
 
277
- let smallestIndex = 0;
350
+ let si = 0;
278
351
  for (let i = 1; i < arrays.length; i++) {
279
- if (arrays[i].length < arrays[smallestIndex].length) {
280
- smallestIndex = i;
281
- }
352
+ if (arrays[i].length < arrays[si].length) si = i;
282
353
  }
283
354
 
284
- const result = arrays[smallestIndex].slice();
285
-
355
+ const result = arrays[si].slice();
286
356
  for (let i = 0; i < arrays.length && result.length > 0; i++) {
287
- if (i === smallestIndex) continue;
288
-
289
- let writeIndex = 0;
357
+ if (i === si) continue;
358
+ let w = 0;
290
359
  for (let j = 0; j < result.length; j++) {
291
- if (binarySearch(arrays[i], result[j])) {
292
- result[writeIndex++] = result[j];
293
- }
360
+ if (bsearch(arrays[i], result[j])) result[w++] = result[j];
294
361
  }
295
- result.length = writeIndex;
362
+ result.length = w;
296
363
  }
297
-
298
364
  return result;
299
365
  }
300
366
 
301
- function binarySearch(sortedArray, value) {
302
- let low = 0;
303
- let high = sortedArray.length - 1;
304
-
305
- while (low <= high) {
306
- const mid = (low + high) >> 1;
307
- const midValue = sortedArray[mid];
308
-
309
- if (midValue === value) return true;
310
- if (midValue < value) low = mid + 1;
311
- else high = mid - 1;
367
+ /**
368
+ * @param {number[]} arr
369
+ * @param {number} val
370
+ */
371
+ function bsearch(arr, val) {
372
+ let lo = 0,
373
+ hi = arr.length - 1;
374
+ while (lo <= hi) {
375
+ const mid = (lo + hi) >> 1;
376
+ if (arr[mid] === val) return true;
377
+ if (arr[mid] < val) lo = mid + 1;
378
+ else hi = mid - 1;
312
379
  }
313
-
314
380
  return false;
315
381
  }
316
382
 
317
- function pickTrigramPosition(wordLength, round) {
318
- const maxPosition = wordLength - 3;
319
- if (maxPosition < 0) return -1;
320
-
321
- if (round === 0) return 0;
322
- if (round === 1 && maxPosition > 0) return maxPosition;
323
- if (round === 2 && maxPosition > 1) return maxPosition >> 1;
324
- if (maxPosition <= 2) return -1;
325
-
326
- const middle = maxPosition >> 1;
327
- const offset = (round - 2) >> 1;
328
- const position = (round & 1) ? Math.max(0, middle - offset) : middle + offset;
329
-
330
- if (position === 0 || position >= maxPosition || position === middle) {
331
- return -1;
383
+ /**
384
+ * 2 = exact match, 1 = prefix match, 0 = no match
385
+ * @param {string} item
386
+ * @param {string[]} qwords
387
+ * @param {Uint8Array} sep
388
+ */
389
+ function prefixScore(item, qwords, sep) {
390
+ let qi = 0,
391
+ pos = 0;
392
+ const len = item.length;
393
+
394
+ while (qi < qwords.length) {
395
+ while (pos < len && sep[item.charCodeAt(pos)]) pos++;
396
+ if (pos >= len) return 0;
397
+
398
+ const ws = pos;
399
+ while (pos < len && !sep[item.charCodeAt(pos)]) pos++;
400
+
401
+ const qw = qwords[qi];
402
+ if (pos - ws !== qw.length) return 0;
403
+ for (let j = 0; j < qw.length; j++) {
404
+ if (item.charCodeAt(ws + j) !== qw.charCodeAt(j)) return 0;
405
+ }
406
+ qi++;
332
407
  }
333
408
 
334
- return position;
409
+ while (pos < len && sep[item.charCodeAt(pos)]) pos++;
410
+ return pos >= len ? 2 : 1;
411
+ }
412
+
413
+ /** @param {number} len @param {number} round */
414
+ function trigramPosition(len, round) {
415
+ const max = len - 3;
416
+ if (max < 0) return -1;
417
+ if (round === 0) return 0;
418
+ if (round === 1 && max > 0) return max;
419
+ if (round === 2 && max > 1) return max >> 1;
420
+ if (max <= 2) return -1;
421
+
422
+ const mid = max >> 1;
423
+ const off = (round - 2) >> 1;
424
+ const pos = round & 1 ? Math.max(0, mid - off) : mid + off;
425
+ return pos === 0 || pos >= max || pos === mid ? -1 : pos;
335
426
  }
package/src/lib.rs CHANGED
@@ -36,16 +36,16 @@ impl<'a> QuickMatch<'a> {
36
36
 
37
37
  for &item in items {
38
38
  max_query_len = max_query_len.max(item.len());
39
- let mut word_count = 0;
40
- for word in item.split(separators) {
41
- word_count += 1;
42
- if word.is_empty() {
43
- continue;
44
- }
39
+ let item_words: Vec<&str> = item.split(separators).filter(|w| !w.is_empty()).collect();
40
+ max_words = max_words.max(item_words.len());
45
41
 
46
- max_word_len = max_word_len.max(item.len());
42
+ for word in &item_words {
43
+ max_word_len = max_word_len.max(word.len());
47
44
 
48
- word_index.entry(word.to_string()).or_default().insert(item);
45
+ word_index
46
+ .entry(word.to_string())
47
+ .or_default()
48
+ .insert(item);
49
49
 
50
50
  if word.len() >= 3 {
51
51
  let chars = word.chars().collect::<Vec<_>>();
@@ -57,13 +57,18 @@ impl<'a> QuickMatch<'a> {
57
57
  }
58
58
  }
59
59
  }
60
- max_words = max_words.max(word_count);
60
+
61
+ // Index adjacent word pairs as compounds (e.g. "hash"+"rate" → "hashrate")
62
+ for pair in item_words.windows(2) {
63
+ let compound = format!("{}{}", pair[0], pair[1]);
64
+ word_index.entry(compound).or_default().insert(item);
65
+ }
61
66
  }
62
67
 
63
68
  Self {
64
69
  max_query_len: max_query_len + 6,
65
70
  max_word_len: max_word_len + 4,
66
- max_word_count: max_word_len + 2,
71
+ max_word_count: max_words + 2,
67
72
  word_index,
68
73
  trigram_index,
69
74
  config,
@@ -71,26 +76,15 @@ impl<'a> QuickMatch<'a> {
71
76
  }
72
77
  }
73
78
 
74
- ///
75
- /// `limit`: max number of returned matches
76
- ///
77
- /// `max_trigrams`: max number of processed trigrams in unknown words (0-10 recommended)
78
- ///
79
79
  pub fn matches(&self, query: &str) -> Vec<&'a str> {
80
80
  self.matches_with(query, &self.config)
81
81
  }
82
82
 
83
- ///
84
- /// `limit`: max number of returned matches
85
- ///
86
- /// `max_trigrams`: max number of processed trigrams in unknown words (0-10 recommended)
87
- ///
88
83
  pub fn matches_with(&self, query: &str, config: &QuickMatchConfig) -> Vec<&'a str> {
89
84
  let limit = config.limit();
90
85
  let trigram_budget = config.trigram_budget();
91
- let query_len = query.len();
92
86
 
93
- if query.is_empty() || query_len > self.max_query_len {
87
+ if query.is_empty() {
94
88
  return vec![];
95
89
  }
96
90
 
@@ -101,22 +95,30 @@ impl<'a> QuickMatch<'a> {
101
95
  .collect::<String>()
102
96
  .to_ascii_lowercase();
103
97
 
104
- let words = query
105
- .split(config.separators())
98
+ if query.is_empty() || query.len() > self.max_query_len {
99
+ return vec![];
100
+ }
101
+
102
+ let separators = config.separators();
103
+
104
+ let query_words: Vec<&str> = query
105
+ .split(separators)
106
106
  .filter(|w| !w.is_empty() && w.len() <= self.max_word_len)
107
- .collect::<FxHashSet<_>>();
107
+ .collect();
108
+
109
+ let words: FxHashSet<&str> = query_words.iter().copied().collect();
108
110
 
109
111
  if words.is_empty() || words.len() > self.max_word_count {
110
112
  return vec![];
111
113
  }
112
114
 
113
- let min_len = query_len.saturating_sub(3);
115
+ let min_len = query.len().saturating_sub(3);
114
116
 
115
117
  let mut pool: Option<FxHashSet<*const str>> = None;
116
118
  let mut unknown_words = Vec::new();
117
119
 
118
120
  let mut words_to_intersect = vec![];
119
- for word in words {
121
+ for &word in &words {
120
122
  if let Some(items) = self.word_index.get(word) {
121
123
  words_to_intersect.push(items)
122
124
  } else if word.len() >= 3 && unknown_words.len() < trigram_budget {
@@ -144,17 +146,23 @@ impl<'a> QuickMatch<'a> {
144
146
  let mut results: Vec<_> = pool
145
147
  .unwrap_or_default()
146
148
  .into_iter()
147
- .map(|item| unsafe { &*item as &str })
149
+ .map(|item| {
150
+ let s = unsafe { &*item as &str };
151
+ (s, prefix_score(s, &query_words, separators))
152
+ })
148
153
  .collect();
149
154
 
155
+ let cmp =
156
+ |a: &(&str, u8), b: &(&str, u8)| b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len()));
157
+
150
158
  if results.len() > limit {
151
- results.select_nth_unstable_by_key(limit, |item| item.len());
159
+ results.select_nth_unstable_by(limit, cmp);
152
160
  results.truncate(limit);
153
161
  }
154
162
 
155
- results.sort_unstable_by_key(|item| item.len());
163
+ results.sort_unstable_by(cmp);
156
164
 
157
- return results;
165
+ return results.into_iter().map(|(item, _)| item).collect();
158
166
  }
159
167
 
160
168
  let mut scores: FxHashMap<*const str, usize> = FxHashMap::default();
@@ -232,26 +240,52 @@ impl<'a> QuickMatch<'a> {
232
240
  }
233
241
  }
234
242
 
235
- let min_score = hit_count.div_ceil(2).max(1);
243
+ let min_score = hit_count.div_ceil(2).max(config.min_score());
236
244
  let mut results: Vec<_> = scores
237
245
  .into_iter()
238
246
  .filter(|(_, s)| *s >= min_score)
239
- .map(|(item, score)| (unsafe { &*item as &str }, score))
247
+ .map(|(item, score)| {
248
+ let s = unsafe { &*item as &str };
249
+ (s, score, prefix_score(s, &query_words, separators))
250
+ })
240
251
  .collect();
241
252
 
253
+ let cmp = |a: &(&str, usize, u8), b: &(&str, usize, u8)| {
254
+ b.2.cmp(&a.2)
255
+ .then_with(|| b.1.cmp(&a.1))
256
+ .then_with(|| a.0.len().cmp(&b.0.len()))
257
+ };
258
+
242
259
  if results.len() > limit {
243
- results.select_nth_unstable_by(limit, |a, b| {
244
- b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len()))
245
- });
260
+ results.select_nth_unstable_by(limit, cmp);
246
261
  results.truncate(limit);
247
262
  }
248
263
 
249
- results.sort_unstable_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.len().cmp(&b.0.len())));
264
+ results.sort_unstable_by(cmp);
250
265
 
251
266
  results
252
267
  .into_iter()
253
268
  .take(limit)
254
- .map(|(item, _)| item)
269
+ .map(|(item, _, _)| item)
255
270
  .collect()
256
271
  }
257
272
  }
273
+
274
+ /// Score how well an item's word sequence matches the query as a prefix.
275
+ /// - 2: exact match (all words match, no extra words in item)
276
+ /// - 1: prefix match (item starts with query words but has more)
277
+ /// - 0: no prefix match
278
+ fn prefix_score(item: &str, query_words: &[&str], separators: &[char]) -> u8 {
279
+ let mut item_words = item.split(separators).filter(|w| !w.is_empty());
280
+ for &qw in query_words {
281
+ match item_words.next() {
282
+ Some(iw) if iw == qw => continue,
283
+ _ => return 0,
284
+ }
285
+ }
286
+ if item_words.next().is_none() {
287
+ 2
288
+ } else {
289
+ 1
290
+ }
291
+ }
package/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 quickmatch
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
package/README.md DELETED
@@ -1,26 +0,0 @@
1
- # quickmatch
2
-
3
- **Lightning-fast fuzzy string matching for Rust.**
4
-
5
- A high-performance string matching library optimized for interactive search experiences like autocomplete, command palettes, and search-as-you-type interfaces.
6
-
7
- [![Crates.io](https://img.shields.io/crates/v/quickmatch.svg)](https://crates.io/crates/quickmatch)
8
- [![Documentation](https://docs.rs/quickmatch/badge.svg)](https://docs.rs/quickmatch)
9
-
10
- ## Features
11
-
12
- - **Blazing fast** - Optimized for sub-millisecond search times
13
- - **Hybrid matching** - Word-level matching with trigram-based fuzzy fallback
14
- - **Memory efficient** - Zero-copy string storage with pointer-based indexing
15
- - **Ranked results** - Intelligent scoring based on match quality
16
- - **Zero external dependencies** - Only uses `rustc-hash` for fast hashing
17
-
18
- ## Installation
19
-
20
- ```bash
21
- # rust
22
- cargo add quickmatch
23
-
24
- # js
25
- npm install quickmatch-js
26
- ```