@yoch/minisearch 8.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2961 @@
1
+ 'use strict';
2
+
3
+ Object.defineProperty(exports, '__esModule', { value: true });
4
+
5
+ /** @ignore */
6
+ const ENTRIES = 'ENTRIES';
7
+ /** @ignore */
8
+ const KEYS = 'KEYS';
9
+ /** @ignore */
10
+ const VALUES = 'VALUES';
11
+ /** @ignore */
12
+ const LEAF = '';
13
+ /**
14
+ * @private
15
+ */
16
+ class TreeIterator {
17
+ constructor(set, type) {
18
+ const node = set._tree;
19
+ const keys = Array.from(node.keys());
20
+ this.set = set;
21
+ this._type = type;
22
+ this._path = keys.length > 0 ? [{ node, keys }] : [];
23
+ }
24
+ next() {
25
+ const value = this.dive();
26
+ this.backtrack();
27
+ return value;
28
+ }
29
+ dive() {
30
+ if (this._path.length === 0) {
31
+ return { done: true, value: undefined };
32
+ }
33
+ const { node, keys } = last$1(this._path);
34
+ if (last$1(keys) === LEAF) {
35
+ return { done: false, value: this.result() };
36
+ }
37
+ const child = node.get(last$1(keys));
38
+ this._path.push({ node: child, keys: Array.from(child.keys()) });
39
+ return this.dive();
40
+ }
41
+ backtrack() {
42
+ if (this._path.length === 0) {
43
+ return;
44
+ }
45
+ const keys = last$1(this._path).keys;
46
+ keys.pop();
47
+ if (keys.length > 0) {
48
+ return;
49
+ }
50
+ this._path.pop();
51
+ this.backtrack();
52
+ }
53
+ key() {
54
+ return this.set._prefix + this._path
55
+ .map(({ keys }) => last$1(keys))
56
+ .filter(key => key !== LEAF)
57
+ .join('');
58
+ }
59
+ value() {
60
+ return last$1(this._path).node.get(LEAF);
61
+ }
62
+ result() {
63
+ switch (this._type) {
64
+ case VALUES: return this.value();
65
+ case KEYS: return this.key();
66
+ default: return [this.key(), this.value()];
67
+ }
68
+ }
69
+ [Symbol.iterator]() {
70
+ return this;
71
+ }
72
+ }
73
+ const last$1 = (array) => {
74
+ return array[array.length - 1];
75
+ };
76
+
77
+ /* eslint-disable no-labels */
78
+ /**
79
+ * @ignore
80
+ */
81
+ const fuzzySearch = (node, query, maxDistance) => {
82
+ const results = new Map();
83
+ if (query === undefined)
84
+ return results;
85
+ // Number of columns in the Levenshtein matrix.
86
+ const n = query.length + 1;
87
+ // Matching terms can never be longer than N + maxDistance.
88
+ const m = n + maxDistance;
89
+ // Fill first matrix row and column with numbers: 0 1 2 3 ...
90
+ const matrix = new Uint8Array(m * n).fill(maxDistance + 1);
91
+ for (let j = 0; j < n; ++j)
92
+ matrix[j] = j;
93
+ for (let i = 1; i < m; ++i)
94
+ matrix[i * n] = i;
95
+ recurse(node, query, maxDistance, results, matrix, 1, n, '');
96
+ return results;
97
+ };
98
+ // Modified version of http://stevehanov.ca/blog/?id=114
99
+ // This builds a Levenshtein matrix for a given query and continuously updates
100
+ // it for nodes in the radix tree that fall within the given maximum edit
101
+ // distance. Keeping the same matrix around is beneficial especially for larger
102
+ // edit distances.
103
+ //
104
+ // k a t e <-- query
105
+ // 0 1 2 3 4
106
+ // c 1 1 2 3 4
107
+ // a 2 2 1 2 3
108
+ // t 3 3 2 1 [2] <-- edit distance
109
+ // ^
110
+ // ^ term in radix tree, rows are added and removed as needed
111
+ const recurse = (node, query, maxDistance, results, matrix, m, n, prefix) => {
112
+ const offset = m * n;
113
+ key: for (const key of node.keys()) {
114
+ if (key === LEAF) {
115
+ // We've reached a leaf node. Check if the edit distance acceptable and
116
+ // store the result if it is.
117
+ const distance = matrix[offset - 1];
118
+ if (distance <= maxDistance) {
119
+ results.set(prefix, [node.get(key), distance]);
120
+ }
121
+ }
122
+ else {
123
+ // Iterate over all characters in the key. Update the Levenshtein matrix
124
+ // and check if the minimum distance in the last row is still within the
125
+ // maximum edit distance. If it is, we can recurse over all child nodes.
126
+ let i = m;
127
+ for (let pos = 0; pos < key.length; ++pos, ++i) {
128
+ const char = key[pos];
129
+ const thisRowOffset = n * i;
130
+ const prevRowOffset = thisRowOffset - n;
131
+ // Set the first column based on the previous row, and initialize the
132
+ // minimum distance in the current row.
133
+ let minDistance = matrix[thisRowOffset];
134
+ const jmin = Math.max(0, i - maxDistance - 1);
135
+ const jmax = Math.min(n - 1, i + maxDistance);
136
+ // Iterate over remaining columns (characters in the query).
137
+ for (let j = jmin; j < jmax; ++j) {
138
+ const different = char !== query[j];
139
+ // It might make sense to only read the matrix positions used for
140
+ // deletion/insertion if the characters are different. But we want to
141
+ // avoid conditional reads for performance reasons.
142
+ const rpl = matrix[prevRowOffset + j] + +different;
143
+ const del = matrix[prevRowOffset + j + 1] + 1;
144
+ const ins = matrix[thisRowOffset + j] + 1;
145
+ const dist = matrix[thisRowOffset + j + 1] = Math.min(rpl, del, ins);
146
+ if (dist < minDistance)
147
+ minDistance = dist;
148
+ }
149
+ // Because distance will never decrease, we can stop. There will be no
150
+ // matching child nodes.
151
+ if (minDistance > maxDistance) {
152
+ continue key;
153
+ }
154
+ }
155
+ recurse(node.get(key), query, maxDistance, results, matrix, i, n, prefix + key);
156
+ }
157
+ }
158
+ };
159
+
160
+ /* eslint-disable no-labels */
161
+ /**
162
+ * A class implementing the same interface as a standard JavaScript
163
+ * [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map)
164
+ * with string keys, but adding support for efficiently searching entries with
165
+ * prefix or fuzzy search. This class is used internally by {@link MiniSearch}
166
+ * as the inverted index data structure. The implementation is a radix tree
167
+ * (compressed prefix tree).
168
+ *
169
+ * Since this class can be of general utility beyond _MiniSearch_, it is
170
+ * internal to `@yoch/minisearch` (not a separate public entry point).
171
+ *
172
+ * @typeParam T The type of the values stored in the map.
173
+ */
174
+ class SearchableMap {
175
+ /**
176
+ * The constructor is normally called without arguments, creating an empty
177
+ * map. In order to create a {@link SearchableMap} from an iterable or from an
178
+ * object, check {@link SearchableMap.from} and {@link
179
+ * SearchableMap.fromObject}.
180
+ *
181
+ * The constructor arguments are for internal use, when creating derived
182
+ * mutable views of a map at a prefix.
183
+ */
184
+ constructor(tree = new Map(), prefix = '') {
185
+ this._size = undefined;
186
+ this._tree = tree;
187
+ this._prefix = prefix;
188
+ }
189
+ /**
190
+ * Root radix tree backing this map. Used when cloning or serializing the full
191
+ * index so {@link Map} key insertion order (prefix / fuzzy / autoSuggest) is preserved.
192
+ */
193
+ get radixTree() {
194
+ return this._tree;
195
+ }
196
+ /**
197
+ * Creates and returns a mutable view of this {@link SearchableMap},
198
+ * containing only entries that share the given prefix.
199
+ *
200
+ * ### Usage:
201
+ *
202
+ * ```javascript
203
+ * let map = new SearchableMap()
204
+ * map.set("unicorn", 1)
205
+ * map.set("universe", 2)
206
+ * map.set("university", 3)
207
+ * map.set("unique", 4)
208
+ * map.set("hello", 5)
209
+ *
210
+ * let uni = map.atPrefix("uni")
211
+ * uni.get("unique") // => 4
212
+ * uni.get("unicorn") // => 1
213
+ * uni.get("hello") // => undefined
214
+ *
215
+ * let univer = map.atPrefix("univer")
216
+ * univer.get("unique") // => undefined
217
+ * univer.get("universe") // => 2
218
+ * univer.get("university") // => 3
219
+ * ```
220
+ *
221
+ * @param prefix The prefix
222
+ * @return A {@link SearchableMap} representing a mutable view of the original
223
+ * Map at the given prefix
224
+ */
225
+ atPrefix(prefix) {
226
+ if (!prefix.startsWith(this._prefix)) {
227
+ throw new Error('Mismatched prefix');
228
+ }
229
+ const [node, path] = trackDown(this._tree, prefix.slice(this._prefix.length));
230
+ if (node === undefined) {
231
+ const [parentNode, key] = last(path);
232
+ for (const k of parentNode.keys()) {
233
+ if (k !== LEAF && k.startsWith(key)) {
234
+ const node = new Map();
235
+ node.set(k.slice(key.length), parentNode.get(k));
236
+ return new SearchableMap(node, prefix);
237
+ }
238
+ }
239
+ }
240
+ return new SearchableMap(node, prefix);
241
+ }
242
+ /**
243
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/clear
244
+ */
245
+ clear() {
246
+ this._size = undefined;
247
+ this._tree.clear();
248
+ }
249
+ /**
250
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/delete
251
+ * @param key Key to delete
252
+ */
253
+ delete(key) {
254
+ this._size = undefined;
255
+ return remove(this._tree, key);
256
+ }
257
+ /**
258
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries
259
+ * @return An iterator iterating through `[key, value]` entries.
260
+ */
261
+ entries() {
262
+ return new TreeIterator(this, ENTRIES);
263
+ }
264
+ /**
265
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/forEach
266
+ * @param fn Iteration function
267
+ */
268
+ forEach(fn) {
269
+ for (const [key, value] of this) {
270
+ fn(key, value, this);
271
+ }
272
+ }
273
+ /**
274
+ * Returns a Map of all the entries that have a key within the given edit
275
+ * distance from the search key. The keys of the returned Map are the matching
276
+ * keys, while the values are two-element arrays where the first element is
277
+ * the value associated to the key, and the second is the edit distance of the
278
+ * key to the search key.
279
+ *
280
+ * ### Usage:
281
+ *
282
+ * ```javascript
283
+ * let map = new SearchableMap()
284
+ * map.set('hello', 'world')
285
+ * map.set('hell', 'yeah')
286
+ * map.set('ciao', 'mondo')
287
+ *
288
+ * // Get all entries that match the key 'hallo' with a maximum edit distance of 2
289
+ * map.fuzzyGet('hallo', 2)
290
+ * // => Map(2) { 'hello' => ['world', 1], 'hell' => ['yeah', 2] }
291
+ *
292
+ * // In the example, the "hello" key has value "world" and edit distance of 1
293
+ * // (change "e" to "a"), the key "hell" has value "yeah" and edit distance of 2
294
+ * // (change "e" to "a", delete "o")
295
+ * ```
296
+ *
297
+ * @param key The search key
298
+ * @param maxEditDistance The maximum edit distance (Levenshtein)
299
+ * @return A Map of the matching keys to their value and edit distance
300
+ */
301
+ fuzzyGet(key, maxEditDistance) {
302
+ return fuzzySearch(this._tree, key, maxEditDistance);
303
+ }
304
+ /**
305
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/get
306
+ * @param key Key to get
307
+ * @return Value associated to the key, or `undefined` if the key is not
308
+ * found.
309
+ */
310
+ get(key) {
311
+ const node = lookup(this._tree, key);
312
+ return node !== undefined ? node.get(LEAF) : undefined;
313
+ }
314
+ /**
315
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/has
316
+ * @param key Key
317
+ * @return True if the key is in the map, false otherwise
318
+ */
319
+ has(key) {
320
+ const node = lookup(this._tree, key);
321
+ return node !== undefined && node.has(LEAF);
322
+ }
323
+ /**
324
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/keys
325
+ * @return An `Iterable` iterating through keys
326
+ */
327
+ keys() {
328
+ return new TreeIterator(this, KEYS);
329
+ }
330
+ /**
331
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/set
332
+ * @param key Key to set
333
+ * @param value Value to associate to the key
334
+ * @return The {@link SearchableMap} itself, to allow chaining
335
+ */
336
+ set(key, value) {
337
+ if (typeof key !== 'string') {
338
+ throw new Error('key must be a string');
339
+ }
340
+ this._size = undefined;
341
+ const node = createPath(this._tree, key);
342
+ node.set(LEAF, value);
343
+ return this;
344
+ }
345
+ /**
346
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/size
347
+ */
348
+ get size() {
349
+ if (this._size) {
350
+ return this._size;
351
+ }
352
+ /** @ignore */
353
+ this._size = 0;
354
+ const iter = this.entries();
355
+ while (!iter.next().done)
356
+ this._size += 1;
357
+ return this._size;
358
+ }
359
+ /**
360
+ * Updates the value at the given key using the provided function. The function
361
+ * is called with the current value at the key, and its return value is used as
362
+ * the new value to be set.
363
+ *
364
+ * ### Example:
365
+ *
366
+ * ```javascript
367
+ * // Increment the current value by one
368
+ * searchableMap.update('somekey', (currentValue) => currentValue == null ? 0 : currentValue + 1)
369
+ * ```
370
+ *
371
+ * If the value at the given key is or will be an object, it might not require
372
+ * re-assignment. In that case it is better to use `fetch()`, because it is
373
+ * faster.
374
+ *
375
+ * @param key The key to update
376
+ * @param fn The function used to compute the new value from the current one
377
+ * @return The {@link SearchableMap} itself, to allow chaining
378
+ */
379
+ update(key, fn) {
380
+ if (typeof key !== 'string') {
381
+ throw new Error('key must be a string');
382
+ }
383
+ this._size = undefined;
384
+ const node = createPath(this._tree, key);
385
+ node.set(LEAF, fn(node.get(LEAF)));
386
+ return this;
387
+ }
388
+ /**
389
+ * Fetches the value of the given key. If the value does not exist, calls the
390
+ * given function to create a new value, which is inserted at the given key
391
+ * and subsequently returned.
392
+ *
393
+ * ### Example:
394
+ *
395
+ * ```javascript
396
+ * const map = searchableMap.fetch('somekey', () => new Map())
397
+ * map.set('foo', 'bar')
398
+ * ```
399
+ *
400
+ * @param key The key to update
401
+ * @param initial A function that creates a new value if the key does not exist
402
+ * @return The existing or new value at the given key
403
+ */
404
+ fetch(key, initial) {
405
+ if (typeof key !== 'string') {
406
+ throw new Error('key must be a string');
407
+ }
408
+ this._size = undefined;
409
+ const node = createPath(this._tree, key);
410
+ let value = node.get(LEAF);
411
+ if (value === undefined) {
412
+ node.set(LEAF, value = initial());
413
+ }
414
+ return value;
415
+ }
416
+ /**
417
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/values
418
+ * @return An `Iterable` iterating through values.
419
+ */
420
+ values() {
421
+ return new TreeIterator(this, VALUES);
422
+ }
423
+ /**
424
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/@@iterator
425
+ */
426
+ [Symbol.iterator]() {
427
+ return this.entries();
428
+ }
429
+ /**
430
+ * Creates a {@link SearchableMap} from an `Iterable` of entries
431
+ *
432
+ * @param entries Entries to be inserted in the {@link SearchableMap}
433
+ * @return A new {@link SearchableMap} with the given entries
434
+ */
435
+ static from(entries) {
436
+ const tree = new SearchableMap();
437
+ for (const [key, value] of entries) {
438
+ tree.set(key, value);
439
+ }
440
+ return tree;
441
+ }
442
+ /**
443
+ * Creates a {@link SearchableMap} from the iterable properties of a JavaScript object
444
+ *
445
+ * @param object Object of entries for the {@link SearchableMap}
446
+ * @return A new {@link SearchableMap} with the given entries
447
+ */
448
+ static fromObject(object) {
449
+ return SearchableMap.from(Object.entries(object));
450
+ }
451
+ }
452
+ const trackDown = (tree, key, path = []) => {
453
+ if (key.length === 0 || tree == null) {
454
+ return [tree, path];
455
+ }
456
+ for (const k of tree.keys()) {
457
+ if (k !== LEAF && key.startsWith(k)) {
458
+ path.push([tree, k]); // performance: update in place
459
+ return trackDown(tree.get(k), key.slice(k.length), path);
460
+ }
461
+ }
462
+ path.push([tree, key]); // performance: update in place
463
+ return trackDown(undefined, '', path);
464
+ };
465
+ const lookup = (tree, key) => {
466
+ if (key.length === 0 || tree == null) {
467
+ return tree;
468
+ }
469
+ for (const k of tree.keys()) {
470
+ if (k !== LEAF && key.startsWith(k)) {
471
+ return lookup(tree.get(k), key.slice(k.length));
472
+ }
473
+ }
474
+ };
475
+ // Create a path in the radix tree for the given key, and returns the deepest
476
+ // node. This function is in the hot path for indexing. It avoids unnecessary
477
+ // string operations and recursion for performance.
478
+ const createPath = (node, key) => {
479
+ const keyLength = key.length;
480
+ outer: for (let pos = 0; node && pos < keyLength;) {
481
+ for (const k of node.keys()) {
482
+ // Check whether this key is a candidate: the first characters must match.
483
+ if (k !== LEAF && key[pos] === k[0]) {
484
+ const len = Math.min(keyLength - pos, k.length);
485
+ // Advance offset to the point where key and k no longer match.
486
+ let offset = 1;
487
+ while (offset < len && key[pos + offset] === k[offset])
488
+ ++offset;
489
+ const child = node.get(k);
490
+ if (offset === k.length) {
491
+ // The existing key is shorter than the key we need to create.
492
+ node = child;
493
+ }
494
+ else {
495
+ // Partial match: we need to insert an intermediate node to contain
496
+ // both the existing subtree and the new node.
497
+ const intermediate = new Map();
498
+ intermediate.set(k.slice(offset), child);
499
+ node.set(key.slice(pos, pos + offset), intermediate);
500
+ node.delete(k);
501
+ node = intermediate;
502
+ }
503
+ pos += offset;
504
+ continue outer;
505
+ }
506
+ }
507
+ // Create a final child node to contain the final suffix of the key.
508
+ const child = new Map();
509
+ node.set(key.slice(pos), child);
510
+ return child;
511
+ }
512
+ return node;
513
+ };
514
+ const remove = (tree, key) => {
515
+ const [node, path] = trackDown(tree, key);
516
+ if (node === undefined) {
517
+ return;
518
+ }
519
+ node.delete(LEAF);
520
+ if (node.size === 0) {
521
+ cleanup(path);
522
+ }
523
+ else if (node.size === 1) {
524
+ const [key, value] = node.entries().next().value;
525
+ merge(path, key, value);
526
+ }
527
+ };
528
+ const cleanup = (path) => {
529
+ if (path.length === 0) {
530
+ return;
531
+ }
532
+ const [node, key] = last(path);
533
+ node.delete(key);
534
+ if (node.size === 0) {
535
+ cleanup(path.slice(0, -1));
536
+ }
537
+ else if (node.size === 1) {
538
+ const [key, value] = node.entries().next().value;
539
+ if (key !== LEAF) {
540
+ merge(path.slice(0, -1), key, value);
541
+ }
542
+ }
543
+ };
544
+ const merge = (path, key, value) => {
545
+ if (path.length === 0) {
546
+ return;
547
+ }
548
+ const [node, nodeKey] = last(path);
549
+ node.set(nodeKey + key, value);
550
+ node.delete(nodeKey);
551
+ };
552
+ const last = (array) => {
553
+ return array[array.length - 1];
554
+ };
555
+
556
+ const OR = 'or';
557
+ const AND = 'and';
558
+ const AND_NOT = 'and_not';
559
+ const defaultBM25params = { k: 1.2, b: 0.7, d: 0.5 };
560
+ const calcBM25Score = (termFreq, matchingCount, totalCount, fieldLength, avgFieldLength, bm25params) => {
561
+ const { k, b, d } = bm25params;
562
+ const invDocFreq = Math.log(1 + (totalCount - matchingCount + 0.5) / (matchingCount + 0.5));
563
+ return invDocFreq * (d + termFreq * (k + 1) / (termFreq + k * (1 - b + b * fieldLength / avgFieldLength)));
564
+ };
565
+ const getOwnProperty = (object, property) => Object.prototype.hasOwnProperty.call(object, property) ? object[property] : undefined;
566
+ const assignUniqueTerm = (target, term) => {
567
+ if (!target.includes(term))
568
+ target.push(term);
569
+ };
570
+ const assignUniqueTerms = (target, source) => {
571
+ for (const term of source) {
572
+ if (!target.includes(term))
573
+ target.push(term);
574
+ }
575
+ };
576
+ const byScore = ({ score: a }, { score: b }) => b - a;
577
+ /** Wrap Map<shortId, freq> as PostingListLike */
578
+ function mapPostingList(freqs) {
579
+ return {
580
+ get size() { return freqs.size; },
581
+ forEachDoc(callback) {
582
+ for (const [docId, termFreq] of freqs) {
583
+ callback(docId, termFreq);
584
+ }
585
+ }
586
+ };
587
+ }
588
+ /** Wrap Map<fieldId, Map<shortId, freq>> as FieldTermDataLike */
589
+ function mapFieldTermData(data) {
590
+ return {
591
+ get(fieldId) {
592
+ const freqs = data.get(fieldId);
593
+ return freqs == null ? undefined : mapPostingList(freqs);
594
+ }
595
+ };
596
+ }
597
+ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTermData, fieldBoosts, context, boostDocumentFn, bm25params, results = new Map()) {
598
+ if (fieldTermData == null)
599
+ return results;
600
+ for (const field of Object.keys(fieldBoosts)) {
601
+ const fieldBoost = fieldBoosts[field];
602
+ const fieldId = context.fieldIds[field];
603
+ const postingList = fieldTermData.get(fieldId);
604
+ if (postingList == null)
605
+ continue;
606
+ let matchingFields = postingList.size;
607
+ const avgFieldLength = context.avgFieldLength[fieldId];
608
+ postingList.forEachDoc((docId, termFreq) => {
609
+ var _a;
610
+ if (context.isDocActive != null && !context.isDocActive(docId)) {
611
+ (_a = context.onInactiveDoc) === null || _a === void 0 ? void 0 : _a.call(context, docId, fieldId, derivedTerm);
612
+ matchingFields -= 1;
613
+ return;
614
+ }
615
+ const docBoost = boostDocumentFn
616
+ ? boostDocumentFn(context.getExternalId(docId), derivedTerm, context.getStoredFields(docId))
617
+ : 1;
618
+ if (!docBoost)
619
+ return;
620
+ const fieldLength = context.getFieldLength(docId, fieldId);
621
+ const rawScore = calcBM25Score(termFreq, matchingFields, context.documentCount, fieldLength, avgFieldLength, bm25params);
622
+ const weightedScore = termWeight * termBoost * fieldBoost * docBoost * rawScore;
623
+ const result = results.get(docId);
624
+ if (result) {
625
+ result.score += weightedScore;
626
+ assignUniqueTerm(result.terms, sourceTerm);
627
+ const match = getOwnProperty(result.match, derivedTerm);
628
+ if (match) {
629
+ match.push(field);
630
+ }
631
+ else {
632
+ result.match[derivedTerm] = [field];
633
+ }
634
+ }
635
+ else {
636
+ results.set(docId, {
637
+ score: weightedScore,
638
+ terms: [sourceTerm],
639
+ match: { [derivedTerm]: [field] }
640
+ });
641
+ }
642
+ });
643
+ }
644
+ return results;
645
+ }
646
+ const combinators = {
647
+ [OR]: (a, b) => {
648
+ for (const docId of b.keys()) {
649
+ const existing = a.get(docId);
650
+ if (existing == null) {
651
+ a.set(docId, b.get(docId));
652
+ }
653
+ else {
654
+ const { score, terms, match } = b.get(docId);
655
+ existing.score = existing.score + score;
656
+ existing.match = Object.assign(existing.match, match);
657
+ assignUniqueTerms(existing.terms, terms);
658
+ }
659
+ }
660
+ return a;
661
+ },
662
+ [AND]: (a, b) => {
663
+ const combined = new Map();
664
+ for (const docId of b.keys()) {
665
+ const existing = a.get(docId);
666
+ if (existing == null)
667
+ continue;
668
+ const { score, terms, match } = b.get(docId);
669
+ assignUniqueTerms(existing.terms, terms);
670
+ combined.set(docId, {
671
+ score: existing.score + score,
672
+ terms: existing.terms,
673
+ match: Object.assign(existing.match, match)
674
+ });
675
+ }
676
+ return combined;
677
+ },
678
+ [AND_NOT]: (a, b) => {
679
+ for (const docId of b.keys())
680
+ a.delete(docId);
681
+ return a;
682
+ }
683
+ };
684
+ function combineResults(results, combineWith = OR) {
685
+ if (results.length === 0)
686
+ return new Map();
687
+ const operator = combineWith.toLowerCase();
688
+ const combinator = combinators[operator];
689
+ if (!combinator) {
690
+ throw new Error(`Invalid combination operator: ${combineWith}`);
691
+ }
692
+ return results.reduce(combinator) || new Map();
693
+ }
694
+ function finalizeSearchResults(params) {
695
+ const { rawResults, getExternalId, getStoredFields, filter, skipSort } = params;
696
+ const results = [];
697
+ for (const [docId, { score, terms, match }] of rawResults) {
698
+ const quality = terms.length || 1;
699
+ const result = {
700
+ id: getExternalId(docId),
701
+ score: score * quality,
702
+ terms: Object.keys(match),
703
+ queryTerms: terms,
704
+ match
705
+ };
706
+ Object.assign(result, getStoredFields(docId));
707
+ if (filter == null || filter(result)) {
708
+ results.push(result);
709
+ }
710
+ }
711
+ if (!skipSort) {
712
+ results.sort(byScore);
713
+ }
714
+ return results;
715
+ }
716
+ const termToQuerySpec = (options) => (term, i, terms) => {
717
+ const fuzzy = (typeof options.fuzzy === 'function')
718
+ ? options.fuzzy(term, i, terms)
719
+ : (options.fuzzy || false);
720
+ const prefix = (typeof options.prefix === 'function')
721
+ ? options.prefix(term, i, terms)
722
+ : (options.prefix === true);
723
+ const termBoost = (typeof options.boostTerm === 'function')
724
+ ? options.boostTerm(term, i, terms)
725
+ : 1;
726
+ return { term, fuzzy, prefix, termBoost };
727
+ };
728
+
729
+ const MAX_FREQ_UINT8 = 255;
730
+ /** View into global flat posting buffers (no per-list allocation). */
731
+ class SegmentPostingList {
732
+ constructor(docIds, freqs, offset, length) {
733
+ this._docIds = docIds;
734
+ this._freqs = freqs;
735
+ this._offset = offset;
736
+ this._length = length;
737
+ }
738
+ get size() {
739
+ return this._length;
740
+ }
741
+ forEachDoc(callback) {
742
+ const { _docIds, _freqs, _offset, _length } = this;
743
+ for (let i = 0; i < _length; i++) {
744
+ callback(_docIds[_offset + i], _freqs[_offset + i]);
745
+ }
746
+ }
747
+ }
748
+ /**
749
+ * Clamp term frequency to Uint8 for flat storage.
750
+ * This intentionally caps tf at 255 in frozen indexes; see benchmark scenario
751
+ * \"overflow frequencies\" to quantify the score drift for very large tf values.
752
+ */
753
+ function clampFreq(freq) {
754
+ return freq > MAX_FREQ_UINT8 ? MAX_FREQ_UINT8 : freq;
755
+ }
756
+ function flatFieldTermData(termIndex, fieldCount, postingsOffsets, postingsLengths, allDocIds, allFreqs) {
757
+ const base = termIndex * fieldCount;
758
+ return {
759
+ get(fieldId) {
760
+ const len = postingsLengths[base + fieldId];
761
+ if (len === 0)
762
+ return undefined;
763
+ const off = postingsOffsets[base + fieldId];
764
+ return new SegmentPostingList(allDocIds, allFreqs, off, len);
765
+ }
766
+ };
767
+ }
768
+
769
+ /** Unicode space, newline, or punctuation — used by the default tokenizer */
770
+ const SPACE_OR_PUNCTUATION = /[\n\r\p{Z}\p{P}]+/u;
771
+ const defaultSearchOptions = {
772
+ combineWith: OR,
773
+ prefix: false,
774
+ fuzzy: false,
775
+ maxFuzzy: 6,
776
+ boost: {},
777
+ weights: { fuzzy: 0.45, prefix: 0.375 },
778
+ bm25: defaultBM25params
779
+ };
780
+ const defaultAutoSuggestOptions = {
781
+ combineWith: AND,
782
+ prefix: (term, i, terms) => i === terms.length - 1
783
+ };
784
+ /** Option defaults applied by {@link FrozenMiniSearch.loadBinary} before caller overrides */
785
+ const defaultFrozenLoadOptions = {
786
+ idField: 'id',
787
+ extractField: (document, fieldName) => document[fieldName],
788
+ stringifyField: (fieldValue) => fieldValue.toString(),
789
+ tokenize: (text) => text.split(SPACE_OR_PUNCTUATION),
790
+ processTerm: (term) => term.toLowerCase(),
791
+ storeFields: [],
792
+ logger: () => { },
793
+ autoVacuum: false
794
+ };
795
+
796
+ const BINARY_MAGIC_V1 = 'MSv1';
797
+ const BINARY_VERSION_V1 = 1;
798
+ const BINARY_MAGIC_V2 = 'MSv2';
799
+ const BINARY_VERSION_V2 = 2;
800
+ const HEADER_SIZE_V2 = 48;
801
+ const FREQ_UINT8 = 0;
802
+ const FREQ_UINT16 = 1;
803
+ function copyView(view) {
804
+ return Buffer.from(new Uint8Array(view.buffer, view.byteOffset, view.byteLength));
805
+ }
806
+ function alignedSlice(buf, offset, length, alignment) {
807
+ if (length === 0)
808
+ return Buffer.alloc(0);
809
+ if (offset % alignment === 0) {
810
+ return buf.subarray(offset, offset + length);
811
+ }
812
+ return buf.subarray(offset, offset + length);
813
+ }
814
+ function readUint32Array(buf, offset, byteLength) {
815
+ if (byteLength === 0)
816
+ return new Uint32Array(0);
817
+ const slice = alignedSlice(buf, offset, byteLength, 4);
818
+ if (slice.byteOffset % 4 === 0 && slice.length === byteLength) {
819
+ return new Uint32Array(slice.buffer, slice.byteOffset, byteLength / 4);
820
+ }
821
+ const out = new Uint32Array(byteLength / 4);
822
+ for (let i = 0; i < out.length; i++)
823
+ out[i] = buf.readUInt32LE(offset + i * 4);
824
+ return out;
825
+ }
826
+ function readUint8Array(buf, offset, byteLength) {
827
+ if (byteLength === 0)
828
+ return new Uint8Array(0);
829
+ const slice = alignedSlice(buf, offset, byteLength, 1);
830
+ return new Uint8Array(slice.buffer, slice.byteOffset, byteLength);
831
+ }
832
+ function readFloat32Array(buf, offset, byteLength) {
833
+ if (byteLength === 0)
834
+ return new Float32Array(0);
835
+ const slice = alignedSlice(buf, offset, byteLength, 4);
836
+ if (slice.byteOffset % 4 === 0 && slice.length === byteLength) {
837
+ return new Float32Array(slice.buffer, slice.byteOffset, byteLength / 4);
838
+ }
839
+ const out = new Float32Array(byteLength / 4);
840
+ for (let i = 0; i < out.length; i++)
841
+ out[i] = buf.readFloatLE(offset + i * 4);
842
+ return out;
843
+ }
844
+ function encodeFrozenSnapshot(snap) {
845
+ const metaJson = Buffer.from(JSON.stringify({
846
+ documentCount: snap.documentCount,
847
+ nextId: snap.nextId,
848
+ fieldCount: snap.fieldCount,
849
+ fieldIds: snap.fieldIds,
850
+ externalIds: snap.externalIds,
851
+ storedFields: snap.storedFields,
852
+ treeShape: snap.treeShape
853
+ }), 'utf8');
854
+ const avgBuf = copyView(snap.avgFieldLength);
855
+ const flBuf = copyView(snap.fieldLengthMatrix);
856
+ const termBufs = snap.terms.map((term) => Buffer.from(term, 'utf8'));
857
+ const dictHeader = Buffer.alloc(4 + snap.terms.length * 4);
858
+ dictHeader.writeUInt32LE(snap.terms.length, 0);
859
+ for (let i = 0; i < termBufs.length; i++) {
860
+ dictHeader.writeUInt32LE(termBufs[i].length, 4 + i * 4);
861
+ }
862
+ const dict = Buffer.concat([dictHeader, ...termBufs]);
863
+ const offBuf = copyView(snap.postingsOffsets);
864
+ const lenBuf = copyView(snap.postingsLengths);
865
+ const docBuf = copyView(snap.allDocIds);
866
+ const freqBuf = copyView(snap.allFreqs);
867
+ const sections = [metaJson, avgBuf, flBuf, dict, offBuf, lenBuf, docBuf, freqBuf];
868
+ const header = Buffer.alloc(HEADER_SIZE_V2);
869
+ header.write(BINARY_MAGIC_V2, 0, 4, 'ascii');
870
+ header.writeUInt16LE(BINARY_VERSION_V2, 4);
871
+ header.writeUInt16LE(0, 6);
872
+ let off = HEADER_SIZE_V2;
873
+ for (let i = 0; i < sections.length; i++) {
874
+ header.writeUInt32LE(off, 8 + i * 4);
875
+ off += sections[i].length;
876
+ }
877
+ header.writeUInt32LE(off, 8 + sections.length * 4);
878
+ return Buffer.concat([header, ...sections]);
879
+ }
880
+ function decodeMSv2(buf) {
881
+ const metaOff = buf.readUInt32LE(8);
882
+ const avgOff = buf.readUInt32LE(12);
883
+ const flOff = buf.readUInt32LE(16);
884
+ const dictOff = buf.readUInt32LE(20);
885
+ const postOffOff = buf.readUInt32LE(24);
886
+ const postLenOff = buf.readUInt32LE(28);
887
+ const docIdsOff = buf.readUInt32LE(32);
888
+ const freqsOff = buf.readUInt32LE(36);
889
+ const endOff = buf.readUInt32LE(40);
890
+ const meta = JSON.parse(buf.toString('utf8', metaOff, avgOff));
891
+ const avgFieldLength = readFloat32Array(buf, avgOff, flOff - avgOff);
892
+ const fieldLengthMatrix = readUint32Array(buf, flOff, dictOff - flOff);
893
+ const termCount = buf.readUInt32LE(dictOff);
894
+ const terms = [];
895
+ let o = dictOff + 4 + termCount * 4;
896
+ for (let i = 0; i < termCount; i++) {
897
+ const len = buf.readUInt32LE(dictOff + 4 + i * 4);
898
+ terms.push(buf.toString('utf8', o, o + len));
899
+ o += len;
900
+ }
901
+ const slotCount = termCount * meta.fieldCount;
902
+ const postingsOffsets = readUint32Array(buf, postOffOff, slotCount * 4);
903
+ const postingsLengths = readUint32Array(buf, postLenOff, slotCount * 4);
904
+ const allDocIds = readUint32Array(buf, docIdsOff, freqsOff - docIdsOff);
905
+ const allFreqs = readUint8Array(buf, freqsOff, endOff - freqsOff);
906
+ return {
907
+ documentCount: meta.documentCount,
908
+ nextId: meta.nextId,
909
+ fieldIds: meta.fieldIds,
910
+ fieldCount: meta.fieldCount,
911
+ avgFieldLength,
912
+ externalIds: meta.externalIds,
913
+ storedFields: meta.storedFields,
914
+ fieldLengthMatrix,
915
+ terms,
916
+ treeShape: meta.treeShape,
917
+ postingsOffsets,
918
+ postingsLengths,
919
+ allDocIds,
920
+ allFreqs
921
+ };
922
+ }
923
+ function decodeMSv1(buf) {
924
+ const metaOff = buf.readUInt32LE(8);
925
+ const avgOff = buf.readUInt32LE(12);
926
+ const flOff = buf.readUInt32LE(16);
927
+ const dictOff = buf.readUInt32LE(20);
928
+ const postOff = buf.readUInt32LE(24);
929
+ const meta = JSON.parse(buf.toString('utf8', metaOff, avgOff));
930
+ const avgFieldLength = readFloat32Array(buf, avgOff, flOff - avgOff);
931
+ const fieldLengthMatrix = readUint32Array(buf, flOff, dictOff - flOff);
932
+ const termCount = buf.readUInt32LE(dictOff);
933
+ const terms = [];
934
+ let o = dictOff + 4 + termCount * 4;
935
+ for (let i = 0; i < termCount; i++) {
936
+ const len = buf.readUInt32LE(dictOff + 4 + i * 4);
937
+ terms.push(buf.toString('utf8', o, o + len));
938
+ o += len;
939
+ }
940
+ const fieldCount = meta.fieldCount;
941
+ const slotCount = termCount * fieldCount;
942
+ const postingsOffsets = new Uint32Array(slotCount);
943
+ const postingsLengths = new Uint32Array(slotCount);
944
+ const docIdChunks = [];
945
+ const freqChunks = [];
946
+ o = postOff;
947
+ for (let ti = 0; ti < termCount; ti++) {
948
+ const fc = buf.readUInt16LE(o);
949
+ o += 2;
950
+ const base = ti * fieldCount;
951
+ for (let f = 0; f < fc; f++) {
952
+ buf.readUInt32LE(o);
953
+ o += 4; // matchCount — same as docLen
954
+ const docLen = buf.readUInt32LE(o);
955
+ o += 4;
956
+ postingsLengths[base + f] = docLen;
957
+ if (docLen === 0) {
958
+ postingsOffsets[base + f] = 0;
959
+ o += 1;
960
+ continue;
961
+ }
962
+ postingsOffsets[base + f] = docIdChunks.length;
963
+ const kind = buf.readUInt8(o);
964
+ o += 1;
965
+ for (let d = 0; d < docLen; d++) {
966
+ docIdChunks.push(buf.readUInt32LE(o + d * 4));
967
+ }
968
+ o += docLen * 4;
969
+ const freqElem = kind === FREQ_UINT8 ? 1 : kind === FREQ_UINT16 ? 2 : 4;
970
+ for (let d = 0; d < docLen; d++) {
971
+ let freq;
972
+ if (kind === FREQ_UINT8)
973
+ freq = buf.readUInt8(o + d);
974
+ else if (kind === FREQ_UINT16)
975
+ freq = buf.readUInt16LE(o + d * 2);
976
+ else
977
+ freq = buf.readUInt32LE(o + d * 4);
978
+ freqChunks.push(freq > 255 ? 255 : freq);
979
+ }
980
+ o += docLen * freqElem;
981
+ }
982
+ }
983
+ return {
984
+ documentCount: meta.documentCount,
985
+ nextId: meta.nextId,
986
+ fieldIds: meta.fieldIds,
987
+ fieldCount,
988
+ avgFieldLength,
989
+ externalIds: meta.externalIds,
990
+ storedFields: meta.storedFields,
991
+ fieldLengthMatrix,
992
+ terms,
993
+ treeShape: meta.treeShape,
994
+ postingsOffsets,
995
+ postingsLengths,
996
+ allDocIds: new Uint32Array(docIdChunks),
997
+ allFreqs: new Uint8Array(freqChunks)
998
+ };
999
+ }
1000
+ function decodeFrozenSnapshot(buf) {
1001
+ const magic = buf.toString('ascii', 0, 4);
1002
+ const version = buf.readUInt16LE(4);
1003
+ if (magic === BINARY_MAGIC_V2 && version === BINARY_VERSION_V2) {
1004
+ return decodeMSv2(buf);
1005
+ }
1006
+ if (magic === BINARY_MAGIC_V1 && version === BINARY_VERSION_V1) {
1007
+ return decodeMSv1(buf);
1008
+ }
1009
+ throw new Error(`Invalid frozen index: magic=${magic} version=${version}`);
1010
+ }
1011
+ function deserializeTermIndexTree(shape) {
1012
+ const tree = new Map();
1013
+ for (const [key, value] of shape) {
1014
+ if (key === LEAF) {
1015
+ tree.set(LEAF, value);
1016
+ }
1017
+ else {
1018
+ tree.set(key, deserializeTermIndexTree(value));
1019
+ }
1020
+ }
1021
+ return tree;
1022
+ }
1023
+ function serializeTermIndexTree(tree) {
1024
+ const shape = [];
1025
+ for (const [key, val] of tree) {
1026
+ if (key === LEAF) {
1027
+ shape.push([key, val]);
1028
+ }
1029
+ else {
1030
+ shape.push([key, serializeTermIndexTree(val)]);
1031
+ }
1032
+ }
1033
+ return shape;
1034
+ }
1035
+
1036
+ function resolveIndexingOptions(options) {
1037
+ if ((options === null || options === void 0 ? void 0 : options.fields) == null) {
1038
+ throw new Error('MiniSearch: option "fields" must be provided');
1039
+ }
1040
+ return {
1041
+ ...defaultFrozenLoadOptions,
1042
+ ...options,
1043
+ searchOptions: { ...defaultSearchOptions, ...(options.searchOptions || {}) },
1044
+ autoSuggestOptions: { ...defaultAutoSuggestOptions, ...(options.autoSuggestOptions || {}) }
1045
+ };
1046
+ }
1047
+ function buildFieldIds(fields) {
1048
+ const fieldIds = {};
1049
+ for (let i = 0; i < fields.length; i++) {
1050
+ fieldIds[fields[i]] = i;
1051
+ }
1052
+ return fieldIds;
1053
+ }
1054
+ /** Token frequencies for one document field (after processTerm). */
1055
+ function collectFieldTermFreqs(tokens, fieldName, processTerm) {
1056
+ const localFreqs = new Map();
1057
+ for (const term of tokens) {
1058
+ const processedTerm = processTerm(term, fieldName);
1059
+ if (Array.isArray(processedTerm)) {
1060
+ for (const t of processedTerm) {
1061
+ localFreqs.set(t, (localFreqs.get(t) || 0) + 1);
1062
+ }
1063
+ }
1064
+ else if (processedTerm) {
1065
+ localFreqs.set(processedTerm, (localFreqs.get(processedTerm) || 0) + 1);
1066
+ }
1067
+ }
1068
+ return localFreqs;
1069
+ }
1070
+ /** Same running average as {@link MiniSearch} private addFieldLength. */
1071
+ function updateAvgFieldLength(avgFieldLength, fieldId, count, length) {
1072
+ const averageFieldLength = avgFieldLength[fieldId] || 0;
1073
+ const totalFieldLength = (averageFieldLength * count) + length;
1074
+ avgFieldLength[fieldId] = totalFieldLength / (count + 1);
1075
+ }
1076
+ function saveStoredFieldsForDocument(storeFields, extractField, document) {
1077
+ if (storeFields.length === 0)
1078
+ return undefined;
1079
+ const documentFields = {};
1080
+ for (const fieldName of storeFields) {
1081
+ const fieldValue = extractField(document, fieldName);
1082
+ if (fieldValue !== undefined)
1083
+ documentFields[fieldName] = fieldValue;
1084
+ }
1085
+ return documentFields;
1086
+ }
1087
+
1088
+ function getOrCreateTermIndex(builder, term) {
1089
+ const existing = builder.index.get(term);
1090
+ if (existing != null)
1091
+ return existing;
1092
+ const ti = builder.terms.length;
1093
+ builder.terms.push(term);
1094
+ builder.index.set(term, ti);
1095
+ return ti;
1096
+ }
1097
+ function appendPosting(builder, termIndex, fieldId, docId, freq) {
1098
+ const slot = termIndex * builder.fieldCount + fieldId;
1099
+ let docIds = builder.postingsDocIds[slot];
1100
+ let freqs = builder.postingsFreqs[slot];
1101
+ if (docIds == null) {
1102
+ docIds = [];
1103
+ freqs = [];
1104
+ builder.postingsDocIds[slot] = docIds;
1105
+ builder.postingsFreqs[slot] = freqs;
1106
+ }
1107
+ docIds.push(docId);
1108
+ freqs.push(clampFreq(freq));
1109
+ }
1110
+ function finalizeFlatPostings(builder) {
1111
+ const termCount = builder.terms.length;
1112
+ const slotCount = termCount * builder.fieldCount;
1113
+ const postingsOffsets = new Uint32Array(slotCount);
1114
+ const postingsLengths = new Uint32Array(slotCount);
1115
+ const docScratch = [];
1116
+ const freqScratch = [];
1117
+ for (let ti = 0; ti < termCount; ti++) {
1118
+ const base = ti * builder.fieldCount;
1119
+ for (let f = 0; f < builder.fieldCount; f++) {
1120
+ const offset = docScratch.length;
1121
+ const docIds = builder.postingsDocIds[base + f];
1122
+ const freqs = builder.postingsFreqs[base + f];
1123
+ if (docIds == null || docIds.length === 0) {
1124
+ postingsOffsets[base + f] = offset;
1125
+ postingsLengths[base + f] = 0;
1126
+ continue;
1127
+ }
1128
+ for (let i = 0; i < docIds.length; i++) {
1129
+ docScratch.push(docIds[i]);
1130
+ freqScratch.push(freqs[i]);
1131
+ }
1132
+ postingsOffsets[base + f] = offset;
1133
+ postingsLengths[base + f] = docIds.length;
1134
+ }
1135
+ }
1136
+ return {
1137
+ postingsOffsets,
1138
+ postingsLengths,
1139
+ allDocIds: new Uint32Array(docScratch),
1140
+ allFreqs: new Uint8Array(freqScratch)
1141
+ };
1142
+ }
1143
+ function indexDocument(builder, document, shortId) {
1144
+ const { extractField, stringifyField, tokenize, processTerm, fields, idField, storeFields } = builder.options;
1145
+ const id = extractField(document, idField);
1146
+ if (id == null) {
1147
+ throw new Error(`MiniSearch: document does not have ID field "${idField}"`);
1148
+ }
1149
+ if (builder.idToShortId.has(id)) {
1150
+ throw new Error(`MiniSearch: duplicate ID ${id}`);
1151
+ }
1152
+ builder.idToShortId.set(id, shortId);
1153
+ builder.externalIds[shortId] = id;
1154
+ builder.storedFields[shortId] = saveStoredFieldsForDocument(storeFields, extractField, document);
1155
+ const documentCount = shortId + 1;
1156
+ for (const field of fields) {
1157
+ const fieldValue = extractField(document, field);
1158
+ if (fieldValue == null)
1159
+ continue;
1160
+ const tokens = tokenize(stringifyField(fieldValue, field), field);
1161
+ const fieldId = builder.fieldIds[field];
1162
+ const uniqueTerms = new Set(tokens).size;
1163
+ const localFreqs = collectFieldTermFreqs(tokens, field, processTerm);
1164
+ builder.fieldLengthMatrix[shortId * builder.fieldCount + fieldId] = uniqueTerms;
1165
+ updateAvgFieldLength(builder.avgFieldLength, fieldId, documentCount - 1, uniqueTerms);
1166
+ for (const [term, freq] of localFreqs) {
1167
+ const ti = getOrCreateTermIndex(builder, term);
1168
+ appendPosting(builder, ti, fieldId, shortId, freq);
1169
+ }
1170
+ }
1171
+ }
1172
+ function createBuilder(options, documentCount) {
1173
+ const fieldCount = options.fields.length;
1174
+ return {
1175
+ options,
1176
+ fieldIds: buildFieldIds(options.fields),
1177
+ fieldCount,
1178
+ documentCount,
1179
+ index: new SearchableMap(),
1180
+ terms: [],
1181
+ postingsDocIds: [],
1182
+ postingsFreqs: [],
1183
+ externalIds: new Array(documentCount),
1184
+ idToShortId: new Map(),
1185
+ storedFields: new Array(documentCount),
1186
+ fieldLengthMatrix: new Uint32Array(documentCount * fieldCount),
1187
+ avgFieldLength: []
1188
+ };
1189
+ }
1190
+ function buildFrozenParamsFromDocuments(documents, options) {
1191
+ var _a;
1192
+ const resolved = resolveIndexingOptions(options);
1193
+ const documentCount = documents.length;
1194
+ const builder = createBuilder(resolved, documentCount);
1195
+ for (let d = 0; d < documentCount; d++) {
1196
+ indexDocument(builder, documents[d], d);
1197
+ }
1198
+ const flat = finalizeFlatPostings(builder);
1199
+ const avgFieldLength = new Float32Array(builder.fieldCount);
1200
+ for (let f = 0; f < builder.fieldCount; f++) {
1201
+ avgFieldLength[f] = (_a = builder.avgFieldLength[f]) !== null && _a !== void 0 ? _a : 0;
1202
+ }
1203
+ return {
1204
+ options: resolved,
1205
+ documentCount,
1206
+ nextId: documentCount,
1207
+ fieldIds: builder.fieldIds,
1208
+ fieldCount: builder.fieldCount,
1209
+ externalIds: builder.externalIds,
1210
+ idToShortId: builder.idToShortId,
1211
+ storedFields: builder.storedFields,
1212
+ fieldLengthMatrix: builder.fieldLengthMatrix,
1213
+ avgFieldLength,
1214
+ index: builder.index,
1215
+ terms: builder.terms,
1216
+ postingsOffsets: flat.postingsOffsets,
1217
+ postingsLengths: flat.postingsLengths,
1218
+ allDocIds: flat.allDocIds,
1219
+ allFreqs: flat.allFreqs
1220
+ };
1221
+ }
1222
+
1223
+ /** Shared wildcard query symbol for MiniSearch and FrozenMiniSearch */
1224
+ const WILDCARD_QUERY = Symbol('*');
1225
+
1226
+ const READ_ONLY_MSG = 'FrozenMiniSearch is read-only. Rebuild from a mutable MiniSearch instance.';
1227
+ const MAP_NODE_ESTIMATE_BYTES = 120;
1228
+ function throwReadOnly() {
1229
+ throw new Error(READ_ONLY_MSG);
1230
+ }
1231
+ function cloneRadixTreeWithTermIndex(tree, termIndexByLeaf) {
1232
+ const out = new Map();
1233
+ for (const [key, val] of tree) {
1234
+ if (key === LEAF) {
1235
+ const idx = termIndexByLeaf.get(val);
1236
+ if (idx == null) {
1237
+ throw new Error('FrozenMiniSearch: missing term index while cloning tree');
1238
+ }
1239
+ out.set(LEAF, idx);
1240
+ }
1241
+ else {
1242
+ out.set(key, cloneRadixTreeWithTermIndex(val, termIndexByLeaf));
1243
+ }
1244
+ }
1245
+ return out;
1246
+ }
1247
+ function countRadixMapNodes(tree) {
1248
+ let n = 1;
1249
+ for (const [key, val] of tree) {
1250
+ if (key !== LEAF)
1251
+ n += countRadixMapNodes(val);
1252
+ }
1253
+ return n;
1254
+ }
1255
+ function frozenMemoryBreakdown(frozen) {
1256
+ return frozen.memoryBreakdown();
1257
+ }
1258
+ /** Instantiate {@link FrozenMiniSearch} from pre-built flat index parts. */
1259
+ function assembleFrozen(params) {
1260
+ return new FrozenMiniSearch(params);
1261
+ }
1262
+ function buildFlatPostingsFromSource(source, fieldCount, shortIdRemap) {
1263
+ const terms = [];
1264
+ const leafToIndex = new WeakMap();
1265
+ for (const [term, fieldIndex] of source._index) {
1266
+ const ti = terms.length;
1267
+ terms.push(term);
1268
+ leafToIndex.set(fieldIndex, ti);
1269
+ }
1270
+ const termCount = terms.length;
1271
+ const slotCount = termCount * fieldCount;
1272
+ const postingsOffsets = new Uint32Array(slotCount);
1273
+ const postingsLengths = new Uint32Array(slotCount);
1274
+ const docScratch = [];
1275
+ const freqScratch = [];
1276
+ for (const [, fieldIndex] of source._index) {
1277
+ const ti = leafToIndex.get(fieldIndex);
1278
+ const base = ti * fieldCount;
1279
+ for (let f = 0; f < fieldCount; f++) {
1280
+ const offset = docScratch.length;
1281
+ const freqs = fieldIndex.get(f);
1282
+ if (freqs == null || freqs.size === 0) {
1283
+ postingsOffsets[base + f] = offset;
1284
+ postingsLengths[base + f] = 0;
1285
+ continue;
1286
+ }
1287
+ let count = 0;
1288
+ for (const [shortId, freq] of freqs) {
1289
+ const docId = shortIdRemap != null ? shortIdRemap[shortId] : shortId;
1290
+ // Skip discarded docs when dense remapping is enabled. This prevents
1291
+ // invalid docIds (no externalId) from leaking into frozen search results.
1292
+ if (docId === 0xffffffff)
1293
+ continue;
1294
+ docScratch.push(docId);
1295
+ freqScratch.push(clampFreq(freq));
1296
+ count++;
1297
+ }
1298
+ postingsOffsets[base + f] = offset;
1299
+ postingsLengths[base + f] = count;
1300
+ }
1301
+ }
1302
+ const allDocIds = new Uint32Array(docScratch);
1303
+ const allFreqs = new Uint8Array(freqScratch);
1304
+ const tree = cloneRadixTreeWithTermIndex(source._index.radixTree, leafToIndex);
1305
+ return { terms, tree, postingsOffsets, postingsLengths, allDocIds, allFreqs };
1306
+ }
1307
+ function freezeFromMiniSearch(source) {
1308
+ var _a;
1309
+ const fieldCount = source._options.fields.length;
1310
+ const { _documentCount, _nextId } = source;
1311
+ const useDense = _documentCount < _nextId;
1312
+ let shortIdRemap = null;
1313
+ const externalIds = new Array(useDense ? _documentCount : _nextId);
1314
+ const storedFields = new Array(externalIds.length);
1315
+ const idToShortId = new Map();
1316
+ if (useDense) {
1317
+ shortIdRemap = new Uint32Array(_nextId);
1318
+ shortIdRemap.fill(0xffffffff);
1319
+ let dense = 0;
1320
+ for (const [shortId, id] of source._documentIds) {
1321
+ shortIdRemap[shortId] = dense;
1322
+ externalIds[dense] = id;
1323
+ idToShortId.set(id, dense);
1324
+ storedFields[dense] = source._storedFields.get(shortId);
1325
+ dense++;
1326
+ }
1327
+ }
1328
+ else {
1329
+ for (const [shortId, id] of source._documentIds) {
1330
+ externalIds[shortId] = id;
1331
+ idToShortId.set(id, shortId);
1332
+ storedFields[shortId] = source._storedFields.get(shortId);
1333
+ }
1334
+ }
1335
+ const matrixRows = useDense ? _documentCount : _nextId;
1336
+ const fieldLengthMatrix = new Uint32Array(matrixRows * fieldCount);
1337
+ for (const [shortId, lengths] of source._fieldLength) {
1338
+ const row = shortIdRemap != null ? shortIdRemap[shortId] : shortId;
1339
+ if (row === 0xffffffff)
1340
+ continue;
1341
+ for (let f = 0; f < fieldCount; f++) {
1342
+ fieldLengthMatrix[row * fieldCount + f] = (_a = lengths[f]) !== null && _a !== void 0 ? _a : 0;
1343
+ }
1344
+ }
1345
+ const avgFieldLength = new Float32Array(source._avgFieldLength.length);
1346
+ for (let i = 0; i < source._avgFieldLength.length; i++) {
1347
+ avgFieldLength[i] = source._avgFieldLength[i];
1348
+ }
1349
+ const flat = buildFlatPostingsFromSource(source, fieldCount, shortIdRemap);
1350
+ const frozenIndex = new SearchableMap(flat.tree);
1351
+ return assembleFrozen({
1352
+ options: source._options,
1353
+ documentCount: _documentCount,
1354
+ nextId: useDense ? _documentCount : _nextId,
1355
+ fieldIds: source._fieldIds,
1356
+ fieldCount,
1357
+ externalIds,
1358
+ idToShortId,
1359
+ storedFields,
1360
+ fieldLengthMatrix,
1361
+ avgFieldLength,
1362
+ index: frozenIndex,
1363
+ terms: flat.terms,
1364
+ postingsOffsets: flat.postingsOffsets,
1365
+ postingsLengths: flat.postingsLengths,
1366
+ allDocIds: flat.allDocIds,
1367
+ allFreqs: flat.allFreqs
1368
+ });
1369
+ }
1370
+ function buildFrozenFromDocuments(documents, options) {
1371
+ return assembleFrozen(buildFrozenParamsFromDocuments(documents, options));
1372
+ }
1373
+ class FrozenMiniSearch {
1374
+ constructor(params) {
1375
+ this._options = params.options;
1376
+ this._documentCount = params.documentCount;
1377
+ this._nextId = params.nextId;
1378
+ this._externalIds = params.externalIds;
1379
+ this._idToShortId = params.idToShortId;
1380
+ this._fieldIds = params.fieldIds;
1381
+ this._fieldCount = params.fieldCount;
1382
+ this._fieldLengthMatrix = params.fieldLengthMatrix;
1383
+ this._avgFieldLength = params.avgFieldLength;
1384
+ this._storedFields = params.storedFields;
1385
+ this._index = params.index;
1386
+ this._terms = params.terms;
1387
+ this._postingsOffsets = params.postingsOffsets;
1388
+ this._postingsLengths = params.postingsLengths;
1389
+ this._allDocIds = params.allDocIds;
1390
+ this._allFreqs = params.allFreqs;
1391
+ }
1392
+ get documentCount() { return this._documentCount; }
1393
+ get termCount() { return this._index.size; }
1394
+ memoryBreakdown() {
1395
+ const termCount = this.termCount;
1396
+ const slotCount = termCount * this._fieldCount;
1397
+ const postingsTyped = this._allDocIds.byteLength + this._allFreqs.byteLength +
1398
+ this._postingsOffsets.byteLength + this._postingsLengths.byteLength;
1399
+ let storedJson = 0;
1400
+ for (const row of this._storedFields) {
1401
+ if (row != null)
1402
+ storedJson += JSON.stringify(row).length;
1403
+ }
1404
+ const mapNodeCount = countRadixMapNodes(this._index.radixTree);
1405
+ const radixEst = mapNodeCount * MAP_NODE_ESTIMATE_BYTES;
1406
+ const estimatedStructuredBytes = postingsTyped +
1407
+ this._fieldLengthMatrix.byteLength +
1408
+ this._avgFieldLength.byteLength +
1409
+ radixEst +
1410
+ storedJson +
1411
+ this._idToShortId.size * 32;
1412
+ return {
1413
+ termCount,
1414
+ documentCount: this._documentCount,
1415
+ nextId: this._nextId,
1416
+ postings: {
1417
+ slotCount,
1418
+ allDocIdsBytes: this._allDocIds.byteLength,
1419
+ allFreqsBytes: this._allFreqs.byteLength,
1420
+ offsetsBytes: this._postingsOffsets.byteLength,
1421
+ lengthsBytes: this._postingsLengths.byteLength,
1422
+ totalTypedBytes: postingsTyped
1423
+ },
1424
+ radixTree: {
1425
+ mapNodeCount,
1426
+ estimatedBytes: radixEst
1427
+ },
1428
+ documents: {
1429
+ externalIdsSlots: this._externalIds.length,
1430
+ storedFieldsSlots: this._storedFields.length,
1431
+ idToShortIdEntries: this._idToShortId.size,
1432
+ fieldLengthMatrixBytes: this._fieldLengthMatrix.byteLength,
1433
+ avgFieldLengthBytes: this._avgFieldLength.byteLength,
1434
+ storedFieldsJsonBytes: storedJson
1435
+ },
1436
+ estimatedStructuredBytes
1437
+ };
1438
+ }
1439
+ has(id) {
1440
+ return this._idToShortId.has(id);
1441
+ }
1442
+ getStoredFields(id) {
1443
+ const shortId = this._idToShortId.get(id);
1444
+ return shortId == null ? undefined : this._storedFields[shortId];
1445
+ }
1446
+ add() { throwReadOnly(); }
1447
+ addAll() { throwReadOnly(); }
1448
+ addAllAsync() { throwReadOnly(); }
1449
+ remove() { throwReadOnly(); }
1450
+ removeAll() { throwReadOnly(); }
1451
+ discard() { throwReadOnly(); }
1452
+ discardAll() { throwReadOnly(); }
1453
+ replace() { throwReadOnly(); }
1454
+ vacuum() { throwReadOnly(); }
1455
+ search(query, searchOptions = {}) {
1456
+ const { searchOptions: globalSearchOptions } = this._options;
1457
+ const searchOptionsWithDefaults = { ...globalSearchOptions, ...searchOptions };
1458
+ const rawResults = this.executeQuery(query, searchOptions);
1459
+ const skipSort = query === FrozenMiniSearch.wildcard && searchOptionsWithDefaults.boostDocument == null;
1460
+ return finalizeSearchResults({
1461
+ rawResults,
1462
+ getExternalId: (docId) => this._externalIds[docId],
1463
+ getStoredFields: (docId) => this._storedFields[docId],
1464
+ filter: searchOptionsWithDefaults.filter,
1465
+ skipSort
1466
+ });
1467
+ }
1468
+ autoSuggest(queryString, options = {}) {
1469
+ options = { ...this._options.autoSuggestOptions, ...options };
1470
+ const suggestions = new Map();
1471
+ for (const { score, terms } of this.search(queryString, options)) {
1472
+ const phrase = terms.join(' ');
1473
+ const suggestion = suggestions.get(phrase);
1474
+ if (suggestion != null) {
1475
+ suggestion.score += score;
1476
+ suggestion.count += 1;
1477
+ }
1478
+ else {
1479
+ suggestions.set(phrase, { score, terms, count: 1 });
1480
+ }
1481
+ }
1482
+ return [...suggestions.entries()]
1483
+ .map(([suggestion, { score, terms, count }]) => ({
1484
+ suggestion,
1485
+ terms,
1486
+ score: score / count
1487
+ }))
1488
+ .sort((a, b) => b.score - a.score);
1489
+ }
1490
+ saveBinary() {
1491
+ return encodeFrozenSnapshot({
1492
+ documentCount: this._documentCount,
1493
+ nextId: this._nextId,
1494
+ fieldIds: this._fieldIds,
1495
+ fieldCount: this._fieldCount,
1496
+ avgFieldLength: this._avgFieldLength,
1497
+ externalIds: this._externalIds,
1498
+ storedFields: this._storedFields,
1499
+ fieldLengthMatrix: this._fieldLengthMatrix,
1500
+ terms: this._terms,
1501
+ treeShape: serializeTermIndexTree(this._index.radixTree),
1502
+ postingsOffsets: this._postingsOffsets,
1503
+ postingsLengths: this._postingsLengths,
1504
+ allDocIds: this._allDocIds,
1505
+ allFreqs: this._allFreqs
1506
+ });
1507
+ }
1508
+ static loadBinary(buffer, options) {
1509
+ if ((options === null || options === void 0 ? void 0 : options.fields) == null) {
1510
+ throw new Error('FrozenMiniSearch: option "fields" must be provided');
1511
+ }
1512
+ const snap = decodeFrozenSnapshot(buffer);
1513
+ const fieldNames = options.fields;
1514
+ for (const name of fieldNames) {
1515
+ if (snap.fieldIds[name] === undefined) {
1516
+ throw new Error(`FrozenMiniSearch: field "${name}" not found in frozen index`);
1517
+ }
1518
+ }
1519
+ const opts = {
1520
+ ...defaultFrozenLoadOptions,
1521
+ ...options,
1522
+ searchOptions: {
1523
+ ...defaultSearchOptions,
1524
+ ...(options.searchOptions || {})
1525
+ },
1526
+ autoSuggestOptions: { ...defaultAutoSuggestOptions, ...(options.autoSuggestOptions || {}) }
1527
+ };
1528
+ const index = new SearchableMap(deserializeTermIndexTree(snap.treeShape));
1529
+ const idToShortId = new Map();
1530
+ for (let i = 0; i < snap.externalIds.length; i++) {
1531
+ if (snap.externalIds[i] !== undefined) {
1532
+ idToShortId.set(snap.externalIds[i], i);
1533
+ }
1534
+ }
1535
+ return assembleFrozen({
1536
+ options: opts,
1537
+ documentCount: snap.documentCount,
1538
+ nextId: snap.nextId,
1539
+ fieldIds: snap.fieldIds,
1540
+ fieldCount: snap.fieldCount,
1541
+ externalIds: snap.externalIds,
1542
+ idToShortId,
1543
+ storedFields: snap.storedFields,
1544
+ fieldLengthMatrix: snap.fieldLengthMatrix,
1545
+ avgFieldLength: snap.avgFieldLength,
1546
+ index,
1547
+ terms: snap.terms,
1548
+ postingsOffsets: snap.postingsOffsets,
1549
+ postingsLengths: snap.postingsLengths,
1550
+ allDocIds: snap.allDocIds,
1551
+ allFreqs: snap.allFreqs
1552
+ });
1553
+ }
1554
+ /**
1555
+ * Build a read-only index in one pass from documents (no mutable MiniSearch step).
1556
+ *
1557
+ * Use {@link MiniSearch} + {@link MiniSearch#freeze} when you need remove, discard, or
1558
+ * incremental updates before freezing.
1559
+ */
1560
+ static fromDocuments(documents, options) {
1561
+ return buildFrozenFromDocuments(documents, options);
1562
+ }
1563
+ getFieldLength(docId, fieldId) {
1564
+ var _a;
1565
+ return (_a = this._fieldLengthMatrix[docId * this._fieldCount + fieldId]) !== null && _a !== void 0 ? _a : 0;
1566
+ }
1567
+ fieldTermDataFor(termIndex) {
1568
+ return flatFieldTermData(termIndex, this._fieldCount, this._postingsOffsets, this._postingsLengths, this._allDocIds, this._allFreqs);
1569
+ }
1570
+ aggregateContext() {
1571
+ return {
1572
+ documentCount: this._documentCount,
1573
+ avgFieldLength: this._avgFieldLength,
1574
+ fieldIds: this._fieldIds,
1575
+ getFieldLength: (docId, fieldId) => this.getFieldLength(docId, fieldId),
1576
+ getExternalId: (docId) => this._externalIds[docId],
1577
+ getStoredFields: (docId) => this._storedFields[docId]
1578
+ };
1579
+ }
1580
+ termResults(sourceTerm, derivedTerm, termWeight, termBoost, termIndex, fieldBoosts, boostDocumentFn, bm25params, results = new Map()) {
1581
+ if (termIndex == null)
1582
+ return results;
1583
+ return aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, this.fieldTermDataFor(termIndex), fieldBoosts, this.aggregateContext(), boostDocumentFn, bm25params, results);
1584
+ }
1585
+ executeQuery(query, searchOptions = {}) {
1586
+ if (query === FrozenMiniSearch.wildcard) {
1587
+ return this.executeWildcardQuery(searchOptions);
1588
+ }
1589
+ if (typeof query !== 'string') {
1590
+ const options = { ...searchOptions, ...query, queries: undefined };
1591
+ const results = query.queries.map((subquery) => this.executeQuery(subquery, options));
1592
+ return combineResults(results, options.combineWith);
1593
+ }
1594
+ const { tokenize, processTerm, searchOptions: globalSearchOptions } = this._options;
1595
+ const options = { tokenize, processTerm, ...globalSearchOptions, ...searchOptions };
1596
+ const { tokenize: searchTokenize, processTerm: searchProcessTerm } = options;
1597
+ const terms = searchTokenize(query)
1598
+ .flatMap((term) => searchProcessTerm(term))
1599
+ .filter((term) => !!term);
1600
+ const queries = terms.map(termToQuerySpec(options));
1601
+ const results = queries.map((q) => this.executeQuerySpec(q, options));
1602
+ return combineResults(results, options.combineWith);
1603
+ }
1604
+ executeQuerySpec(query, searchOptions) {
1605
+ var _a, _b;
1606
+ const options = { ...this._options.searchOptions, ...searchOptions };
1607
+ const boosts = (options.fields || this._options.fields).reduce((b, field) => ({ ...b, [field]: getOwnProperty(options.boost, field) || 1 }), {});
1608
+ const { boostDocument, weights, maxFuzzy, bm25: bm25params } = options;
1609
+ const fuzzyWeight = (_a = weights === null || weights === void 0 ? void 0 : weights.fuzzy) !== null && _a !== void 0 ? _a : 0.45;
1610
+ const prefixWeight = (_b = weights === null || weights === void 0 ? void 0 : weights.prefix) !== null && _b !== void 0 ? _b : 0.375;
1611
+ const termIndex = this._index.get(query.term);
1612
+ const results = this.termResults(query.term, query.term, 1, query.termBoost, termIndex, boosts, boostDocument, bm25params);
1613
+ let prefixMatches;
1614
+ let fuzzyMatches;
1615
+ if (query.prefix) {
1616
+ prefixMatches = this._index.atPrefix(query.term);
1617
+ }
1618
+ if (query.fuzzy) {
1619
+ const fuzzy = (query.fuzzy === true) ? 0.2 : query.fuzzy;
1620
+ const maxDistance = fuzzy < 1
1621
+ ? Math.min(maxFuzzy, Math.round(query.term.length * fuzzy))
1622
+ : fuzzy;
1623
+ if (maxDistance)
1624
+ fuzzyMatches = this._index.fuzzyGet(query.term, maxDistance);
1625
+ }
1626
+ if (prefixMatches) {
1627
+ for (const [term, ti] of prefixMatches) {
1628
+ const distance = term.length - query.term.length;
1629
+ if (!distance)
1630
+ continue;
1631
+ fuzzyMatches === null || fuzzyMatches === void 0 ? void 0 : fuzzyMatches.delete(term);
1632
+ const weight = prefixWeight * term.length / (term.length + 0.3 * distance);
1633
+ this.termResults(query.term, term, weight, query.termBoost, ti, boosts, boostDocument, bm25params, results);
1634
+ }
1635
+ }
1636
+ if (fuzzyMatches) {
1637
+ for (const term of fuzzyMatches.keys()) {
1638
+ const [ti, distance] = fuzzyMatches.get(term);
1639
+ if (!distance)
1640
+ continue;
1641
+ const weight = fuzzyWeight * term.length / (term.length + distance);
1642
+ this.termResults(query.term, term, weight, query.termBoost, ti, boosts, boostDocument, bm25params, results);
1643
+ }
1644
+ }
1645
+ return results;
1646
+ }
1647
+ executeWildcardQuery(searchOptions) {
1648
+ const results = new Map();
1649
+ const options = { ...this._options.searchOptions, ...searchOptions };
1650
+ for (let shortId = 0; shortId < this._nextId; shortId++) {
1651
+ const id = this._externalIds[shortId];
1652
+ if (id === undefined)
1653
+ continue;
1654
+ const score = options.boostDocument
1655
+ ? options.boostDocument(id, '', this._storedFields[shortId])
1656
+ : 1;
1657
+ results.set(shortId, { score, terms: [], match: {} });
1658
+ }
1659
+ return results;
1660
+ }
1661
+ }
1662
+ FrozenMiniSearch.wildcard = WILDCARD_QUERY;
1663
+
1664
+ /**
1665
+ * {@link MiniSearch} is the main entrypoint class, implementing a full-text
1666
+ * search engine in memory.
1667
+ *
1668
+ * @typeParam T The type of the documents being indexed.
1669
+ *
1670
+ * ### Basic example:
1671
+ *
1672
+ * ```javascript
1673
+ * const documents = [
1674
+ * {
1675
+ * id: 1,
1676
+ * title: 'Moby Dick',
1677
+ * text: 'Call me Ishmael. Some years ago...',
1678
+ * category: 'fiction'
1679
+ * },
1680
+ * {
1681
+ * id: 2,
1682
+ * title: 'Zen and the Art of Motorcycle Maintenance',
1683
+ * text: 'I can see by my watch...',
1684
+ * category: 'fiction'
1685
+ * },
1686
+ * {
1687
+ * id: 3,
1688
+ * title: 'Neuromancer',
1689
+ * text: 'The sky above the port was...',
1690
+ * category: 'fiction'
1691
+ * },
1692
+ * {
1693
+ * id: 4,
1694
+ * title: 'Zen and the Art of Archery',
1695
+ * text: 'At first sight it must seem...',
1696
+ * category: 'non-fiction'
1697
+ * },
1698
+ * // ...and more
1699
+ * ]
1700
+ *
1701
+ * // Create a search engine that indexes the 'title' and 'text' fields for
1702
+ * // full-text search. Search results will include 'title' and 'category' (plus the
1703
+ * // id field, that is always stored and returned)
1704
+ * const miniSearch = new MiniSearch({
1705
+ * fields: ['title', 'text'],
1706
+ * storeFields: ['title', 'category']
1707
+ * })
1708
+ *
1709
+ * // Add documents to the index
1710
+ * miniSearch.addAll(documents)
1711
+ *
1712
+ * // Search for documents:
1713
+ * let results = miniSearch.search('zen art motorcycle')
1714
+ * // => [
1715
+ * // { id: 2, title: 'Zen and the Art of Motorcycle Maintenance', category: 'fiction', score: 2.77258 },
1716
+ * // { id: 4, title: 'Zen and the Art of Archery', category: 'non-fiction', score: 1.38629 }
1717
+ * // ]
1718
+ * ```
1719
+ */
1720
+ class MiniSearch {
1721
+ /**
1722
+ * @param options Configuration options
1723
+ *
1724
+ * ### Examples:
1725
+ *
1726
+ * ```javascript
1727
+ * // Create a search engine that indexes the 'title' and 'text' fields of your
1728
+ * // documents:
1729
+ * const miniSearch = new MiniSearch({ fields: ['title', 'text'] })
1730
+ * ```
1731
+ *
1732
+ * ### ID Field:
1733
+ *
1734
+ * ```javascript
1735
+ * // Your documents are assumed to include a unique 'id' field, but if you want
1736
+ * // to use a different field for document identification, you can set the
1737
+ * // 'idField' option:
1738
+ * const miniSearch = new MiniSearch({ idField: 'key', fields: ['title', 'text'] })
1739
+ * ```
1740
+ *
1741
+ * ### Options and defaults:
1742
+ *
1743
+ * ```javascript
1744
+ * // The full set of options (here with their default value) is:
1745
+ * const miniSearch = new MiniSearch({
1746
+ * // idField: field that uniquely identifies a document
1747
+ * idField: 'id',
1748
+ *
1749
+ * // extractField: function used to get the value of a field in a document.
1750
+ * // By default, it assumes the document is a flat object with field names as
1751
+ * // property keys and field values as string property values, but custom logic
1752
+ * // can be implemented by setting this option to a custom extractor function.
1753
+ * extractField: (document, fieldName) => document[fieldName],
1754
+ *
1755
+ * // tokenize: function used to split fields into individual terms. By
1756
+ * // default, it is also used to tokenize search queries, unless a specific
1757
+ * // `tokenize` search option is supplied. When tokenizing an indexed field,
1758
+ * // the field name is passed as the second argument.
1759
+ * tokenize: (string, _fieldName) => string.split(SPACE_OR_PUNCTUATION),
1760
+ *
1761
+ * // processTerm: function used to process each tokenized term before
1762
+ * // indexing. It can be used for stemming and normalization. Return a falsy
1763
+ * // value in order to discard a term. By default, it is also used to process
1764
+ * // search queries, unless a specific `processTerm` option is supplied as a
1765
+ * // search option. When processing a term from a indexed field, the field
1766
+ * // name is passed as the second argument.
1767
+ * processTerm: (term, _fieldName) => term.toLowerCase(),
1768
+ *
1769
+ * // searchOptions: default search options, see the `search` method for
1770
+ * // details
1771
+ * searchOptions: undefined,
1772
+ *
1773
+ * // fields: document fields to be indexed. Mandatory, but not set by default
1774
+ * fields: undefined
1775
+ *
1776
+ * // storeFields: document fields to be stored and returned as part of the
1777
+ * // search results.
1778
+ * storeFields: []
1779
+ * })
1780
+ * ```
1781
+ */
1782
+ constructor(options) {
1783
+ if ((options === null || options === void 0 ? void 0 : options.fields) == null) {
1784
+ throw new Error('MiniSearch: option "fields" must be provided');
1785
+ }
1786
+ const autoVacuum = (options.autoVacuum == null || options.autoVacuum === true) ? defaultAutoVacuumOptions : options.autoVacuum;
1787
+ this._options = {
1788
+ ...defaultOptions,
1789
+ ...options,
1790
+ autoVacuum,
1791
+ searchOptions: { ...defaultSearchOptions, ...(options.searchOptions || {}) },
1792
+ autoSuggestOptions: { ...defaultAutoSuggestOptions, ...(options.autoSuggestOptions || {}) }
1793
+ };
1794
+ this._index = new SearchableMap();
1795
+ this._documentCount = 0;
1796
+ this._documentIds = new Map();
1797
+ this._idToShortId = new Map();
1798
+ // Fields are defined during initialization, don't change, are few in
1799
+ // number, rarely need iterating over, and have string keys. Therefore in
1800
+ // this case an object is a better candidate than a Map to store the mapping
1801
+ // from field key to ID.
1802
+ this._fieldIds = {};
1803
+ this._fieldLength = new Map();
1804
+ this._avgFieldLength = [];
1805
+ this._nextId = 0;
1806
+ this._storedFields = new Map();
1807
+ this._dirtCount = 0;
1808
+ this._currentVacuum = null;
1809
+ this._enqueuedVacuum = null;
1810
+ this._enqueuedVacuumConditions = defaultVacuumConditions;
1811
+ this.addFields(this._options.fields);
1812
+ }
1813
+ /**
1814
+ * Adds a document to the index
1815
+ *
1816
+ * @param document The document to be indexed
1817
+ */
1818
+ add(document) {
1819
+ const { extractField, stringifyField, tokenize, processTerm, fields, idField } = this._options;
1820
+ const id = extractField(document, idField);
1821
+ if (id == null) {
1822
+ throw new Error(`MiniSearch: document does not have ID field "${idField}"`);
1823
+ }
1824
+ if (this._idToShortId.has(id)) {
1825
+ throw new Error(`MiniSearch: duplicate ID ${id}`);
1826
+ }
1827
+ const shortDocumentId = this.addDocumentId(id);
1828
+ const stored = saveStoredFieldsForDocument(this._options.storeFields, extractField, document);
1829
+ if (stored != null)
1830
+ this._storedFields.set(shortDocumentId, stored);
1831
+ for (const field of fields) {
1832
+ const fieldValue = extractField(document, field);
1833
+ if (fieldValue == null)
1834
+ continue;
1835
+ const tokens = tokenize(stringifyField(fieldValue, field), field);
1836
+ const fieldId = this._fieldIds[field];
1837
+ const uniqueTerms = new Set(tokens).size;
1838
+ const localFreqs = collectFieldTermFreqs(tokens, field, processTerm);
1839
+ this.addFieldLength(shortDocumentId, fieldId, this._documentCount - 1, uniqueTerms);
1840
+ for (const [term] of localFreqs) {
1841
+ const freq = localFreqs.get(term);
1842
+ for (let i = 0; i < freq; i++) {
1843
+ this.addTerm(fieldId, shortDocumentId, term);
1844
+ }
1845
+ }
1846
+ }
1847
+ }
1848
+ /**
1849
+ * Adds all the given documents to the index
1850
+ *
1851
+ * @param documents An array of documents to be indexed
1852
+ */
1853
+ addAll(documents) {
1854
+ for (const document of documents)
1855
+ this.add(document);
1856
+ }
1857
+ /**
1858
+ * Adds all the given documents to the index asynchronously.
1859
+ *
1860
+ * Returns a promise that resolves (to `undefined`) when the indexing is done.
1861
+ * This method is useful when index many documents, to avoid blocking the main
1862
+ * thread. The indexing is performed asynchronously and in chunks.
1863
+ *
1864
+ * @param documents An array of documents to be indexed
1865
+ * @param options Configuration options
1866
+ * @return A promise resolving to `undefined` when the indexing is done
1867
+ */
1868
+ addAllAsync(documents, options = {}) {
1869
+ const { chunkSize = 10 } = options;
1870
+ const acc = { chunk: [], promise: Promise.resolve() };
1871
+ const { chunk, promise } = documents.reduce(({ chunk, promise }, document, i) => {
1872
+ chunk.push(document);
1873
+ if ((i + 1) % chunkSize === 0) {
1874
+ return {
1875
+ chunk: [],
1876
+ promise: promise
1877
+ .then(() => new Promise(resolve => setTimeout(resolve, 0)))
1878
+ .then(() => this.addAll(chunk))
1879
+ };
1880
+ }
1881
+ else {
1882
+ return { chunk, promise };
1883
+ }
1884
+ }, acc);
1885
+ return promise.then(() => this.addAll(chunk));
1886
+ }
1887
+ /**
1888
+ * Removes the given document from the index.
1889
+ *
1890
+ * The document to remove must NOT have changed between indexing and removal,
1891
+ * otherwise the index will be corrupted.
1892
+ *
1893
+ * This method requires passing the full document to be removed (not just the
1894
+ * ID), and immediately removes the document from the inverted index, allowing
1895
+ * memory to be released. A convenient alternative is {@link
1896
+ * MiniSearch#discard}, which needs only the document ID, and has the same
1897
+ * visible effect, but delays cleaning up the index until the next vacuuming.
1898
+ *
1899
+ * @param document The document to be removed
1900
+ */
1901
+ remove(document) {
1902
+ const { tokenize, processTerm, extractField, stringifyField, fields, idField } = this._options;
1903
+ const id = extractField(document, idField);
1904
+ if (id == null) {
1905
+ throw new Error(`MiniSearch: document does not have ID field "${idField}"`);
1906
+ }
1907
+ const shortId = this._idToShortId.get(id);
1908
+ if (shortId == null) {
1909
+ throw new Error(`MiniSearch: cannot remove document with ID ${id}: it is not in the index`);
1910
+ }
1911
+ for (const field of fields) {
1912
+ const fieldValue = extractField(document, field);
1913
+ if (fieldValue == null)
1914
+ continue;
1915
+ const tokens = tokenize(stringifyField(fieldValue, field), field);
1916
+ const fieldId = this._fieldIds[field];
1917
+ const uniqueTerms = new Set(tokens).size;
1918
+ this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms);
1919
+ for (const term of tokens) {
1920
+ const processedTerm = processTerm(term, field);
1921
+ if (Array.isArray(processedTerm)) {
1922
+ for (const t of processedTerm) {
1923
+ this.removeTerm(fieldId, shortId, t);
1924
+ }
1925
+ }
1926
+ else if (processedTerm) {
1927
+ this.removeTerm(fieldId, shortId, processedTerm);
1928
+ }
1929
+ }
1930
+ }
1931
+ this._storedFields.delete(shortId);
1932
+ this._documentIds.delete(shortId);
1933
+ this._idToShortId.delete(id);
1934
+ this._fieldLength.delete(shortId);
1935
+ this._documentCount -= 1;
1936
+ }
1937
+ /**
1938
+ * Removes all the given documents from the index. If called with no arguments,
1939
+ * it removes _all_ documents from the index.
1940
+ *
1941
+ * @param documents The documents to be removed. If this argument is omitted,
1942
+ * all documents are removed. Note that, for removing all documents, it is
1943
+ * more efficient to call this method with no arguments than to pass all
1944
+ * documents.
1945
+ */
1946
+ removeAll(documents) {
1947
+ if (documents) {
1948
+ for (const document of documents)
1949
+ this.remove(document);
1950
+ }
1951
+ else if (arguments.length > 0) {
1952
+ throw new Error('Expected documents to be present. Omit the argument to remove all documents.');
1953
+ }
1954
+ else {
1955
+ this._index = new SearchableMap();
1956
+ this._documentCount = 0;
1957
+ this._documentIds = new Map();
1958
+ this._idToShortId = new Map();
1959
+ this._fieldLength = new Map();
1960
+ this._avgFieldLength = [];
1961
+ this._storedFields = new Map();
1962
+ this._nextId = 0;
1963
+ }
1964
+ }
1965
+ /**
1966
+ * Discards the document with the given ID, so it won't appear in search results
1967
+ *
1968
+ * It has the same visible effect of {@link MiniSearch.remove} (both cause the
1969
+ * document to stop appearing in searches), but a different effect on the
1970
+ * internal data structures:
1971
+ *
1972
+ * - {@link MiniSearch#remove} requires passing the full document to be
1973
+ * removed as argument, and removes it from the inverted index immediately.
1974
+ *
1975
+ * - {@link MiniSearch#discard} instead only needs the document ID, and
1976
+ * works by marking the current version of the document as discarded, so it
1977
+ * is immediately ignored by searches. This is faster and more convenient
1978
+ * than {@link MiniSearch#remove}, but the index is not immediately
1979
+ * modified. To take care of that, vacuuming is performed after a certain
1980
+ * number of documents are discarded, cleaning up the index and allowing
1981
+ * memory to be released.
1982
+ *
1983
+ * After discarding a document, it is possible to re-add a new version, and
1984
+ * only the new version will appear in searches. In other words, discarding
1985
+ * and re-adding a document works exactly like removing and re-adding it. The
1986
+ * {@link MiniSearch.replace} method can also be used to replace a document
1987
+ * with a new version.
1988
+ *
1989
+ * #### Details about vacuuming
1990
+ *
1991
+ * Repetite calls to this method would leave obsolete document references in
1992
+ * the index, invisible to searches. Two mechanisms take care of cleaning up:
1993
+ * clean up during search, and vacuuming.
1994
+ *
1995
+ * - Upon search, whenever a discarded ID is found (and ignored for the
1996
+ * results), references to the discarded document are removed from the
1997
+ * inverted index entries for the search terms. This ensures that subsequent
1998
+ * searches for the same terms do not need to skip these obsolete references
1999
+ * again.
2000
+ *
2001
+ * - In addition, vacuuming is performed automatically by default (see the
2002
+ * `autoVacuum` field in {@link Options}) after a certain number of
2003
+ * documents are discarded. Vacuuming traverses all terms in the index,
2004
+ * cleaning up all references to discarded documents. Vacuuming can also be
2005
+ * triggered manually by calling {@link MiniSearch#vacuum}.
2006
+ *
2007
+ * @param id The ID of the document to be discarded
2008
+ */
2009
+ discard(id) {
2010
+ const shortId = this._idToShortId.get(id);
2011
+ if (shortId == null) {
2012
+ throw new Error(`MiniSearch: cannot discard document with ID ${id}: it is not in the index`);
2013
+ }
2014
+ this._idToShortId.delete(id);
2015
+ this._documentIds.delete(shortId);
2016
+ this._storedFields.delete(shortId);
2017
+ (this._fieldLength.get(shortId) || []).forEach((fieldLength, fieldId) => {
2018
+ this.removeFieldLength(shortId, fieldId, this._documentCount, fieldLength);
2019
+ });
2020
+ this._fieldLength.delete(shortId);
2021
+ this._documentCount -= 1;
2022
+ this._dirtCount += 1;
2023
+ this.maybeAutoVacuum();
2024
+ }
2025
+ maybeAutoVacuum() {
2026
+ if (this._options.autoVacuum === false) {
2027
+ return;
2028
+ }
2029
+ const { minDirtFactor, minDirtCount, batchSize, batchWait } = this._options.autoVacuum;
2030
+ this.conditionalVacuum({ batchSize, batchWait }, { minDirtCount, minDirtFactor });
2031
+ }
2032
+ /**
2033
+ * Discards the documents with the given IDs, so they won't appear in search
2034
+ * results
2035
+ *
2036
+ * It is equivalent to calling {@link MiniSearch#discard} for all the given
2037
+ * IDs, but with the optimization of triggering at most one automatic
2038
+ * vacuuming at the end.
2039
+ *
2040
+ * Note: to remove all documents from the index, it is faster and more
2041
+ * convenient to call {@link MiniSearch.removeAll} with no argument, instead
2042
+ * of passing all IDs to this method.
2043
+ */
2044
+ discardAll(ids) {
2045
+ const autoVacuum = this._options.autoVacuum;
2046
+ try {
2047
+ this._options.autoVacuum = false;
2048
+ for (const id of ids) {
2049
+ this.discard(id);
2050
+ }
2051
+ }
2052
+ finally {
2053
+ this._options.autoVacuum = autoVacuum;
2054
+ }
2055
+ this.maybeAutoVacuum();
2056
+ }
2057
+ /**
2058
+ * It replaces an existing document with the given updated version
2059
+ *
2060
+ * It works by discarding the current version and adding the updated one, so
2061
+ * it is functionally equivalent to calling {@link MiniSearch#discard}
2062
+ * followed by {@link MiniSearch#add}. The ID of the updated document should
2063
+ * be the same as the original one.
2064
+ *
2065
+ * Since it uses {@link MiniSearch#discard} internally, this method relies on
2066
+ * vacuuming to clean up obsolete document references from the index, allowing
2067
+ * memory to be released (see {@link MiniSearch#discard}).
2068
+ *
2069
+ * @param updatedDocument The updated document to replace the old version
2070
+ * with
2071
+ */
2072
+ replace(updatedDocument) {
2073
+ const { idField, extractField } = this._options;
2074
+ const id = extractField(updatedDocument, idField);
2075
+ this.discard(id);
2076
+ this.add(updatedDocument);
2077
+ }
2078
+ /**
2079
+ * Triggers a manual vacuuming, cleaning up references to discarded documents
2080
+ * from the inverted index
2081
+ *
2082
+ * Vacuuming is only useful for applications that use the {@link
2083
+ * MiniSearch#discard} or {@link MiniSearch#replace} methods.
2084
+ *
2085
+ * By default, vacuuming is performed automatically when needed (controlled by
2086
+ * the `autoVacuum` field in {@link Options}), so there is usually no need to
2087
+ * call this method, unless one wants to make sure to perform vacuuming at a
2088
+ * specific moment.
2089
+ *
2090
+ * Vacuuming traverses all terms in the inverted index in batches, and cleans
2091
+ * up references to discarded documents from the posting list, allowing memory
2092
+ * to be released.
2093
+ *
2094
+ * The method takes an optional object as argument with the following keys:
2095
+ *
2096
+ * - `batchSize`: the size of each batch (1000 by default)
2097
+ *
2098
+ * - `batchWait`: the number of milliseconds to wait between batches (10 by
2099
+ * default)
2100
+ *
2101
+ * On large indexes, vacuuming could have a non-negligible cost: batching
2102
+ * avoids blocking the thread for long, diluting this cost so that it is not
2103
+ * negatively affecting the application. Nonetheless, this method should only
2104
+ * be called when necessary, and relying on automatic vacuuming is usually
2105
+ * better.
2106
+ *
2107
+ * It returns a promise that resolves (to undefined) when the clean up is
2108
+ * completed. If vacuuming is already ongoing at the time this method is
2109
+ * called, a new one is enqueued immediately after the ongoing one, and a
2110
+ * corresponding promise is returned. However, no more than one vacuuming is
2111
+ * enqueued on top of the ongoing one, even if this method is called more
2112
+ * times (enqueuing multiple ones would be useless).
2113
+ *
2114
+ * @param options Configuration options for the batch size and delay. See
2115
+ * {@link VacuumOptions}.
2116
+ */
2117
+ vacuum(options = {}) {
2118
+ return this.conditionalVacuum(options);
2119
+ }
2120
+ conditionalVacuum(options, conditions) {
2121
+ // If a vacuum is already ongoing, schedule another as soon as it finishes,
2122
+ // unless there's already one enqueued. If one was already enqueued, do not
2123
+ // enqueue another on top, but make sure that the conditions are the
2124
+ // broadest.
2125
+ if (this._currentVacuum) {
2126
+ this._enqueuedVacuumConditions = this._enqueuedVacuumConditions && conditions;
2127
+ if (this._enqueuedVacuum != null) {
2128
+ return this._enqueuedVacuum;
2129
+ }
2130
+ this._enqueuedVacuum = this._currentVacuum.then(() => {
2131
+ const conditions = this._enqueuedVacuumConditions;
2132
+ this._enqueuedVacuumConditions = defaultVacuumConditions;
2133
+ return this.performVacuuming(options, conditions);
2134
+ });
2135
+ return this._enqueuedVacuum;
2136
+ }
2137
+ if (this.vacuumConditionsMet(conditions) === false) {
2138
+ return Promise.resolve();
2139
+ }
2140
+ this._currentVacuum = this.performVacuuming(options);
2141
+ return this._currentVacuum;
2142
+ }
2143
+ async performVacuuming(options, conditions) {
2144
+ const initialDirtCount = this._dirtCount;
2145
+ if (this.vacuumConditionsMet(conditions)) {
2146
+ const batchSize = options.batchSize || defaultVacuumOptions.batchSize;
2147
+ const batchWait = options.batchWait || defaultVacuumOptions.batchWait;
2148
+ let i = 1;
2149
+ for (const [term, fieldsData] of this._index) {
2150
+ for (const [fieldId, fieldIndex] of fieldsData) {
2151
+ for (const [shortId] of fieldIndex) {
2152
+ if (this._documentIds.has(shortId)) {
2153
+ continue;
2154
+ }
2155
+ if (fieldIndex.size <= 1) {
2156
+ fieldsData.delete(fieldId);
2157
+ }
2158
+ else {
2159
+ fieldIndex.delete(shortId);
2160
+ }
2161
+ }
2162
+ }
2163
+ if (this._index.get(term).size === 0) {
2164
+ this._index.delete(term);
2165
+ }
2166
+ if (i % batchSize === 0) {
2167
+ await new Promise((resolve) => setTimeout(resolve, batchWait));
2168
+ }
2169
+ i += 1;
2170
+ }
2171
+ this._dirtCount -= initialDirtCount;
2172
+ }
2173
+ // Make the next lines always async, so they execute after this function returns
2174
+ await null;
2175
+ this._currentVacuum = this._enqueuedVacuum;
2176
+ this._enqueuedVacuum = null;
2177
+ }
2178
+ vacuumConditionsMet(conditions) {
2179
+ if (conditions == null) {
2180
+ return true;
2181
+ }
2182
+ let { minDirtCount, minDirtFactor } = conditions;
2183
+ minDirtCount = minDirtCount || defaultAutoVacuumOptions.minDirtCount;
2184
+ minDirtFactor = minDirtFactor || defaultAutoVacuumOptions.minDirtFactor;
2185
+ return this.dirtCount >= minDirtCount && this.dirtFactor >= minDirtFactor;
2186
+ }
2187
+ /**
2188
+ * Is `true` if a vacuuming operation is ongoing, `false` otherwise
2189
+ */
2190
+ get isVacuuming() {
2191
+ return this._currentVacuum != null;
2192
+ }
2193
+ /**
2194
+ * The number of documents discarded since the most recent vacuuming
2195
+ */
2196
+ get dirtCount() {
2197
+ return this._dirtCount;
2198
+ }
2199
+ /**
2200
+ * A number between 0 and 1 giving an indication about the proportion of
2201
+ * documents that are discarded, and can therefore be cleaned up by vacuuming.
2202
+ * A value close to 0 means that the index is relatively clean, while a higher
2203
+ * value means that the index is relatively dirty, and vacuuming could release
2204
+ * memory.
2205
+ */
2206
+ get dirtFactor() {
2207
+ return this._dirtCount / (1 + this._documentCount + this._dirtCount);
2208
+ }
2209
+ /**
2210
+ * Returns `true` if a document with the given ID is present in the index and
2211
+ * available for search, `false` otherwise
2212
+ *
2213
+ * @param id The document ID
2214
+ */
2215
+ has(id) {
2216
+ return this._idToShortId.has(id);
2217
+ }
2218
+ /**
2219
+ * Returns the stored fields (as configured in the `storeFields` constructor
2220
+ * option) for the given document ID. Returns `undefined` if the document is
2221
+ * not present in the index.
2222
+ *
2223
+ * @param id The document ID
2224
+ */
2225
+ getStoredFields(id) {
2226
+ const shortId = this._idToShortId.get(id);
2227
+ if (shortId == null) {
2228
+ return undefined;
2229
+ }
2230
+ return this._storedFields.get(shortId);
2231
+ }
2232
+ /**
2233
+ * Search for documents matching the given search query.
2234
+ *
2235
+ * The result is a list of scored document IDs matching the query, sorted by
2236
+ * descending score, and each including data about which terms were matched and
2237
+ * in which fields.
2238
+ *
2239
+ * ### Basic usage:
2240
+ *
2241
+ * ```javascript
2242
+ * // Search for "zen art motorcycle" with default options: terms have to match
2243
+ * // exactly, and individual terms are joined with OR
2244
+ * miniSearch.search('zen art motorcycle')
2245
+ * // => [ { id: 2, score: 2.77258, match: { ... } }, { id: 4, score: 1.38629, match: { ... } } ]
2246
+ * ```
2247
+ *
2248
+ * ### Restrict search to specific fields:
2249
+ *
2250
+ * ```javascript
2251
+ * // Search only in the 'title' field
2252
+ * miniSearch.search('zen', { fields: ['title'] })
2253
+ * ```
2254
+ *
2255
+ * ### Field boosting:
2256
+ *
2257
+ * ```javascript
2258
+ * // Boost a field
2259
+ * miniSearch.search('zen', { boost: { title: 2 } })
2260
+ * ```
2261
+ *
2262
+ * ### Prefix search:
2263
+ *
2264
+ * ```javascript
2265
+ * // Search for "moto" with prefix search (it will match documents
2266
+ * // containing terms that start with "moto" or "neuro")
2267
+ * miniSearch.search('moto neuro', { prefix: true })
2268
+ * ```
2269
+ *
2270
+ * ### Fuzzy search:
2271
+ *
2272
+ * ```javascript
2273
+ * // Search for "ismael" with fuzzy search (it will match documents containing
2274
+ * // terms similar to "ismael", with a maximum edit distance of 0.2 term.length
2275
+ * // (rounded to nearest integer)
2276
+ * miniSearch.search('ismael', { fuzzy: 0.2 })
2277
+ * ```
2278
+ *
2279
+ * ### Combining strategies:
2280
+ *
2281
+ * ```javascript
2282
+ * // Mix of exact match, prefix search, and fuzzy search
2283
+ * miniSearch.search('ismael mob', {
2284
+ * prefix: true,
2285
+ * fuzzy: 0.2
2286
+ * })
2287
+ * ```
2288
+ *
2289
+ * ### Advanced prefix and fuzzy search:
2290
+ *
2291
+ * ```javascript
2292
+ * // Perform fuzzy and prefix search depending on the search term. Here
2293
+ * // performing prefix and fuzzy search only on terms longer than 3 characters
2294
+ * miniSearch.search('ismael mob', {
2295
+ * prefix: term => term.length > 3
2296
+ * fuzzy: term => term.length > 3 ? 0.2 : null
2297
+ * })
2298
+ * ```
2299
+ *
2300
+ * ### Combine with AND:
2301
+ *
2302
+ * ```javascript
2303
+ * // Combine search terms with AND (to match only documents that contain both
2304
+ * // "motorcycle" and "art")
2305
+ * miniSearch.search('motorcycle art', { combineWith: 'AND' })
2306
+ * ```
2307
+ *
2308
+ * ### Combine with AND_NOT:
2309
+ *
2310
+ * There is also an AND_NOT combinator, that finds documents that match the
2311
+ * first term, but do not match any of the other terms. This combinator is
2312
+ * rarely useful with simple queries, and is meant to be used with advanced
2313
+ * query combinations (see later for more details).
2314
+ *
2315
+ * ### Filtering results:
2316
+ *
2317
+ * ```javascript
2318
+ * // Filter only results in the 'fiction' category (assuming that 'category'
2319
+ * // is a stored field)
2320
+ * miniSearch.search('motorcycle art', {
2321
+ * filter: (result) => result.category === 'fiction'
2322
+ * })
2323
+ * ```
2324
+ *
2325
+ * ### Wildcard query
2326
+ *
2327
+ * Searching for an empty string (assuming the default tokenizer) returns no
2328
+ * results. Sometimes though, one needs to match all documents, like in a
2329
+ * "wildcard" search. This is possible by passing the special value
2330
+ * {@link MiniSearch.wildcard} as the query:
2331
+ *
2332
+ * ```javascript
2333
+ * // Return search results for all documents
2334
+ * miniSearch.search(MiniSearch.wildcard)
2335
+ * ```
2336
+ *
2337
+ * Note that search options such as `filter` and `boostDocument` are still
2338
+ * applied, influencing which results are returned, and their order:
2339
+ *
2340
+ * ```javascript
2341
+ * // Return search results for all documents in the 'fiction' category
2342
+ * miniSearch.search(MiniSearch.wildcard, {
2343
+ * filter: (result) => result.category === 'fiction'
2344
+ * })
2345
+ * ```
2346
+ *
2347
+ * ### Advanced combination of queries:
2348
+ *
2349
+ * It is possible to combine different subqueries with OR, AND, and AND_NOT,
2350
+ * and even with different search options, by passing a query expression
2351
+ * tree object as the first argument, instead of a string.
2352
+ *
2353
+ * ```javascript
2354
+ * // Search for documents that contain "zen" and ("motorcycle" or "archery")
2355
+ * miniSearch.search({
2356
+ * combineWith: 'AND',
2357
+ * queries: [
2358
+ * 'zen',
2359
+ * {
2360
+ * combineWith: 'OR',
2361
+ * queries: ['motorcycle', 'archery']
2362
+ * }
2363
+ * ]
2364
+ * })
2365
+ *
2366
+ * // Search for documents that contain ("apple" or "pear") but not "juice" and
2367
+ * // not "tree"
2368
+ * miniSearch.search({
2369
+ * combineWith: 'AND_NOT',
2370
+ * queries: [
2371
+ * {
2372
+ * combineWith: 'OR',
2373
+ * queries: ['apple', 'pear']
2374
+ * },
2375
+ * 'juice',
2376
+ * 'tree'
2377
+ * ]
2378
+ * })
2379
+ * ```
2380
+ *
2381
+ * Each node in the expression tree can be either a string, or an object that
2382
+ * supports all {@link SearchOptions} fields, plus a `queries` array field for
2383
+ * subqueries.
2384
+ *
2385
+ * Note that, while this can become complicated to do by hand for complex or
2386
+ * deeply nested queries, it provides a formalized expression tree API for
2387
+ * external libraries that implement a parser for custom query languages.
2388
+ *
2389
+ * @param query Search query
2390
+ * @param searchOptions Search options. Each option, if not given, defaults to the corresponding value of `searchOptions` given to the constructor, or to the library default.
2391
+ */
2392
+ search(query, searchOptions = {}) {
2393
+ const { searchOptions: globalSearchOptions } = this._options;
2394
+ const searchOptionsWithDefaults = { ...globalSearchOptions, ...searchOptions };
2395
+ const rawResults = this.executeQuery(query, searchOptions);
2396
+ const skipSort = query === MiniSearch.wildcard && searchOptionsWithDefaults.boostDocument == null;
2397
+ return finalizeSearchResults({
2398
+ rawResults,
2399
+ getExternalId: (docId) => this._documentIds.get(docId),
2400
+ getStoredFields: (docId) => this._storedFields.get(docId),
2401
+ filter: searchOptionsWithDefaults.filter,
2402
+ skipSort
2403
+ });
2404
+ }
2405
+ /**
2406
+ * Provide suggestions for the given search query
2407
+ *
2408
+ * The result is a list of suggested modified search queries, derived from the
2409
+ * given search query, each with a relevance score, sorted by descending score.
2410
+ *
2411
+ * By default, it uses the same options used for search, except that by
2412
+ * default it performs prefix search on the last term of the query, and
2413
+ * combine terms with `'AND'` (requiring all query terms to match). Custom
2414
+ * options can be passed as a second argument. Defaults can be changed upon
2415
+ * calling the {@link MiniSearch} constructor, by passing a
2416
+ * `autoSuggestOptions` option.
2417
+ *
2418
+ * ### Basic usage:
2419
+ *
2420
+ * ```javascript
2421
+ * // Get suggestions for 'neuro':
2422
+ * miniSearch.autoSuggest('neuro')
2423
+ * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 0.46240 } ]
2424
+ * ```
2425
+ *
2426
+ * ### Multiple words:
2427
+ *
2428
+ * ```javascript
2429
+ * // Get suggestions for 'zen ar':
2430
+ * miniSearch.autoSuggest('zen ar')
2431
+ * // => [
2432
+ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 },
2433
+ * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 }
2434
+ * // ]
2435
+ * ```
2436
+ *
2437
+ * ### Fuzzy suggestions:
2438
+ *
2439
+ * ```javascript
2440
+ * // Correct spelling mistakes using fuzzy search:
2441
+ * miniSearch.autoSuggest('neromancer', { fuzzy: 0.2 })
2442
+ * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 1.03998 } ]
2443
+ * ```
2444
+ *
2445
+ * ### Filtering:
2446
+ *
2447
+ * ```javascript
2448
+ * // Get suggestions for 'zen ar', but only within the 'fiction' category
2449
+ * // (assuming that 'category' is a stored field):
2450
+ * miniSearch.autoSuggest('zen ar', {
2451
+ * filter: (result) => result.category === 'fiction'
2452
+ * })
2453
+ * // => [
2454
+ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 },
2455
+ * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 }
2456
+ * // ]
2457
+ * ```
2458
+ *
2459
+ * @param queryString Query string to be expanded into suggestions
2460
+ * @param options Search options. The supported options and default values
2461
+ * are the same as for the {@link MiniSearch#search} method, except that by
2462
+ * default prefix search is performed on the last term in the query, and terms
2463
+ * are combined with `'AND'`.
2464
+ * @return A sorted array of suggestions sorted by relevance score.
2465
+ */
2466
+ autoSuggest(queryString, options = {}) {
2467
+ options = { ...this._options.autoSuggestOptions, ...options };
2468
+ const suggestions = new Map();
2469
+ for (const { score, terms } of this.search(queryString, options)) {
2470
+ const phrase = terms.join(' ');
2471
+ const suggestion = suggestions.get(phrase);
2472
+ if (suggestion != null) {
2473
+ suggestion.score += score;
2474
+ suggestion.count += 1;
2475
+ }
2476
+ else {
2477
+ suggestions.set(phrase, { score, terms, count: 1 });
2478
+ }
2479
+ }
2480
+ const results = [];
2481
+ for (const [suggestion, { score, terms, count }] of suggestions) {
2482
+ results.push({ suggestion, terms, score: score / count });
2483
+ }
2484
+ results.sort(byScore);
2485
+ return results;
2486
+ }
2487
+ /**
2488
+ * Total number of documents available to search
2489
+ */
2490
+ get documentCount() {
2491
+ return this._documentCount;
2492
+ }
2493
+ /**
2494
+ * Number of terms in the index
2495
+ */
2496
+ get termCount() {
2497
+ return this._index.size;
2498
+ }
2499
+ /**
2500
+ * Deserializes a JSON index (serialized with `JSON.stringify(miniSearch)`)
2501
+ * and instantiates a MiniSearch instance. It should be given the same options
2502
+ * originally used when serializing the index.
2503
+ *
2504
+ * ### Usage:
2505
+ *
2506
+ * ```javascript
2507
+ * // If the index was serialized with:
2508
+ * let miniSearch = new MiniSearch({ fields: ['title', 'text'] })
2509
+ * miniSearch.addAll(documents)
2510
+ *
2511
+ * const json = JSON.stringify(miniSearch)
2512
+ * // It can later be deserialized like this:
2513
+ * miniSearch = MiniSearch.loadJSON(json, { fields: ['title', 'text'] })
2514
+ * ```
2515
+ *
2516
+ * @param json JSON-serialized index
2517
+ * @param options configuration options, same as the constructor
2518
+ * @return An instance of MiniSearch deserialized from the given JSON.
2519
+ */
2520
+ static loadJSON(json, options) {
2521
+ if (options == null) {
2522
+ throw new Error('MiniSearch: loadJSON should be given the same options used when serializing the index');
2523
+ }
2524
+ return this.loadJS(JSON.parse(json), options);
2525
+ }
2526
+ /**
2527
+ * Async equivalent of {@link MiniSearch.loadJSON}
2528
+ *
2529
+ * This function is an alternative to {@link MiniSearch.loadJSON} that returns
2530
+ * a promise, and loads the index in batches, leaving pauses between them to avoid
2531
+ * blocking the main thread. It tends to be slower than the synchronous
2532
+ * version, but does not block the main thread, so it can be a better choice
2533
+ * when deserializing very large indexes.
2534
+ *
2535
+ * @param json JSON-serialized index
2536
+ * @param options configuration options, same as the constructor
2537
+ * @return A Promise that will resolve to an instance of MiniSearch deserialized from the given JSON.
2538
+ */
2539
+ static async loadJSONAsync(json, options) {
2540
+ if (options == null) {
2541
+ throw new Error('MiniSearch: loadJSON should be given the same options used when serializing the index');
2542
+ }
2543
+ return this.loadJSAsync(JSON.parse(json), options);
2544
+ }
2545
+ /**
2546
+ * Returns the default value of an option. It will throw an error if no option
2547
+ * with the given name exists.
2548
+ *
2549
+ * @param optionName Name of the option
2550
+ * @return The default value of the given option
2551
+ *
2552
+ * ### Usage:
2553
+ *
2554
+ * ```javascript
2555
+ * // Get default tokenizer
2556
+ * MiniSearch.getDefault('tokenize')
2557
+ *
2558
+ * // Get default term processor
2559
+ * MiniSearch.getDefault('processTerm')
2560
+ *
2561
+ * // Unknown options will throw an error
2562
+ * MiniSearch.getDefault('notExisting')
2563
+ * // => throws 'MiniSearch: unknown option "notExisting"'
2564
+ * ```
2565
+ */
2566
+ static getDefault(optionName) {
2567
+ if (defaultOptions.hasOwnProperty(optionName)) {
2568
+ return getOwnProperty(defaultOptions, optionName);
2569
+ }
2570
+ else {
2571
+ throw new Error(`MiniSearch: unknown option "${optionName}"`);
2572
+ }
2573
+ }
2574
+ /**
2575
+ * @ignore
2576
+ */
2577
+ static loadJS(js, options) {
2578
+ const { index, documentIds, fieldLength, storedFields, serializationVersion } = js;
2579
+ const miniSearch = this.instantiateMiniSearch(js, options);
2580
+ miniSearch._documentIds = objectToNumericMap(documentIds);
2581
+ miniSearch._fieldLength = objectToNumericMap(fieldLength);
2582
+ miniSearch._storedFields = objectToNumericMap(storedFields);
2583
+ for (const [shortId, id] of miniSearch._documentIds) {
2584
+ miniSearch._idToShortId.set(id, shortId);
2585
+ }
2586
+ for (const [term, data] of index) {
2587
+ const dataMap = new Map();
2588
+ for (const fieldId of Object.keys(data)) {
2589
+ let indexEntry = data[fieldId];
2590
+ // Version 1 used to nest the index entry inside a field called ds
2591
+ if (serializationVersion === 1) {
2592
+ indexEntry = indexEntry.ds;
2593
+ }
2594
+ dataMap.set(parseInt(fieldId, 10), objectToNumericMap(indexEntry));
2595
+ }
2596
+ miniSearch._index.set(term, dataMap);
2597
+ }
2598
+ return miniSearch;
2599
+ }
2600
+ /**
2601
+ * @ignore
2602
+ */
2603
+ static async loadJSAsync(js, options) {
2604
+ const { index, documentIds, fieldLength, storedFields, serializationVersion } = js;
2605
+ const miniSearch = this.instantiateMiniSearch(js, options);
2606
+ miniSearch._documentIds = await objectToNumericMapAsync(documentIds);
2607
+ miniSearch._fieldLength = await objectToNumericMapAsync(fieldLength);
2608
+ miniSearch._storedFields = await objectToNumericMapAsync(storedFields);
2609
+ for (const [shortId, id] of miniSearch._documentIds) {
2610
+ miniSearch._idToShortId.set(id, shortId);
2611
+ }
2612
+ let count = 0;
2613
+ for (const [term, data] of index) {
2614
+ const dataMap = new Map();
2615
+ for (const fieldId of Object.keys(data)) {
2616
+ let indexEntry = data[fieldId];
2617
+ // Version 1 used to nest the index entry inside a field called ds
2618
+ if (serializationVersion === 1) {
2619
+ indexEntry = indexEntry.ds;
2620
+ }
2621
+ dataMap.set(parseInt(fieldId, 10), await objectToNumericMapAsync(indexEntry));
2622
+ }
2623
+ if (++count % 1000 === 0)
2624
+ await wait(0);
2625
+ miniSearch._index.set(term, dataMap);
2626
+ }
2627
+ return miniSearch;
2628
+ }
2629
+ /**
2630
+ * @ignore
2631
+ */
2632
+ static instantiateMiniSearch(js, options) {
2633
+ const { documentCount, nextId, fieldIds, averageFieldLength, dirtCount, serializationVersion } = js;
2634
+ if (serializationVersion !== 1 && serializationVersion !== 2) {
2635
+ throw new Error('MiniSearch: cannot deserialize an index created with an incompatible version');
2636
+ }
2637
+ const miniSearch = new MiniSearch(options);
2638
+ miniSearch._documentCount = documentCount;
2639
+ miniSearch._nextId = nextId;
2640
+ miniSearch._idToShortId = new Map();
2641
+ miniSearch._fieldIds = fieldIds;
2642
+ miniSearch._avgFieldLength = averageFieldLength;
2643
+ miniSearch._dirtCount = dirtCount || 0;
2644
+ miniSearch._index = new SearchableMap();
2645
+ return miniSearch;
2646
+ }
2647
+ /**
2648
+ * @ignore
2649
+ */
2650
+ executeQuery(query, searchOptions = {}) {
2651
+ if (query === MiniSearch.wildcard) {
2652
+ return this.executeWildcardQuery(searchOptions);
2653
+ }
2654
+ if (typeof query !== 'string') {
2655
+ const options = { ...searchOptions, ...query, queries: undefined };
2656
+ const results = query.queries.map((subquery) => this.executeQuery(subquery, options));
2657
+ return this.combineResults(results, options.combineWith);
2658
+ }
2659
+ const { tokenize, processTerm, searchOptions: globalSearchOptions } = this._options;
2660
+ const options = { tokenize, processTerm, ...globalSearchOptions, ...searchOptions };
2661
+ const { tokenize: searchTokenize, processTerm: searchProcessTerm } = options;
2662
+ const terms = searchTokenize(query)
2663
+ .flatMap((term) => searchProcessTerm(term))
2664
+ .filter((term) => !!term);
2665
+ const queries = terms.map(termToQuerySpec(options));
2666
+ const results = queries.map(query => this.executeQuerySpec(query, options));
2667
+ return this.combineResults(results, options.combineWith);
2668
+ }
2669
+ /**
2670
+ * @ignore
2671
+ */
2672
+ executeQuerySpec(query, searchOptions) {
2673
+ const options = { ...this._options.searchOptions, ...searchOptions };
2674
+ const boosts = (options.fields || this._options.fields).reduce((boosts, field) => ({ ...boosts, [field]: getOwnProperty(options.boost, field) || 1 }), {});
2675
+ const { boostDocument, weights, maxFuzzy, bm25: bm25params } = options;
2676
+ const { fuzzy: fuzzyWeight, prefix: prefixWeight } = { ...defaultSearchOptions.weights, ...weights };
2677
+ const data = this._index.get(query.term);
2678
+ const results = this.termResults(query.term, query.term, 1, query.termBoost, data, boosts, boostDocument, bm25params);
2679
+ let prefixMatches;
2680
+ let fuzzyMatches;
2681
+ if (query.prefix) {
2682
+ prefixMatches = this._index.atPrefix(query.term);
2683
+ }
2684
+ if (query.fuzzy) {
2685
+ const fuzzy = (query.fuzzy === true) ? 0.2 : query.fuzzy;
2686
+ const maxDistance = fuzzy < 1 ? Math.min(maxFuzzy, Math.round(query.term.length * fuzzy)) : fuzzy;
2687
+ if (maxDistance)
2688
+ fuzzyMatches = this._index.fuzzyGet(query.term, maxDistance);
2689
+ }
2690
+ if (prefixMatches) {
2691
+ for (const [term, data] of prefixMatches) {
2692
+ const distance = term.length - query.term.length;
2693
+ if (!distance) {
2694
+ continue;
2695
+ } // Skip exact match.
2696
+ // Delete the term from fuzzy results (if present) if it is also a
2697
+ // prefix result. This entry will always be scored as a prefix result.
2698
+ fuzzyMatches === null || fuzzyMatches === void 0 ? void 0 : fuzzyMatches.delete(term);
2699
+ // Weight gradually approaches 0 as distance goes to infinity, with the
2700
+ // weight for the hypothetical distance 0 being equal to prefixWeight.
2701
+ // The rate of change is much lower than that of fuzzy matches to
2702
+ // account for the fact that prefix matches stay more relevant than
2703
+ // fuzzy matches for longer distances.
2704
+ const weight = prefixWeight * term.length / (term.length + 0.3 * distance);
2705
+ this.termResults(query.term, term, weight, query.termBoost, data, boosts, boostDocument, bm25params, results);
2706
+ }
2707
+ }
2708
+ if (fuzzyMatches) {
2709
+ for (const term of fuzzyMatches.keys()) {
2710
+ const [data, distance] = fuzzyMatches.get(term);
2711
+ if (!distance) {
2712
+ continue;
2713
+ } // Skip exact match.
2714
+ // Weight gradually approaches 0 as distance goes to infinity, with the
2715
+ // weight for the hypothetical distance 0 being equal to fuzzyWeight.
2716
+ const weight = fuzzyWeight * term.length / (term.length + distance);
2717
+ this.termResults(query.term, term, weight, query.termBoost, data, boosts, boostDocument, bm25params, results);
2718
+ }
2719
+ }
2720
+ return results;
2721
+ }
2722
+ /**
2723
+ * @ignore
2724
+ */
2725
+ executeWildcardQuery(searchOptions) {
2726
+ const results = new Map();
2727
+ const options = { ...this._options.searchOptions, ...searchOptions };
2728
+ for (const [shortId, id] of this._documentIds) {
2729
+ const score = options.boostDocument ? options.boostDocument(id, '', this._storedFields.get(shortId)) : 1;
2730
+ results.set(shortId, {
2731
+ score,
2732
+ terms: [],
2733
+ match: {}
2734
+ });
2735
+ }
2736
+ return results;
2737
+ }
2738
+ /**
2739
+ * @ignore
2740
+ */
2741
+ combineResults(results, combineWith = OR) {
2742
+ return combineResults(results, combineWith);
2743
+ }
2744
+ /**
2745
+ * Build a read-only {@link FrozenMiniSearch} snapshot optimized for RAM and search CPU.
2746
+ */
2747
+ freeze() {
2748
+ return freezeFromMiniSearch(this);
2749
+ }
2750
+ /**
2751
+ * Allows serialization of the index to JSON, to possibly store it and later
2752
+ * deserialize it with {@link MiniSearch.loadJSON}.
2753
+ *
2754
+ * Normally one does not directly call this method, but rather call the
2755
+ * standard JavaScript `JSON.stringify()` passing the {@link MiniSearch}
2756
+ * instance, and JavaScript will internally call this method. Upon
2757
+ * deserialization, one must pass to {@link MiniSearch.loadJSON} the same
2758
+ * options used to create the original instance that was serialized.
2759
+ *
2760
+ * ### Usage:
2761
+ *
2762
+ * ```javascript
2763
+ * // Serialize the index:
2764
+ * let miniSearch = new MiniSearch({ fields: ['title', 'text'] })
2765
+ * miniSearch.addAll(documents)
2766
+ * const json = JSON.stringify(miniSearch)
2767
+ *
2768
+ * // Later, to deserialize it:
2769
+ * miniSearch = MiniSearch.loadJSON(json, { fields: ['title', 'text'] })
2770
+ * ```
2771
+ *
2772
+ * @return A plain-object serializable representation of the search index.
2773
+ */
2774
+ toJSON() {
2775
+ const index = [];
2776
+ for (const [term, fieldIndex] of this._index) {
2777
+ const data = {};
2778
+ for (const [fieldId, freqs] of fieldIndex) {
2779
+ data[fieldId] = Object.fromEntries(freqs);
2780
+ }
2781
+ index.push([term, data]);
2782
+ }
2783
+ return {
2784
+ documentCount: this._documentCount,
2785
+ nextId: this._nextId,
2786
+ documentIds: Object.fromEntries(this._documentIds),
2787
+ fieldIds: this._fieldIds,
2788
+ fieldLength: Object.fromEntries(this._fieldLength),
2789
+ averageFieldLength: this._avgFieldLength,
2790
+ storedFields: Object.fromEntries(this._storedFields),
2791
+ dirtCount: this._dirtCount,
2792
+ index,
2793
+ serializationVersion: 2
2794
+ };
2795
+ }
2796
+ /**
2797
+ * @ignore
2798
+ */
2799
+ termResults(sourceTerm, derivedTerm, termWeight, termBoost, fieldTermData, fieldBoosts, boostDocumentFn, bm25params, results = new Map()) {
2800
+ return aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTermData == null ? undefined : mapFieldTermData(fieldTermData), fieldBoosts, {
2801
+ documentCount: this._documentCount,
2802
+ avgFieldLength: this._avgFieldLength,
2803
+ fieldIds: this._fieldIds,
2804
+ getFieldLength: (docId, fieldId) => this._fieldLength.get(docId)[fieldId],
2805
+ getExternalId: (docId) => this._documentIds.get(docId),
2806
+ getStoredFields: (docId) => this._storedFields.get(docId),
2807
+ isDocActive: (docId) => this._documentIds.has(docId),
2808
+ onInactiveDoc: (docId, fieldId, term) => this.removeTerm(fieldId, docId, term)
2809
+ }, boostDocumentFn, bm25params, results);
2810
+ }
2811
+ /**
2812
+ * @ignore
2813
+ */
2814
+ addTerm(fieldId, documentId, term) {
2815
+ const indexData = this._index.fetch(term, createMap);
2816
+ let fieldIndex = indexData.get(fieldId);
2817
+ if (fieldIndex == null) {
2818
+ fieldIndex = new Map();
2819
+ fieldIndex.set(documentId, 1);
2820
+ indexData.set(fieldId, fieldIndex);
2821
+ }
2822
+ else {
2823
+ const docs = fieldIndex.get(documentId);
2824
+ fieldIndex.set(documentId, (docs || 0) + 1);
2825
+ }
2826
+ }
2827
+ /**
2828
+ * @ignore
2829
+ */
2830
+ removeTerm(fieldId, documentId, term) {
2831
+ if (!this._index.has(term)) {
2832
+ this.warnDocumentChanged(documentId, fieldId, term);
2833
+ return;
2834
+ }
2835
+ const indexData = this._index.fetch(term, createMap);
2836
+ const fieldIndex = indexData.get(fieldId);
2837
+ if (fieldIndex == null || fieldIndex.get(documentId) == null) {
2838
+ this.warnDocumentChanged(documentId, fieldId, term);
2839
+ }
2840
+ else if (fieldIndex.get(documentId) <= 1) {
2841
+ if (fieldIndex.size <= 1) {
2842
+ indexData.delete(fieldId);
2843
+ }
2844
+ else {
2845
+ fieldIndex.delete(documentId);
2846
+ }
2847
+ }
2848
+ else {
2849
+ fieldIndex.set(documentId, fieldIndex.get(documentId) - 1);
2850
+ }
2851
+ if (this._index.get(term).size === 0) {
2852
+ this._index.delete(term);
2853
+ }
2854
+ }
2855
+ /**
2856
+ * @ignore
2857
+ */
2858
+ warnDocumentChanged(shortDocumentId, fieldId, term) {
2859
+ for (const fieldName of Object.keys(this._fieldIds)) {
2860
+ if (this._fieldIds[fieldName] === fieldId) {
2861
+ this._options.logger('warn', `MiniSearch: document with ID ${this._documentIds.get(shortDocumentId)} has changed before removal: term "${term}" was not present in field "${fieldName}". Removing a document after it has changed can corrupt the index!`, 'version_conflict');
2862
+ return;
2863
+ }
2864
+ }
2865
+ }
2866
+ /**
2867
+ * @ignore
2868
+ */
2869
+ addDocumentId(documentId) {
2870
+ const shortDocumentId = this._nextId;
2871
+ this._idToShortId.set(documentId, shortDocumentId);
2872
+ this._documentIds.set(shortDocumentId, documentId);
2873
+ this._documentCount += 1;
2874
+ this._nextId += 1;
2875
+ return shortDocumentId;
2876
+ }
2877
+ /**
2878
+ * @ignore
2879
+ */
2880
+ addFields(fields) {
2881
+ for (let i = 0; i < fields.length; i++) {
2882
+ this._fieldIds[fields[i]] = i;
2883
+ }
2884
+ }
2885
+ /**
2886
+ * @ignore
2887
+ */
2888
+ addFieldLength(documentId, fieldId, count, length) {
2889
+ let fieldLengths = this._fieldLength.get(documentId);
2890
+ if (fieldLengths == null)
2891
+ this._fieldLength.set(documentId, fieldLengths = []);
2892
+ fieldLengths[fieldId] = length;
2893
+ const averageFieldLength = this._avgFieldLength[fieldId] || 0;
2894
+ const totalFieldLength = (averageFieldLength * count) + length;
2895
+ this._avgFieldLength[fieldId] = totalFieldLength / (count + 1);
2896
+ }
2897
+ /**
2898
+ * @ignore
2899
+ */
2900
+ removeFieldLength(documentId, fieldId, count, length) {
2901
+ if (count === 1) {
2902
+ this._avgFieldLength[fieldId] = 0;
2903
+ return;
2904
+ }
2905
+ const totalFieldLength = (this._avgFieldLength[fieldId] * count) - length;
2906
+ this._avgFieldLength[fieldId] = totalFieldLength / (count - 1);
2907
+ }
2908
+ }
2909
+ /**
2910
+ * The special wildcard symbol that can be passed to {@link MiniSearch#search}
2911
+ * to match all documents
2912
+ */
2913
+ MiniSearch.wildcard = WILDCARD_QUERY;
2914
+ const defaultOptions = {
2915
+ idField: 'id',
2916
+ extractField: (document, fieldName) => document[fieldName],
2917
+ stringifyField: (fieldValue, fieldName) => fieldValue.toString(),
2918
+ tokenize: (text) => text.split(SPACE_OR_PUNCTUATION),
2919
+ processTerm: (term) => term.toLowerCase(),
2920
+ fields: undefined,
2921
+ searchOptions: undefined,
2922
+ storeFields: [],
2923
+ logger: (level, message) => {
2924
+ if (typeof (console === null || console === void 0 ? void 0 : console[level]) === 'function')
2925
+ console[level](message);
2926
+ },
2927
+ autoVacuum: true
2928
+ };
2929
+ const defaultVacuumOptions = { batchSize: 1000, batchWait: 10 };
2930
+ const defaultVacuumConditions = { minDirtFactor: 0.1, minDirtCount: 20 };
2931
+ const defaultAutoVacuumOptions = { ...defaultVacuumOptions, ...defaultVacuumConditions };
2932
+ const createMap = () => new Map();
2933
+ const objectToNumericMap = (object) => {
2934
+ const map = new Map();
2935
+ for (const key of Object.keys(object)) {
2936
+ map.set(parseInt(key, 10), object[key]);
2937
+ }
2938
+ return map;
2939
+ };
2940
+ const objectToNumericMapAsync = async (object) => {
2941
+ const map = new Map();
2942
+ let count = 0;
2943
+ for (const key of Object.keys(object)) {
2944
+ map.set(parseInt(key, 10), object[key]);
2945
+ if (++count % 1000 === 0) {
2946
+ await wait(0);
2947
+ }
2948
+ }
2949
+ return map;
2950
+ };
2951
+ const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
2952
+
2953
+ exports.AND = AND;
2954
+ exports.AND_NOT = AND_NOT;
2955
+ exports.FrozenMiniSearch = FrozenMiniSearch;
2956
+ exports.OR = OR;
2957
+ exports.assembleFrozen = assembleFrozen;
2958
+ exports.buildFrozenFromDocuments = buildFrozenFromDocuments;
2959
+ exports.default = MiniSearch;
2960
+ exports.freezeFromMiniSearch = freezeFromMiniSearch;
2961
+ exports.frozenMemoryBreakdown = frozenMemoryBreakdown;