@mgks/docmd 0.3.6 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2013 @@
1
+ (function (global, factory) {
2
+ typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
3
+ typeof define === 'function' && define.amd ? define(factory) :
4
+ (global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.MiniSearch = factory());
5
+ })(this, (function () { 'use strict';
6
+
7
+ /** @ignore */
8
+ const ENTRIES = 'ENTRIES';
9
+ /** @ignore */
10
+ const KEYS = 'KEYS';
11
+ /** @ignore */
12
+ const VALUES = 'VALUES';
13
+ /** @ignore */
14
+ const LEAF = '';
15
+ /**
16
+ * @private
17
+ */
18
+ class TreeIterator {
19
+ constructor(set, type) {
20
+ const node = set._tree;
21
+ const keys = Array.from(node.keys());
22
+ this.set = set;
23
+ this._type = type;
24
+ this._path = keys.length > 0 ? [{ node, keys }] : [];
25
+ }
26
+ next() {
27
+ const value = this.dive();
28
+ this.backtrack();
29
+ return value;
30
+ }
31
+ dive() {
32
+ if (this._path.length === 0) {
33
+ return { done: true, value: undefined };
34
+ }
35
+ const { node, keys } = last$1(this._path);
36
+ if (last$1(keys) === LEAF) {
37
+ return { done: false, value: this.result() };
38
+ }
39
+ const child = node.get(last$1(keys));
40
+ this._path.push({ node: child, keys: Array.from(child.keys()) });
41
+ return this.dive();
42
+ }
43
+ backtrack() {
44
+ if (this._path.length === 0) {
45
+ return;
46
+ }
47
+ const keys = last$1(this._path).keys;
48
+ keys.pop();
49
+ if (keys.length > 0) {
50
+ return;
51
+ }
52
+ this._path.pop();
53
+ this.backtrack();
54
+ }
55
+ key() {
56
+ return this.set._prefix + this._path
57
+ .map(({ keys }) => last$1(keys))
58
+ .filter(key => key !== LEAF)
59
+ .join('');
60
+ }
61
+ value() {
62
+ return last$1(this._path).node.get(LEAF);
63
+ }
64
+ result() {
65
+ switch (this._type) {
66
+ case VALUES: return this.value();
67
+ case KEYS: return this.key();
68
+ default: return [this.key(), this.value()];
69
+ }
70
+ }
71
+ [Symbol.iterator]() {
72
+ return this;
73
+ }
74
+ }
75
+ const last$1 = (array) => {
76
+ return array[array.length - 1];
77
+ };
78
+
79
+ /* eslint-disable no-labels */
80
+ /**
81
+ * @ignore
82
+ */
83
+ const fuzzySearch = (node, query, maxDistance) => {
84
+ const results = new Map();
85
+ if (query === undefined)
86
+ return results;
87
+ // Number of columns in the Levenshtein matrix.
88
+ const n = query.length + 1;
89
+ // Matching terms can never be longer than N + maxDistance.
90
+ const m = n + maxDistance;
91
+ // Fill first matrix row and column with numbers: 0 1 2 3 ...
92
+ const matrix = new Uint8Array(m * n).fill(maxDistance + 1);
93
+ for (let j = 0; j < n; ++j)
94
+ matrix[j] = j;
95
+ for (let i = 1; i < m; ++i)
96
+ matrix[i * n] = i;
97
+ recurse(node, query, maxDistance, results, matrix, 1, n, '');
98
+ return results;
99
+ };
100
+ // Modified version of http://stevehanov.ca/blog/?id=114
101
+ // This builds a Levenshtein matrix for a given query and continuously updates
102
+ // it for nodes in the radix tree that fall within the given maximum edit
103
+ // distance. Keeping the same matrix around is beneficial especially for larger
104
+ // edit distances.
105
+ //
106
+ // k a t e <-- query
107
+ // 0 1 2 3 4
108
+ // c 1 1 2 3 4
109
+ // a 2 2 1 2 3
110
+ // t 3 3 2 1 [2] <-- edit distance
111
+ // ^
112
+ // ^ term in radix tree, rows are added and removed as needed
113
+ const recurse = (node, query, maxDistance, results, matrix, m, n, prefix) => {
114
+ const offset = m * n;
115
+ key: for (const key of node.keys()) {
116
+ if (key === LEAF) {
117
+ // We've reached a leaf node. Check if the edit distance acceptable and
118
+ // store the result if it is.
119
+ const distance = matrix[offset - 1];
120
+ if (distance <= maxDistance) {
121
+ results.set(prefix, [node.get(key), distance]);
122
+ }
123
+ }
124
+ else {
125
+ // Iterate over all characters in the key. Update the Levenshtein matrix
126
+ // and check if the minimum distance in the last row is still within the
127
+ // maximum edit distance. If it is, we can recurse over all child nodes.
128
+ let i = m;
129
+ for (let pos = 0; pos < key.length; ++pos, ++i) {
130
+ const char = key[pos];
131
+ const thisRowOffset = n * i;
132
+ const prevRowOffset = thisRowOffset - n;
133
+ // Set the first column based on the previous row, and initialize the
134
+ // minimum distance in the current row.
135
+ let minDistance = matrix[thisRowOffset];
136
+ const jmin = Math.max(0, i - maxDistance - 1);
137
+ const jmax = Math.min(n - 1, i + maxDistance);
138
+ // Iterate over remaining columns (characters in the query).
139
+ for (let j = jmin; j < jmax; ++j) {
140
+ const different = char !== query[j];
141
+ // It might make sense to only read the matrix positions used for
142
+ // deletion/insertion if the characters are different. But we want to
143
+ // avoid conditional reads for performance reasons.
144
+ const rpl = matrix[prevRowOffset + j] + +different;
145
+ const del = matrix[prevRowOffset + j + 1] + 1;
146
+ const ins = matrix[thisRowOffset + j] + 1;
147
+ const dist = matrix[thisRowOffset + j + 1] = Math.min(rpl, del, ins);
148
+ if (dist < minDistance)
149
+ minDistance = dist;
150
+ }
151
+ // Because distance will never decrease, we can stop. There will be no
152
+ // matching child nodes.
153
+ if (minDistance > maxDistance) {
154
+ continue key;
155
+ }
156
+ }
157
+ recurse(node.get(key), query, maxDistance, results, matrix, i, n, prefix + key);
158
+ }
159
+ }
160
+ };
161
+
162
+ /* eslint-disable no-labels */
163
+ /**
164
+ * A class implementing the same interface as a standard JavaScript
165
+ * [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map)
166
+ * with string keys, but adding support for efficiently searching entries with
167
+ * prefix or fuzzy search. This class is used internally by {@link MiniSearch}
168
+ * as the inverted index data structure. The implementation is a radix tree
169
+ * (compressed prefix tree).
170
+ *
171
+ * Since this class can be of general utility beyond _MiniSearch_, it is
172
+ * exported by the `minisearch` package and can be imported (or required) as
173
+ * `minisearch/SearchableMap`.
174
+ *
175
+ * @typeParam T The type of the values stored in the map.
176
+ */
177
+ class SearchableMap {
178
+ /**
179
+ * The constructor is normally called without arguments, creating an empty
180
+ * map. In order to create a {@link SearchableMap} from an iterable or from an
181
+ * object, check {@link SearchableMap.from} and {@link
182
+ * SearchableMap.fromObject}.
183
+ *
184
+ * The constructor arguments are for internal use, when creating derived
185
+ * mutable views of a map at a prefix.
186
+ */
187
+ constructor(tree = new Map(), prefix = '') {
188
+ this._size = undefined;
189
+ this._tree = tree;
190
+ this._prefix = prefix;
191
+ }
192
+ /**
193
+ * Creates and returns a mutable view of this {@link SearchableMap},
194
+ * containing only entries that share the given prefix.
195
+ *
196
+ * ### Usage:
197
+ *
198
+ * ```javascript
199
+ * let map = new SearchableMap()
200
+ * map.set("unicorn", 1)
201
+ * map.set("universe", 2)
202
+ * map.set("university", 3)
203
+ * map.set("unique", 4)
204
+ * map.set("hello", 5)
205
+ *
206
+ * let uni = map.atPrefix("uni")
207
+ * uni.get("unique") // => 4
208
+ * uni.get("unicorn") // => 1
209
+ * uni.get("hello") // => undefined
210
+ *
211
+ * let univer = map.atPrefix("univer")
212
+ * univer.get("unique") // => undefined
213
+ * univer.get("universe") // => 2
214
+ * univer.get("university") // => 3
215
+ * ```
216
+ *
217
+ * @param prefix The prefix
218
+ * @return A {@link SearchableMap} representing a mutable view of the original
219
+ * Map at the given prefix
220
+ */
221
+ atPrefix(prefix) {
222
+ if (!prefix.startsWith(this._prefix)) {
223
+ throw new Error('Mismatched prefix');
224
+ }
225
+ const [node, path] = trackDown(this._tree, prefix.slice(this._prefix.length));
226
+ if (node === undefined) {
227
+ const [parentNode, key] = last(path);
228
+ for (const k of parentNode.keys()) {
229
+ if (k !== LEAF && k.startsWith(key)) {
230
+ const node = new Map();
231
+ node.set(k.slice(key.length), parentNode.get(k));
232
+ return new SearchableMap(node, prefix);
233
+ }
234
+ }
235
+ }
236
+ return new SearchableMap(node, prefix);
237
+ }
238
+ /**
239
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/clear
240
+ */
241
+ clear() {
242
+ this._size = undefined;
243
+ this._tree.clear();
244
+ }
245
+ /**
246
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/delete
247
+ * @param key Key to delete
248
+ */
249
+ delete(key) {
250
+ this._size = undefined;
251
+ return remove(this._tree, key);
252
+ }
253
+ /**
254
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries
255
+ * @return An iterator iterating through `[key, value]` entries.
256
+ */
257
+ entries() {
258
+ return new TreeIterator(this, ENTRIES);
259
+ }
260
+ /**
261
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/forEach
262
+ * @param fn Iteration function
263
+ */
264
+ forEach(fn) {
265
+ for (const [key, value] of this) {
266
+ fn(key, value, this);
267
+ }
268
+ }
269
+ /**
270
+ * Returns a Map of all the entries that have a key within the given edit
271
+ * distance from the search key. The keys of the returned Map are the matching
272
+ * keys, while the values are two-element arrays where the first element is
273
+ * the value associated to the key, and the second is the edit distance of the
274
+ * key to the search key.
275
+ *
276
+ * ### Usage:
277
+ *
278
+ * ```javascript
279
+ * let map = new SearchableMap()
280
+ * map.set('hello', 'world')
281
+ * map.set('hell', 'yeah')
282
+ * map.set('ciao', 'mondo')
283
+ *
284
+ * // Get all entries that match the key 'hallo' with a maximum edit distance of 2
285
+ * map.fuzzyGet('hallo', 2)
286
+ * // => Map(2) { 'hello' => ['world', 1], 'hell' => ['yeah', 2] }
287
+ *
288
+ * // In the example, the "hello" key has value "world" and edit distance of 1
289
+ * // (change "e" to "a"), the key "hell" has value "yeah" and edit distance of 2
290
+ * // (change "e" to "a", delete "o")
291
+ * ```
292
+ *
293
+ * @param key The search key
294
+ * @param maxEditDistance The maximum edit distance (Levenshtein)
295
+ * @return A Map of the matching keys to their value and edit distance
296
+ */
297
+ fuzzyGet(key, maxEditDistance) {
298
+ return fuzzySearch(this._tree, key, maxEditDistance);
299
+ }
300
+ /**
301
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/get
302
+ * @param key Key to get
303
+ * @return Value associated to the key, or `undefined` if the key is not
304
+ * found.
305
+ */
306
+ get(key) {
307
+ const node = lookup(this._tree, key);
308
+ return node !== undefined ? node.get(LEAF) : undefined;
309
+ }
310
+ /**
311
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/has
312
+ * @param key Key
313
+ * @return True if the key is in the map, false otherwise
314
+ */
315
+ has(key) {
316
+ const node = lookup(this._tree, key);
317
+ return node !== undefined && node.has(LEAF);
318
+ }
319
+ /**
320
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/keys
321
+ * @return An `Iterable` iterating through keys
322
+ */
323
+ keys() {
324
+ return new TreeIterator(this, KEYS);
325
+ }
326
+ /**
327
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/set
328
+ * @param key Key to set
329
+ * @param value Value to associate to the key
330
+ * @return The {@link SearchableMap} itself, to allow chaining
331
+ */
332
+ set(key, value) {
333
+ if (typeof key !== 'string') {
334
+ throw new Error('key must be a string');
335
+ }
336
+ this._size = undefined;
337
+ const node = createPath(this._tree, key);
338
+ node.set(LEAF, value);
339
+ return this;
340
+ }
341
+ /**
342
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/size
343
+ */
344
+ get size() {
345
+ if (this._size) {
346
+ return this._size;
347
+ }
348
+ /** @ignore */
349
+ this._size = 0;
350
+ const iter = this.entries();
351
+ while (!iter.next().done)
352
+ this._size += 1;
353
+ return this._size;
354
+ }
355
+ /**
356
+ * Updates the value at the given key using the provided function. The function
357
+ * is called with the current value at the key, and its return value is used as
358
+ * the new value to be set.
359
+ *
360
+ * ### Example:
361
+ *
362
+ * ```javascript
363
+ * // Increment the current value by one
364
+ * searchableMap.update('somekey', (currentValue) => currentValue == null ? 0 : currentValue + 1)
365
+ * ```
366
+ *
367
+ * If the value at the given key is or will be an object, it might not require
368
+ * re-assignment. In that case it is better to use `fetch()`, because it is
369
+ * faster.
370
+ *
371
+ * @param key The key to update
372
+ * @param fn The function used to compute the new value from the current one
373
+ * @return The {@link SearchableMap} itself, to allow chaining
374
+ */
375
+ update(key, fn) {
376
+ if (typeof key !== 'string') {
377
+ throw new Error('key must be a string');
378
+ }
379
+ this._size = undefined;
380
+ const node = createPath(this._tree, key);
381
+ node.set(LEAF, fn(node.get(LEAF)));
382
+ return this;
383
+ }
384
+ /**
385
+ * Fetches the value of the given key. If the value does not exist, calls the
386
+ * given function to create a new value, which is inserted at the given key
387
+ * and subsequently returned.
388
+ *
389
+ * ### Example:
390
+ *
391
+ * ```javascript
392
+ * const map = searchableMap.fetch('somekey', () => new Map())
393
+ * map.set('foo', 'bar')
394
+ * ```
395
+ *
396
+ * @param key The key to update
397
+ * @param initial A function that creates a new value if the key does not exist
398
+ * @return The existing or new value at the given key
399
+ */
400
+ fetch(key, initial) {
401
+ if (typeof key !== 'string') {
402
+ throw new Error('key must be a string');
403
+ }
404
+ this._size = undefined;
405
+ const node = createPath(this._tree, key);
406
+ let value = node.get(LEAF);
407
+ if (value === undefined) {
408
+ node.set(LEAF, value = initial());
409
+ }
410
+ return value;
411
+ }
412
+ /**
413
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/values
414
+ * @return An `Iterable` iterating through values.
415
+ */
416
+ values() {
417
+ return new TreeIterator(this, VALUES);
418
+ }
419
+ /**
420
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/@@iterator
421
+ */
422
+ [Symbol.iterator]() {
423
+ return this.entries();
424
+ }
425
+ /**
426
+ * Creates a {@link SearchableMap} from an `Iterable` of entries
427
+ *
428
+ * @param entries Entries to be inserted in the {@link SearchableMap}
429
+ * @return A new {@link SearchableMap} with the given entries
430
+ */
431
+ static from(entries) {
432
+ const tree = new SearchableMap();
433
+ for (const [key, value] of entries) {
434
+ tree.set(key, value);
435
+ }
436
+ return tree;
437
+ }
438
+ /**
439
+ * Creates a {@link SearchableMap} from the iterable properties of a JavaScript object
440
+ *
441
+ * @param object Object of entries for the {@link SearchableMap}
442
+ * @return A new {@link SearchableMap} with the given entries
443
+ */
444
+ static fromObject(object) {
445
+ return SearchableMap.from(Object.entries(object));
446
+ }
447
+ }
448
+ const trackDown = (tree, key, path = []) => {
449
+ if (key.length === 0 || tree == null) {
450
+ return [tree, path];
451
+ }
452
+ for (const k of tree.keys()) {
453
+ if (k !== LEAF && key.startsWith(k)) {
454
+ path.push([tree, k]); // performance: update in place
455
+ return trackDown(tree.get(k), key.slice(k.length), path);
456
+ }
457
+ }
458
+ path.push([tree, key]); // performance: update in place
459
+ return trackDown(undefined, '', path);
460
+ };
461
+ const lookup = (tree, key) => {
462
+ if (key.length === 0 || tree == null) {
463
+ return tree;
464
+ }
465
+ for (const k of tree.keys()) {
466
+ if (k !== LEAF && key.startsWith(k)) {
467
+ return lookup(tree.get(k), key.slice(k.length));
468
+ }
469
+ }
470
+ };
471
+ // Create a path in the radix tree for the given key, and returns the deepest
472
+ // node. This function is in the hot path for indexing. It avoids unnecessary
473
+ // string operations and recursion for performance.
474
+ const createPath = (node, key) => {
475
+ const keyLength = key.length;
476
+ outer: for (let pos = 0; node && pos < keyLength;) {
477
+ for (const k of node.keys()) {
478
+ // Check whether this key is a candidate: the first characters must match.
479
+ if (k !== LEAF && key[pos] === k[0]) {
480
+ const len = Math.min(keyLength - pos, k.length);
481
+ // Advance offset to the point where key and k no longer match.
482
+ let offset = 1;
483
+ while (offset < len && key[pos + offset] === k[offset])
484
+ ++offset;
485
+ const child = node.get(k);
486
+ if (offset === k.length) {
487
+ // The existing key is shorter than the key we need to create.
488
+ node = child;
489
+ }
490
+ else {
491
+ // Partial match: we need to insert an intermediate node to contain
492
+ // both the existing subtree and the new node.
493
+ const intermediate = new Map();
494
+ intermediate.set(k.slice(offset), child);
495
+ node.set(key.slice(pos, pos + offset), intermediate);
496
+ node.delete(k);
497
+ node = intermediate;
498
+ }
499
+ pos += offset;
500
+ continue outer;
501
+ }
502
+ }
503
+ // Create a final child node to contain the final suffix of the key.
504
+ const child = new Map();
505
+ node.set(key.slice(pos), child);
506
+ return child;
507
+ }
508
+ return node;
509
+ };
510
+ const remove = (tree, key) => {
511
+ const [node, path] = trackDown(tree, key);
512
+ if (node === undefined) {
513
+ return;
514
+ }
515
+ node.delete(LEAF);
516
+ if (node.size === 0) {
517
+ cleanup(path);
518
+ }
519
+ else if (node.size === 1) {
520
+ const [key, value] = node.entries().next().value;
521
+ merge(path, key, value);
522
+ }
523
+ };
524
+ const cleanup = (path) => {
525
+ if (path.length === 0) {
526
+ return;
527
+ }
528
+ const [node, key] = last(path);
529
+ node.delete(key);
530
+ if (node.size === 0) {
531
+ cleanup(path.slice(0, -1));
532
+ }
533
+ else if (node.size === 1) {
534
+ const [key, value] = node.entries().next().value;
535
+ if (key !== LEAF) {
536
+ merge(path.slice(0, -1), key, value);
537
+ }
538
+ }
539
+ };
540
+ const merge = (path, key, value) => {
541
+ if (path.length === 0) {
542
+ return;
543
+ }
544
+ const [node, nodeKey] = last(path);
545
+ node.set(nodeKey + key, value);
546
+ node.delete(nodeKey);
547
+ };
548
+ const last = (array) => {
549
+ return array[array.length - 1];
550
+ };
551
+
552
+ const OR = 'or';
553
+ const AND = 'and';
554
+ const AND_NOT = 'and_not';
555
+ /**
556
+ * {@link MiniSearch} is the main entrypoint class, implementing a full-text
557
+ * search engine in memory.
558
+ *
559
+ * @typeParam T The type of the documents being indexed.
560
+ *
561
+ * ### Basic example:
562
+ *
563
+ * ```javascript
564
+ * const documents = [
565
+ * {
566
+ * id: 1,
567
+ * title: 'Moby Dick',
568
+ * text: 'Call me Ishmael. Some years ago...',
569
+ * category: 'fiction'
570
+ * },
571
+ * {
572
+ * id: 2,
573
+ * title: 'Zen and the Art of Motorcycle Maintenance',
574
+ * text: 'I can see by my watch...',
575
+ * category: 'fiction'
576
+ * },
577
+ * {
578
+ * id: 3,
579
+ * title: 'Neuromancer',
580
+ * text: 'The sky above the port was...',
581
+ * category: 'fiction'
582
+ * },
583
+ * {
584
+ * id: 4,
585
+ * title: 'Zen and the Art of Archery',
586
+ * text: 'At first sight it must seem...',
587
+ * category: 'non-fiction'
588
+ * },
589
+ * // ...and more
590
+ * ]
591
+ *
592
+ * // Create a search engine that indexes the 'title' and 'text' fields for
593
+ * // full-text search. Search results will include 'title' and 'category' (plus the
594
+ * // id field, that is always stored and returned)
595
+ * const miniSearch = new MiniSearch({
596
+ * fields: ['title', 'text'],
597
+ * storeFields: ['title', 'category']
598
+ * })
599
+ *
600
+ * // Add documents to the index
601
+ * miniSearch.addAll(documents)
602
+ *
603
+ * // Search for documents:
604
+ * let results = miniSearch.search('zen art motorcycle')
605
+ * // => [
606
+ * // { id: 2, title: 'Zen and the Art of Motorcycle Maintenance', category: 'fiction', score: 2.77258 },
607
+ * // { id: 4, title: 'Zen and the Art of Archery', category: 'non-fiction', score: 1.38629 }
608
+ * // ]
609
+ * ```
610
+ */
611
+ class MiniSearch {
612
+ /**
613
+ * @param options Configuration options
614
+ *
615
+ * ### Examples:
616
+ *
617
+ * ```javascript
618
+ * // Create a search engine that indexes the 'title' and 'text' fields of your
619
+ * // documents:
620
+ * const miniSearch = new MiniSearch({ fields: ['title', 'text'] })
621
+ * ```
622
+ *
623
+ * ### ID Field:
624
+ *
625
+ * ```javascript
626
+ * // Your documents are assumed to include a unique 'id' field, but if you want
627
+ * // to use a different field for document identification, you can set the
628
+ * // 'idField' option:
629
+ * const miniSearch = new MiniSearch({ idField: 'key', fields: ['title', 'text'] })
630
+ * ```
631
+ *
632
+ * ### Options and defaults:
633
+ *
634
+ * ```javascript
635
+ * // The full set of options (here with their default value) is:
636
+ * const miniSearch = new MiniSearch({
637
+ * // idField: field that uniquely identifies a document
638
+ * idField: 'id',
639
+ *
640
+ * // extractField: function used to get the value of a field in a document.
641
+ * // By default, it assumes the document is a flat object with field names as
642
+ * // property keys and field values as string property values, but custom logic
643
+ * // can be implemented by setting this option to a custom extractor function.
644
+ * extractField: (document, fieldName) => document[fieldName],
645
+ *
646
+ * // tokenize: function used to split fields into individual terms. By
647
+ * // default, it is also used to tokenize search queries, unless a specific
648
+ * // `tokenize` search option is supplied. When tokenizing an indexed field,
649
+ * // the field name is passed as the second argument.
650
+ * tokenize: (string, _fieldName) => string.split(SPACE_OR_PUNCTUATION),
651
+ *
652
+ * // processTerm: function used to process each tokenized term before
653
+ * // indexing. It can be used for stemming and normalization. Return a falsy
654
+ * // value in order to discard a term. By default, it is also used to process
655
+ * // search queries, unless a specific `processTerm` option is supplied as a
656
+ * // search option. When processing a term from a indexed field, the field
657
+ * // name is passed as the second argument.
658
+ * processTerm: (term, _fieldName) => term.toLowerCase(),
659
+ *
660
+ * // searchOptions: default search options, see the `search` method for
661
+ * // details
662
+ * searchOptions: undefined,
663
+ *
664
+ * // fields: document fields to be indexed. Mandatory, but not set by default
665
+ * fields: undefined
666
+ *
667
+ * // storeFields: document fields to be stored and returned as part of the
668
+ * // search results.
669
+ * storeFields: []
670
+ * })
671
+ * ```
672
+ */
673
+ constructor(options) {
674
+ if ((options === null || options === void 0 ? void 0 : options.fields) == null) {
675
+ throw new Error('MiniSearch: option "fields" must be provided');
676
+ }
677
+ const autoVacuum = (options.autoVacuum == null || options.autoVacuum === true) ? defaultAutoVacuumOptions : options.autoVacuum;
678
+ this._options = {
679
+ ...defaultOptions,
680
+ ...options,
681
+ autoVacuum,
682
+ searchOptions: { ...defaultSearchOptions, ...(options.searchOptions || {}) },
683
+ autoSuggestOptions: { ...defaultAutoSuggestOptions, ...(options.autoSuggestOptions || {}) }
684
+ };
685
+ this._index = new SearchableMap();
686
+ this._documentCount = 0;
687
+ this._documentIds = new Map();
688
+ this._idToShortId = new Map();
689
+ // Fields are defined during initialization, don't change, are few in
690
+ // number, rarely need iterating over, and have string keys. Therefore in
691
+ // this case an object is a better candidate than a Map to store the mapping
692
+ // from field key to ID.
693
+ this._fieldIds = {};
694
+ this._fieldLength = new Map();
695
+ this._avgFieldLength = [];
696
+ this._nextId = 0;
697
+ this._storedFields = new Map();
698
+ this._dirtCount = 0;
699
+ this._currentVacuum = null;
700
+ this._enqueuedVacuum = null;
701
+ this._enqueuedVacuumConditions = defaultVacuumConditions;
702
+ this.addFields(this._options.fields);
703
+ }
704
+ /**
705
+ * Adds a document to the index
706
+ *
707
+ * @param document The document to be indexed
708
+ */
709
+ add(document) {
710
+ const { extractField, stringifyField, tokenize, processTerm, fields, idField } = this._options;
711
+ const id = extractField(document, idField);
712
+ if (id == null) {
713
+ throw new Error(`MiniSearch: document does not have ID field "${idField}"`);
714
+ }
715
+ if (this._idToShortId.has(id)) {
716
+ throw new Error(`MiniSearch: duplicate ID ${id}`);
717
+ }
718
+ const shortDocumentId = this.addDocumentId(id);
719
+ this.saveStoredFields(shortDocumentId, document);
720
+ for (const field of fields) {
721
+ const fieldValue = extractField(document, field);
722
+ if (fieldValue == null)
723
+ continue;
724
+ const tokens = tokenize(stringifyField(fieldValue, field), field);
725
+ const fieldId = this._fieldIds[field];
726
+ const uniqueTerms = new Set(tokens).size;
727
+ this.addFieldLength(shortDocumentId, fieldId, this._documentCount - 1, uniqueTerms);
728
+ for (const term of tokens) {
729
+ const processedTerm = processTerm(term, field);
730
+ if (Array.isArray(processedTerm)) {
731
+ for (const t of processedTerm) {
732
+ this.addTerm(fieldId, shortDocumentId, t);
733
+ }
734
+ }
735
+ else if (processedTerm) {
736
+ this.addTerm(fieldId, shortDocumentId, processedTerm);
737
+ }
738
+ }
739
+ }
740
+ }
741
+ /**
742
+ * Adds all the given documents to the index
743
+ *
744
+ * @param documents An array of documents to be indexed
745
+ */
746
+ addAll(documents) {
747
+ for (const document of documents)
748
+ this.add(document);
749
+ }
750
+ /**
751
+ * Adds all the given documents to the index asynchronously.
752
+ *
753
+ * Returns a promise that resolves (to `undefined`) when the indexing is done.
754
+ * This method is useful when index many documents, to avoid blocking the main
755
+ * thread. The indexing is performed asynchronously and in chunks.
756
+ *
757
+ * @param documents An array of documents to be indexed
758
+ * @param options Configuration options
759
+ * @return A promise resolving to `undefined` when the indexing is done
760
+ */
761
+ addAllAsync(documents, options = {}) {
762
+ const { chunkSize = 10 } = options;
763
+ const acc = { chunk: [], promise: Promise.resolve() };
764
+ const { chunk, promise } = documents.reduce(({ chunk, promise }, document, i) => {
765
+ chunk.push(document);
766
+ if ((i + 1) % chunkSize === 0) {
767
+ return {
768
+ chunk: [],
769
+ promise: promise
770
+ .then(() => new Promise(resolve => setTimeout(resolve, 0)))
771
+ .then(() => this.addAll(chunk))
772
+ };
773
+ }
774
+ else {
775
+ return { chunk, promise };
776
+ }
777
+ }, acc);
778
+ return promise.then(() => this.addAll(chunk));
779
+ }
780
+ /**
781
+ * Removes the given document from the index.
782
+ *
783
+ * The document to remove must NOT have changed between indexing and removal,
784
+ * otherwise the index will be corrupted.
785
+ *
786
+ * This method requires passing the full document to be removed (not just the
787
+ * ID), and immediately removes the document from the inverted index, allowing
788
+ * memory to be released. A convenient alternative is {@link
789
+ * MiniSearch#discard}, which needs only the document ID, and has the same
790
+ * visible effect, but delays cleaning up the index until the next vacuuming.
791
+ *
792
+ * @param document The document to be removed
793
+ */
794
+ remove(document) {
795
+ const { tokenize, processTerm, extractField, stringifyField, fields, idField } = this._options;
796
+ const id = extractField(document, idField);
797
+ if (id == null) {
798
+ throw new Error(`MiniSearch: document does not have ID field "${idField}"`);
799
+ }
800
+ const shortId = this._idToShortId.get(id);
801
+ if (shortId == null) {
802
+ throw new Error(`MiniSearch: cannot remove document with ID ${id}: it is not in the index`);
803
+ }
804
+ for (const field of fields) {
805
+ const fieldValue = extractField(document, field);
806
+ if (fieldValue == null)
807
+ continue;
808
+ const tokens = tokenize(stringifyField(fieldValue, field), field);
809
+ const fieldId = this._fieldIds[field];
810
+ const uniqueTerms = new Set(tokens).size;
811
+ this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms);
812
+ for (const term of tokens) {
813
+ const processedTerm = processTerm(term, field);
814
+ if (Array.isArray(processedTerm)) {
815
+ for (const t of processedTerm) {
816
+ this.removeTerm(fieldId, shortId, t);
817
+ }
818
+ }
819
+ else if (processedTerm) {
820
+ this.removeTerm(fieldId, shortId, processedTerm);
821
+ }
822
+ }
823
+ }
824
+ this._storedFields.delete(shortId);
825
+ this._documentIds.delete(shortId);
826
+ this._idToShortId.delete(id);
827
+ this._fieldLength.delete(shortId);
828
+ this._documentCount -= 1;
829
+ }
830
+ /**
831
+ * Removes all the given documents from the index. If called with no arguments,
832
+ * it removes _all_ documents from the index.
833
+ *
834
+ * @param documents The documents to be removed. If this argument is omitted,
835
+ * all documents are removed. Note that, for removing all documents, it is
836
+ * more efficient to call this method with no arguments than to pass all
837
+ * documents.
838
+ */
839
+ removeAll(documents) {
840
+ if (documents) {
841
+ for (const document of documents)
842
+ this.remove(document);
843
+ }
844
+ else if (arguments.length > 0) {
845
+ throw new Error('Expected documents to be present. Omit the argument to remove all documents.');
846
+ }
847
+ else {
848
+ this._index = new SearchableMap();
849
+ this._documentCount = 0;
850
+ this._documentIds = new Map();
851
+ this._idToShortId = new Map();
852
+ this._fieldLength = new Map();
853
+ this._avgFieldLength = [];
854
+ this._storedFields = new Map();
855
+ this._nextId = 0;
856
+ }
857
+ }
858
+ /**
859
+ * Discards the document with the given ID, so it won't appear in search results
860
+ *
861
+ * It has the same visible effect of {@link MiniSearch.remove} (both cause the
862
+ * document to stop appearing in searches), but a different effect on the
863
+ * internal data structures:
864
+ *
865
+ * - {@link MiniSearch#remove} requires passing the full document to be
866
+ * removed as argument, and removes it from the inverted index immediately.
867
+ *
868
+ * - {@link MiniSearch#discard} instead only needs the document ID, and
869
+ * works by marking the current version of the document as discarded, so it
870
+ * is immediately ignored by searches. This is faster and more convenient
871
+ * than {@link MiniSearch#remove}, but the index is not immediately
872
+ * modified. To take care of that, vacuuming is performed after a certain
873
+ * number of documents are discarded, cleaning up the index and allowing
874
+ * memory to be released.
875
+ *
876
+ * After discarding a document, it is possible to re-add a new version, and
877
+ * only the new version will appear in searches. In other words, discarding
878
+ * and re-adding a document works exactly like removing and re-adding it. The
879
+ * {@link MiniSearch.replace} method can also be used to replace a document
880
+ * with a new version.
881
+ *
882
+ * #### Details about vacuuming
883
+ *
884
+ * Repetite calls to this method would leave obsolete document references in
885
+ * the index, invisible to searches. Two mechanisms take care of cleaning up:
886
+ * clean up during search, and vacuuming.
887
+ *
888
+ * - Upon search, whenever a discarded ID is found (and ignored for the
889
+ * results), references to the discarded document are removed from the
890
+ * inverted index entries for the search terms. This ensures that subsequent
891
+ * searches for the same terms do not need to skip these obsolete references
892
+ * again.
893
+ *
894
+ * - In addition, vacuuming is performed automatically by default (see the
895
+ * `autoVacuum` field in {@link Options}) after a certain number of
896
+ * documents are discarded. Vacuuming traverses all terms in the index,
897
+ * cleaning up all references to discarded documents. Vacuuming can also be
898
+ * triggered manually by calling {@link MiniSearch#vacuum}.
899
+ *
900
+ * @param id The ID of the document to be discarded
901
+ */
902
+ discard(id) {
903
+ const shortId = this._idToShortId.get(id);
904
+ if (shortId == null) {
905
+ throw new Error(`MiniSearch: cannot discard document with ID ${id}: it is not in the index`);
906
+ }
907
+ this._idToShortId.delete(id);
908
+ this._documentIds.delete(shortId);
909
+ this._storedFields.delete(shortId);
910
+ (this._fieldLength.get(shortId) || []).forEach((fieldLength, fieldId) => {
911
+ this.removeFieldLength(shortId, fieldId, this._documentCount, fieldLength);
912
+ });
913
+ this._fieldLength.delete(shortId);
914
+ this._documentCount -= 1;
915
+ this._dirtCount += 1;
916
+ this.maybeAutoVacuum();
917
+ }
918
+ maybeAutoVacuum() {
919
+ if (this._options.autoVacuum === false) {
920
+ return;
921
+ }
922
+ const { minDirtFactor, minDirtCount, batchSize, batchWait } = this._options.autoVacuum;
923
+ this.conditionalVacuum({ batchSize, batchWait }, { minDirtCount, minDirtFactor });
924
+ }
925
+ /**
926
+ * Discards the documents with the given IDs, so they won't appear in search
927
+ * results
928
+ *
929
+ * It is equivalent to calling {@link MiniSearch#discard} for all the given
930
+ * IDs, but with the optimization of triggering at most one automatic
931
+ * vacuuming at the end.
932
+ *
933
+ * Note: to remove all documents from the index, it is faster and more
934
+ * convenient to call {@link MiniSearch.removeAll} with no argument, instead
935
+ * of passing all IDs to this method.
936
+ */
937
+ discardAll(ids) {
938
+ const autoVacuum = this._options.autoVacuum;
939
+ try {
940
+ this._options.autoVacuum = false;
941
+ for (const id of ids) {
942
+ this.discard(id);
943
+ }
944
+ }
945
+ finally {
946
+ this._options.autoVacuum = autoVacuum;
947
+ }
948
+ this.maybeAutoVacuum();
949
+ }
950
+ /**
951
+ * It replaces an existing document with the given updated version
952
+ *
953
+ * It works by discarding the current version and adding the updated one, so
954
+ * it is functionally equivalent to calling {@link MiniSearch#discard}
955
+ * followed by {@link MiniSearch#add}. The ID of the updated document should
956
+ * be the same as the original one.
957
+ *
958
+ * Since it uses {@link MiniSearch#discard} internally, this method relies on
959
+ * vacuuming to clean up obsolete document references from the index, allowing
960
+ * memory to be released (see {@link MiniSearch#discard}).
961
+ *
962
+ * @param updatedDocument The updated document to replace the old version
963
+ * with
964
+ */
965
+ replace(updatedDocument) {
966
+ const { idField, extractField } = this._options;
967
+ const id = extractField(updatedDocument, idField);
968
+ this.discard(id);
969
+ this.add(updatedDocument);
970
+ }
971
+ /**
972
+ * Triggers a manual vacuuming, cleaning up references to discarded documents
973
+ * from the inverted index
974
+ *
975
+ * Vacuuming is only useful for applications that use the {@link
976
+ * MiniSearch#discard} or {@link MiniSearch#replace} methods.
977
+ *
978
+ * By default, vacuuming is performed automatically when needed (controlled by
979
+ * the `autoVacuum` field in {@link Options}), so there is usually no need to
980
+ * call this method, unless one wants to make sure to perform vacuuming at a
981
+ * specific moment.
982
+ *
983
+ * Vacuuming traverses all terms in the inverted index in batches, and cleans
984
+ * up references to discarded documents from the posting list, allowing memory
985
+ * to be released.
986
+ *
987
+ * The method takes an optional object as argument with the following keys:
988
+ *
989
+ * - `batchSize`: the size of each batch (1000 by default)
990
+ *
991
+ * - `batchWait`: the number of milliseconds to wait between batches (10 by
992
+ * default)
993
+ *
994
+ * On large indexes, vacuuming could have a non-negligible cost: batching
995
+ * avoids blocking the thread for long, diluting this cost so that it is not
996
+ * negatively affecting the application. Nonetheless, this method should only
997
+ * be called when necessary, and relying on automatic vacuuming is usually
998
+ * better.
999
+ *
1000
+ * It returns a promise that resolves (to undefined) when the clean up is
1001
+ * completed. If vacuuming is already ongoing at the time this method is
1002
+ * called, a new one is enqueued immediately after the ongoing one, and a
1003
+ * corresponding promise is returned. However, no more than one vacuuming is
1004
+ * enqueued on top of the ongoing one, even if this method is called more
1005
+ * times (enqueuing multiple ones would be useless).
1006
+ *
1007
+ * @param options Configuration options for the batch size and delay. See
1008
+ * {@link VacuumOptions}.
1009
+ */
1010
+ vacuum(options = {}) {
1011
+ return this.conditionalVacuum(options);
1012
+ }
1013
+ conditionalVacuum(options, conditions) {
1014
+ // If a vacuum is already ongoing, schedule another as soon as it finishes,
1015
+ // unless there's already one enqueued. If one was already enqueued, do not
1016
+ // enqueue another on top, but make sure that the conditions are the
1017
+ // broadest.
1018
+ if (this._currentVacuum) {
1019
+ this._enqueuedVacuumConditions = this._enqueuedVacuumConditions && conditions;
1020
+ if (this._enqueuedVacuum != null) {
1021
+ return this._enqueuedVacuum;
1022
+ }
1023
+ this._enqueuedVacuum = this._currentVacuum.then(() => {
1024
+ const conditions = this._enqueuedVacuumConditions;
1025
+ this._enqueuedVacuumConditions = defaultVacuumConditions;
1026
+ return this.performVacuuming(options, conditions);
1027
+ });
1028
+ return this._enqueuedVacuum;
1029
+ }
1030
+ if (this.vacuumConditionsMet(conditions) === false) {
1031
+ return Promise.resolve();
1032
+ }
1033
+ this._currentVacuum = this.performVacuuming(options);
1034
+ return this._currentVacuum;
1035
+ }
1036
+ async performVacuuming(options, conditions) {
1037
+ const initialDirtCount = this._dirtCount;
1038
+ if (this.vacuumConditionsMet(conditions)) {
1039
+ const batchSize = options.batchSize || defaultVacuumOptions.batchSize;
1040
+ const batchWait = options.batchWait || defaultVacuumOptions.batchWait;
1041
+ let i = 1;
1042
+ for (const [term, fieldsData] of this._index) {
1043
+ for (const [fieldId, fieldIndex] of fieldsData) {
1044
+ for (const [shortId] of fieldIndex) {
1045
+ if (this._documentIds.has(shortId)) {
1046
+ continue;
1047
+ }
1048
+ if (fieldIndex.size <= 1) {
1049
+ fieldsData.delete(fieldId);
1050
+ }
1051
+ else {
1052
+ fieldIndex.delete(shortId);
1053
+ }
1054
+ }
1055
+ }
1056
+ if (this._index.get(term).size === 0) {
1057
+ this._index.delete(term);
1058
+ }
1059
+ if (i % batchSize === 0) {
1060
+ await new Promise((resolve) => setTimeout(resolve, batchWait));
1061
+ }
1062
+ i += 1;
1063
+ }
1064
+ this._dirtCount -= initialDirtCount;
1065
+ }
1066
+ // Make the next lines always async, so they execute after this function returns
1067
+ await null;
1068
+ this._currentVacuum = this._enqueuedVacuum;
1069
+ this._enqueuedVacuum = null;
1070
+ }
1071
+ vacuumConditionsMet(conditions) {
1072
+ if (conditions == null) {
1073
+ return true;
1074
+ }
1075
+ let { minDirtCount, minDirtFactor } = conditions;
1076
+ minDirtCount = minDirtCount || defaultAutoVacuumOptions.minDirtCount;
1077
+ minDirtFactor = minDirtFactor || defaultAutoVacuumOptions.minDirtFactor;
1078
+ return this.dirtCount >= minDirtCount && this.dirtFactor >= minDirtFactor;
1079
+ }
1080
+ /**
1081
+ * Is `true` if a vacuuming operation is ongoing, `false` otherwise
1082
+ */
1083
+ get isVacuuming() {
1084
+ return this._currentVacuum != null;
1085
+ }
1086
+ /**
1087
+ * The number of documents discarded since the most recent vacuuming
1088
+ */
1089
+ get dirtCount() {
1090
+ return this._dirtCount;
1091
+ }
1092
+ /**
1093
+ * A number between 0 and 1 giving an indication about the proportion of
1094
+ * documents that are discarded, and can therefore be cleaned up by vacuuming.
1095
+ * A value close to 0 means that the index is relatively clean, while a higher
1096
+ * value means that the index is relatively dirty, and vacuuming could release
1097
+ * memory.
1098
+ */
1099
+ get dirtFactor() {
1100
+ return this._dirtCount / (1 + this._documentCount + this._dirtCount);
1101
+ }
1102
+ /**
1103
+ * Returns `true` if a document with the given ID is present in the index and
1104
+ * available for search, `false` otherwise
1105
+ *
1106
+ * @param id The document ID
1107
+ */
1108
+ has(id) {
1109
+ return this._idToShortId.has(id);
1110
+ }
1111
+ /**
1112
+ * Returns the stored fields (as configured in the `storeFields` constructor
1113
+ * option) for the given document ID. Returns `undefined` if the document is
1114
+ * not present in the index.
1115
+ *
1116
+ * @param id The document ID
1117
+ */
1118
+ getStoredFields(id) {
1119
+ const shortId = this._idToShortId.get(id);
1120
+ if (shortId == null) {
1121
+ return undefined;
1122
+ }
1123
+ return this._storedFields.get(shortId);
1124
+ }
1125
+ /**
1126
+ * Search for documents matching the given search query.
1127
+ *
1128
+ * The result is a list of scored document IDs matching the query, sorted by
1129
+ * descending score, and each including data about which terms were matched and
1130
+ * in which fields.
1131
+ *
1132
+ * ### Basic usage:
1133
+ *
1134
+ * ```javascript
1135
+ * // Search for "zen art motorcycle" with default options: terms have to match
1136
+ * // exactly, and individual terms are joined with OR
1137
+ * miniSearch.search('zen art motorcycle')
1138
+ * // => [ { id: 2, score: 2.77258, match: { ... } }, { id: 4, score: 1.38629, match: { ... } } ]
1139
+ * ```
1140
+ *
1141
+ * ### Restrict search to specific fields:
1142
+ *
1143
+ * ```javascript
1144
+ * // Search only in the 'title' field
1145
+ * miniSearch.search('zen', { fields: ['title'] })
1146
+ * ```
1147
+ *
1148
+ * ### Field boosting:
1149
+ *
1150
+ * ```javascript
1151
+ * // Boost a field
1152
+ * miniSearch.search('zen', { boost: { title: 2 } })
1153
+ * ```
1154
+ *
1155
+ * ### Prefix search:
1156
+ *
1157
+ * ```javascript
1158
+ * // Search for "moto" with prefix search (it will match documents
1159
+ * // containing terms that start with "moto" or "neuro")
1160
+ * miniSearch.search('moto neuro', { prefix: true })
1161
+ * ```
1162
+ *
1163
+ * ### Fuzzy search:
1164
+ *
1165
+ * ```javascript
1166
+ * // Search for "ismael" with fuzzy search (it will match documents containing
1167
+ * // terms similar to "ismael", with a maximum edit distance of 0.2 term.length
1168
+ * // (rounded to nearest integer)
1169
+ * miniSearch.search('ismael', { fuzzy: 0.2 })
1170
+ * ```
1171
+ *
1172
+ * ### Combining strategies:
1173
+ *
1174
+ * ```javascript
1175
+ * // Mix of exact match, prefix search, and fuzzy search
1176
+ * miniSearch.search('ismael mob', {
1177
+ * prefix: true,
1178
+ * fuzzy: 0.2
1179
+ * })
1180
+ * ```
1181
+ *
1182
+ * ### Advanced prefix and fuzzy search:
1183
+ *
1184
+ * ```javascript
1185
+ * // Perform fuzzy and prefix search depending on the search term. Here
1186
+ * // performing prefix and fuzzy search only on terms longer than 3 characters
1187
+ * miniSearch.search('ismael mob', {
1188
+ * prefix: term => term.length > 3
1189
+ * fuzzy: term => term.length > 3 ? 0.2 : null
1190
+ * })
1191
+ * ```
1192
+ *
1193
+ * ### Combine with AND:
1194
+ *
1195
+ * ```javascript
1196
+ * // Combine search terms with AND (to match only documents that contain both
1197
+ * // "motorcycle" and "art")
1198
+ * miniSearch.search('motorcycle art', { combineWith: 'AND' })
1199
+ * ```
1200
+ *
1201
+ * ### Combine with AND_NOT:
1202
+ *
1203
+ * There is also an AND_NOT combinator, that finds documents that match the
1204
+ * first term, but do not match any of the other terms. This combinator is
1205
+ * rarely useful with simple queries, and is meant to be used with advanced
1206
+ * query combinations (see later for more details).
1207
+ *
1208
+ * ### Filtering results:
1209
+ *
1210
+ * ```javascript
1211
+ * // Filter only results in the 'fiction' category (assuming that 'category'
1212
+ * // is a stored field)
1213
+ * miniSearch.search('motorcycle art', {
1214
+ * filter: (result) => result.category === 'fiction'
1215
+ * })
1216
+ * ```
1217
+ *
1218
+ * ### Wildcard query
1219
+ *
1220
+ * Searching for an empty string (assuming the default tokenizer) returns no
1221
+ * results. Sometimes though, one needs to match all documents, like in a
1222
+ * "wildcard" search. This is possible by passing the special value
1223
+ * {@link MiniSearch.wildcard} as the query:
1224
+ *
1225
+ * ```javascript
1226
+ * // Return search results for all documents
1227
+ * miniSearch.search(MiniSearch.wildcard)
1228
+ * ```
1229
+ *
1230
+ * Note that search options such as `filter` and `boostDocument` are still
1231
+ * applied, influencing which results are returned, and their order:
1232
+ *
1233
+ * ```javascript
1234
+ * // Return search results for all documents in the 'fiction' category
1235
+ * miniSearch.search(MiniSearch.wildcard, {
1236
+ * filter: (result) => result.category === 'fiction'
1237
+ * })
1238
+ * ```
1239
+ *
1240
+ * ### Advanced combination of queries:
1241
+ *
1242
+ * It is possible to combine different subqueries with OR, AND, and AND_NOT,
1243
+ * and even with different search options, by passing a query expression
1244
+ * tree object as the first argument, instead of a string.
1245
+ *
1246
+ * ```javascript
1247
+ * // Search for documents that contain "zen" and ("motorcycle" or "archery")
1248
+ * miniSearch.search({
1249
+ * combineWith: 'AND',
1250
+ * queries: [
1251
+ * 'zen',
1252
+ * {
1253
+ * combineWith: 'OR',
1254
+ * queries: ['motorcycle', 'archery']
1255
+ * }
1256
+ * ]
1257
+ * })
1258
+ *
1259
+ * // Search for documents that contain ("apple" or "pear") but not "juice" and
1260
+ * // not "tree"
1261
+ * miniSearch.search({
1262
+ * combineWith: 'AND_NOT',
1263
+ * queries: [
1264
+ * {
1265
+ * combineWith: 'OR',
1266
+ * queries: ['apple', 'pear']
1267
+ * },
1268
+ * 'juice',
1269
+ * 'tree'
1270
+ * ]
1271
+ * })
1272
+ * ```
1273
+ *
1274
+ * Each node in the expression tree can be either a string, or an object that
1275
+ * supports all {@link SearchOptions} fields, plus a `queries` array field for
1276
+ * subqueries.
1277
+ *
1278
+ * Note that, while this can become complicated to do by hand for complex or
1279
+ * deeply nested queries, it provides a formalized expression tree API for
1280
+ * external libraries that implement a parser for custom query languages.
1281
+ *
1282
+ * @param query Search query
1283
+ * @param searchOptions Search options. Each option, if not given, defaults to the corresponding value of `searchOptions` given to the constructor, or to the library default.
1284
+ */
1285
+ search(query, searchOptions = {}) {
1286
+ const { searchOptions: globalSearchOptions } = this._options;
1287
+ const searchOptionsWithDefaults = { ...globalSearchOptions, ...searchOptions };
1288
+ const rawResults = this.executeQuery(query, searchOptions);
1289
+ const results = [];
1290
+ for (const [docId, { score, terms, match }] of rawResults) {
1291
+ // terms are the matched query terms, which will be returned to the user
1292
+ // as queryTerms. The quality is calculated based on them, as opposed to
1293
+ // the matched terms in the document (which can be different due to
1294
+ // prefix and fuzzy match)
1295
+ const quality = terms.length || 1;
1296
+ const result = {
1297
+ id: this._documentIds.get(docId),
1298
+ score: score * quality,
1299
+ terms: Object.keys(match),
1300
+ queryTerms: terms,
1301
+ match
1302
+ };
1303
+ Object.assign(result, this._storedFields.get(docId));
1304
+ if (searchOptionsWithDefaults.filter == null || searchOptionsWithDefaults.filter(result)) {
1305
+ results.push(result);
1306
+ }
1307
+ }
1308
+ // If it's a wildcard query, and no document boost is applied, skip sorting
1309
+ // the results, as all results have the same score of 1
1310
+ if (query === MiniSearch.wildcard && searchOptionsWithDefaults.boostDocument == null) {
1311
+ return results;
1312
+ }
1313
+ results.sort(byScore);
1314
+ return results;
1315
+ }
1316
+ /**
1317
+ * Provide suggestions for the given search query
1318
+ *
1319
+ * The result is a list of suggested modified search queries, derived from the
1320
+ * given search query, each with a relevance score, sorted by descending score.
1321
+ *
1322
+ * By default, it uses the same options used for search, except that by
1323
+ * default it performs prefix search on the last term of the query, and
1324
+ * combine terms with `'AND'` (requiring all query terms to match). Custom
1325
+ * options can be passed as a second argument. Defaults can be changed upon
1326
+ * calling the {@link MiniSearch} constructor, by passing a
1327
+ * `autoSuggestOptions` option.
1328
+ *
1329
+ * ### Basic usage:
1330
+ *
1331
+ * ```javascript
1332
+ * // Get suggestions for 'neuro':
1333
+ * miniSearch.autoSuggest('neuro')
1334
+ * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 0.46240 } ]
1335
+ * ```
1336
+ *
1337
+ * ### Multiple words:
1338
+ *
1339
+ * ```javascript
1340
+ * // Get suggestions for 'zen ar':
1341
+ * miniSearch.autoSuggest('zen ar')
1342
+ * // => [
1343
+ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 },
1344
+ * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 }
1345
+ * // ]
1346
+ * ```
1347
+ *
1348
+ * ### Fuzzy suggestions:
1349
+ *
1350
+ * ```javascript
1351
+ * // Correct spelling mistakes using fuzzy search:
1352
+ * miniSearch.autoSuggest('neromancer', { fuzzy: 0.2 })
1353
+ * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 1.03998 } ]
1354
+ * ```
1355
+ *
1356
+ * ### Filtering:
1357
+ *
1358
+ * ```javascript
1359
+ * // Get suggestions for 'zen ar', but only within the 'fiction' category
1360
+ * // (assuming that 'category' is a stored field):
1361
+ * miniSearch.autoSuggest('zen ar', {
1362
+ * filter: (result) => result.category === 'fiction'
1363
+ * })
1364
+ * // => [
1365
+ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 },
1366
+ * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 }
1367
+ * // ]
1368
+ * ```
1369
+ *
1370
+ * @param queryString Query string to be expanded into suggestions
1371
+ * @param options Search options. The supported options and default values
1372
+ * are the same as for the {@link MiniSearch#search} method, except that by
1373
+ * default prefix search is performed on the last term in the query, and terms
1374
+ * are combined with `'AND'`.
1375
+ * @return A sorted array of suggestions sorted by relevance score.
1376
+ */
1377
+ autoSuggest(queryString, options = {}) {
1378
+ options = { ...this._options.autoSuggestOptions, ...options };
1379
+ const suggestions = new Map();
1380
+ for (const { score, terms } of this.search(queryString, options)) {
1381
+ const phrase = terms.join(' ');
1382
+ const suggestion = suggestions.get(phrase);
1383
+ if (suggestion != null) {
1384
+ suggestion.score += score;
1385
+ suggestion.count += 1;
1386
+ }
1387
+ else {
1388
+ suggestions.set(phrase, { score, terms, count: 1 });
1389
+ }
1390
+ }
1391
+ const results = [];
1392
+ for (const [suggestion, { score, terms, count }] of suggestions) {
1393
+ results.push({ suggestion, terms, score: score / count });
1394
+ }
1395
+ results.sort(byScore);
1396
+ return results;
1397
+ }
1398
+ /**
1399
+ * Total number of documents available to search
1400
+ */
1401
+ get documentCount() {
1402
+ return this._documentCount;
1403
+ }
1404
+ /**
1405
+ * Number of terms in the index
1406
+ */
1407
+ get termCount() {
1408
+ return this._index.size;
1409
+ }
1410
+ /**
1411
+ * Deserializes a JSON index (serialized with `JSON.stringify(miniSearch)`)
1412
+ * and instantiates a MiniSearch instance. It should be given the same options
1413
+ * originally used when serializing the index.
1414
+ *
1415
+ * ### Usage:
1416
+ *
1417
+ * ```javascript
1418
+ * // If the index was serialized with:
1419
+ * let miniSearch = new MiniSearch({ fields: ['title', 'text'] })
1420
+ * miniSearch.addAll(documents)
1421
+ *
1422
+ * const json = JSON.stringify(miniSearch)
1423
+ * // It can later be deserialized like this:
1424
+ * miniSearch = MiniSearch.loadJSON(json, { fields: ['title', 'text'] })
1425
+ * ```
1426
+ *
1427
+ * @param json JSON-serialized index
1428
+ * @param options configuration options, same as the constructor
1429
+ * @return An instance of MiniSearch deserialized from the given JSON.
1430
+ */
1431
+ static loadJSON(json, options) {
1432
+ if (options == null) {
1433
+ throw new Error('MiniSearch: loadJSON should be given the same options used when serializing the index');
1434
+ }
1435
+ return this.loadJS(JSON.parse(json), options);
1436
+ }
1437
+ /**
1438
+ * Async equivalent of {@link MiniSearch.loadJSON}
1439
+ *
1440
+ * This function is an alternative to {@link MiniSearch.loadJSON} that returns
1441
+ * a promise, and loads the index in batches, leaving pauses between them to avoid
1442
+ * blocking the main thread. It tends to be slower than the synchronous
1443
+ * version, but does not block the main thread, so it can be a better choice
1444
+ * when deserializing very large indexes.
1445
+ *
1446
+ * @param json JSON-serialized index
1447
+ * @param options configuration options, same as the constructor
1448
+ * @return A Promise that will resolve to an instance of MiniSearch deserialized from the given JSON.
1449
+ */
1450
+ static async loadJSONAsync(json, options) {
1451
+ if (options == null) {
1452
+ throw new Error('MiniSearch: loadJSON should be given the same options used when serializing the index');
1453
+ }
1454
+ return this.loadJSAsync(JSON.parse(json), options);
1455
+ }
1456
+ /**
1457
+ * Returns the default value of an option. It will throw an error if no option
1458
+ * with the given name exists.
1459
+ *
1460
+ * @param optionName Name of the option
1461
+ * @return The default value of the given option
1462
+ *
1463
+ * ### Usage:
1464
+ *
1465
+ * ```javascript
1466
+ * // Get default tokenizer
1467
+ * MiniSearch.getDefault('tokenize')
1468
+ *
1469
+ * // Get default term processor
1470
+ * MiniSearch.getDefault('processTerm')
1471
+ *
1472
+ * // Unknown options will throw an error
1473
+ * MiniSearch.getDefault('notExisting')
1474
+ * // => throws 'MiniSearch: unknown option "notExisting"'
1475
+ * ```
1476
+ */
1477
+ static getDefault(optionName) {
1478
+ if (defaultOptions.hasOwnProperty(optionName)) {
1479
+ return getOwnProperty(defaultOptions, optionName);
1480
+ }
1481
+ else {
1482
+ throw new Error(`MiniSearch: unknown option "${optionName}"`);
1483
+ }
1484
+ }
1485
+ /**
1486
+ * @ignore
1487
+ */
1488
+ static loadJS(js, options) {
1489
+ const { index, documentIds, fieldLength, storedFields, serializationVersion } = js;
1490
+ const miniSearch = this.instantiateMiniSearch(js, options);
1491
+ miniSearch._documentIds = objectToNumericMap(documentIds);
1492
+ miniSearch._fieldLength = objectToNumericMap(fieldLength);
1493
+ miniSearch._storedFields = objectToNumericMap(storedFields);
1494
+ for (const [shortId, id] of miniSearch._documentIds) {
1495
+ miniSearch._idToShortId.set(id, shortId);
1496
+ }
1497
+ for (const [term, data] of index) {
1498
+ const dataMap = new Map();
1499
+ for (const fieldId of Object.keys(data)) {
1500
+ let indexEntry = data[fieldId];
1501
+ // Version 1 used to nest the index entry inside a field called ds
1502
+ if (serializationVersion === 1) {
1503
+ indexEntry = indexEntry.ds;
1504
+ }
1505
+ dataMap.set(parseInt(fieldId, 10), objectToNumericMap(indexEntry));
1506
+ }
1507
+ miniSearch._index.set(term, dataMap);
1508
+ }
1509
+ return miniSearch;
1510
+ }
1511
+ /**
1512
+ * @ignore
1513
+ */
1514
+ static async loadJSAsync(js, options) {
1515
+ const { index, documentIds, fieldLength, storedFields, serializationVersion } = js;
1516
+ const miniSearch = this.instantiateMiniSearch(js, options);
1517
+ miniSearch._documentIds = await objectToNumericMapAsync(documentIds);
1518
+ miniSearch._fieldLength = await objectToNumericMapAsync(fieldLength);
1519
+ miniSearch._storedFields = await objectToNumericMapAsync(storedFields);
1520
+ for (const [shortId, id] of miniSearch._documentIds) {
1521
+ miniSearch._idToShortId.set(id, shortId);
1522
+ }
1523
+ let count = 0;
1524
+ for (const [term, data] of index) {
1525
+ const dataMap = new Map();
1526
+ for (const fieldId of Object.keys(data)) {
1527
+ let indexEntry = data[fieldId];
1528
+ // Version 1 used to nest the index entry inside a field called ds
1529
+ if (serializationVersion === 1) {
1530
+ indexEntry = indexEntry.ds;
1531
+ }
1532
+ dataMap.set(parseInt(fieldId, 10), await objectToNumericMapAsync(indexEntry));
1533
+ }
1534
+ if (++count % 1000 === 0)
1535
+ await wait(0);
1536
+ miniSearch._index.set(term, dataMap);
1537
+ }
1538
+ return miniSearch;
1539
+ }
1540
+ /**
1541
+ * @ignore
1542
+ */
1543
+ static instantiateMiniSearch(js, options) {
1544
+ const { documentCount, nextId, fieldIds, averageFieldLength, dirtCount, serializationVersion } = js;
1545
+ if (serializationVersion !== 1 && serializationVersion !== 2) {
1546
+ throw new Error('MiniSearch: cannot deserialize an index created with an incompatible version');
1547
+ }
1548
+ const miniSearch = new MiniSearch(options);
1549
+ miniSearch._documentCount = documentCount;
1550
+ miniSearch._nextId = nextId;
1551
+ miniSearch._idToShortId = new Map();
1552
+ miniSearch._fieldIds = fieldIds;
1553
+ miniSearch._avgFieldLength = averageFieldLength;
1554
+ miniSearch._dirtCount = dirtCount || 0;
1555
+ miniSearch._index = new SearchableMap();
1556
+ return miniSearch;
1557
+ }
1558
+ /**
1559
+ * @ignore
1560
+ */
1561
+ executeQuery(query, searchOptions = {}) {
1562
+ if (query === MiniSearch.wildcard) {
1563
+ return this.executeWildcardQuery(searchOptions);
1564
+ }
1565
+ if (typeof query !== 'string') {
1566
+ const options = { ...searchOptions, ...query, queries: undefined };
1567
+ const results = query.queries.map((subquery) => this.executeQuery(subquery, options));
1568
+ return this.combineResults(results, options.combineWith);
1569
+ }
1570
+ const { tokenize, processTerm, searchOptions: globalSearchOptions } = this._options;
1571
+ const options = { tokenize, processTerm, ...globalSearchOptions, ...searchOptions };
1572
+ const { tokenize: searchTokenize, processTerm: searchProcessTerm } = options;
1573
+ const terms = searchTokenize(query)
1574
+ .flatMap((term) => searchProcessTerm(term))
1575
+ .filter((term) => !!term);
1576
+ const queries = terms.map(termToQuerySpec(options));
1577
+ const results = queries.map(query => this.executeQuerySpec(query, options));
1578
+ return this.combineResults(results, options.combineWith);
1579
+ }
1580
+ /**
1581
+ * @ignore
1582
+ */
1583
+ executeQuerySpec(query, searchOptions) {
1584
+ const options = { ...this._options.searchOptions, ...searchOptions };
1585
+ const boosts = (options.fields || this._options.fields).reduce((boosts, field) => ({ ...boosts, [field]: getOwnProperty(options.boost, field) || 1 }), {});
1586
+ const { boostDocument, weights, maxFuzzy, bm25: bm25params } = options;
1587
+ const { fuzzy: fuzzyWeight, prefix: prefixWeight } = { ...defaultSearchOptions.weights, ...weights };
1588
+ const data = this._index.get(query.term);
1589
+ const results = this.termResults(query.term, query.term, 1, query.termBoost, data, boosts, boostDocument, bm25params);
1590
+ let prefixMatches;
1591
+ let fuzzyMatches;
1592
+ if (query.prefix) {
1593
+ prefixMatches = this._index.atPrefix(query.term);
1594
+ }
1595
+ if (query.fuzzy) {
1596
+ const fuzzy = (query.fuzzy === true) ? 0.2 : query.fuzzy;
1597
+ const maxDistance = fuzzy < 1 ? Math.min(maxFuzzy, Math.round(query.term.length * fuzzy)) : fuzzy;
1598
+ if (maxDistance)
1599
+ fuzzyMatches = this._index.fuzzyGet(query.term, maxDistance);
1600
+ }
1601
+ if (prefixMatches) {
1602
+ for (const [term, data] of prefixMatches) {
1603
+ const distance = term.length - query.term.length;
1604
+ if (!distance) {
1605
+ continue;
1606
+ } // Skip exact match.
1607
+ // Delete the term from fuzzy results (if present) if it is also a
1608
+ // prefix result. This entry will always be scored as a prefix result.
1609
+ fuzzyMatches === null || fuzzyMatches === void 0 ? void 0 : fuzzyMatches.delete(term);
1610
+ // Weight gradually approaches 0 as distance goes to infinity, with the
1611
+ // weight for the hypothetical distance 0 being equal to prefixWeight.
1612
+ // The rate of change is much lower than that of fuzzy matches to
1613
+ // account for the fact that prefix matches stay more relevant than
1614
+ // fuzzy matches for longer distances.
1615
+ const weight = prefixWeight * term.length / (term.length + 0.3 * distance);
1616
+ this.termResults(query.term, term, weight, query.termBoost, data, boosts, boostDocument, bm25params, results);
1617
+ }
1618
+ }
1619
+ if (fuzzyMatches) {
1620
+ for (const term of fuzzyMatches.keys()) {
1621
+ const [data, distance] = fuzzyMatches.get(term);
1622
+ if (!distance) {
1623
+ continue;
1624
+ } // Skip exact match.
1625
+ // Weight gradually approaches 0 as distance goes to infinity, with the
1626
+ // weight for the hypothetical distance 0 being equal to fuzzyWeight.
1627
+ const weight = fuzzyWeight * term.length / (term.length + distance);
1628
+ this.termResults(query.term, term, weight, query.termBoost, data, boosts, boostDocument, bm25params, results);
1629
+ }
1630
+ }
1631
+ return results;
1632
+ }
1633
+ /**
1634
+ * @ignore
1635
+ */
1636
+ executeWildcardQuery(searchOptions) {
1637
+ const results = new Map();
1638
+ const options = { ...this._options.searchOptions, ...searchOptions };
1639
+ for (const [shortId, id] of this._documentIds) {
1640
+ const score = options.boostDocument ? options.boostDocument(id, '', this._storedFields.get(shortId)) : 1;
1641
+ results.set(shortId, {
1642
+ score,
1643
+ terms: [],
1644
+ match: {}
1645
+ });
1646
+ }
1647
+ return results;
1648
+ }
1649
+ /**
1650
+ * @ignore
1651
+ */
1652
+ combineResults(results, combineWith = OR) {
1653
+ if (results.length === 0) {
1654
+ return new Map();
1655
+ }
1656
+ const operator = combineWith.toLowerCase();
1657
+ const combinator = combinators[operator];
1658
+ if (!combinator) {
1659
+ throw new Error(`Invalid combination operator: ${combineWith}`);
1660
+ }
1661
+ return results.reduce(combinator) || new Map();
1662
+ }
1663
+ /**
1664
+ * Allows serialization of the index to JSON, to possibly store it and later
1665
+ * deserialize it with {@link MiniSearch.loadJSON}.
1666
+ *
1667
+ * Normally one does not directly call this method, but rather call the
1668
+ * standard JavaScript `JSON.stringify()` passing the {@link MiniSearch}
1669
+ * instance, and JavaScript will internally call this method. Upon
1670
+ * deserialization, one must pass to {@link MiniSearch.loadJSON} the same
1671
+ * options used to create the original instance that was serialized.
1672
+ *
1673
+ * ### Usage:
1674
+ *
1675
+ * ```javascript
1676
+ * // Serialize the index:
1677
+ * let miniSearch = new MiniSearch({ fields: ['title', 'text'] })
1678
+ * miniSearch.addAll(documents)
1679
+ * const json = JSON.stringify(miniSearch)
1680
+ *
1681
+ * // Later, to deserialize it:
1682
+ * miniSearch = MiniSearch.loadJSON(json, { fields: ['title', 'text'] })
1683
+ * ```
1684
+ *
1685
+ * @return A plain-object serializable representation of the search index.
1686
+ */
1687
+ toJSON() {
1688
+ const index = [];
1689
+ for (const [term, fieldIndex] of this._index) {
1690
+ const data = {};
1691
+ for (const [fieldId, freqs] of fieldIndex) {
1692
+ data[fieldId] = Object.fromEntries(freqs);
1693
+ }
1694
+ index.push([term, data]);
1695
+ }
1696
+ return {
1697
+ documentCount: this._documentCount,
1698
+ nextId: this._nextId,
1699
+ documentIds: Object.fromEntries(this._documentIds),
1700
+ fieldIds: this._fieldIds,
1701
+ fieldLength: Object.fromEntries(this._fieldLength),
1702
+ averageFieldLength: this._avgFieldLength,
1703
+ storedFields: Object.fromEntries(this._storedFields),
1704
+ dirtCount: this._dirtCount,
1705
+ index,
1706
+ serializationVersion: 2
1707
+ };
1708
+ }
1709
+ /**
1710
+ * @ignore
1711
+ */
1712
+ termResults(sourceTerm, derivedTerm, termWeight, termBoost, fieldTermData, fieldBoosts, boostDocumentFn, bm25params, results = new Map()) {
1713
+ if (fieldTermData == null)
1714
+ return results;
1715
+ for (const field of Object.keys(fieldBoosts)) {
1716
+ const fieldBoost = fieldBoosts[field];
1717
+ const fieldId = this._fieldIds[field];
1718
+ const fieldTermFreqs = fieldTermData.get(fieldId);
1719
+ if (fieldTermFreqs == null)
1720
+ continue;
1721
+ let matchingFields = fieldTermFreqs.size;
1722
+ const avgFieldLength = this._avgFieldLength[fieldId];
1723
+ for (const docId of fieldTermFreqs.keys()) {
1724
+ if (!this._documentIds.has(docId)) {
1725
+ this.removeTerm(fieldId, docId, derivedTerm);
1726
+ matchingFields -= 1;
1727
+ continue;
1728
+ }
1729
+ const docBoost = boostDocumentFn ? boostDocumentFn(this._documentIds.get(docId), derivedTerm, this._storedFields.get(docId)) : 1;
1730
+ if (!docBoost)
1731
+ continue;
1732
+ const termFreq = fieldTermFreqs.get(docId);
1733
+ const fieldLength = this._fieldLength.get(docId)[fieldId];
1734
+ // NOTE: The total number of fields is set to the number of documents
1735
+ // `this._documentCount`. It could also make sense to use the number of
1736
+ // documents where the current field is non-blank as a normalization
1737
+ // factor. This will make a difference in scoring if the field is rarely
1738
+ // present. This is currently not supported, and may require further
1739
+ // analysis to see if it is a valid use case.
1740
+ const rawScore = calcBM25Score(termFreq, matchingFields, this._documentCount, fieldLength, avgFieldLength, bm25params);
1741
+ const weightedScore = termWeight * termBoost * fieldBoost * docBoost * rawScore;
1742
+ const result = results.get(docId);
1743
+ if (result) {
1744
+ result.score += weightedScore;
1745
+ assignUniqueTerm(result.terms, sourceTerm);
1746
+ const match = getOwnProperty(result.match, derivedTerm);
1747
+ if (match) {
1748
+ match.push(field);
1749
+ }
1750
+ else {
1751
+ result.match[derivedTerm] = [field];
1752
+ }
1753
+ }
1754
+ else {
1755
+ results.set(docId, {
1756
+ score: weightedScore,
1757
+ terms: [sourceTerm],
1758
+ match: { [derivedTerm]: [field] }
1759
+ });
1760
+ }
1761
+ }
1762
+ }
1763
+ return results;
1764
+ }
1765
+ /**
1766
+ * @ignore
1767
+ */
1768
+ addTerm(fieldId, documentId, term) {
1769
+ const indexData = this._index.fetch(term, createMap);
1770
+ let fieldIndex = indexData.get(fieldId);
1771
+ if (fieldIndex == null) {
1772
+ fieldIndex = new Map();
1773
+ fieldIndex.set(documentId, 1);
1774
+ indexData.set(fieldId, fieldIndex);
1775
+ }
1776
+ else {
1777
+ const docs = fieldIndex.get(documentId);
1778
+ fieldIndex.set(documentId, (docs || 0) + 1);
1779
+ }
1780
+ }
1781
+ /**
1782
+ * @ignore
1783
+ */
1784
+ removeTerm(fieldId, documentId, term) {
1785
+ if (!this._index.has(term)) {
1786
+ this.warnDocumentChanged(documentId, fieldId, term);
1787
+ return;
1788
+ }
1789
+ const indexData = this._index.fetch(term, createMap);
1790
+ const fieldIndex = indexData.get(fieldId);
1791
+ if (fieldIndex == null || fieldIndex.get(documentId) == null) {
1792
+ this.warnDocumentChanged(documentId, fieldId, term);
1793
+ }
1794
+ else if (fieldIndex.get(documentId) <= 1) {
1795
+ if (fieldIndex.size <= 1) {
1796
+ indexData.delete(fieldId);
1797
+ }
1798
+ else {
1799
+ fieldIndex.delete(documentId);
1800
+ }
1801
+ }
1802
+ else {
1803
+ fieldIndex.set(documentId, fieldIndex.get(documentId) - 1);
1804
+ }
1805
+ if (this._index.get(term).size === 0) {
1806
+ this._index.delete(term);
1807
+ }
1808
+ }
1809
+ /**
1810
+ * @ignore
1811
+ */
1812
+ warnDocumentChanged(shortDocumentId, fieldId, term) {
1813
+ for (const fieldName of Object.keys(this._fieldIds)) {
1814
+ if (this._fieldIds[fieldName] === fieldId) {
1815
+ this._options.logger('warn', `MiniSearch: document with ID ${this._documentIds.get(shortDocumentId)} has changed before removal: term "${term}" was not present in field "${fieldName}". Removing a document after it has changed can corrupt the index!`, 'version_conflict');
1816
+ return;
1817
+ }
1818
+ }
1819
+ }
1820
+ /**
1821
+ * @ignore
1822
+ */
1823
+ addDocumentId(documentId) {
1824
+ const shortDocumentId = this._nextId;
1825
+ this._idToShortId.set(documentId, shortDocumentId);
1826
+ this._documentIds.set(shortDocumentId, documentId);
1827
+ this._documentCount += 1;
1828
+ this._nextId += 1;
1829
+ return shortDocumentId;
1830
+ }
1831
+ /**
1832
+ * @ignore
1833
+ */
1834
+ addFields(fields) {
1835
+ for (let i = 0; i < fields.length; i++) {
1836
+ this._fieldIds[fields[i]] = i;
1837
+ }
1838
+ }
1839
+ /**
1840
+ * @ignore
1841
+ */
1842
+ addFieldLength(documentId, fieldId, count, length) {
1843
+ let fieldLengths = this._fieldLength.get(documentId);
1844
+ if (fieldLengths == null)
1845
+ this._fieldLength.set(documentId, fieldLengths = []);
1846
+ fieldLengths[fieldId] = length;
1847
+ const averageFieldLength = this._avgFieldLength[fieldId] || 0;
1848
+ const totalFieldLength = (averageFieldLength * count) + length;
1849
+ this._avgFieldLength[fieldId] = totalFieldLength / (count + 1);
1850
+ }
1851
+ /**
1852
+ * @ignore
1853
+ */
1854
+ removeFieldLength(documentId, fieldId, count, length) {
1855
+ if (count === 1) {
1856
+ this._avgFieldLength[fieldId] = 0;
1857
+ return;
1858
+ }
1859
+ const totalFieldLength = (this._avgFieldLength[fieldId] * count) - length;
1860
+ this._avgFieldLength[fieldId] = totalFieldLength / (count - 1);
1861
+ }
1862
+ /**
1863
+ * @ignore
1864
+ */
1865
+ saveStoredFields(documentId, doc) {
1866
+ const { storeFields, extractField } = this._options;
1867
+ if (storeFields == null || storeFields.length === 0) {
1868
+ return;
1869
+ }
1870
+ let documentFields = this._storedFields.get(documentId);
1871
+ if (documentFields == null)
1872
+ this._storedFields.set(documentId, documentFields = {});
1873
+ for (const fieldName of storeFields) {
1874
+ const fieldValue = extractField(doc, fieldName);
1875
+ if (fieldValue !== undefined)
1876
+ documentFields[fieldName] = fieldValue;
1877
+ }
1878
+ }
1879
+ }
1880
+ /**
1881
+ * The special wildcard symbol that can be passed to {@link MiniSearch#search}
1882
+ * to match all documents
1883
+ */
1884
+ MiniSearch.wildcard = Symbol('*');
1885
+ const getOwnProperty = (object, property) => Object.prototype.hasOwnProperty.call(object, property) ? object[property] : undefined;
1886
+ const combinators = {
1887
+ [OR]: (a, b) => {
1888
+ for (const docId of b.keys()) {
1889
+ const existing = a.get(docId);
1890
+ if (existing == null) {
1891
+ a.set(docId, b.get(docId));
1892
+ }
1893
+ else {
1894
+ const { score, terms, match } = b.get(docId);
1895
+ existing.score = existing.score + score;
1896
+ existing.match = Object.assign(existing.match, match);
1897
+ assignUniqueTerms(existing.terms, terms);
1898
+ }
1899
+ }
1900
+ return a;
1901
+ },
1902
+ [AND]: (a, b) => {
1903
+ const combined = new Map();
1904
+ for (const docId of b.keys()) {
1905
+ const existing = a.get(docId);
1906
+ if (existing == null)
1907
+ continue;
1908
+ const { score, terms, match } = b.get(docId);
1909
+ assignUniqueTerms(existing.terms, terms);
1910
+ combined.set(docId, {
1911
+ score: existing.score + score,
1912
+ terms: existing.terms,
1913
+ match: Object.assign(existing.match, match)
1914
+ });
1915
+ }
1916
+ return combined;
1917
+ },
1918
+ [AND_NOT]: (a, b) => {
1919
+ for (const docId of b.keys())
1920
+ a.delete(docId);
1921
+ return a;
1922
+ }
1923
+ };
1924
+ const defaultBM25params = { k: 1.2, b: 0.7, d: 0.5 };
1925
+ const calcBM25Score = (termFreq, matchingCount, totalCount, fieldLength, avgFieldLength, bm25params) => {
1926
+ const { k, b, d } = bm25params;
1927
+ const invDocFreq = Math.log(1 + (totalCount - matchingCount + 0.5) / (matchingCount + 0.5));
1928
+ return invDocFreq * (d + termFreq * (k + 1) / (termFreq + k * (1 - b + b * fieldLength / avgFieldLength)));
1929
+ };
1930
+ const termToQuerySpec = (options) => (term, i, terms) => {
1931
+ const fuzzy = (typeof options.fuzzy === 'function')
1932
+ ? options.fuzzy(term, i, terms)
1933
+ : (options.fuzzy || false);
1934
+ const prefix = (typeof options.prefix === 'function')
1935
+ ? options.prefix(term, i, terms)
1936
+ : (options.prefix === true);
1937
+ const termBoost = (typeof options.boostTerm === 'function')
1938
+ ? options.boostTerm(term, i, terms)
1939
+ : 1;
1940
+ return { term, fuzzy, prefix, termBoost };
1941
+ };
1942
+ const defaultOptions = {
1943
+ idField: 'id',
1944
+ extractField: (document, fieldName) => document[fieldName],
1945
+ stringifyField: (fieldValue, fieldName) => fieldValue.toString(),
1946
+ tokenize: (text) => text.split(SPACE_OR_PUNCTUATION),
1947
+ processTerm: (term) => term.toLowerCase(),
1948
+ fields: undefined,
1949
+ searchOptions: undefined,
1950
+ storeFields: [],
1951
+ logger: (level, message) => {
1952
+ if (typeof (console === null || console === void 0 ? void 0 : console[level]) === 'function')
1953
+ console[level](message);
1954
+ },
1955
+ autoVacuum: true
1956
+ };
1957
+ const defaultSearchOptions = {
1958
+ combineWith: OR,
1959
+ prefix: false,
1960
+ fuzzy: false,
1961
+ maxFuzzy: 6,
1962
+ boost: {},
1963
+ weights: { fuzzy: 0.45, prefix: 0.375 },
1964
+ bm25: defaultBM25params
1965
+ };
1966
+ const defaultAutoSuggestOptions = {
1967
+ combineWith: AND,
1968
+ prefix: (term, i, terms) => i === terms.length - 1
1969
+ };
1970
+ const defaultVacuumOptions = { batchSize: 1000, batchWait: 10 };
1971
+ const defaultVacuumConditions = { minDirtFactor: 0.1, minDirtCount: 20 };
1972
+ const defaultAutoVacuumOptions = { ...defaultVacuumOptions, ...defaultVacuumConditions };
1973
+ const assignUniqueTerm = (target, term) => {
1974
+ // Avoid adding duplicate terms.
1975
+ if (!target.includes(term))
1976
+ target.push(term);
1977
+ };
1978
+ const assignUniqueTerms = (target, source) => {
1979
+ for (const term of source) {
1980
+ // Avoid adding duplicate terms.
1981
+ if (!target.includes(term))
1982
+ target.push(term);
1983
+ }
1984
+ };
1985
+ const byScore = ({ score: a }, { score: b }) => b - a;
1986
+ const createMap = () => new Map();
1987
+ const objectToNumericMap = (object) => {
1988
+ const map = new Map();
1989
+ for (const key of Object.keys(object)) {
1990
+ map.set(parseInt(key, 10), object[key]);
1991
+ }
1992
+ return map;
1993
+ };
1994
+ const objectToNumericMapAsync = async (object) => {
1995
+ const map = new Map();
1996
+ let count = 0;
1997
+ for (const key of Object.keys(object)) {
1998
+ map.set(parseInt(key, 10), object[key]);
1999
+ if (++count % 1000 === 0) {
2000
+ await wait(0);
2001
+ }
2002
+ }
2003
+ return map;
2004
+ };
2005
+ const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
2006
+ // This regular expression matches any Unicode space, newline, or punctuation
2007
+ // character
2008
+ const SPACE_OR_PUNCTUATION = /[\n\r\p{Z}\p{P}]+/u;
2009
+
2010
+ return MiniSearch;
2011
+
2012
+ }));
2013
+ //# sourceMappingURL=index.js.map