@yoch/minisearch 8.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1635 @@
1
+ type LeafType = '' & {
2
+ readonly __tag: unique symbol;
3
+ };
4
+ interface RadixTree<T> extends Map<string, T | RadixTree<T>> {
5
+ get(key: LeafType): T | undefined;
6
+ get(key: string): RadixTree<T> | undefined;
7
+ set(key: LeafType, value: T): this;
8
+ set(key: string, value: RadixTree<T>): this;
9
+ }
10
+ type Entry<T> = [string, T];
11
+
12
+ interface Iterators<T> {
13
+ ENTRIES: Entry<T>;
14
+ KEYS: string;
15
+ VALUES: T;
16
+ }
17
+ type Kind<T> = keyof Iterators<T>;
18
+ type Result<T, K extends keyof Iterators<T>> = Iterators<T>[K];
19
+ type IteratorPath<T> = {
20
+ node: RadixTree<T>;
21
+ keys: string[];
22
+ }[];
23
+ type IterableSet<T> = {
24
+ _tree: RadixTree<T>;
25
+ _prefix: string;
26
+ };
27
+ /**
28
+ * @private
29
+ */
30
+ declare class TreeIterator<T, K extends Kind<T>> implements Iterator<Result<T, K>> {
31
+ set: IterableSet<T>;
32
+ _type: K;
33
+ _path: IteratorPath<T>;
34
+ constructor(set: IterableSet<T>, type: K);
35
+ next(): IteratorResult<Result<T, K>>;
36
+ dive(): IteratorResult<Result<T, K>>;
37
+ backtrack(): void;
38
+ key(): string;
39
+ value(): T;
40
+ result(): Result<T, K>;
41
+ [Symbol.iterator](): this;
42
+ }
43
+
44
+ type FuzzyResult<T> = [T, number];
45
+ type FuzzyResults<T> = Map<string, FuzzyResult<T>>;
46
+
47
+ /**
48
+ * A class implementing the same interface as a standard JavaScript
49
+ * [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map)
50
+ * with string keys, but adding support for efficiently searching entries with
51
+ * prefix or fuzzy search. This class is used internally by {@link MiniSearch}
52
+ * as the inverted index data structure. The implementation is a radix tree
53
+ * (compressed prefix tree).
54
+ *
55
+ * Since this class can be of general utility beyond _MiniSearch_, it is
56
+ * internal to `@yoch/minisearch` (not a separate public entry point).
57
+ *
58
+ * @typeParam T The type of the values stored in the map.
59
+ */
60
+ declare class SearchableMap<T = any> {
61
+ /**
62
+ * @internal
63
+ */
64
+ _tree: RadixTree<T>;
65
+ /**
66
+ * @internal
67
+ */
68
+ _prefix: string;
69
+ private _size;
70
+ /**
71
+ * The constructor is normally called without arguments, creating an empty
72
+ * map. In order to create a {@link SearchableMap} from an iterable or from an
73
+ * object, check {@link SearchableMap.from} and {@link
74
+ * SearchableMap.fromObject}.
75
+ *
76
+ * The constructor arguments are for internal use, when creating derived
77
+ * mutable views of a map at a prefix.
78
+ */
79
+ constructor(tree?: RadixTree<T>, prefix?: string);
80
+ /**
81
+ * Root radix tree backing this map. Used when cloning or serializing the full
82
+ * index so {@link Map} key insertion order (prefix / fuzzy / autoSuggest) is preserved.
83
+ */
84
+ get radixTree(): RadixTree<T>;
85
+ /**
86
+ * Creates and returns a mutable view of this {@link SearchableMap},
87
+ * containing only entries that share the given prefix.
88
+ *
89
+ * ### Usage:
90
+ *
91
+ * ```javascript
92
+ * let map = new SearchableMap()
93
+ * map.set("unicorn", 1)
94
+ * map.set("universe", 2)
95
+ * map.set("university", 3)
96
+ * map.set("unique", 4)
97
+ * map.set("hello", 5)
98
+ *
99
+ * let uni = map.atPrefix("uni")
100
+ * uni.get("unique") // => 4
101
+ * uni.get("unicorn") // => 1
102
+ * uni.get("hello") // => undefined
103
+ *
104
+ * let univer = map.atPrefix("univer")
105
+ * univer.get("unique") // => undefined
106
+ * univer.get("universe") // => 2
107
+ * univer.get("university") // => 3
108
+ * ```
109
+ *
110
+ * @param prefix The prefix
111
+ * @return A {@link SearchableMap} representing a mutable view of the original
112
+ * Map at the given prefix
113
+ */
114
+ atPrefix(prefix: string): SearchableMap<T>;
115
+ /**
116
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/clear
117
+ */
118
+ clear(): void;
119
+ /**
120
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/delete
121
+ * @param key Key to delete
122
+ */
123
+ delete(key: string): void;
124
+ /**
125
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries
126
+ * @return An iterator iterating through `[key, value]` entries.
127
+ */
128
+ entries(): TreeIterator<T, "ENTRIES">;
129
+ /**
130
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/forEach
131
+ * @param fn Iteration function
132
+ */
133
+ forEach(fn: (key: string, value: T, map: SearchableMap) => void): void;
134
+ /**
135
+ * Returns a Map of all the entries that have a key within the given edit
136
+ * distance from the search key. The keys of the returned Map are the matching
137
+ * keys, while the values are two-element arrays where the first element is
138
+ * the value associated to the key, and the second is the edit distance of the
139
+ * key to the search key.
140
+ *
141
+ * ### Usage:
142
+ *
143
+ * ```javascript
144
+ * let map = new SearchableMap()
145
+ * map.set('hello', 'world')
146
+ * map.set('hell', 'yeah')
147
+ * map.set('ciao', 'mondo')
148
+ *
149
+ * // Get all entries that match the key 'hallo' with a maximum edit distance of 2
150
+ * map.fuzzyGet('hallo', 2)
151
+ * // => Map(2) { 'hello' => ['world', 1], 'hell' => ['yeah', 2] }
152
+ *
153
+ * // In the example, the "hello" key has value "world" and edit distance of 1
154
+ * // (change "e" to "a"), the key "hell" has value "yeah" and edit distance of 2
155
+ * // (change "e" to "a", delete "o")
156
+ * ```
157
+ *
158
+ * @param key The search key
159
+ * @param maxEditDistance The maximum edit distance (Levenshtein)
160
+ * @return A Map of the matching keys to their value and edit distance
161
+ */
162
+ fuzzyGet(key: string, maxEditDistance: number): FuzzyResults<T>;
163
+ /**
164
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/get
165
+ * @param key Key to get
166
+ * @return Value associated to the key, or `undefined` if the key is not
167
+ * found.
168
+ */
169
+ get(key: string): T | undefined;
170
+ /**
171
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/has
172
+ * @param key Key
173
+ * @return True if the key is in the map, false otherwise
174
+ */
175
+ has(key: string): boolean;
176
+ /**
177
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/keys
178
+ * @return An `Iterable` iterating through keys
179
+ */
180
+ keys(): TreeIterator<T, "KEYS">;
181
+ /**
182
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/set
183
+ * @param key Key to set
184
+ * @param value Value to associate to the key
185
+ * @return The {@link SearchableMap} itself, to allow chaining
186
+ */
187
+ set(key: string, value: T): SearchableMap<T>;
188
+ /**
189
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/size
190
+ */
191
+ get size(): number;
192
+ /**
193
+ * Updates the value at the given key using the provided function. The function
194
+ * is called with the current value at the key, and its return value is used as
195
+ * the new value to be set.
196
+ *
197
+ * ### Example:
198
+ *
199
+ * ```javascript
200
+ * // Increment the current value by one
201
+ * searchableMap.update('somekey', (currentValue) => currentValue == null ? 0 : currentValue + 1)
202
+ * ```
203
+ *
204
+ * If the value at the given key is or will be an object, it might not require
205
+ * re-assignment. In that case it is better to use `fetch()`, because it is
206
+ * faster.
207
+ *
208
+ * @param key The key to update
209
+ * @param fn The function used to compute the new value from the current one
210
+ * @return The {@link SearchableMap} itself, to allow chaining
211
+ */
212
+ update(key: string, fn: (value: T | undefined) => T): SearchableMap<T>;
213
+ /**
214
+ * Fetches the value of the given key. If the value does not exist, calls the
215
+ * given function to create a new value, which is inserted at the given key
216
+ * and subsequently returned.
217
+ *
218
+ * ### Example:
219
+ *
220
+ * ```javascript
221
+ * const map = searchableMap.fetch('somekey', () => new Map())
222
+ * map.set('foo', 'bar')
223
+ * ```
224
+ *
225
+ * @param key The key to update
226
+ * @param initial A function that creates a new value if the key does not exist
227
+ * @return The existing or new value at the given key
228
+ */
229
+ fetch(key: string, initial: () => T): T;
230
+ /**
231
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/values
232
+ * @return An `Iterable` iterating through values.
233
+ */
234
+ values(): TreeIterator<T, "VALUES">;
235
+ /**
236
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/@@iterator
237
+ */
238
+ [Symbol.iterator](): TreeIterator<T, "ENTRIES">;
239
+ /**
240
+ * Creates a {@link SearchableMap} from an `Iterable` of entries
241
+ *
242
+ * @param entries Entries to be inserted in the {@link SearchableMap}
243
+ * @return A new {@link SearchableMap} with the given entries
244
+ */
245
+ static from<T = any>(entries: Iterable<Entry<T>> | Entry<T>[]): SearchableMap<any>;
246
+ /**
247
+ * Creates a {@link SearchableMap} from the iterable properties of a JavaScript object
248
+ *
249
+ * @param object Object of entries for the {@link SearchableMap}
250
+ * @return A new {@link SearchableMap} with the given entries
251
+ */
252
+ static fromObject<T = any>(object: {
253
+ [key: string]: T;
254
+ }): SearchableMap<any>;
255
+ }
256
+
257
+ /** Shared wildcard query symbol for MiniSearch and FrozenMiniSearch */
258
+ declare const WILDCARD_QUERY: unique symbol;
259
+
260
+ type SearchOptionsWithDefaults$1 = SearchOptions & {
261
+ boost: {
262
+ [fieldName: string]: number;
263
+ };
264
+ weights: {
265
+ fuzzy: number;
266
+ prefix: number;
267
+ };
268
+ prefix: boolean | ((term: string, index: number, terms: string[]) => boolean);
269
+ fuzzy: boolean | number | ((term: string, index: number, terms: string[]) => boolean | number);
270
+ maxFuzzy: number;
271
+ combineWith: CombinationOperator;
272
+ bm25: BM25Params;
273
+ };
274
+ type OptionsWithDefaults$1<T> = Options<T> & {
275
+ storeFields: string[];
276
+ idField: string;
277
+ extractField: (document: T, fieldName: string) => any;
278
+ stringifyField: (fieldValue: any, fieldName: string) => string;
279
+ tokenize: (text: string, fieldName: string) => string[];
280
+ processTerm: (term: string, fieldName: string) => string | string[] | null | undefined | false;
281
+ searchOptions: SearchOptionsWithDefaults$1;
282
+ autoSuggestOptions: SearchOptions;
283
+ };
284
+ interface FreezeSource<T = any> {
285
+ _options: OptionsWithDefaults$1<T>;
286
+ _index: SearchableMap<Map<number, Map<number, number>>>;
287
+ _documentCount: number;
288
+ _nextId: number;
289
+ _documentIds: Map<number, any>;
290
+ _fieldIds: {
291
+ [key: string]: number;
292
+ };
293
+ _fieldLength: Map<number, number[]>;
294
+ _avgFieldLength: number[];
295
+ _storedFields: Map<number, Record<string, unknown>>;
296
+ }
297
+ interface FrozenMemoryBreakdown {
298
+ termCount: number;
299
+ documentCount: number;
300
+ nextId: number;
301
+ postings: {
302
+ slotCount: number;
303
+ allDocIdsBytes: number;
304
+ allFreqsBytes: number;
305
+ offsetsBytes: number;
306
+ lengthsBytes: number;
307
+ totalTypedBytes: number;
308
+ };
309
+ radixTree: {
310
+ mapNodeCount: number;
311
+ estimatedBytes: number;
312
+ };
313
+ documents: {
314
+ externalIdsSlots: number;
315
+ storedFieldsSlots: number;
316
+ idToShortIdEntries: number;
317
+ fieldLengthMatrixBytes: number;
318
+ avgFieldLengthBytes: number;
319
+ storedFieldsJsonBytes: number;
320
+ };
321
+ estimatedStructuredBytes: number;
322
+ }
323
+ declare function frozenMemoryBreakdown(frozen: FrozenMiniSearch): FrozenMemoryBreakdown;
324
+ interface FrozenAssembleParams<T = any> {
325
+ options: OptionsWithDefaults$1<T>;
326
+ documentCount: number;
327
+ nextId: number;
328
+ fieldIds: {
329
+ [field: string]: number;
330
+ };
331
+ fieldCount: number;
332
+ externalIds: unknown[];
333
+ idToShortId: Map<unknown, number>;
334
+ storedFields: (Record<string, unknown> | undefined)[];
335
+ fieldLengthMatrix: Uint32Array;
336
+ avgFieldLength: Float32Array;
337
+ index: SearchableMap<number>;
338
+ terms: string[];
339
+ postingsOffsets: Uint32Array;
340
+ postingsLengths: Uint32Array;
341
+ allDocIds: Uint32Array;
342
+ allFreqs: Uint8Array;
343
+ }
344
+ /** Instantiate {@link FrozenMiniSearch} from pre-built flat index parts. */
345
+ declare function assembleFrozen<T>(params: FrozenAssembleParams<T>): FrozenMiniSearch<T>;
346
+ declare function freezeFromMiniSearch<T>(source: FreezeSource<T>): FrozenMiniSearch<T>;
347
+ declare function buildFrozenFromDocuments<T>(documents: readonly T[], options: Options<T>): FrozenMiniSearch<T>;
348
+ declare class FrozenMiniSearch<T = any> {
349
+ private readonly _options;
350
+ private readonly _index;
351
+ private readonly _documentCount;
352
+ private readonly _nextId;
353
+ private readonly _externalIds;
354
+ private readonly _idToShortId;
355
+ private readonly _fieldIds;
356
+ private readonly _fieldCount;
357
+ private readonly _fieldLengthMatrix;
358
+ private readonly _avgFieldLength;
359
+ private readonly _storedFields;
360
+ private readonly _terms;
361
+ private readonly _postingsOffsets;
362
+ private readonly _postingsLengths;
363
+ private readonly _allDocIds;
364
+ private readonly _allFreqs;
365
+ constructor(params: {
366
+ options: OptionsWithDefaults$1<T>;
367
+ documentCount: number;
368
+ nextId: number;
369
+ fieldIds: {
370
+ [field: string]: number;
371
+ };
372
+ fieldCount: number;
373
+ externalIds: unknown[];
374
+ idToShortId: Map<unknown, number>;
375
+ storedFields: (Record<string, unknown> | undefined)[];
376
+ fieldLengthMatrix: Uint32Array;
377
+ avgFieldLength: Float32Array;
378
+ index: SearchableMap<number>;
379
+ terms: string[];
380
+ postingsOffsets: Uint32Array;
381
+ postingsLengths: Uint32Array;
382
+ allDocIds: Uint32Array;
383
+ allFreqs: Uint8Array;
384
+ });
385
+ static readonly wildcard: typeof WILDCARD_QUERY;
386
+ get documentCount(): number;
387
+ get termCount(): number;
388
+ memoryBreakdown(): FrozenMemoryBreakdown;
389
+ has(id: unknown): boolean;
390
+ getStoredFields(id: unknown): Record<string, unknown> | undefined;
391
+ add(): void;
392
+ addAll(): void;
393
+ addAllAsync(): Promise<void>;
394
+ remove(): void;
395
+ removeAll(): void;
396
+ discard(): void;
397
+ discardAll(): void;
398
+ replace(): void;
399
+ vacuum(): Promise<void>;
400
+ search(query: Query, searchOptions?: SearchOptions): SearchResult[];
401
+ autoSuggest(queryString: string, options?: SearchOptions): Suggestion[];
402
+ saveBinary(): Buffer;
403
+ static loadBinary<T>(buffer: Buffer, options: Options<T>): FrozenMiniSearch<T>;
404
+ /**
405
+ * Build a read-only index in one pass from documents (no mutable MiniSearch step).
406
+ *
407
+ * Use {@link MiniSearch} + {@link MiniSearch#freeze} when you need remove, discard, or
408
+ * incremental updates before freezing.
409
+ */
410
+ static fromDocuments<T>(documents: readonly T[], options: Options<T>): FrozenMiniSearch<T>;
411
+ private getFieldLength;
412
+ private fieldTermDataFor;
413
+ private aggregateContext;
414
+ private termResults;
415
+ private executeQuery;
416
+ private executeQuerySpec;
417
+ private executeWildcardQuery;
418
+ }
419
+
420
+ type BM25Params = {
421
+ k: number;
422
+ b: number;
423
+ d: number;
424
+ };
425
+ type LowercaseCombinationOperator$1 = 'or' | 'and' | 'and_not';
426
+ declare const OR: LowercaseCombinationOperator$1;
427
+ declare const AND: LowercaseCombinationOperator$1;
428
+ declare const AND_NOT: LowercaseCombinationOperator$1;
429
+
430
+ type LowercaseCombinationOperator = 'or' | 'and' | 'and_not';
431
+ type CombinationOperator = LowercaseCombinationOperator | Uppercase<LowercaseCombinationOperator> | Capitalize<LowercaseCombinationOperator>;
432
+
433
+ /**
434
+ * Search options to customize the search behavior.
435
+ */
436
+ type SearchOptions = {
437
+ /**
438
+ * Names of the fields to search in. If omitted, all fields are searched.
439
+ */
440
+ fields?: string[];
441
+ /**
442
+ * Function used to filter search results, for example on the basis of stored
443
+ * fields. It takes as argument each search result and should return a boolean
444
+ * to indicate if the result should be kept or not.
445
+ */
446
+ filter?: (result: SearchResult) => boolean;
447
+ /**
448
+ * Key-value object of field names to boosting values. By default, fields are
449
+ * assigned a boosting factor of 1. If one assigns to a field a boosting value
450
+ * of 2, a result that matches the query in that field is assigned a score
451
+ * twice as high as a result matching the query in another field, all else
452
+ * being equal.
453
+ */
454
+ boost?: {
455
+ [fieldName: string]: number;
456
+ };
457
+ /**
458
+ * Function to calculate a boost factor for each term.
459
+ *
460
+ * This function, if provided, is called for each query term (as split by
461
+ * `tokenize` and processed by `processTerm`). The arguments passed to the
462
+ * function are the query term, the positional index of the term in the query,
463
+ * and the array of all query terms. It is expected to return a numeric boost
464
+ * factor for the term. A factor lower than 1 reduces the importance of the
465
+ * term, a factor greater than 1 increases it. A factor of exactly 1 is
466
+ * neutral, and does not affect the term's importance.
467
+ */
468
+ boostTerm?: (term: string, i: number, terms: string[]) => number;
469
+ /**
470
+ * Relative weights to assign to prefix search results and fuzzy search
471
+ * results. Exact matches are assigned a weight of 1.
472
+ */
473
+ weights?: {
474
+ fuzzy: number;
475
+ prefix: number;
476
+ };
477
+ /**
478
+ * Function to calculate a boost factor for documents. It takes as arguments
479
+ * the document ID, and a term that matches the search in that document, and
480
+ * the value of the stored fields for the document (if any). It should return
481
+ * a boosting factor: a number higher than 1 increases the computed score, a
482
+ * number lower than 1 decreases the score, and a falsy value skips the search
483
+ * result completely.
484
+ */
485
+ boostDocument?: (documentId: any, term: string, storedFields?: Record<string, unknown>) => number;
486
+ /**
487
+ * Controls whether to perform prefix search. It can be a simple boolean, or a
488
+ * function.
489
+ *
490
+ * If a boolean is passed, prefix search is performed if true.
491
+ *
492
+ * If a function is passed, it is called upon search with a search term, the
493
+ * positional index of that search term in the tokenized search query, and the
494
+ * tokenized search query. The function should return a boolean to indicate
495
+ * whether to perform prefix search for that search term.
496
+ */
497
+ prefix?: boolean | ((term: string, index: number, terms: string[]) => boolean);
498
+ /**
499
+ * Controls whether to perform fuzzy search. It can be a simple boolean, or a
500
+ * number, or a function.
501
+ *
502
+ * If a boolean is given, fuzzy search with a default fuzziness parameter is
503
+ * performed if true.
504
+ *
505
+ * If a number higher or equal to 1 is given, fuzzy search is performed, with
506
+ * a maximum edit distance (Levenshtein) equal to the number.
507
+ *
508
+ * If a number between 0 and 1 is given, fuzzy search is performed within a
509
+ * maximum edit distance corresponding to that fraction of the term length,
510
+ * approximated to the nearest integer. For example, 0.2 would mean an edit
511
+ * distance of 20% of the term length, so 1 character in a 5-characters term.
512
+ * The calculated fuzziness value is limited by the `maxFuzzy` option, to
513
+ * prevent slowdown for very long queries.
514
+ *
515
+ * If a function is passed, the function is called upon search with a search
516
+ * term, a positional index of that term in the tokenized search query, and
517
+ * the tokenized search query. It should return a boolean or a number, with
518
+ * the meaning documented above.
519
+ */
520
+ fuzzy?: boolean | number | ((term: string, index: number, terms: string[]) => boolean | number);
521
+ /**
522
+ * Controls the maximum fuzziness when using a fractional fuzzy value. This is
523
+ * set to 6 by default. Very high edit distances usually don't produce
524
+ * meaningful results, but can excessively impact search performance.
525
+ */
526
+ maxFuzzy?: number;
527
+ /**
528
+ * The operand to combine partial results for each term. By default it is
529
+ * "OR", so results matching _any_ of the search terms are returned by a
530
+ * search. If "AND" is given, only results matching _all_ the search terms are
531
+ * returned by a search.
532
+ */
533
+ combineWith?: CombinationOperator;
534
+ /**
535
+ * Function to tokenize the search query. By default, the same tokenizer used
536
+ * for indexing is used also for search.
537
+ *
538
+ * @remarks This function is called after `extractField` extracts a truthy
539
+ * value from a field. This function is then expected to split the extracted
540
+ * `text` document into tokens (more commonly referred to as "terms" in this
541
+ * context). The resulting split might be simple, like for example on word
542
+ * boundaries, or it might be more complex, taking into account certain
543
+ * encoding, or parsing needs, or even just special cases. Think about how one
544
+ * might need to go about indexing the term "short-term". You would likely
545
+ * want to treat this case specially, and return two terms instead, `[
546
+ * "short", "term" ]`.
547
+ *
548
+ * Or, you could let such a case be handled by the `processTerm` function,
549
+ * which is designed to turn each token/term into whole terms or sub-terms. In
550
+ * any case, the purpose of this function is to split apart the provided
551
+ * `text` document into parts that can be processed by the `processTerm`
552
+ * function.
553
+ */
554
+ tokenize?: (text: string) => string[];
555
+ /**
556
+ * Function to process or normalize terms in the search query. By default, the
557
+ * same term processor used for indexing is used also for search.
558
+ *
559
+ * @remarks
560
+ * During the document indexing phase, the first step is to call the
561
+ * `extractField` function to fetch the requested value/field from the
562
+ * document. This is then passed off to the `tokenize` function, which will
563
+ * break apart each value into "terms". These terms are then individually
564
+ * passed through this function to compute each term individually. A term
565
+ * might for example be something like "lbs", in which case one would likely
566
+ * want to return `[ "lbs", "lb", "pound", "pounds" ]`. You may also return
567
+ * just a single string, or a falsy value if you would like to skip indexing
568
+ * entirely for a specific term.
569
+ *
570
+ * Truthy return value(s) are then fed to the indexer as positive matches for
571
+ * this document. In our example above, all four of the `[ "lbs", "lb",
572
+ * "pound", "pounds" ]` terms would be added to the indexing engine, matching
573
+ * against the current document being computed.
574
+ *
575
+ * *Note: Whatever values are returned from this function will receive no
576
+ * further processing before being indexed. This means for example, if you
577
+ * include whitespace at the beginning or end of a word, it will also be
578
+ * indexed that way, with the included whitespace.*
579
+ */
580
+ processTerm?: (term: string) => string | string[] | null | undefined | false;
581
+ /**
582
+ * BM25+ algorithm parameters. Customizing these is almost never necessary,
583
+ * and finetuning them requires an understanding of the BM25 scoring model. In
584
+ * most cases, it is best to omit this option to use defaults, and instead use
585
+ * boosting to tweak scoring for specific use cases.
586
+ */
587
+ bm25?: BM25Params;
588
+ };
589
+ type SearchOptionsWithDefaults = SearchOptions & {
590
+ boost: {
591
+ [fieldName: string]: number;
592
+ };
593
+ weights: {
594
+ fuzzy: number;
595
+ prefix: number;
596
+ };
597
+ prefix: boolean | ((term: string, index: number, terms: string[]) => boolean);
598
+ fuzzy: boolean | number | ((term: string, index: number, terms: string[]) => boolean | number);
599
+ maxFuzzy: number;
600
+ combineWith: CombinationOperator;
601
+ bm25: BM25Params;
602
+ };
603
+ /**
604
+ * Configuration options passed to the {@link MiniSearch} constructor
605
+ *
606
+ * @typeParam T The type of documents being indexed.
607
+ */
608
+ type Options<T = any> = {
609
+ /**
610
+ * Names of the document fields to be indexed.
611
+ */
612
+ fields: string[];
613
+ /**
614
+ * Name of the ID field, uniquely identifying a document.
615
+ */
616
+ idField?: string;
617
+ /**
618
+ * Names of fields to store, so that search results would include them. By
619
+ * default none, so results would only contain the id field.
620
+ */
621
+ storeFields?: string[];
622
+ /**
623
+ * Function used to extract the value of each field in documents. By default,
624
+ * the documents are assumed to be plain objects with field names as keys,
625
+ * but by specifying a custom `extractField` function one can completely
626
+ * customize how the fields are extracted.
627
+ *
628
+ * The function takes as arguments the document, and the name of the field to
629
+ * extract from it. It should return the field value as a string.
630
+ *
631
+ * @remarks
632
+ * The returned string is fed into the `tokenize` function to split it up
633
+ * into tokens.
634
+ */
635
+ extractField?: (document: T, fieldName: string) => any;
636
+ /**
637
+ * Function used to turn field values into strings for indexing
638
+ *
639
+ * The function takes as arguments the field value, and the name of the field
640
+ * to stringify, so that its logic can be customized on specific fields. By
641
+ * default, it simply calls `toString()` on the field value (which in many
642
+ * cases is already a string).
643
+ *
644
+ * ### Example:
645
+ *
646
+ * ```javascript
647
+ * // Custom stringifier that formats dates as "Tuesday, September 16, 2025"
648
+ * const miniSearch = new MiniSearch({
649
+ * fields: ['title', 'date'],
650
+ * stringifyField: ((fieldValue, _fieldName) => {
651
+ * if (fieldValue instanceof Date) {
652
+ * return fieldValue.toLocaleDateString('en-US', {
653
+ * weekday: 'long',
654
+ * year: 'numeric',
655
+ * month: 'long',
656
+ * day: 'numeric'
657
+ * })
658
+ * } else {
659
+ * return fieldValue.toString()
660
+ * }
661
+ * }
662
+ * })
663
+ * ```
664
+ */
665
+ stringifyField?: (fieldValue: any, fieldName: string) => string;
666
+ /**
667
+ * Function used to split a field value into individual terms to be indexed.
668
+ * The default tokenizer separates terms by space or punctuation, but a
669
+ * custom tokenizer can be provided for custom logic.
670
+ *
671
+ * The function takes as arguments string to tokenize, and the name of the
672
+ * field it comes from. It should return the terms as an array of strings.
673
+ * When used for tokenizing a search query instead of a document field, the
674
+ * `fieldName` is undefined.
675
+ *
676
+ * @remarks
677
+ * This function is called after `extractField` extracts a truthy value from a
678
+ * field. This function is then expected to split the extracted `text` document
679
+ * into tokens (more commonly referred to as "terms" in this context). The resulting
680
+ * split might be simple, like for example on word boundaries, or it might be more
681
+ * complex, taking into account certain encoding, or parsing needs, or even just
682
+ * special cases. Think about how one might need to go about indexing the term
683
+ * "short-term". You would likely want to treat this case specially, and return two
684
+ * terms instead, `[ "short", "term" ]`.
685
+ *
686
+ * Or, you could let such a case be handled by the `processTerm` function,
687
+ * which is designed to turn each token/term into whole terms or sub-terms. In any
688
+ * case, the purpose of this function is to split apart the provided `text` document
689
+ * into parts that can be processed by the `processTerm` function.
690
+ */
691
+ tokenize?: (text: string, fieldName?: string) => string[];
692
+ /**
693
+ * Function used to process a term before indexing or search. This can be
694
+ * used for normalization (such as stemming). By default, terms are
695
+ * downcased, and otherwise no other normalization is performed.
696
+ *
697
+ * The function takes as arguments a term to process, and the name of the
698
+ * field it comes from. It should return the processed term as a string, or a
699
+ * falsy value to reject the term entirely.
700
+ *
701
+ * It can also return an array of strings, in which case each string in the
702
+ * returned array is indexed as a separate term.
703
+ *
704
+ * @remarks
705
+ * During the document indexing phase, the first step is to call the `extractField`
706
+ * function to fetch the requested value/field from the document. This is then
707
+ * passed off to the `tokenizer`, which will break apart each value into "terms".
708
+ * These terms are then individually passed through the `processTerm` function
709
+ * to compute each term individually. A term might for example be something
710
+ * like "lbs", in which case one would likely want to return
711
+ * `[ "lbs", "lb", "pound", "pounds" ]`. You may also return a single string value,
712
+ * or a falsy value if you would like to skip indexing entirely for a specific term.
713
+ *
714
+ * Truthy return value(s) are then fed to the indexer as positive matches for this
715
+ * document. In our example above, all four of the `[ "lbs", "lb", "pound", "pounds" ]`
716
+ * terms would be added to the indexing engine, matching against the current document
717
+ * being computed.
718
+ *
719
+ * *Note: Whatever values are returned from this function will receive no further
720
+ * processing before being indexed. This means for example, if you include whitespace
721
+ * at the beginning or end of a word, it will also be indexed that way, with the
722
+ * included whitespace.*
723
+ */
724
+ processTerm?: (term: string, fieldName?: string) => string | string[] | null | undefined | false;
725
+ /**
726
+ * Function called to log messages. Arguments are a log level ('debug',
727
+ * 'info', 'warn', or 'error'), a log message, and an optional string code
728
+ * that identifies the reason for the log.
729
+ *
730
+ * The default implementation uses `console`, if defined.
731
+ */
732
+ logger?: (level: LogLevel, message: string, code?: string) => void;
733
+ /**
734
+ * If `true` (the default), vacuuming is performed automatically as soon as
735
+ * {@link MiniSearch#discard} is called a certain number of times, cleaning up
736
+ * obsolete references from the index. If `false`, no automatic vacuuming is
737
+ * performed. Custom settings controlling auto vacuuming thresholds, as well
738
+ * as batching behavior, can be passed as an object (see the {@link
739
+ * AutoVacuumOptions} type).
740
+ */
741
+ autoVacuum?: boolean | AutoVacuumOptions;
742
+ /**
743
+ * Default search options (see the {@link SearchOptions} type and the {@link
744
+ * MiniSearch#search} method for details)
745
+ */
746
+ searchOptions?: SearchOptions;
747
+ /**
748
+ * Default auto suggest options (see the {@link SearchOptions} type and the
749
+ * {@link MiniSearch#autoSuggest} method for details)
750
+ */
751
+ autoSuggestOptions?: SearchOptions;
752
+ };
753
+ type OptionsWithDefaults<T = any> = Options<T> & {
754
+ storeFields: string[];
755
+ idField: string;
756
+ extractField: (document: T, fieldName: string) => any;
757
+ stringifyField: (fieldValue: any, fieldName: string) => string;
758
+ tokenize: (text: string, fieldName: string) => string[];
759
+ processTerm: (term: string, fieldName: string) => string | string[] | null | undefined | false;
760
+ logger: (level: LogLevel, message: string, code?: string) => void;
761
+ autoVacuum: false | AutoVacuumOptions;
762
+ searchOptions: SearchOptionsWithDefaults;
763
+ autoSuggestOptions: SearchOptions;
764
+ };
765
+ type LogLevel = 'debug' | 'info' | 'warn' | 'error';
766
+ /**
767
+ * The type of auto-suggestions
768
+ */
769
+ type Suggestion = {
770
+ /**
771
+ * The suggestion
772
+ */
773
+ suggestion: string;
774
+ /**
775
+ * Suggestion as an array of terms
776
+ */
777
+ terms: string[];
778
+ /**
779
+ * Score for the suggestion
780
+ */
781
+ score: number;
782
+ };
783
+ /**
784
+ * Match information for a search result. It is a key-value object where keys
785
+ * are terms that matched, and values are the list of fields that the term was
786
+ * found in.
787
+ */
788
+ type MatchInfo = {
789
+ [term: string]: string[];
790
+ };
791
+ /**
792
+ * Type of the search results. Each search result indicates the document ID, the
793
+ * terms that matched, the match information, the score, and all the stored
794
+ * fields.
795
+ */
796
+ type SearchResult = {
797
+ /**
798
+ * The document ID
799
+ */
800
+ id: any;
801
+ /**
802
+ * List of document terms that matched. For example, if a prefix search for
803
+ * `"moto"` matches `"motorcycle"`, `terms` will contain `"motorcycle"`.
804
+ */
805
+ terms: string[];
806
+ /**
807
+ * List of query terms that matched. For example, if a prefix search for
808
+ * `"moto"` matches `"motorcycle"`, `queryTerms` will contain `"moto"`.
809
+ */
810
+ queryTerms: string[];
811
+ /**
812
+ * Score of the search results
813
+ */
814
+ score: number;
815
+ /**
816
+ * Match information, see {@link MatchInfo}
817
+ */
818
+ match: MatchInfo;
819
+ /**
820
+ * Stored fields
821
+ */
822
+ [key: string]: any;
823
+ };
824
+ /**
825
+ * @ignore
826
+ */
827
+ type AsPlainObject = {
828
+ documentCount: number;
829
+ nextId: number;
830
+ documentIds: {
831
+ [shortId: string]: any;
832
+ };
833
+ fieldIds: {
834
+ [fieldName: string]: number;
835
+ };
836
+ fieldLength: {
837
+ [shortId: string]: number[];
838
+ };
839
+ averageFieldLength: number[];
840
+ storedFields: {
841
+ [shortId: string]: any;
842
+ };
843
+ dirtCount?: number;
844
+ index: [string, {
845
+ [fieldId: string]: SerializedIndexEntry;
846
+ }][];
847
+ serializationVersion: number;
848
+ };
849
+ type QueryCombination = SearchOptions & {
850
+ queries: Query[];
851
+ };
852
+ /**
853
+ * Wildcard query, used to match all terms
854
+ */
855
+ type Wildcard = typeof MiniSearch.wildcard;
856
+ /**
857
+ * Search query expression, either a query string or an expression tree
858
+ * combining several queries with a combination of AND or OR.
859
+ */
860
+ type Query = QueryCombination | string | Wildcard;
861
+ /**
862
+ * Options to control vacuuming behavior.
863
+ *
864
+ * Vacuuming cleans up document references made obsolete by {@link
865
+ * MiniSearch.discard} from the index. On large indexes, vacuuming is
866
+ * potentially costly, because it has to traverse the whole inverted index.
867
+ * Therefore, in order to dilute this cost so it does not negatively affects the
868
+ * application, vacuuming is performed in batches, with a delay between each
869
+ * batch. These options are used to configure the batch size and the delay
870
+ * between batches.
871
+ */
872
+ type VacuumOptions = {
873
+ /**
874
+ * Size of each vacuuming batch (the number of terms in the index that will be
875
+ * traversed in each batch). Defaults to 1000.
876
+ */
877
+ batchSize?: number;
878
+ /**
879
+ * Wait time between each vacuuming batch in milliseconds. Defaults to 10.
880
+ */
881
+ batchWait?: number;
882
+ };
883
+ /**
884
+ * Sets minimum thresholds for `dirtCount` and `dirtFactor` that trigger an
885
+ * automatic vacuuming.
886
+ */
887
+ type VacuumConditions = {
888
+ /**
889
+ * Minimum `dirtCount` (number of discarded documents since the last vacuuming)
890
+ * under which auto vacuum is not triggered. It defaults to 20.
891
+ */
892
+ minDirtCount?: number;
893
+ /**
894
+ * Minimum `dirtFactor` (proportion of discarded documents over the total)
895
+ * under which auto vacuum is not triggered. It defaults to 0.1.
896
+ */
897
+ minDirtFactor?: number;
898
+ };
899
+ /**
900
+ * Options to control auto vacuum behavior. When discarding a document with
901
+ * {@link MiniSearch#discard}, a vacuuming operation is automatically started if
902
+ * the `dirtCount` and `dirtFactor` are above the `minDirtCount` and
903
+ * `minDirtFactor` thresholds defined by this configuration. See {@link
904
+ * VacuumConditions} for details on these.
905
+ *
906
+ * Also, `batchSize` and `batchWait` can be specified, controlling batching
907
+ * behavior (see {@link VacuumOptions}).
908
+ */
909
+ type AutoVacuumOptions = VacuumOptions & VacuumConditions;
910
+ type DocumentTermFreqs = Map<number, number>;
911
+ type FieldTermData = Map<number, DocumentTermFreqs>;
912
+ /**
913
+ * {@link MiniSearch} is the main entrypoint class, implementing a full-text
914
+ * search engine in memory.
915
+ *
916
+ * @typeParam T The type of the documents being indexed.
917
+ *
918
+ * ### Basic example:
919
+ *
920
+ * ```javascript
921
+ * const documents = [
922
+ * {
923
+ * id: 1,
924
+ * title: 'Moby Dick',
925
+ * text: 'Call me Ishmael. Some years ago...',
926
+ * category: 'fiction'
927
+ * },
928
+ * {
929
+ * id: 2,
930
+ * title: 'Zen and the Art of Motorcycle Maintenance',
931
+ * text: 'I can see by my watch...',
932
+ * category: 'fiction'
933
+ * },
934
+ * {
935
+ * id: 3,
936
+ * title: 'Neuromancer',
937
+ * text: 'The sky above the port was...',
938
+ * category: 'fiction'
939
+ * },
940
+ * {
941
+ * id: 4,
942
+ * title: 'Zen and the Art of Archery',
943
+ * text: 'At first sight it must seem...',
944
+ * category: 'non-fiction'
945
+ * },
946
+ * // ...and more
947
+ * ]
948
+ *
949
+ * // Create a search engine that indexes the 'title' and 'text' fields for
950
+ * // full-text search. Search results will include 'title' and 'category' (plus the
951
+ * // id field, that is always stored and returned)
952
+ * const miniSearch = new MiniSearch({
953
+ * fields: ['title', 'text'],
954
+ * storeFields: ['title', 'category']
955
+ * })
956
+ *
957
+ * // Add documents to the index
958
+ * miniSearch.addAll(documents)
959
+ *
960
+ * // Search for documents:
961
+ * let results = miniSearch.search('zen art motorcycle')
962
+ * // => [
963
+ * // { id: 2, title: 'Zen and the Art of Motorcycle Maintenance', category: 'fiction', score: 2.77258 },
964
+ * // { id: 4, title: 'Zen and the Art of Archery', category: 'non-fiction', score: 1.38629 }
965
+ * // ]
966
+ * ```
967
+ */
968
+ declare class MiniSearch<T = any> {
969
+ protected _options: OptionsWithDefaults<T>;
970
+ protected _index: SearchableMap<FieldTermData>;
971
+ protected _documentCount: number;
972
+ protected _documentIds: Map<number, any>;
973
+ protected _idToShortId: Map<any, number>;
974
+ protected _fieldIds: {
975
+ [key: string]: number;
976
+ };
977
+ protected _fieldLength: Map<number, number[]>;
978
+ protected _avgFieldLength: number[];
979
+ protected _nextId: number;
980
+ protected _storedFields: Map<number, Record<string, unknown>>;
981
+ protected _dirtCount: number;
982
+ private _currentVacuum;
983
+ private _enqueuedVacuum;
984
+ private _enqueuedVacuumConditions;
985
+ /**
986
+ * The special wildcard symbol that can be passed to {@link MiniSearch#search}
987
+ * to match all documents
988
+ */
989
+ static readonly wildcard: typeof WILDCARD_QUERY;
990
+ /**
991
+ * @param options Configuration options
992
+ *
993
+ * ### Examples:
994
+ *
995
+ * ```javascript
996
+ * // Create a search engine that indexes the 'title' and 'text' fields of your
997
+ * // documents:
998
+ * const miniSearch = new MiniSearch({ fields: ['title', 'text'] })
999
+ * ```
1000
+ *
1001
+ * ### ID Field:
1002
+ *
1003
+ * ```javascript
1004
+ * // Your documents are assumed to include a unique 'id' field, but if you want
1005
+ * // to use a different field for document identification, you can set the
1006
+ * // 'idField' option:
1007
+ * const miniSearch = new MiniSearch({ idField: 'key', fields: ['title', 'text'] })
1008
+ * ```
1009
+ *
1010
+ * ### Options and defaults:
1011
+ *
1012
+ * ```javascript
1013
+ * // The full set of options (here with their default value) is:
1014
+ * const miniSearch = new MiniSearch({
1015
+ * // idField: field that uniquely identifies a document
1016
+ * idField: 'id',
1017
+ *
1018
+ * // extractField: function used to get the value of a field in a document.
1019
+ * // By default, it assumes the document is a flat object with field names as
1020
+ * // property keys and field values as string property values, but custom logic
1021
+ * // can be implemented by setting this option to a custom extractor function.
1022
+ * extractField: (document, fieldName) => document[fieldName],
1023
+ *
1024
+ * // tokenize: function used to split fields into individual terms. By
1025
+ * // default, it is also used to tokenize search queries, unless a specific
1026
+ * // `tokenize` search option is supplied. When tokenizing an indexed field,
1027
+ * // the field name is passed as the second argument.
1028
+ * tokenize: (string, _fieldName) => string.split(SPACE_OR_PUNCTUATION),
1029
+ *
1030
+ * // processTerm: function used to process each tokenized term before
1031
+ * // indexing. It can be used for stemming and normalization. Return a falsy
1032
+ * // value in order to discard a term. By default, it is also used to process
1033
+ * // search queries, unless a specific `processTerm` option is supplied as a
1034
+ * // search option. When processing a term from a indexed field, the field
1035
+ * // name is passed as the second argument.
1036
+ * processTerm: (term, _fieldName) => term.toLowerCase(),
1037
+ *
1038
+ * // searchOptions: default search options, see the `search` method for
1039
+ * // details
1040
+ * searchOptions: undefined,
1041
+ *
1042
+ * // fields: document fields to be indexed. Mandatory, but not set by default
1043
+ * fields: undefined
1044
+ *
1045
+ * // storeFields: document fields to be stored and returned as part of the
1046
+ * // search results.
1047
+ * storeFields: []
1048
+ * })
1049
+ * ```
1050
+ */
1051
+ constructor(options: Options<T>);
1052
+ /**
1053
+ * Adds a document to the index
1054
+ *
1055
+ * @param document The document to be indexed
1056
+ */
1057
+ add(document: T): void;
1058
+ /**
1059
+ * Adds all the given documents to the index
1060
+ *
1061
+ * @param documents An array of documents to be indexed
1062
+ */
1063
+ addAll(documents: readonly T[]): void;
1064
+ /**
1065
+ * Adds all the given documents to the index asynchronously.
1066
+ *
1067
+ * Returns a promise that resolves (to `undefined`) when the indexing is done.
1068
+ * This method is useful when index many documents, to avoid blocking the main
1069
+ * thread. The indexing is performed asynchronously and in chunks.
1070
+ *
1071
+ * @param documents An array of documents to be indexed
1072
+ * @param options Configuration options
1073
+ * @return A promise resolving to `undefined` when the indexing is done
1074
+ */
1075
+ addAllAsync(documents: readonly T[], options?: {
1076
+ chunkSize?: number;
1077
+ }): Promise<void>;
1078
+ /**
1079
+ * Removes the given document from the index.
1080
+ *
1081
+ * The document to remove must NOT have changed between indexing and removal,
1082
+ * otherwise the index will be corrupted.
1083
+ *
1084
+ * This method requires passing the full document to be removed (not just the
1085
+ * ID), and immediately removes the document from the inverted index, allowing
1086
+ * memory to be released. A convenient alternative is {@link
1087
+ * MiniSearch#discard}, which needs only the document ID, and has the same
1088
+ * visible effect, but delays cleaning up the index until the next vacuuming.
1089
+ *
1090
+ * @param document The document to be removed
1091
+ */
1092
+ remove(document: T): void;
1093
+ /**
1094
+ * Removes all the given documents from the index. If called with no arguments,
1095
+ * it removes _all_ documents from the index.
1096
+ *
1097
+ * @param documents The documents to be removed. If this argument is omitted,
1098
+ * all documents are removed. Note that, for removing all documents, it is
1099
+ * more efficient to call this method with no arguments than to pass all
1100
+ * documents.
1101
+ */
1102
+ removeAll(documents?: readonly T[]): void;
1103
+ /**
1104
+ * Discards the document with the given ID, so it won't appear in search results
1105
+ *
1106
+ * It has the same visible effect of {@link MiniSearch.remove} (both cause the
1107
+ * document to stop appearing in searches), but a different effect on the
1108
+ * internal data structures:
1109
+ *
1110
+ * - {@link MiniSearch#remove} requires passing the full document to be
1111
+ * removed as argument, and removes it from the inverted index immediately.
1112
+ *
1113
+ * - {@link MiniSearch#discard} instead only needs the document ID, and
1114
+ * works by marking the current version of the document as discarded, so it
1115
+ * is immediately ignored by searches. This is faster and more convenient
1116
+ * than {@link MiniSearch#remove}, but the index is not immediately
1117
+ * modified. To take care of that, vacuuming is performed after a certain
1118
+ * number of documents are discarded, cleaning up the index and allowing
1119
+ * memory to be released.
1120
+ *
1121
+ * After discarding a document, it is possible to re-add a new version, and
1122
+ * only the new version will appear in searches. In other words, discarding
1123
+ * and re-adding a document works exactly like removing and re-adding it. The
1124
+ * {@link MiniSearch.replace} method can also be used to replace a document
1125
+ * with a new version.
1126
+ *
1127
+ * #### Details about vacuuming
1128
+ *
1129
+ * Repetite calls to this method would leave obsolete document references in
1130
+ * the index, invisible to searches. Two mechanisms take care of cleaning up:
1131
+ * clean up during search, and vacuuming.
1132
+ *
1133
+ * - Upon search, whenever a discarded ID is found (and ignored for the
1134
+ * results), references to the discarded document are removed from the
1135
+ * inverted index entries for the search terms. This ensures that subsequent
1136
+ * searches for the same terms do not need to skip these obsolete references
1137
+ * again.
1138
+ *
1139
+ * - In addition, vacuuming is performed automatically by default (see the
1140
+ * `autoVacuum` field in {@link Options}) after a certain number of
1141
+ * documents are discarded. Vacuuming traverses all terms in the index,
1142
+ * cleaning up all references to discarded documents. Vacuuming can also be
1143
+ * triggered manually by calling {@link MiniSearch#vacuum}.
1144
+ *
1145
+ * @param id The ID of the document to be discarded
1146
+ */
1147
+ discard(id: any): void;
1148
+ private maybeAutoVacuum;
1149
+ /**
1150
+ * Discards the documents with the given IDs, so they won't appear in search
1151
+ * results
1152
+ *
1153
+ * It is equivalent to calling {@link MiniSearch#discard} for all the given
1154
+ * IDs, but with the optimization of triggering at most one automatic
1155
+ * vacuuming at the end.
1156
+ *
1157
+ * Note: to remove all documents from the index, it is faster and more
1158
+ * convenient to call {@link MiniSearch.removeAll} with no argument, instead
1159
+ * of passing all IDs to this method.
1160
+ */
1161
+ discardAll(ids: readonly any[]): void;
1162
+ /**
1163
+ * It replaces an existing document with the given updated version
1164
+ *
1165
+ * It works by discarding the current version and adding the updated one, so
1166
+ * it is functionally equivalent to calling {@link MiniSearch#discard}
1167
+ * followed by {@link MiniSearch#add}. The ID of the updated document should
1168
+ * be the same as the original one.
1169
+ *
1170
+ * Since it uses {@link MiniSearch#discard} internally, this method relies on
1171
+ * vacuuming to clean up obsolete document references from the index, allowing
1172
+ * memory to be released (see {@link MiniSearch#discard}).
1173
+ *
1174
+ * @param updatedDocument The updated document to replace the old version
1175
+ * with
1176
+ */
1177
+ replace(updatedDocument: T): void;
1178
+ /**
1179
+ * Triggers a manual vacuuming, cleaning up references to discarded documents
1180
+ * from the inverted index
1181
+ *
1182
+ * Vacuuming is only useful for applications that use the {@link
1183
+ * MiniSearch#discard} or {@link MiniSearch#replace} methods.
1184
+ *
1185
+ * By default, vacuuming is performed automatically when needed (controlled by
1186
+ * the `autoVacuum` field in {@link Options}), so there is usually no need to
1187
+ * call this method, unless one wants to make sure to perform vacuuming at a
1188
+ * specific moment.
1189
+ *
1190
+ * Vacuuming traverses all terms in the inverted index in batches, and cleans
1191
+ * up references to discarded documents from the posting list, allowing memory
1192
+ * to be released.
1193
+ *
1194
+ * The method takes an optional object as argument with the following keys:
1195
+ *
1196
+ * - `batchSize`: the size of each batch (1000 by default)
1197
+ *
1198
+ * - `batchWait`: the number of milliseconds to wait between batches (10 by
1199
+ * default)
1200
+ *
1201
+ * On large indexes, vacuuming could have a non-negligible cost: batching
1202
+ * avoids blocking the thread for long, diluting this cost so that it is not
1203
+ * negatively affecting the application. Nonetheless, this method should only
1204
+ * be called when necessary, and relying on automatic vacuuming is usually
1205
+ * better.
1206
+ *
1207
+ * It returns a promise that resolves (to undefined) when the clean up is
1208
+ * completed. If vacuuming is already ongoing at the time this method is
1209
+ * called, a new one is enqueued immediately after the ongoing one, and a
1210
+ * corresponding promise is returned. However, no more than one vacuuming is
1211
+ * enqueued on top of the ongoing one, even if this method is called more
1212
+ * times (enqueuing multiple ones would be useless).
1213
+ *
1214
+ * @param options Configuration options for the batch size and delay. See
1215
+ * {@link VacuumOptions}.
1216
+ */
1217
+ vacuum(options?: VacuumOptions): Promise<void>;
1218
+ private conditionalVacuum;
1219
+ private performVacuuming;
1220
+ private vacuumConditionsMet;
1221
+ /**
1222
+ * Is `true` if a vacuuming operation is ongoing, `false` otherwise
1223
+ */
1224
+ get isVacuuming(): boolean;
1225
+ /**
1226
+ * The number of documents discarded since the most recent vacuuming
1227
+ */
1228
+ get dirtCount(): number;
1229
+ /**
1230
+ * A number between 0 and 1 giving an indication about the proportion of
1231
+ * documents that are discarded, and can therefore be cleaned up by vacuuming.
1232
+ * A value close to 0 means that the index is relatively clean, while a higher
1233
+ * value means that the index is relatively dirty, and vacuuming could release
1234
+ * memory.
1235
+ */
1236
+ get dirtFactor(): number;
1237
+ /**
1238
+ * Returns `true` if a document with the given ID is present in the index and
1239
+ * available for search, `false` otherwise
1240
+ *
1241
+ * @param id The document ID
1242
+ */
1243
+ has(id: any): boolean;
1244
+ /**
1245
+ * Returns the stored fields (as configured in the `storeFields` constructor
1246
+ * option) for the given document ID. Returns `undefined` if the document is
1247
+ * not present in the index.
1248
+ *
1249
+ * @param id The document ID
1250
+ */
1251
+ getStoredFields(id: any): Record<string, unknown> | undefined;
1252
+ /**
1253
+ * Search for documents matching the given search query.
1254
+ *
1255
+ * The result is a list of scored document IDs matching the query, sorted by
1256
+ * descending score, and each including data about which terms were matched and
1257
+ * in which fields.
1258
+ *
1259
+ * ### Basic usage:
1260
+ *
1261
+ * ```javascript
1262
+ * // Search for "zen art motorcycle" with default options: terms have to match
1263
+ * // exactly, and individual terms are joined with OR
1264
+ * miniSearch.search('zen art motorcycle')
1265
+ * // => [ { id: 2, score: 2.77258, match: { ... } }, { id: 4, score: 1.38629, match: { ... } } ]
1266
+ * ```
1267
+ *
1268
+ * ### Restrict search to specific fields:
1269
+ *
1270
+ * ```javascript
1271
+ * // Search only in the 'title' field
1272
+ * miniSearch.search('zen', { fields: ['title'] })
1273
+ * ```
1274
+ *
1275
+ * ### Field boosting:
1276
+ *
1277
+ * ```javascript
1278
+ * // Boost a field
1279
+ * miniSearch.search('zen', { boost: { title: 2 } })
1280
+ * ```
1281
+ *
1282
+ * ### Prefix search:
1283
+ *
1284
+ * ```javascript
1285
+ * // Search for "moto" with prefix search (it will match documents
1286
+ * // containing terms that start with "moto" or "neuro")
1287
+ * miniSearch.search('moto neuro', { prefix: true })
1288
+ * ```
1289
+ *
1290
+ * ### Fuzzy search:
1291
+ *
1292
+ * ```javascript
1293
+ * // Search for "ismael" with fuzzy search (it will match documents containing
1294
+ * // terms similar to "ismael", with a maximum edit distance of 0.2 term.length
1295
+ * // (rounded to nearest integer)
1296
+ * miniSearch.search('ismael', { fuzzy: 0.2 })
1297
+ * ```
1298
+ *
1299
+ * ### Combining strategies:
1300
+ *
1301
+ * ```javascript
1302
+ * // Mix of exact match, prefix search, and fuzzy search
1303
+ * miniSearch.search('ismael mob', {
1304
+ * prefix: true,
1305
+ * fuzzy: 0.2
1306
+ * })
1307
+ * ```
1308
+ *
1309
+ * ### Advanced prefix and fuzzy search:
1310
+ *
1311
+ * ```javascript
1312
+ * // Perform fuzzy and prefix search depending on the search term. Here
1313
+ * // performing prefix and fuzzy search only on terms longer than 3 characters
1314
+ * miniSearch.search('ismael mob', {
1315
+ * prefix: term => term.length > 3
1316
+ * fuzzy: term => term.length > 3 ? 0.2 : null
1317
+ * })
1318
+ * ```
1319
+ *
1320
+ * ### Combine with AND:
1321
+ *
1322
+ * ```javascript
1323
+ * // Combine search terms with AND (to match only documents that contain both
1324
+ * // "motorcycle" and "art")
1325
+ * miniSearch.search('motorcycle art', { combineWith: 'AND' })
1326
+ * ```
1327
+ *
1328
+ * ### Combine with AND_NOT:
1329
+ *
1330
+ * There is also an AND_NOT combinator, that finds documents that match the
1331
+ * first term, but do not match any of the other terms. This combinator is
1332
+ * rarely useful with simple queries, and is meant to be used with advanced
1333
+ * query combinations (see later for more details).
1334
+ *
1335
+ * ### Filtering results:
1336
+ *
1337
+ * ```javascript
1338
+ * // Filter only results in the 'fiction' category (assuming that 'category'
1339
+ * // is a stored field)
1340
+ * miniSearch.search('motorcycle art', {
1341
+ * filter: (result) => result.category === 'fiction'
1342
+ * })
1343
+ * ```
1344
+ *
1345
+ * ### Wildcard query
1346
+ *
1347
+ * Searching for an empty string (assuming the default tokenizer) returns no
1348
+ * results. Sometimes though, one needs to match all documents, like in a
1349
+ * "wildcard" search. This is possible by passing the special value
1350
+ * {@link MiniSearch.wildcard} as the query:
1351
+ *
1352
+ * ```javascript
1353
+ * // Return search results for all documents
1354
+ * miniSearch.search(MiniSearch.wildcard)
1355
+ * ```
1356
+ *
1357
+ * Note that search options such as `filter` and `boostDocument` are still
1358
+ * applied, influencing which results are returned, and their order:
1359
+ *
1360
+ * ```javascript
1361
+ * // Return search results for all documents in the 'fiction' category
1362
+ * miniSearch.search(MiniSearch.wildcard, {
1363
+ * filter: (result) => result.category === 'fiction'
1364
+ * })
1365
+ * ```
1366
+ *
1367
+ * ### Advanced combination of queries:
1368
+ *
1369
+ * It is possible to combine different subqueries with OR, AND, and AND_NOT,
1370
+ * and even with different search options, by passing a query expression
1371
+ * tree object as the first argument, instead of a string.
1372
+ *
1373
+ * ```javascript
1374
+ * // Search for documents that contain "zen" and ("motorcycle" or "archery")
1375
+ * miniSearch.search({
1376
+ * combineWith: 'AND',
1377
+ * queries: [
1378
+ * 'zen',
1379
+ * {
1380
+ * combineWith: 'OR',
1381
+ * queries: ['motorcycle', 'archery']
1382
+ * }
1383
+ * ]
1384
+ * })
1385
+ *
1386
+ * // Search for documents that contain ("apple" or "pear") but not "juice" and
1387
+ * // not "tree"
1388
+ * miniSearch.search({
1389
+ * combineWith: 'AND_NOT',
1390
+ * queries: [
1391
+ * {
1392
+ * combineWith: 'OR',
1393
+ * queries: ['apple', 'pear']
1394
+ * },
1395
+ * 'juice',
1396
+ * 'tree'
1397
+ * ]
1398
+ * })
1399
+ * ```
1400
+ *
1401
+ * Each node in the expression tree can be either a string, or an object that
1402
+ * supports all {@link SearchOptions} fields, plus a `queries` array field for
1403
+ * subqueries.
1404
+ *
1405
+ * Note that, while this can become complicated to do by hand for complex or
1406
+ * deeply nested queries, it provides a formalized expression tree API for
1407
+ * external libraries that implement a parser for custom query languages.
1408
+ *
1409
+ * @param query Search query
1410
+ * @param searchOptions Search options. Each option, if not given, defaults to the corresponding value of `searchOptions` given to the constructor, or to the library default.
1411
+ */
1412
+ search(query: Query, searchOptions?: SearchOptions): SearchResult[];
1413
+ /**
1414
+ * Provide suggestions for the given search query
1415
+ *
1416
+ * The result is a list of suggested modified search queries, derived from the
1417
+ * given search query, each with a relevance score, sorted by descending score.
1418
+ *
1419
+ * By default, it uses the same options used for search, except that by
1420
+ * default it performs prefix search on the last term of the query, and
1421
+ * combine terms with `'AND'` (requiring all query terms to match). Custom
1422
+ * options can be passed as a second argument. Defaults can be changed upon
1423
+ * calling the {@link MiniSearch} constructor, by passing a
1424
+ * `autoSuggestOptions` option.
1425
+ *
1426
+ * ### Basic usage:
1427
+ *
1428
+ * ```javascript
1429
+ * // Get suggestions for 'neuro':
1430
+ * miniSearch.autoSuggest('neuro')
1431
+ * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 0.46240 } ]
1432
+ * ```
1433
+ *
1434
+ * ### Multiple words:
1435
+ *
1436
+ * ```javascript
1437
+ * // Get suggestions for 'zen ar':
1438
+ * miniSearch.autoSuggest('zen ar')
1439
+ * // => [
1440
+ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 },
1441
+ * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 }
1442
+ * // ]
1443
+ * ```
1444
+ *
1445
+ * ### Fuzzy suggestions:
1446
+ *
1447
+ * ```javascript
1448
+ * // Correct spelling mistakes using fuzzy search:
1449
+ * miniSearch.autoSuggest('neromancer', { fuzzy: 0.2 })
1450
+ * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 1.03998 } ]
1451
+ * ```
1452
+ *
1453
+ * ### Filtering:
1454
+ *
1455
+ * ```javascript
1456
+ * // Get suggestions for 'zen ar', but only within the 'fiction' category
1457
+ * // (assuming that 'category' is a stored field):
1458
+ * miniSearch.autoSuggest('zen ar', {
1459
+ * filter: (result) => result.category === 'fiction'
1460
+ * })
1461
+ * // => [
1462
+ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 },
1463
+ * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 }
1464
+ * // ]
1465
+ * ```
1466
+ *
1467
+ * @param queryString Query string to be expanded into suggestions
1468
+ * @param options Search options. The supported options and default values
1469
+ * are the same as for the {@link MiniSearch#search} method, except that by
1470
+ * default prefix search is performed on the last term in the query, and terms
1471
+ * are combined with `'AND'`.
1472
+ * @return A sorted array of suggestions sorted by relevance score.
1473
+ */
1474
+ autoSuggest(queryString: string, options?: SearchOptions): Suggestion[];
1475
+ /**
1476
+ * Total number of documents available to search
1477
+ */
1478
+ get documentCount(): number;
1479
+ /**
1480
+ * Number of terms in the index
1481
+ */
1482
+ get termCount(): number;
1483
+ /**
1484
+ * Deserializes a JSON index (serialized with `JSON.stringify(miniSearch)`)
1485
+ * and instantiates a MiniSearch instance. It should be given the same options
1486
+ * originally used when serializing the index.
1487
+ *
1488
+ * ### Usage:
1489
+ *
1490
+ * ```javascript
1491
+ * // If the index was serialized with:
1492
+ * let miniSearch = new MiniSearch({ fields: ['title', 'text'] })
1493
+ * miniSearch.addAll(documents)
1494
+ *
1495
+ * const json = JSON.stringify(miniSearch)
1496
+ * // It can later be deserialized like this:
1497
+ * miniSearch = MiniSearch.loadJSON(json, { fields: ['title', 'text'] })
1498
+ * ```
1499
+ *
1500
+ * @param json JSON-serialized index
1501
+ * @param options configuration options, same as the constructor
1502
+ * @return An instance of MiniSearch deserialized from the given JSON.
1503
+ */
1504
+ static loadJSON<T = any>(json: string, options: Options<T>): MiniSearch<T>;
1505
+ /**
1506
+ * Async equivalent of {@link MiniSearch.loadJSON}
1507
+ *
1508
+ * This function is an alternative to {@link MiniSearch.loadJSON} that returns
1509
+ * a promise, and loads the index in batches, leaving pauses between them to avoid
1510
+ * blocking the main thread. It tends to be slower than the synchronous
1511
+ * version, but does not block the main thread, so it can be a better choice
1512
+ * when deserializing very large indexes.
1513
+ *
1514
+ * @param json JSON-serialized index
1515
+ * @param options configuration options, same as the constructor
1516
+ * @return A Promise that will resolve to an instance of MiniSearch deserialized from the given JSON.
1517
+ */
1518
+ static loadJSONAsync<T = any>(json: string, options: Options<T>): Promise<MiniSearch<T>>;
1519
+ /**
1520
+ * Returns the default value of an option. It will throw an error if no option
1521
+ * with the given name exists.
1522
+ *
1523
+ * @param optionName Name of the option
1524
+ * @return The default value of the given option
1525
+ *
1526
+ * ### Usage:
1527
+ *
1528
+ * ```javascript
1529
+ * // Get default tokenizer
1530
+ * MiniSearch.getDefault('tokenize')
1531
+ *
1532
+ * // Get default term processor
1533
+ * MiniSearch.getDefault('processTerm')
1534
+ *
1535
+ * // Unknown options will throw an error
1536
+ * MiniSearch.getDefault('notExisting')
1537
+ * // => throws 'MiniSearch: unknown option "notExisting"'
1538
+ * ```
1539
+ */
1540
+ static getDefault(optionName: string): any;
1541
+ /**
1542
+ * @ignore
1543
+ */
1544
+ static loadJS<T = any>(js: AsPlainObject, options: Options<T>): MiniSearch<T>;
1545
+ /**
1546
+ * @ignore
1547
+ */
1548
+ static loadJSAsync<T = any>(js: AsPlainObject, options: Options<T>): Promise<MiniSearch<T>>;
1549
+ /**
1550
+ * @ignore
1551
+ */
1552
+ private static instantiateMiniSearch;
1553
+ /**
1554
+ * @ignore
1555
+ */
1556
+ private executeQuery;
1557
+ /**
1558
+ * @ignore
1559
+ */
1560
+ private executeQuerySpec;
1561
+ /**
1562
+ * @ignore
1563
+ */
1564
+ private executeWildcardQuery;
1565
+ /**
1566
+ * @ignore
1567
+ */
1568
+ private combineResults;
1569
+ /**
1570
+ * Build a read-only {@link FrozenMiniSearch} snapshot optimized for RAM and search CPU.
1571
+ */
1572
+ freeze(): FrozenMiniSearch;
1573
+ /**
1574
+ * Allows serialization of the index to JSON, to possibly store it and later
1575
+ * deserialize it with {@link MiniSearch.loadJSON}.
1576
+ *
1577
+ * Normally one does not directly call this method, but rather call the
1578
+ * standard JavaScript `JSON.stringify()` passing the {@link MiniSearch}
1579
+ * instance, and JavaScript will internally call this method. Upon
1580
+ * deserialization, one must pass to {@link MiniSearch.loadJSON} the same
1581
+ * options used to create the original instance that was serialized.
1582
+ *
1583
+ * ### Usage:
1584
+ *
1585
+ * ```javascript
1586
+ * // Serialize the index:
1587
+ * let miniSearch = new MiniSearch({ fields: ['title', 'text'] })
1588
+ * miniSearch.addAll(documents)
1589
+ * const json = JSON.stringify(miniSearch)
1590
+ *
1591
+ * // Later, to deserialize it:
1592
+ * miniSearch = MiniSearch.loadJSON(json, { fields: ['title', 'text'] })
1593
+ * ```
1594
+ *
1595
+ * @return A plain-object serializable representation of the search index.
1596
+ */
1597
+ toJSON(): AsPlainObject;
1598
+ /**
1599
+ * @ignore
1600
+ */
1601
+ private termResults;
1602
+ /**
1603
+ * @ignore
1604
+ */
1605
+ private addTerm;
1606
+ /**
1607
+ * @ignore
1608
+ */
1609
+ private removeTerm;
1610
+ /**
1611
+ * @ignore
1612
+ */
1613
+ private warnDocumentChanged;
1614
+ /**
1615
+ * @ignore
1616
+ */
1617
+ private addDocumentId;
1618
+ /**
1619
+ * @ignore
1620
+ */
1621
+ private addFields;
1622
+ /**
1623
+ * @ignore
1624
+ */
1625
+ private addFieldLength;
1626
+ /**
1627
+ * @ignore
1628
+ */
1629
+ private removeFieldLength;
1630
+ }
1631
+ interface SerializedIndexEntry {
1632
+ [key: string]: number;
1633
+ }
1634
+
1635
+ export { AND, AND_NOT, type AsPlainObject, type AutoVacuumOptions, type BM25Params, type CombinationOperator, type FrozenAssembleParams, type FrozenMemoryBreakdown, FrozenMiniSearch, type LowercaseCombinationOperator, type MatchInfo, OR, type Options, type Query, type QueryCombination, type SearchOptions, type SearchResult, type Suggestion, type VacuumConditions, type VacuumOptions, type Wildcard, assembleFrozen, buildFrozenFromDocuments, MiniSearch as default, freezeFromMiniSearch, frozenMemoryBreakdown };