@yoch/frozenminisearch 1.2.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +30 -17
- package/dist/browser/index.d.ts +693 -0
- package/dist/browser/index.js +1 -0
- package/dist/cjs/index.cjs +3348 -3201
- package/dist/es/index.d.ts +105 -58
- package/dist/es/index.js +3345 -3202
- package/package.json +12 -3
|
@@ -0,0 +1,693 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wildcard query symbol (matches all documents).
|
|
3
|
+
* Use {@link FrozenMiniSearch.wildcard} in application code.
|
|
4
|
+
*/
|
|
5
|
+
declare const WILDCARD_QUERY: unique symbol;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* BM25+ algorithm parameters.
|
|
9
|
+
*/
|
|
10
|
+
type BM25Params = {
|
|
11
|
+
k: number;
|
|
12
|
+
b: number;
|
|
13
|
+
d: number;
|
|
14
|
+
};
|
|
15
|
+
type LowercaseCombinationOperator = 'or' | 'and' | 'and_not';
|
|
16
|
+
type CombinationOperator = LowercaseCombinationOperator | Uppercase<LowercaseCombinationOperator> | Capitalize<LowercaseCombinationOperator>;
|
|
17
|
+
type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
18
|
+
/**
|
|
19
|
+
* Search options to customize the search behavior.
|
|
20
|
+
*/
|
|
21
|
+
type SearchOptions = {
|
|
22
|
+
/**
|
|
23
|
+
* Names of the fields to search in. If omitted, all fields are searched.
|
|
24
|
+
*/
|
|
25
|
+
fields?: string[];
|
|
26
|
+
/**
|
|
27
|
+
* Function used to filter search results, for example on the basis of stored
|
|
28
|
+
* fields. It takes as argument each search result and should return a boolean
|
|
29
|
+
* to indicate if the result should be kept or not.
|
|
30
|
+
*/
|
|
31
|
+
filter?: (result: SearchResult) => boolean;
|
|
32
|
+
/**
|
|
33
|
+
* Key-value object of field names to boosting values. By default, fields are
|
|
34
|
+
* assigned a boosting factor of 1. If one assigns to a field a boosting value
|
|
35
|
+
* of 2, a result that matches the query in that field is assigned a score
|
|
36
|
+
* twice as high as a result matching the query in another field, all else
|
|
37
|
+
* being equal.
|
|
38
|
+
*/
|
|
39
|
+
boost?: {
|
|
40
|
+
[fieldName: string]: number;
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Function to calculate a boost factor for each query term. Returning a
|
|
44
|
+
* factor lower than 1 reduces the importance of the term, greater than 1
|
|
45
|
+
* increases it, and exactly 1 is neutral.
|
|
46
|
+
*/
|
|
47
|
+
boostTerm?: (term: string, i: number, terms: string[]) => number;
|
|
48
|
+
/**
|
|
49
|
+
* Relative weights to assign to prefix search results and fuzzy search
|
|
50
|
+
* results. Exact matches are assigned a weight of 1.
|
|
51
|
+
*/
|
|
52
|
+
weights?: {
|
|
53
|
+
fuzzy: number;
|
|
54
|
+
prefix: number;
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Function to calculate a boost factor for documents. It takes as arguments
|
|
58
|
+
* the document ID, and a term that matches the search in that document, and
|
|
59
|
+
* the value of the stored fields for the document (if any). A falsy value
|
|
60
|
+
* skips the search result completely.
|
|
61
|
+
*/
|
|
62
|
+
boostDocument?: (documentId: any, term: string, storedFields?: Record<string, unknown>) => number;
|
|
63
|
+
/**
|
|
64
|
+
* Controls whether to perform prefix search. Either a boolean, or a function
|
|
65
|
+
* called per query term that returns a boolean.
|
|
66
|
+
*/
|
|
67
|
+
prefix?: boolean | ((term: string, index: number, terms: string[]) => boolean);
|
|
68
|
+
/**
|
|
69
|
+
* Controls whether to perform fuzzy search. Either a boolean (default
|
|
70
|
+
* fuzziness), a number (explicit edit distance ≥ 1, or fractional 0–1 of the
|
|
71
|
+
* term length), or a function returning either.
|
|
72
|
+
*/
|
|
73
|
+
fuzzy?: boolean | number | ((term: string, index: number, terms: string[]) => boolean | number);
|
|
74
|
+
/**
|
|
75
|
+
* Maximum fuzziness when using a fractional fuzzy value. Defaults to 6.
|
|
76
|
+
*/
|
|
77
|
+
maxFuzzy?: number;
|
|
78
|
+
/**
|
|
79
|
+
* The operand to combine partial results for each term. Defaults to "OR".
|
|
80
|
+
*/
|
|
81
|
+
combineWith?: CombinationOperator;
|
|
82
|
+
/**
|
|
83
|
+
* Function to tokenize the search query. By default, the same tokenizer used
|
|
84
|
+
* for indexing is used also for search.
|
|
85
|
+
*/
|
|
86
|
+
tokenize?: (text: string) => string[];
|
|
87
|
+
/**
|
|
88
|
+
* Function to process or normalize terms in the search query. By default, the
|
|
89
|
+
* same term processor used for indexing is used also for search.
|
|
90
|
+
*/
|
|
91
|
+
processTerm?: (term: string) => string | string[] | null | undefined | false;
|
|
92
|
+
/**
|
|
93
|
+
* BM25+ algorithm parameters. Customizing these is almost never necessary.
|
|
94
|
+
*/
|
|
95
|
+
bm25?: BM25Params;
|
|
96
|
+
};
|
|
97
|
+
/**
|
|
98
|
+
* `SearchOptions` with library defaults filled in. Used as the canonical shape
|
|
99
|
+
* resolved by `MiniSearch` / `FrozenMiniSearch` before passing options around.
|
|
100
|
+
*/
|
|
101
|
+
type SearchOptionsWithDefaults = SearchOptions & {
|
|
102
|
+
boost: {
|
|
103
|
+
[fieldName: string]: number;
|
|
104
|
+
};
|
|
105
|
+
weights: {
|
|
106
|
+
fuzzy: number;
|
|
107
|
+
prefix: number;
|
|
108
|
+
};
|
|
109
|
+
prefix: boolean | ((term: string, index: number, terms: string[]) => boolean);
|
|
110
|
+
fuzzy: boolean | number | ((term: string, index: number, terms: string[]) => boolean | number);
|
|
111
|
+
maxFuzzy: number;
|
|
112
|
+
combineWith: CombinationOperator;
|
|
113
|
+
bm25: BM25Params;
|
|
114
|
+
};
|
|
115
|
+
/**
|
|
116
|
+
* Configuration options compatible with the MiniSearch constructor.
|
|
117
|
+
*
|
|
118
|
+
* @typeParam T The type of documents being indexed.
|
|
119
|
+
*/
|
|
120
|
+
type Options<T = any> = {
|
|
121
|
+
/** Names of the document fields to be indexed. */
|
|
122
|
+
fields: string[];
|
|
123
|
+
/** Name of the ID field, uniquely identifying a document. Defaults to `"id"`. */
|
|
124
|
+
idField?: string;
|
|
125
|
+
/** Names of fields to store, so that search results would include them. */
|
|
126
|
+
storeFields?: string[];
|
|
127
|
+
/** Function used to extract the value of each field in documents. */
|
|
128
|
+
extractField?: (document: T, fieldName: string) => any;
|
|
129
|
+
/** Function used to turn field values into strings for indexing. */
|
|
130
|
+
stringifyField?: (fieldValue: any, fieldName: string) => string;
|
|
131
|
+
/** Function used to split a field value into individual terms to be indexed. */
|
|
132
|
+
tokenize?: (text: string, fieldName?: string) => string[];
|
|
133
|
+
/** Function used to process a term before indexing or search (e.g. stemming). */
|
|
134
|
+
processTerm?: (term: string, fieldName?: string) => string | string[] | null | undefined | false;
|
|
135
|
+
/** Function called to log messages from the library. */
|
|
136
|
+
logger?: (level: LogLevel, message: string, code?: string) => void;
|
|
137
|
+
/** Auto-vacuum behaviour after MiniSearch `discard`; defaults to `true`. */
|
|
138
|
+
autoVacuum?: boolean | AutoVacuumOptions;
|
|
139
|
+
/** Default search options. */
|
|
140
|
+
searchOptions?: SearchOptions;
|
|
141
|
+
/** Default auto-suggest options. */
|
|
142
|
+
autoSuggestOptions?: SearchOptions;
|
|
143
|
+
};
|
|
144
|
+
/**
|
|
145
|
+
* Canonical `Options<T>` with defaults filled in. Shared by `MiniSearch`,
|
|
146
|
+
* `FrozenMiniSearch`, the frozen builder, and the binary load path so they
|
|
147
|
+
* cannot drift.
|
|
148
|
+
*/
|
|
149
|
+
type OptionsWithDefaults<T = any> = Options<T> & {
|
|
150
|
+
storeFields: string[];
|
|
151
|
+
idField: string;
|
|
152
|
+
extractField: (document: T, fieldName: string) => any;
|
|
153
|
+
stringifyField: (fieldValue: any, fieldName: string) => string;
|
|
154
|
+
tokenize: (text: string, fieldName: string) => string[];
|
|
155
|
+
processTerm: (term: string, fieldName: string) => string | string[] | null | undefined | false;
|
|
156
|
+
logger: (level: LogLevel, message: string, code?: string) => void;
|
|
157
|
+
autoVacuum: false | AutoVacuumOptions;
|
|
158
|
+
searchOptions: SearchOptionsWithDefaults;
|
|
159
|
+
autoSuggestOptions: SearchOptions;
|
|
160
|
+
};
|
|
161
|
+
/** Browser snapshot compression (`zstd` not supported). */
|
|
162
|
+
type BrowserBinaryCompression = 'auto' | 'raw' | 'zlib';
|
|
163
|
+
/** Browser `saveBinaryAsync()` options (no zstd). */
|
|
164
|
+
type BrowserSaveBinaryAsyncOptions = {
|
|
165
|
+
compression?: BrowserBinaryCompression;
|
|
166
|
+
};
|
|
167
|
+
/**
|
|
168
|
+
* A search-completion suggestion.
|
|
169
|
+
*/
|
|
170
|
+
type Suggestion = {
|
|
171
|
+
/** The suggested phrase. */
|
|
172
|
+
suggestion: string;
|
|
173
|
+
/** The suggestion as an array of terms. */
|
|
174
|
+
terms: string[];
|
|
175
|
+
/** Score for the suggestion. */
|
|
176
|
+
score: number;
|
|
177
|
+
};
|
|
178
|
+
/**
|
|
179
|
+
* Match information for a search result: keys are terms that matched, values
|
|
180
|
+
* are the list of fields each term was found in.
|
|
181
|
+
*/
|
|
182
|
+
type MatchInfo = {
|
|
183
|
+
[term: string]: string[];
|
|
184
|
+
};
|
|
185
|
+
/**
|
|
186
|
+
* A single search result, including the document ID, terms that matched, the
|
|
187
|
+
* match information, the score, and all the stored fields.
|
|
188
|
+
*/
|
|
189
|
+
type SearchResult = {
|
|
190
|
+
/** The document ID. */
|
|
191
|
+
id: any;
|
|
192
|
+
/** Document terms that matched (e.g. `"motorcycle"` for prefix `"moto"`). */
|
|
193
|
+
terms: string[];
|
|
194
|
+
/** Query terms that matched (e.g. `"moto"` for prefix `"moto"`). */
|
|
195
|
+
queryTerms: string[];
|
|
196
|
+
/** Score of the search result. */
|
|
197
|
+
score: number;
|
|
198
|
+
/** Match information, see {@link MatchInfo}. */
|
|
199
|
+
match: MatchInfo;
|
|
200
|
+
/** Stored fields are merged onto the result. */
|
|
201
|
+
[key: string]: any;
|
|
202
|
+
};
|
|
203
|
+
/** A boolean combination of sub-queries. */
|
|
204
|
+
type QueryCombination = SearchOptions & {
|
|
205
|
+
queries: Query[];
|
|
206
|
+
};
|
|
207
|
+
/**
|
|
208
|
+
* Wildcard query symbol, used to match all documents.
|
|
209
|
+
* Use {@link FrozenMiniSearch.wildcard}.
|
|
210
|
+
*/
|
|
211
|
+
type Wildcard = typeof WILDCARD_QUERY;
|
|
212
|
+
/**
|
|
213
|
+
* Search query expression: a query string, an expression tree combining
|
|
214
|
+
* several queries with `AND`/`OR`/`AND_NOT`, or the wildcard symbol.
|
|
215
|
+
*/
|
|
216
|
+
type Query = QueryCombination | string | Wildcard;
|
|
217
|
+
/**
|
|
218
|
+
* Options controlling vacuuming behaviour.
|
|
219
|
+
*/
|
|
220
|
+
type VacuumOptions = {
|
|
221
|
+
/** Number of terms traversed per batch. Defaults to 1000. */
|
|
222
|
+
batchSize?: number;
|
|
223
|
+
/** Wait time between batches in milliseconds. Defaults to 10. */
|
|
224
|
+
batchWait?: number;
|
|
225
|
+
};
|
|
226
|
+
/**
|
|
227
|
+
* Minimum thresholds for `dirtCount` and `dirtFactor` triggering an automatic
|
|
228
|
+
* vacuum.
|
|
229
|
+
*/
|
|
230
|
+
type VacuumConditions = {
|
|
231
|
+
/** Minimum dirt count; defaults to 20. */
|
|
232
|
+
minDirtCount?: number;
|
|
233
|
+
/** Minimum dirt factor; defaults to 0.1. */
|
|
234
|
+
minDirtFactor?: number;
|
|
235
|
+
};
|
|
236
|
+
/**
|
|
237
|
+
* Options controlling auto-vacuum behaviour. Combines {@link VacuumOptions} and
|
|
238
|
+
* {@link VacuumConditions}.
|
|
239
|
+
*/
|
|
240
|
+
type AutoVacuumOptions = VacuumOptions & VacuumConditions;
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Runtime stored fields. Single store field → one column (no per-doc Record at rest).
|
|
244
|
+
* Wire format stays row JSON; encode/decode can skip intermediate row arrays when layout is known.
|
|
245
|
+
*/
|
|
246
|
+
type StoredFieldsLayout = {
|
|
247
|
+
kind: 'none';
|
|
248
|
+
} | {
|
|
249
|
+
kind: 'single';
|
|
250
|
+
field: string;
|
|
251
|
+
values: unknown[];
|
|
252
|
+
} | {
|
|
253
|
+
kind: 'multi';
|
|
254
|
+
rows: (Record<string, unknown> | undefined)[];
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
declare const OR: LowercaseCombinationOperator;
|
|
258
|
+
declare const AND: LowercaseCombinationOperator;
|
|
259
|
+
declare const AND_NOT: LowercaseCombinationOperator;
|
|
260
|
+
interface RawResultValue {
|
|
261
|
+
score: number;
|
|
262
|
+
terms: string[];
|
|
263
|
+
match: MatchInfo;
|
|
264
|
+
}
|
|
265
|
+
type RawResult = Map<number, RawResultValue>;
|
|
266
|
+
/** Posting list for one (term, field): docId -> term frequency */
|
|
267
|
+
interface PostingListLike {
|
|
268
|
+
readonly size: number;
|
|
269
|
+
forEachDoc(callback: (docId: number, termFreq: number) => void): void;
|
|
270
|
+
}
|
|
271
|
+
/** term -> fieldId -> posting list */
|
|
272
|
+
interface FieldTermDataLike {
|
|
273
|
+
get(fieldId: number): PostingListLike | undefined;
|
|
274
|
+
}
|
|
275
|
+
interface FinalizeSearchParams {
|
|
276
|
+
rawResults: RawResult;
|
|
277
|
+
getExternalId: (docId: number) => unknown;
|
|
278
|
+
getStoredFields?: (docId: number) => Record<string, unknown> | undefined;
|
|
279
|
+
/** When set, copies stored fields in place (no per-doc row allocation for single-column layouts). */
|
|
280
|
+
storedFieldsLayout?: StoredFieldsLayout;
|
|
281
|
+
filter?: (result: SearchResult) => boolean;
|
|
282
|
+
skipSort?: boolean;
|
|
283
|
+
}
|
|
284
|
+
/** Merge search options, apply wildcard skipSort, then {@link finalizeSearchResults}. */
|
|
285
|
+
declare function finalizeRawSearchResults(rawResults: RawResult, query: Query, searchOptions: SearchOptions, globalSearchOptions: SearchOptionsWithDefaults, getExternalId: (docId: number) => unknown, getStoredFields?: (docId: number) => Record<string, unknown> | undefined, storedFieldsLayout?: StoredFieldsLayout): SearchResult[];
|
|
286
|
+
declare function finalizeSearchResults(params: FinalizeSearchParams): SearchResult[];
|
|
287
|
+
|
|
288
|
+
type SuggestionHit = Pick<SearchResult, 'score' | 'terms'>;
|
|
289
|
+
/** Aggregate search hits into ranked phrase suggestions. */
|
|
290
|
+
declare function suggestFromSearchResults(hits: Iterable<SuggestionHit>): Suggestion[];
|
|
291
|
+
/** Build suggestions from raw search hits without materializing full public results. */
|
|
292
|
+
declare function suggestFromRawResults(rawResults: RawResult): Suggestion[];
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Smallest unsigned typed array that can hold the structure's indices. Widths
|
|
296
|
+
* are chosen adaptively at build time (`packedIndexArray` in `layout.ts`); reads via
|
|
297
|
+
* `arr[i]` are width-agnostic, so query code never branches on the concrete type.
|
|
298
|
+
*/
|
|
299
|
+
type PackedIndexArray = Uint8Array | Uint16Array | Uint32Array;
|
|
300
|
+
type PackedTermRef = {
|
|
301
|
+
termIndex: number;
|
|
302
|
+
length: number;
|
|
303
|
+
};
|
|
304
|
+
type PackedFuzzyRef = PackedTermRef & {
|
|
305
|
+
distance: number;
|
|
306
|
+
};
|
|
307
|
+
/** In-memory packed string radix map (term → payload). */
|
|
308
|
+
interface PackedStringRadixMap<V = number> {
|
|
309
|
+
readonly size: number;
|
|
310
|
+
get(term: string): V | undefined;
|
|
311
|
+
entries(): Iterable<[string, V]>;
|
|
312
|
+
prefixRefs(prefix: string): Iterable<PackedTermRef>;
|
|
313
|
+
/** Lazy generator; see `packedRadixFuzzyRefs` in `fuzzy.ts` for rationale. */
|
|
314
|
+
fuzzyRefs(term: string, maxDistance: number): Iterable<PackedFuzzyRef>;
|
|
315
|
+
termByIndex(termIndex: number): string;
|
|
316
|
+
termLengthByIndex(termIndex: number): number;
|
|
317
|
+
/** @deprecated Internal benchmark/compat wrapper. Prefer `prefixRefs` + `termByIndex`. */
|
|
318
|
+
prefixEntries(prefix: string): Iterable<[string, V]>;
|
|
319
|
+
/**
|
|
320
|
+
* @deprecated Internal benchmark/compat wrapper. Prefer `fuzzyRefs` + `termByIndex`.
|
|
321
|
+
*
|
|
322
|
+
* Fuzzy matches for `term` within `maxDistance` edit distance. Yields every matching
|
|
323
|
+
* `[term, value, distance]`; iteration order is implementation-defined (compare sets, not order).
|
|
324
|
+
*/
|
|
325
|
+
fuzzyEntries(term: string, maxDistance: number): Iterable<[string, V, number]>;
|
|
326
|
+
packedByteLength(): number;
|
|
327
|
+
packedNodeCount(): number;
|
|
328
|
+
packedEdgeCount(): number;
|
|
329
|
+
}
|
|
330
|
+
interface PackedRadixTreeData {
|
|
331
|
+
readonly size: number;
|
|
332
|
+
readonly nodeCount: number;
|
|
333
|
+
readonly edgeCount: number;
|
|
334
|
+
readonly labelHeap: string;
|
|
335
|
+
/**
|
|
336
|
+
* CSR edge offsets, length `nodeCount + 1`. Node `n` owns edges
|
|
337
|
+
* `[nodeEdgeOffset[n], nodeEdgeOffset[n + 1])`; the final entry equals
|
|
338
|
+
* `edgeCount`. Replaces the former `nodeFirstEdge`/`nodeEdgeCount` pair: edges
|
|
339
|
+
* are laid out contiguously in node order, so the per-node first index is just
|
|
340
|
+
* the prefix sum of the counts and need not be stored separately.
|
|
341
|
+
*/
|
|
342
|
+
readonly nodeEdgeOffset: PackedIndexArray;
|
|
343
|
+
/**
|
|
344
|
+
* Leaf payload per node (term index for a frozen index). Meaningful only when
|
|
345
|
+
* the node has a leaf, i.e. `nodeLeafOrder[n] !== 0`; otherwise the cell is
|
|
346
|
+
* unused (stored as `0`). Width adapts to the largest payload.
|
|
347
|
+
*/
|
|
348
|
+
readonly nodeValue: PackedIndexArray;
|
|
349
|
+
/**
|
|
350
|
+
* Leaf slot among a node's siblings, encoded as `slot + 1` with `0` meaning
|
|
351
|
+
* "no leaf". This avoids a wide sentinel: the column adapts to the largest
|
|
352
|
+
* child count instead of forcing `Uint32`. Decode with {@link decodeLeafSlot}.
|
|
353
|
+
*/
|
|
354
|
+
readonly nodeLeafOrder: PackedIndexArray;
|
|
355
|
+
readonly edgeLabelStart: PackedIndexArray;
|
|
356
|
+
readonly edgeLabelLength: PackedIndexArray;
|
|
357
|
+
readonly edgeChild: PackedIndexArray;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Node-indexed parent pointers enabling per-term reconstruction without ever
|
|
362
|
+
* materializing a global path blob. Reconstruction climbs leaf → root in
|
|
363
|
+
* O(depth) and touches only the terms actually requested, which is the whole
|
|
364
|
+
* point of deferring string materialization.
|
|
365
|
+
*
|
|
366
|
+
* - `leafNodeByTermIndex[ti]` is the node carrying term `ti`'s leaf.
|
|
367
|
+
* - `parentNode[node]` is `node`'s parent (root = node `0`, value unused).
|
|
368
|
+
* - `parentEdge[node]` is the edge index linking `parentNode[node]` to `node`.
|
|
369
|
+
*
|
|
370
|
+
* All three are built by cheap linear scans (O(nodeCount + edgeCount)), with no
|
|
371
|
+
* recursion and no per-term allocation, so building them is far cheaper than the
|
|
372
|
+
* former whole-tree DFS + path-edge blob.
|
|
373
|
+
*/
|
|
374
|
+
type PackedLazyTermMetadata = {
|
|
375
|
+
leafNodeByTermIndex: PackedIndexArray;
|
|
376
|
+
parentNode: PackedIndexArray;
|
|
377
|
+
parentEdge: PackedIndexArray;
|
|
378
|
+
};
|
|
379
|
+
|
|
380
|
+
declare class PackedRadixTree implements PackedStringRadixMap<number>, PackedRadixTreeData {
|
|
381
|
+
readonly size: number;
|
|
382
|
+
readonly nodeCount: number;
|
|
383
|
+
readonly edgeCount: number;
|
|
384
|
+
readonly labelHeap: string;
|
|
385
|
+
readonly nodeEdgeOffset: PackedIndexArray;
|
|
386
|
+
readonly nodeValue: PackedIndexArray;
|
|
387
|
+
readonly nodeLeafOrder: PackedIndexArray;
|
|
388
|
+
readonly edgeLabelStart: PackedIndexArray;
|
|
389
|
+
readonly edgeLabelLength: PackedIndexArray;
|
|
390
|
+
readonly edgeChild: PackedIndexArray;
|
|
391
|
+
private _lazyTermMetadata;
|
|
392
|
+
private constructor();
|
|
393
|
+
static fromData(data: PackedRadixTreeData): PackedRadixTree;
|
|
394
|
+
private findEdge;
|
|
395
|
+
get(term: string): number | undefined;
|
|
396
|
+
entries(): IterableIterator<[string, number]>;
|
|
397
|
+
/** @deprecated Internal benchmark/compat wrapper. Prefer `prefixRefs` + `termByIndex`. */
|
|
398
|
+
prefixEntries(prefix: string): IterableIterator<[string, number]>;
|
|
399
|
+
prefixRefs(prefix: string): IterableIterator<PackedTermRef>;
|
|
400
|
+
/**
|
|
401
|
+
* Walk `prefix` to the subtree root; returns accumulated heap label prefix string.
|
|
402
|
+
* `null` when no terms share the prefix.
|
|
403
|
+
*/
|
|
404
|
+
private resolvePrefixWalk;
|
|
405
|
+
private resolvePrefixWalkRef;
|
|
406
|
+
/**
|
|
407
|
+
* Follow `key` from the root. Shared by exact lookup and prefix iteration.
|
|
408
|
+
* Mid-edge stop uses the full edge label in `prefix` (SearchableMap parity).
|
|
409
|
+
*/
|
|
410
|
+
private walkKey;
|
|
411
|
+
/**
|
|
412
|
+
* Depth-first traversal matching {@link SearchableMap}'s `TreeIterator`, which
|
|
413
|
+
* visits siblings in reverse Map-insertion order (last key first). The leaf, if
|
|
414
|
+
* any, sits at `nodeLeafOrder` among the original sibling slots; everything else
|
|
415
|
+
* is an edge. Exact order matters for prefix iteration and autoSuggest parity.
|
|
416
|
+
*/
|
|
417
|
+
private emitSubtree;
|
|
418
|
+
private emitSubtreeRefs;
|
|
419
|
+
/** @deprecated Internal benchmark/compat wrapper. Prefer `fuzzyRefs` + `termByIndex`. */
|
|
420
|
+
fuzzyEntries(term: string, maxDistance: number): Iterable<[string, number, number]>;
|
|
421
|
+
fuzzyRefs(term: string, maxDistance: number): Iterable<PackedFuzzyRef>;
|
|
422
|
+
lazyTermMetadata(): PackedLazyTermMetadata;
|
|
423
|
+
termLengthByIndex(termIndex: number): number;
|
|
424
|
+
termByIndex(termIndex: number): string;
|
|
425
|
+
packedByteLength(): number;
|
|
426
|
+
packedNodeCount(): number;
|
|
427
|
+
packedEdgeCount(): number;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/** Frozen term index used by {@link FrozenMiniSearch} (packed radix tree). */
|
|
431
|
+
type FrozenTermIndex = PackedRadixTree;
|
|
432
|
+
|
|
433
|
+
type IdLookupMode = 'identity' | 'lazy-map';
|
|
434
|
+
interface IdToShortIdLookup {
|
|
435
|
+
readonly mode: IdLookupMode;
|
|
436
|
+
readonly mapEntryCount: number;
|
|
437
|
+
has(id: unknown): boolean;
|
|
438
|
+
get(id: unknown): number | undefined;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
type DocIdArray = Uint16Array | Uint32Array;
|
|
442
|
+
/** Adaptive-width unsigned column for term frequencies (u8 or u16; never u32). */
|
|
443
|
+
type FreqArray = Uint8Array | Uint16Array;
|
|
444
|
+
|
|
445
|
+
type FieldIdArray = Uint8Array | Uint16Array;
|
|
446
|
+
type PostingsLayoutKind = 'dense' | 'sparse';
|
|
447
|
+
interface FrozenPostingsLayout {
|
|
448
|
+
fieldCount: number;
|
|
449
|
+
termCount: number;
|
|
450
|
+
nextId: number;
|
|
451
|
+
layout: PostingsLayoutKind;
|
|
452
|
+
docIdWidth: 16 | 32;
|
|
453
|
+
/** Width of sparse field id column; null when layout is dense. */
|
|
454
|
+
sparseFieldIdWidth: 8 | 16 | null;
|
|
455
|
+
allDocIds: DocIdArray;
|
|
456
|
+
allFreqs: FreqArray;
|
|
457
|
+
denseOffsets: Uint32Array | null;
|
|
458
|
+
denseLengths: Uint32Array | null;
|
|
459
|
+
sparseTermStarts: Uint32Array | null;
|
|
460
|
+
sparseFieldIds: FieldIdArray | null;
|
|
461
|
+
sparseOffsets: Uint32Array | null;
|
|
462
|
+
sparseLengths: Uint32Array | null;
|
|
463
|
+
}
|
|
464
|
+
/** Single rebindable {@link FieldTermDataLike} per frozen index (O(1) RAM). */
|
|
465
|
+
type FrozenFieldTermFlyweight = FieldTermDataLike & {
|
|
466
|
+
bind(termIndex: number): FrozenFieldTermFlyweight;
|
|
467
|
+
};
|
|
468
|
+
|
|
469
|
+
/** Adaptive-width unsigned column (1/2/4 bytes per element) for field lengths and packed radix columns. */
|
|
470
|
+
type FieldLengthArray = PackedIndexArray;
|
|
471
|
+
|
|
472
|
+
interface FrozenMemoryBreakdown {
|
|
473
|
+
termCount: number;
|
|
474
|
+
documentCount: number;
|
|
475
|
+
nextId: number;
|
|
476
|
+
postings: {
|
|
477
|
+
slotCount: number;
|
|
478
|
+
layout: string;
|
|
479
|
+
docIdWidth: number;
|
|
480
|
+
allDocIdsBytes: number;
|
|
481
|
+
allFreqsBytes: number;
|
|
482
|
+
offsetsBytes: number;
|
|
483
|
+
lengthsBytes: number;
|
|
484
|
+
totalTypedBytes: number;
|
|
485
|
+
};
|
|
486
|
+
radixTree: {
|
|
487
|
+
nodeCount: number;
|
|
488
|
+
edgeCount: number;
|
|
489
|
+
estimatedBytes: number;
|
|
490
|
+
};
|
|
491
|
+
documents: {
|
|
492
|
+
externalIdsSlots: number;
|
|
493
|
+
storedFieldsSlots: number;
|
|
494
|
+
idLookupMode: string;
|
|
495
|
+
idToShortIdEntries: number;
|
|
496
|
+
fieldLengthMatrixBytes: number;
|
|
497
|
+
avgFieldLengthBytes: number;
|
|
498
|
+
storedFieldsJsonBytes: number;
|
|
499
|
+
};
|
|
500
|
+
estimatedStructuredBytes: number;
|
|
501
|
+
}
|
|
502
|
+
/**
|
|
503
|
+
* Low-level parameters for {@link assembleFrozen} (custom frozen index pipelines).
|
|
504
|
+
* Field types are part of the public surface for advanced assembly; typical apps use
|
|
505
|
+
* {@link buildFrozenFromDocuments}, {@link FrozenMiniSearch.fromJson}, or binary load instead.
|
|
506
|
+
*/
|
|
507
|
+
interface FrozenAssembleParams<T = any> {
|
|
508
|
+
options: OptionsWithDefaults<T>;
|
|
509
|
+
documentCount: number;
|
|
510
|
+
nextId: number;
|
|
511
|
+
fieldIds: {
|
|
512
|
+
[field: string]: number;
|
|
513
|
+
};
|
|
514
|
+
fieldCount: number;
|
|
515
|
+
externalIds: unknown[];
|
|
516
|
+
idLookup: IdToShortIdLookup;
|
|
517
|
+
storedFields: StoredFieldsLayout;
|
|
518
|
+
fieldLengthMatrix: FieldLengthArray;
|
|
519
|
+
avgFieldLength: Float32Array;
|
|
520
|
+
index: FrozenTermIndex;
|
|
521
|
+
termCount: number;
|
|
522
|
+
postings: FrozenPostingsLayout;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
/** MiniSearch JSON snapshot (`toJSON` wire format, `serializationVersion` 1 or 2). */
|
|
526
|
+
type SerializedIndexEntry = Record<string, number>;
|
|
527
|
+
type MiniSearchSnapshot = {
|
|
528
|
+
documentCount: number;
|
|
529
|
+
nextId: number;
|
|
530
|
+
documentIds: {
|
|
531
|
+
[shortId: string]: unknown;
|
|
532
|
+
};
|
|
533
|
+
fieldIds: {
|
|
534
|
+
[fieldName: string]: number;
|
|
535
|
+
};
|
|
536
|
+
fieldLength: {
|
|
537
|
+
[shortId: string]: number[];
|
|
538
|
+
};
|
|
539
|
+
averageFieldLength: number[];
|
|
540
|
+
storedFields: {
|
|
541
|
+
[shortId: string]: Record<string, unknown> | undefined;
|
|
542
|
+
};
|
|
543
|
+
dirtCount?: number;
|
|
544
|
+
index: [string, {
|
|
545
|
+
[fieldId: string]: SerializedIndexEntry | {
|
|
546
|
+
ds: SerializedIndexEntry;
|
|
547
|
+
};
|
|
548
|
+
}][];
|
|
549
|
+
serializationVersion: number;
|
|
550
|
+
};
|
|
551
|
+
|
|
552
|
+
interface FrozenIndexBuilderHints {
|
|
553
|
+
/** Pre-size per-document arrays when the final document count is known. */
|
|
554
|
+
estimatedDocumentCount?: number;
|
|
555
|
+
/** Hint for initial growable posting column capacity per (term, field) slot. */
|
|
556
|
+
estimatedPostingsPerSlot?: number;
|
|
557
|
+
}
|
|
558
|
+
/** Incremental builder for {@link FrozenMiniSearch} without materializing a full `documents[]` array. */
|
|
559
|
+
declare class FrozenIndexBuilder<T> {
|
|
560
|
+
private readonly _options;
|
|
561
|
+
private readonly _fieldIds;
|
|
562
|
+
private readonly _fieldCount;
|
|
563
|
+
private _index;
|
|
564
|
+
private readonly _postings;
|
|
565
|
+
private readonly _termCount;
|
|
566
|
+
private readonly _externalIds;
|
|
567
|
+
private readonly _storedFields;
|
|
568
|
+
private readonly _fieldLengthData;
|
|
569
|
+
private readonly _avgFieldLength;
|
|
570
|
+
private readonly _seenIds;
|
|
571
|
+
private readonly _fieldTermFreqScratch;
|
|
572
|
+
private readonly _rawTokenScratch;
|
|
573
|
+
private readonly _tokenScratch;
|
|
574
|
+
private _nextId;
|
|
575
|
+
private _frozen;
|
|
576
|
+
constructor(options: Options<T>, hints?: FrozenIndexBuilderHints);
|
|
577
|
+
/** Number of documents indexed so far (not yet frozen). */
|
|
578
|
+
get documentCount(): number;
|
|
579
|
+
add(document: T): void;
|
|
580
|
+
/**
|
|
581
|
+
* Adds all the given documents to the index.
|
|
582
|
+
*
|
|
583
|
+
* @param documents An array of documents to be indexed
|
|
584
|
+
*/
|
|
585
|
+
addAll(documents: readonly T[]): void;
|
|
586
|
+
/**
|
|
587
|
+
* Adds all the given documents to the index asynchronously.
|
|
588
|
+
*
|
|
589
|
+
* Returns a promise that resolves (to `undefined`) when the indexing is done.
|
|
590
|
+
* This method is useful when indexing many documents, to avoid blocking the main
|
|
591
|
+
* thread. The indexing is performed asynchronously and in chunks. Finalize with
|
|
592
|
+
* {@link freezeFrozenIndexBuilder} when done.
|
|
593
|
+
*
|
|
594
|
+
* @param documents An array of documents to be indexed
|
|
595
|
+
* @param options Configuration options
|
|
596
|
+
* @return A promise resolving to `undefined` when the indexing is done
|
|
597
|
+
*/
|
|
598
|
+
addAllAsync(documents: readonly T[], options?: {
|
|
599
|
+
chunkSize?: number;
|
|
600
|
+
}): Promise<void>;
|
|
601
|
+
/**
|
|
602
|
+
* Finalize this builder into assembly params. Call {@link assembleFrozen} or
|
|
603
|
+
* {@link freezeFrozenIndexBuilder} to obtain a {@link FrozenMiniSearch} instance.
|
|
604
|
+
*/
|
|
605
|
+
freezeParams(): FrozenAssembleParams<T>;
|
|
606
|
+
}
|
|
607
|
+
/** Create an incremental builder for {@link FrozenMiniSearch}. */
|
|
608
|
+
declare function createFrozenIndexBuilder<T>(options: Options<T>, hints?: FrozenIndexBuilderHints): FrozenIndexBuilder<T>;
|
|
609
|
+
|
|
610
|
+
declare function frozenMemoryBreakdown(frozen: FrozenMiniSearchCore): FrozenMemoryBreakdown;
|
|
611
|
+
type FrozenMiniSearchCtor<T, I extends FrozenMiniSearchCore<T>> = new (params: FrozenAssembleParams<T>) => I;
|
|
612
|
+
declare class FrozenMiniSearchCore<T = any> {
|
|
613
|
+
protected readonly _options: OptionsWithDefaults<T>;
|
|
614
|
+
protected readonly _index: FrozenTermIndex;
|
|
615
|
+
protected readonly _documentCount: number;
|
|
616
|
+
protected readonly _nextId: number;
|
|
617
|
+
protected readonly _externalIds: unknown[];
|
|
618
|
+
protected readonly _idLookup: IdToShortIdLookup;
|
|
619
|
+
protected readonly _fieldIds: {
|
|
620
|
+
[field: string]: number;
|
|
621
|
+
};
|
|
622
|
+
protected readonly _fieldCount: number;
|
|
623
|
+
protected readonly _fieldLengthMatrix: FieldLengthArray;
|
|
624
|
+
protected readonly _avgFieldLength: Float32Array;
|
|
625
|
+
protected readonly _storedFields: StoredFieldsLayout;
|
|
626
|
+
protected readonly _termCount: number;
|
|
627
|
+
protected readonly _postings: FrozenPostingsLayout;
|
|
628
|
+
protected readonly _fieldTermFlyweight: FrozenFieldTermFlyweight;
|
|
629
|
+
private readonly _aggregateContext;
|
|
630
|
+
private readonly _queryEngineParams;
|
|
631
|
+
private readonly _hasStoredFields;
|
|
632
|
+
constructor(params: FrozenAssembleParams<T>);
|
|
633
|
+
static readonly wildcard: typeof WILDCARD_QUERY;
|
|
634
|
+
get documentCount(): number;
|
|
635
|
+
get termCount(): number;
|
|
636
|
+
memoryBreakdown(): FrozenMemoryBreakdown;
|
|
637
|
+
has(id: unknown): boolean;
|
|
638
|
+
getStoredFields(id: unknown): Record<string, unknown> | undefined;
|
|
639
|
+
search(query: Query, searchOptions?: SearchOptions): SearchResult[];
|
|
640
|
+
/**
|
|
641
|
+
* Without a `filter`, aggregates suggestions from raw query hits (no full result materialization).
|
|
642
|
+
* With a `filter`, uses {@link search} so stored fields are available to the predicate.
|
|
643
|
+
*/
|
|
644
|
+
autoSuggest(queryString: string, options?: SearchOptions): Suggestion[];
|
|
645
|
+
/** Build a read-only index in one pass from documents. */
|
|
646
|
+
static fromDocuments<T, I extends FrozenMiniSearchCore<T>>(this: FrozenMiniSearchCtor<T, I>, documents: readonly T[], options: Options<T>): I;
|
|
647
|
+
/**
|
|
648
|
+
* Export this index as a MiniSearch wire snapshot (`serializationVersion: 2`).
|
|
649
|
+
* Use for migration or interchange with the `minisearch` package (`JSON.stringify` works via this method).
|
|
650
|
+
* Term order in `index` may differ from MiniSearch native `toJSON`; search scores stay equivalent.
|
|
651
|
+
*/
|
|
652
|
+
toJSON(): MiniSearchSnapshot;
|
|
653
|
+
/**
|
|
654
|
+
* Build a new frozen index **from** a MiniSearch JSON snapshot string (import / migration).
|
|
655
|
+
* Accepts the wire format produced by MiniSearch `toJSON` or by {@link toJSON} on this class.
|
|
656
|
+
* No runtime dependency on the `minisearch` package.
|
|
657
|
+
*/
|
|
658
|
+
static fromJson<T, I extends FrozenMiniSearchCore<T>>(this: FrozenMiniSearchCtor<T, I>, json: string, options?: Options<T>): I;
|
|
659
|
+
/**
|
|
660
|
+
* Same as {@link fromJson} with a pre-parsed snapshot object.
|
|
661
|
+
* `storedFields` are shallow-copied; callers must not mutate nested values
|
|
662
|
+
* after load if they intend to keep the index immutable.
|
|
663
|
+
*/
|
|
664
|
+
static fromMiniSearchSnapshot<T, I extends FrozenMiniSearchCore<T>>(this: FrozenMiniSearchCtor<T, I>, snapshot: MiniSearchSnapshot, options?: Options<T>): I;
|
|
665
|
+
/** Accepts any object exposing `toJSON()` in MiniSearch snapshot shape. */
|
|
666
|
+
static fromMiniSearch<T, I extends FrozenMiniSearchCore<T>>(this: FrozenMiniSearchCtor<T, I>, source: {
|
|
667
|
+
toJSON(): MiniSearchSnapshot;
|
|
668
|
+
}, options?: Options<T>): I;
|
|
669
|
+
/**
|
|
670
|
+
* Build a read-only index from an async stream of documents (e.g. CSV parser).
|
|
671
|
+
* For sync iterables, use {@link createFrozenIndexBuilder} with `for...of` instead.
|
|
672
|
+
*
|
|
673
|
+
* @param hints Optional builder hints; `estimatedDocumentCount` pre-allocates
|
|
674
|
+
* per-document arrays when the final document count is known upfront.
|
|
675
|
+
*/
|
|
676
|
+
static fromAsyncIterable<T, I extends FrozenMiniSearchCore<T>>(this: FrozenMiniSearchCtor<T, I>, iterable: AsyncIterable<T>, options: Options<T>, hints?: FrozenIndexBuilderHints): Promise<I>;
|
|
677
|
+
private getFieldLength;
|
|
678
|
+
private executeQuery;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
declare function assembleFrozen<T>(params: FrozenAssembleParams<T>): FrozenMiniSearchBrowser<T>;
|
|
682
|
+
declare class FrozenMiniSearchBrowser<T = any> extends FrozenMiniSearchCore<T> {
|
|
683
|
+
saveBinaryAsync(saveOptions?: BrowserSaveBinaryAsyncOptions): Promise<Uint8Array>;
|
|
684
|
+
private binarySnapshotInput;
|
|
685
|
+
static loadBinaryAsync<T>(buffer: Uint8Array, options?: Options<T>): Promise<FrozenMiniSearchBrowser<T>>;
|
|
686
|
+
private static fromBinarySnapshot;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
declare function buildFrozenFromDocuments<T>(documents: readonly T[], options: Options<T>): FrozenMiniSearchBrowser<T>;
|
|
690
|
+
declare function freezeFrozenIndexBuilder<T>(builder: FrozenIndexBuilder<T>): FrozenMiniSearchBrowser<T>;
|
|
691
|
+
|
|
692
|
+
export { AND, AND_NOT, FrozenIndexBuilder, FrozenMiniSearchBrowser as FrozenMiniSearch, OR, assembleFrozen, buildFrozenFromDocuments, createFrozenIndexBuilder, FrozenMiniSearchBrowser as default, finalizeRawSearchResults, finalizeSearchResults, freezeFrozenIndexBuilder, frozenMemoryBreakdown, suggestFromRawResults, suggestFromSearchResults };
|
|
693
|
+
export type { BM25Params, BrowserBinaryCompression, BrowserSaveBinaryAsyncOptions, CombinationOperator, FrozenAssembleParams, FrozenIndexBuilderHints, FrozenMemoryBreakdown, LogLevel, LowercaseCombinationOperator, MatchInfo, MiniSearchSnapshot, Options, Query, QueryCombination, SearchOptions, SearchResult, SerializedIndexEntry, Suggestion, Wildcard };
|