@yoch/frozenminisearch 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4350 @@
1
+ 'use strict';
2
+
3
+ Object.defineProperty(exports, '__esModule', { value: true });
4
+
5
+ var zlib = require('node:zlib');
6
+
7
+ const MAX_FREQ = 65535;
8
+ function readDocId(docIds, index) {
9
+ return docIds[index];
10
+ }
11
+ function allocateFreqs(length, maxValue) {
12
+ if (maxValue <= 0xff)
13
+ return new Uint8Array(length);
14
+ return new Uint16Array(length);
15
+ }
16
+ /**
17
+ * Clamp term frequency for frozen flat storage (max Uint16).
18
+ * Values above {@link MAX_FREQ} are rare; BM25+ contribution is already flat well below that.
19
+ */
20
+ function clampFreq(freq) {
21
+ return freq > MAX_FREQ ? MAX_FREQ : freq;
22
+ }
23
+ /** View into global flat posting buffers (no per-list allocation). */
24
+ class SegmentPostingList {
25
+ constructor(docIds, freqs, offset, length) {
26
+ this.docIds = docIds;
27
+ this.freqs = freqs;
28
+ this._offset = offset;
29
+ this._length = length;
30
+ }
31
+ get offset() {
32
+ return this._offset;
33
+ }
34
+ get length() {
35
+ return this._length;
36
+ }
37
+ /** Rebind this view to another segment in the same global buffers (flyweight use). */
38
+ rebind(offset, length) {
39
+ this._offset = offset;
40
+ this._length = length;
41
+ return this;
42
+ }
43
+ get size() {
44
+ return this._length;
45
+ }
46
+ forEachDoc(callback) {
47
+ const { docIds, freqs, offset, length } = this;
48
+ for (let i = 0; i < length; i++) {
49
+ callback(readDocId(docIds, offset + i), freqs[offset + i]);
50
+ }
51
+ }
52
+ }
53
+
54
+ /**
55
+ * Wildcard query symbol (matches all documents).
56
+ * Use {@link FrozenMiniSearch.wildcard} in application code.
57
+ */
58
+ const WILDCARD_QUERY = Symbol('*');
59
+ /** True only for this package's wildcard symbol (strict identity, not description). */
60
+ function isWildcardQuery(query) {
61
+ return query === WILDCARD_QUERY;
62
+ }
63
+
64
+ const OR = 'or';
65
+ const AND = 'and';
66
+ const AND_NOT = 'and_not';
67
+ const defaultBM25params = { k: 1.2, b: 0.7, d: 0.5 };
68
+ function bm25FieldConstants(bm25params, avgFieldLength) {
69
+ const { k, b, d } = bm25params;
70
+ return { k, d, k1: k + 1, oneMinusB: 1 - b, bOverAvg: b / avgFieldLength };
71
+ }
72
+ function calcBM25ScoreWithConstants(termFreq, matchingCount, totalCount, fieldLength, constants) {
73
+ const { k, d, k1, oneMinusB, bOverAvg } = constants;
74
+ const invDocFreq = Math.log(1 + (totalCount - matchingCount + 0.5) / (matchingCount + 0.5));
75
+ return invDocFreq * (d + termFreq * k1 / (termFreq + k * (oneMinusB + bOverAvg * fieldLength)));
76
+ }
77
+ const getOwnProperty = (object, property) => Object.prototype.hasOwnProperty.call(object, property) ? object[property] : undefined;
78
+ function fieldBoostsForQuery(options, fields) {
79
+ const searchFields = options.fields || fields;
80
+ const boosts = {};
81
+ for (const field of searchFields) {
82
+ boosts[field] = getOwnProperty(options.boost, field) || 1;
83
+ }
84
+ return { names: Object.keys(boosts), boosts };
85
+ }
86
+ const assignUniqueTerm = (target, term) => {
87
+ if (!target.includes(term))
88
+ target.push(term);
89
+ };
90
+ const assignUniqueTerms = (target, source) => {
91
+ for (const term of source) {
92
+ if (!target.includes(term))
93
+ target.push(term);
94
+ }
95
+ };
96
+ const byScore = ({ score: a }, { score: b }) => b - a;
97
+ function getDerivedTerm(derivedTerm, cache) {
98
+ if (typeof derivedTerm === 'string')
99
+ return derivedTerm;
100
+ if (cache.value === undefined)
101
+ cache.value = derivedTerm.resolve();
102
+ return cache.value;
103
+ }
104
+ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache) {
105
+ const resolvedDerivedTerm = getDerivedTerm(derivedTerm, derivedTermCache);
106
+ const docBoost = boostDocumentFn
107
+ ? boostDocumentFn(context.getExternalId(docId), resolvedDerivedTerm, context.getStoredFields(docId))
108
+ : 1;
109
+ if (!docBoost)
110
+ return;
111
+ const fieldLength = context.getFieldLength(docId, fieldId);
112
+ const rawScore = calcBM25ScoreWithConstants(termFreq, matchingFields, context.documentCount, fieldLength, bm25);
113
+ const weightedScore = termWeight * termBoost * fieldBoost * docBoost * rawScore;
114
+ const result = results.get(docId);
115
+ if (result) {
116
+ result.score += weightedScore;
117
+ assignUniqueTerm(result.terms, sourceTerm);
118
+ const match = getOwnProperty(result.match, resolvedDerivedTerm);
119
+ if (match) {
120
+ match.push(field);
121
+ }
122
+ else {
123
+ result.match[resolvedDerivedTerm] = [field];
124
+ }
125
+ }
126
+ else {
127
+ results.set(docId, {
128
+ score: weightedScore,
129
+ terms: [sourceTerm],
130
+ match: { [resolvedDerivedTerm]: [field] },
131
+ });
132
+ }
133
+ }
134
+ function aggregateSegmentPostingList(sourceTerm, derivedTerm, termWeight, termBoost, field, fieldId, fieldBoost, list, context, boostDocumentFn, bm25params, results, allowedDocs) {
135
+ var _a;
136
+ let matchingFields = list.length;
137
+ const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
138
+ const { docIds, freqs, offset, length } = list;
139
+ const derivedTermCache = {};
140
+ for (let i = 0; i < length; i++) {
141
+ const docId = readDocId(docIds, offset + i);
142
+ const termFreq = freqs[offset + i];
143
+ if (context.isDocActive != null && !context.isDocActive(docId)) {
144
+ (_a = context.onInactiveDoc) === null || _a === void 0 ? void 0 : _a.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
145
+ matchingFields -= 1;
146
+ continue;
147
+ }
148
+ if (allowedDocs != null && !allowedDocs.has(docId))
149
+ continue;
150
+ scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
151
+ }
152
+ return matchingFields;
153
+ }
154
+ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTermData, fieldBoosts, context, boostDocumentFn, bm25params, results = new Map(), termOptions) {
155
+ if (fieldTermData == null)
156
+ return results;
157
+ const { allowedDocs } = termOptions !== null && termOptions !== void 0 ? termOptions : {};
158
+ for (const field of fieldBoosts.names) {
159
+ const fieldBoost = fieldBoosts.boosts[field];
160
+ const fieldId = context.fieldIds[field];
161
+ const postingList = fieldTermData.get(fieldId);
162
+ if (postingList == null)
163
+ continue;
164
+ if (postingList instanceof SegmentPostingList) {
165
+ aggregateSegmentPostingList(sourceTerm, derivedTerm, termWeight, termBoost, field, fieldId, fieldBoost, postingList, context, boostDocumentFn, bm25params, results, allowedDocs);
166
+ continue;
167
+ }
168
+ let matchingFields = postingList.size;
169
+ const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
170
+ const derivedTermCache = {};
171
+ postingList.forEachDoc((docId, termFreq) => {
172
+ var _a;
173
+ if (context.isDocActive != null && !context.isDocActive(docId)) {
174
+ (_a = context.onInactiveDoc) === null || _a === void 0 ? void 0 : _a.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
175
+ matchingFields -= 1;
176
+ return;
177
+ }
178
+ if (allowedDocs != null && !allowedDocs.has(docId))
179
+ return;
180
+ scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
181
+ });
182
+ }
183
+ return results;
184
+ }
185
+ const combinators = {
186
+ [OR]: (a, b) => {
187
+ for (const docId of b.keys()) {
188
+ const existing = a.get(docId);
189
+ if (existing == null) {
190
+ a.set(docId, b.get(docId));
191
+ }
192
+ else {
193
+ const { score, terms, match } = b.get(docId);
194
+ existing.score = existing.score + score;
195
+ existing.match = Object.assign(existing.match, match);
196
+ assignUniqueTerms(existing.terms, terms);
197
+ }
198
+ }
199
+ return a;
200
+ },
201
+ [AND]: (a, b) => {
202
+ for (const docId of a.keys()) {
203
+ const inB = b.get(docId);
204
+ if (inB == null) {
205
+ a.delete(docId);
206
+ continue;
207
+ }
208
+ const existing = a.get(docId);
209
+ const { score, terms, match } = inB;
210
+ existing.score += score;
211
+ assignUniqueTerms(existing.terms, terms);
212
+ Object.assign(existing.match, match);
213
+ }
214
+ return a;
215
+ },
216
+ [AND_NOT]: (a, b) => {
217
+ for (const docId of b.keys())
218
+ a.delete(docId);
219
+ return a;
220
+ },
221
+ };
222
+ /**
223
+ * Combines per-term raw results. Mutates `results[0]` in place (OR/AND/AND_NOT); do not reuse
224
+ * other entries in `results` after this call.
225
+ */
226
+ function combineResults(results, combineWith = OR) {
227
+ if (results.length === 0)
228
+ return new Map();
229
+ const operator = combineWith.toLowerCase();
230
+ const combinator = combinators[operator];
231
+ if (!combinator) {
232
+ throw new Error(`Invalid combination operator: ${combineWith}`);
233
+ }
234
+ return results.reduce(combinator);
235
+ }
236
+ /** Merge search options, apply wildcard skipSort, then {@link finalizeSearchResults}. */
237
+ function finalizeRawSearchResults(rawResults, query, searchOptions, globalSearchOptions, getExternalId, getStoredFields) {
238
+ const searchOptionsWithDefaults = {
239
+ ...globalSearchOptions,
240
+ ...searchOptions,
241
+ };
242
+ const skipSort = isWildcardQuery(query) && searchOptionsWithDefaults.boostDocument == null;
243
+ return finalizeSearchResults({
244
+ rawResults,
245
+ getExternalId,
246
+ getStoredFields,
247
+ filter: searchOptionsWithDefaults.filter,
248
+ skipSort,
249
+ });
250
+ }
251
+ function finalizeSearchResults(params) {
252
+ const { rawResults, getExternalId, getStoredFields, filter, skipSort } = params;
253
+ const results = [];
254
+ for (const [docId, { score, terms, match }] of rawResults) {
255
+ const quality = terms.length || 1;
256
+ const result = {
257
+ id: getExternalId(docId),
258
+ score: score * quality,
259
+ terms: Object.keys(match),
260
+ queryTerms: terms,
261
+ match,
262
+ };
263
+ Object.assign(result, getStoredFields(docId));
264
+ if (filter == null || filter(result)) {
265
+ results.push(result);
266
+ }
267
+ }
268
+ if (!skipSort) {
269
+ results.sort(byScore);
270
+ }
271
+ return results;
272
+ }
273
+ const termToQuerySpec = (options) => (term, i, terms) => {
274
+ const fuzzy = (typeof options.fuzzy === 'function')
275
+ ? options.fuzzy(term, i, terms)
276
+ : (options.fuzzy || false);
277
+ const prefix = (typeof options.prefix === 'function')
278
+ ? options.prefix(term, i, terms)
279
+ : (options.prefix === true);
280
+ const termBoost = (typeof options.boostTerm === 'function')
281
+ ? options.boostTerm(term, i, terms)
282
+ : 1;
283
+ return { term, fuzzy, prefix, termBoost };
284
+ };
285
+
286
+ /**
287
+ * Build-time scratch marker for "node carries no leaf" while assembling a tree
288
+ * from a mutable `RadixTree` or a decoded binary section. It is *not* stored in
289
+ * the packed arrays: there, the absence of a leaf is encoded by
290
+ * `nodeLeafOrder === 0` (see {@link PackedRadixTreeData.nodeLeafOrder}), which
291
+ * frees `nodeValue` from carrying a sentinel and lets both columns use the
292
+ * narrowest typed array.
293
+ */
294
+ const PACKED_NO_VALUE = 0xffffffff;
295
+ /** Max UTF-16 length of a single edge label. */
296
+ const MAX_PACKED_EDGE_LABEL_LENGTH = 0xffff;
297
+
298
+ /**
299
+ * Allocate the narrowest unsigned typed array that can hold every value in
300
+ * `[0, maxValue]`. Mirrors the adaptive width choice already used for frozen
301
+ * postings (Uint16 vs Uint32), trading nothing at read time for a smaller
302
+ * footprint on the common small/medium index.
303
+ */
304
+ function packedIndexArray(length, maxValue) {
305
+ if (maxValue <= 0xff)
306
+ return new Uint8Array(length);
307
+ if (maxValue <= 0xffff)
308
+ return new Uint16Array(length);
309
+ return new Uint32Array(length);
310
+ }
311
+ /**
312
+ * Decode a stored `nodeLeafOrder` cell into a sibling slot: `-1` when the node
313
+ * has no leaf, otherwise the leaf's slot among its siblings. The stored value
314
+ * is `slot + 1` (0 = no leaf), so plain subtraction recovers both cases.
315
+ */
316
+ function decodeLeafSlot(storedLeafOrder) {
317
+ return storedLeafOrder - 1;
318
+ }
319
+ /**
320
+ * Number of logical children for a node: radix edges plus optional leaf slot.
321
+ */
322
+ function packedNodeChildCount(edgeCount, hasLeaf) {
323
+ return edgeCount + (hasLeaf ? 1 : 0);
324
+ }
325
+ /**
326
+ * Map a logical child slot (including optional leaf) to the edge offset.
327
+ * `leafSlot` is the decoded slot (`-1` when the node has no leaf).
328
+ *
329
+ * Returns `-1` when `slot` points to the leaf.
330
+ */
331
+ function edgeOffsetAtSlot(slot, leafSlot) {
332
+ if (slot === leafSlot)
333
+ return -1;
334
+ return slot - (leafSlot >= 0 && leafSlot < slot ? 1 : 0);
335
+ }
336
+
337
+ /** Validate packed-tree invariants for a frozen index (term indices in `[0, termCount)`). */
338
+ function validateFrozenTermIndexLeaves(tree, termCount) {
339
+ if (tree.nodeEdgeOffset.length !== tree.nodeCount + 1
340
+ || tree.nodeValue.length !== tree.nodeCount
341
+ || tree.nodeLeafOrder.length !== tree.nodeCount
342
+ || tree.edgeLabelStart.length !== tree.edgeCount
343
+ || tree.edgeLabelLength.length !== tree.edgeCount
344
+ || tree.edgeChild.length !== tree.edgeCount) {
345
+ throw new Error('FrozenTermIndex: array length mismatch');
346
+ }
347
+ if (tree.nodeCount === 0) {
348
+ throw new Error('FrozenTermIndex: missing root node');
349
+ }
350
+ if (tree.nodeEdgeOffset[0] !== 0 || tree.nodeEdgeOffset[tree.nodeCount] !== tree.edgeCount) {
351
+ throw new Error('FrozenTermIndex: edge offsets not bounded by [0, edgeCount]');
352
+ }
353
+ const seenLeaves = new Uint8Array(termCount);
354
+ let leafCount = 0;
355
+ for (let node = 0; node < tree.nodeCount; node++) {
356
+ const first = tree.nodeEdgeOffset[node];
357
+ const count = tree.nodeEdgeOffset[node + 1] - first;
358
+ if (count < 0) {
359
+ throw new Error(`FrozenTermIndex: node ${node} edge offsets not monotonic`);
360
+ }
361
+ const leafSlot = decodeLeafSlot(tree.nodeLeafOrder[node]);
362
+ if (leafSlot < 0) {
363
+ if (tree.nodeValue[node] !== 0) {
364
+ throw new Error(`FrozenTermIndex: node ${node} has value without leaf`);
365
+ }
366
+ continue;
367
+ }
368
+ if (leafSlot >= count + 1) {
369
+ throw new Error(`FrozenTermIndex: node ${node} leaf order out of bounds`);
370
+ }
371
+ leafCount++;
372
+ const v = tree.nodeValue[node];
373
+ if (!Number.isInteger(v) || v < 0 || v >= termCount) {
374
+ throw new Error(`FrozenTermIndex: leaf index out of range: ${v}`);
375
+ }
376
+ if (seenLeaves[v] !== 0) {
377
+ throw new Error(`FrozenTermIndex: duplicate leaf index: ${v}`);
378
+ }
379
+ seenLeaves[v] = 1;
380
+ }
381
+ for (let edge = 0; edge < tree.edgeCount; edge++) {
382
+ const start = tree.edgeLabelStart[edge];
383
+ const len = tree.edgeLabelLength[edge];
384
+ if (len === 0 || len > MAX_PACKED_EDGE_LABEL_LENGTH || start + len > tree.labelHeap.length) {
385
+ throw new Error(`FrozenTermIndex: edge ${edge} label range out of bounds`);
386
+ }
387
+ if (tree.edgeChild[edge] >= tree.nodeCount) {
388
+ throw new Error(`FrozenTermIndex: edge ${edge} child out of bounds`);
389
+ }
390
+ }
391
+ if (leafCount !== termCount) {
392
+ throw new Error(`FrozenTermIndex: leaf count ${leafCount} !== termCount ${termCount}`);
393
+ }
394
+ if (tree.size !== termCount) {
395
+ throw new Error(`FrozenTermIndex: size ${tree.size} !== termCount ${termCount}`);
396
+ }
397
+ }
398
+
399
+ /** @ignore */
400
+ const ENTRIES = 'ENTRIES';
401
+ /** @ignore */
402
+ const KEYS = 'KEYS';
403
+ /** @ignore */
404
+ const VALUES = 'VALUES';
405
+ /** @ignore */
406
+ const LEAF = '';
407
+ /**
408
+ * @private
409
+ */
410
+ class TreeIterator {
411
+ constructor(set, type) {
412
+ const node = set._tree;
413
+ const keys = Array.from(node.keys());
414
+ this.set = set;
415
+ this._type = type;
416
+ this._path = keys.length > 0 ? [{ node, keys }] : [];
417
+ }
418
+ next() {
419
+ const value = this.dive();
420
+ this.backtrack();
421
+ return value;
422
+ }
423
+ dive() {
424
+ if (this._path.length === 0) {
425
+ return { done: true, value: undefined };
426
+ }
427
+ const { node, keys } = last$1(this._path);
428
+ if (last$1(keys) === LEAF) {
429
+ return { done: false, value: this.result() };
430
+ }
431
+ const child = node.get(last$1(keys));
432
+ this._path.push({ node: child, keys: Array.from(child.keys()) });
433
+ return this.dive();
434
+ }
435
+ backtrack() {
436
+ if (this._path.length === 0) {
437
+ return;
438
+ }
439
+ const keys = last$1(this._path).keys;
440
+ keys.pop();
441
+ if (keys.length > 0) {
442
+ return;
443
+ }
444
+ this._path.pop();
445
+ this.backtrack();
446
+ }
447
+ key() {
448
+ return this.set._prefix + this._path
449
+ .map(({ keys }) => last$1(keys))
450
+ .filter(key => key !== LEAF)
451
+ .join('');
452
+ }
453
+ value() {
454
+ return last$1(this._path).node.get(LEAF);
455
+ }
456
+ result() {
457
+ switch (this._type) {
458
+ case VALUES: return this.value();
459
+ case KEYS: return this.key();
460
+ default: return [this.key(), this.value()];
461
+ }
462
+ }
463
+ [Symbol.iterator]() {
464
+ return this;
465
+ }
466
+ }
467
+ const last$1 = (array) => {
468
+ return array[array.length - 1];
469
+ };
470
+
471
+ /**
472
+ * Length bounds for fuzzy radix traversal (B1).
473
+ * Only prunes when the dictionary prefix cannot be a prefix of any match.
474
+ */
475
+ /** Dictionary prefix length already exceeds any term within edit distance k. */
476
+ function isDictPrefixTooLong(dictPrefixLen, queryLen, maxDistance) {
477
+ return dictPrefixLen > queryLen + maxDistance;
478
+ }
479
+ /**
480
+ * @param prefixLen Dictionary characters on the path before this edge
481
+ * @param edgeLen Compressed edge label length
482
+ */
483
+ function shouldPruneFuzzyEdge(prefixLen, edgeLen, queryLen, maxDistance) {
484
+ return isDictPrefixTooLong(prefixLen + edgeLen, queryLen, maxDistance);
485
+ }
486
+
487
+ /* eslint-disable no-labels */
488
+ /**
489
+ * @ignore
490
+ */
491
+ const fuzzySearch = (node, query, maxDistance) => {
492
+ const results = new Map();
493
+ if (query === undefined)
494
+ return results;
495
+ // Number of columns in the Levenshtein matrix.
496
+ const n = query.length + 1;
497
+ // Matching terms can never be longer than N + maxDistance.
498
+ const m = n + maxDistance;
499
+ // Fill first matrix row and column with numbers: 0 1 2 3 ...
500
+ const matrix = new Uint8Array(m * n).fill(maxDistance + 1);
501
+ for (let j = 0; j < n; ++j)
502
+ matrix[j] = j;
503
+ for (let i = 1; i < m; ++i)
504
+ matrix[i * n] = i;
505
+ recurse$1(node, query, maxDistance, results, matrix, 1, n, '');
506
+ return results;
507
+ };
508
+ // Modified version of http://stevehanov.ca/blog/?id=114
509
+ const recurse$1 = (node, query, maxDistance, results, matrix, m, n, prefix) => {
510
+ const offset = m * n;
511
+ key: for (const key of node.keys()) {
512
+ if (key === LEAF) {
513
+ const distance = matrix[offset - 1];
514
+ if (distance <= maxDistance) {
515
+ results.set(prefix, [node.get(key), distance]);
516
+ }
517
+ }
518
+ else {
519
+ // m = next matrix row index; dictionary prefix on path has length m - 1
520
+ if (shouldPruneFuzzyEdge(m - 1, key.length, query.length, maxDistance)) {
521
+ continue key;
522
+ }
523
+ let i = m;
524
+ for (let pos = 0; pos < key.length; ++pos, ++i) {
525
+ const char = key[pos];
526
+ const thisRowOffset = n * i;
527
+ const prevRowOffset = thisRowOffset - n;
528
+ let minDistance = matrix[thisRowOffset];
529
+ const jmin = Math.max(0, i - maxDistance - 1);
530
+ const jmax = Math.min(n - 1, i + maxDistance);
531
+ for (let j = jmin; j < jmax; ++j) {
532
+ const different = char !== query[j];
533
+ const rpl = matrix[prevRowOffset + j] + +different;
534
+ const del = matrix[prevRowOffset + j + 1] + 1;
535
+ const ins = matrix[thisRowOffset + j] + 1;
536
+ const dist = matrix[thisRowOffset + j + 1] = Math.min(rpl, del, ins);
537
+ if (dist < minDistance)
538
+ minDistance = dist;
539
+ }
540
+ if (minDistance > maxDistance) {
541
+ continue key;
542
+ }
543
+ }
544
+ recurse$1(node.get(key), query, maxDistance, results, matrix, i, n, prefix + key);
545
+ }
546
+ }
547
+ };
548
+
549
+ /* eslint-disable no-labels */
550
+ /**
551
+ * A class implementing the same interface as a standard JavaScript
552
+ * [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map)
553
+ * with string keys, but adding support for efficiently searching entries with
554
+ * prefix or fuzzy search. This class is used internally by {@link MiniSearch}
555
+ * as the inverted index data structure. The implementation is a radix tree
556
+ * (compressed prefix tree).
557
+ *
558
+ * Since this class can be of general utility beyond _MiniSearch_, it is
559
+ * internal to `@yoch/frozenminisearch` (not a separate public entry point).
560
+ *
561
+ * @typeParam T The type of the values stored in the map.
562
+ */
563
+ class SearchableMap {
564
+ /**
565
+ * The constructor is normally called without arguments, creating an empty
566
+ * map. In order to create a {@link SearchableMap} from an iterable or from an
567
+ * object, check {@link SearchableMap.from} and {@link
568
+ * SearchableMap.fromObject}.
569
+ *
570
+ * The constructor arguments are for internal use, when creating derived
571
+ * mutable views of a map at a prefix.
572
+ */
573
+ constructor(tree = new Map(), prefix = '') {
574
+ this._size = undefined;
575
+ this._tree = tree;
576
+ this._prefix = prefix;
577
+ }
578
+ /**
579
+ * Root radix tree backing this map. Used when cloning or serializing the full
580
+ * index so {@link Map} key insertion order (prefix / fuzzy / autoSuggest) is preserved.
581
+ */
582
+ get radixTree() {
583
+ return this._tree;
584
+ }
585
+ /**
586
+ * Creates and returns a mutable view of this {@link SearchableMap},
587
+ * containing only entries that share the given prefix.
588
+ *
589
+ * ### Usage:
590
+ *
591
+ * ```javascript
592
+ * let map = new SearchableMap()
593
+ * map.set("unicorn", 1)
594
+ * map.set("universe", 2)
595
+ * map.set("university", 3)
596
+ * map.set("unique", 4)
597
+ * map.set("hello", 5)
598
+ *
599
+ * let uni = map.atPrefix("uni")
600
+ * uni.get("unique") // => 4
601
+ * uni.get("unicorn") // => 1
602
+ * uni.get("hello") // => undefined
603
+ *
604
+ * let univer = map.atPrefix("univer")
605
+ * univer.get("unique") // => undefined
606
+ * univer.get("universe") // => 2
607
+ * univer.get("university") // => 3
608
+ * ```
609
+ *
610
+ * @param prefix The prefix
611
+ * @return A {@link SearchableMap} representing a mutable view of the original
612
+ * Map at the given prefix
613
+ */
614
+ atPrefix(prefix) {
615
+ if (!prefix.startsWith(this._prefix)) {
616
+ throw new Error('Mismatched prefix');
617
+ }
618
+ const [node, path] = trackDown(this._tree, prefix.slice(this._prefix.length));
619
+ if (node === undefined) {
620
+ const [parentNode, key] = last(path);
621
+ for (const k of parentNode.keys()) {
622
+ if (k !== LEAF && k.startsWith(key)) {
623
+ const node = new Map();
624
+ node.set(k.slice(key.length), parentNode.get(k));
625
+ return new SearchableMap(node, prefix);
626
+ }
627
+ }
628
+ }
629
+ return new SearchableMap(node, prefix);
630
+ }
631
+ /**
632
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/clear
633
+ */
634
+ clear() {
635
+ this._size = undefined;
636
+ this._tree.clear();
637
+ }
638
+ /**
639
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/delete
640
+ * @param key Key to delete
641
+ */
642
+ delete(key) {
643
+ this._size = undefined;
644
+ return remove(this._tree, key);
645
+ }
646
+ /**
647
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries
648
+ * @return An iterator iterating through `[key, value]` entries.
649
+ */
650
+ entries() {
651
+ return new TreeIterator(this, ENTRIES);
652
+ }
653
+ /**
654
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/forEach
655
+ * @param fn Iteration function
656
+ */
657
+ forEach(fn) {
658
+ for (const [key, value] of this) {
659
+ fn(key, value, this);
660
+ }
661
+ }
662
+ /**
663
+ * Returns a Map of all the entries that have a key within the given edit
664
+ * distance from the search key. The keys of the returned Map are the matching
665
+ * keys, while the values are two-element arrays where the first element is
666
+ * the value associated to the key, and the second is the edit distance of the
667
+ * key to the search key.
668
+ *
669
+ * ### Usage:
670
+ *
671
+ * ```javascript
672
+ * let map = new SearchableMap()
673
+ * map.set('hello', 'world')
674
+ * map.set('hell', 'yeah')
675
+ * map.set('ciao', 'mondo')
676
+ *
677
+ * // Get all entries that match the key 'hallo' with a maximum edit distance of 2
678
+ * map.fuzzyGet('hallo', 2)
679
+ * // => Map(2) { 'hello' => ['world', 1], 'hell' => ['yeah', 2] }
680
+ *
681
+ * // In the example, the "hello" key has value "world" and edit distance of 1
682
+ * // (change "e" to "a"), the key "hell" has value "yeah" and edit distance of 2
683
+ * // (change "e" to "a", delete "o")
684
+ * ```
685
+ *
686
+ * @param key The search key
687
+ * @param maxEditDistance The maximum edit distance (Levenshtein)
688
+ * @return A Map of the matching keys to their value and edit distance
689
+ */
690
+ fuzzyGet(key, maxEditDistance) {
691
+ return fuzzySearch(this._tree, key, maxEditDistance);
692
+ }
693
+ /**
694
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/get
695
+ * @param key Key to get
696
+ * @return Value associated to the key, or `undefined` if the key is not
697
+ * found.
698
+ */
699
+ get(key) {
700
+ const node = lookup(this._tree, key);
701
+ return node !== undefined ? node.get(LEAF) : undefined;
702
+ }
703
+ /**
704
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/has
705
+ * @param key Key
706
+ * @return True if the key is in the map, false otherwise
707
+ */
708
+ has(key) {
709
+ const node = lookup(this._tree, key);
710
+ return node !== undefined && node.has(LEAF);
711
+ }
712
+ /**
713
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/keys
714
+ * @return An `Iterable` iterating through keys
715
+ */
716
+ keys() {
717
+ return new TreeIterator(this, KEYS);
718
+ }
719
+ /**
720
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/set
721
+ * @param key Key to set
722
+ * @param value Value to associate to the key
723
+ * @return The {@link SearchableMap} itself, to allow chaining
724
+ */
725
+ set(key, value) {
726
+ if (typeof key !== 'string') {
727
+ throw new Error('key must be a string');
728
+ }
729
+ this._size = undefined;
730
+ const node = createPath(this._tree, key);
731
+ node.set(LEAF, value);
732
+ return this;
733
+ }
734
+ /**
735
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/size
736
+ */
737
+ get size() {
738
+ if (this._size !== undefined) {
739
+ return this._size;
740
+ }
741
+ /** @ignore */
742
+ this._size = 0;
743
+ const iter = this.entries();
744
+ while (!iter.next().done)
745
+ this._size += 1;
746
+ return this._size;
747
+ }
748
+ /**
749
+ * Updates the value at the given key using the provided function. The function
750
+ * is called with the current value at the key, and its return value is used as
751
+ * the new value to be set.
752
+ *
753
+ * ### Example:
754
+ *
755
+ * ```javascript
756
+ * // Increment the current value by one
757
+ * searchableMap.update('somekey', (currentValue) => currentValue == null ? 0 : currentValue + 1)
758
+ * ```
759
+ *
760
+ * If the value at the given key is or will be an object, it might not require
761
+ * re-assignment. In that case it is better to use `fetch()`, because it is
762
+ * faster.
763
+ *
764
+ * @param key The key to update
765
+ * @param fn The function used to compute the new value from the current one
766
+ * @return The {@link SearchableMap} itself, to allow chaining
767
+ */
768
+ update(key, fn) {
769
+ if (typeof key !== 'string') {
770
+ throw new Error('key must be a string');
771
+ }
772
+ this._size = undefined;
773
+ const node = createPath(this._tree, key);
774
+ node.set(LEAF, fn(node.get(LEAF)));
775
+ return this;
776
+ }
777
+ /**
778
+ * Fetches the value of the given key. If the value does not exist, calls the
779
+ * given function to create a new value, which is inserted at the given key
780
+ * and subsequently returned.
781
+ *
782
+ * ### Example:
783
+ *
784
+ * ```javascript
785
+ * const map = searchableMap.fetch('somekey', () => new Map())
786
+ * map.set('foo', 'bar')
787
+ * ```
788
+ *
789
+ * @param key The key to update
790
+ * @param initial A function that creates a new value if the key does not exist
791
+ * @return The existing or new value at the given key
792
+ */
793
+ fetch(key, initial) {
794
+ if (typeof key !== 'string') {
795
+ throw new Error('key must be a string');
796
+ }
797
+ this._size = undefined;
798
+ const node = createPath(this._tree, key);
799
+ let value = node.get(LEAF);
800
+ if (value === undefined) {
801
+ node.set(LEAF, value = initial());
802
+ }
803
+ return value;
804
+ }
805
+ /**
806
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/values
807
+ * @return An `Iterable` iterating through values.
808
+ */
809
+ values() {
810
+ return new TreeIterator(this, VALUES);
811
+ }
812
+ /**
813
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/@@iterator
814
+ */
815
+ [Symbol.iterator]() {
816
+ return this.entries();
817
+ }
818
+ /**
819
+ * Creates a {@link SearchableMap} from an `Iterable` of entries
820
+ *
821
+ * @param entries Entries to be inserted in the {@link SearchableMap}
822
+ * @return A new {@link SearchableMap} with the given entries
823
+ */
824
+ static from(entries) {
825
+ const tree = new SearchableMap();
826
+ for (const [key, value] of entries) {
827
+ tree.set(key, value);
828
+ }
829
+ return tree;
830
+ }
831
+ /**
832
+ * Creates a {@link SearchableMap} from the iterable properties of a JavaScript object
833
+ *
834
+ * @param object Object of entries for the {@link SearchableMap}
835
+ * @return A new {@link SearchableMap} with the given entries
836
+ */
837
+ static fromObject(object) {
838
+ return SearchableMap.from(Object.entries(object));
839
+ }
840
+ }
841
+ const trackDown = (tree, key, path = []) => {
842
+ if (key.length === 0 || tree == null) {
843
+ return [tree, path];
844
+ }
845
+ for (const k of tree.keys()) {
846
+ if (k !== LEAF && key.startsWith(k)) {
847
+ path.push([tree, k]); // performance: update in place
848
+ return trackDown(tree.get(k), key.slice(k.length), path);
849
+ }
850
+ }
851
+ path.push([tree, key]); // performance: update in place
852
+ return trackDown(undefined, '', path);
853
+ };
854
+ const lookup = (tree, key) => {
855
+ if (key.length === 0 || tree == null) {
856
+ return tree;
857
+ }
858
+ for (const k of tree.keys()) {
859
+ if (k !== LEAF && key.startsWith(k)) {
860
+ return lookup(tree.get(k), key.slice(k.length));
861
+ }
862
+ }
863
+ };
864
+ // Create a path in the radix tree for the given key, and returns the deepest
865
+ // node. This function is in the hot path for indexing. It avoids unnecessary
866
+ // string operations and recursion for performance.
867
+ const createPath = (node, key) => {
868
+ const keyLength = key.length;
869
+ outer: for (let pos = 0; node && pos < keyLength;) {
870
+ for (const k of node.keys()) {
871
+ // Check whether this key is a candidate: the first characters must match.
872
+ if (k !== LEAF && key[pos] === k[0]) {
873
+ const len = Math.min(keyLength - pos, k.length);
874
+ // Advance offset to the point where key and k no longer match.
875
+ let offset = 1;
876
+ while (offset < len && key[pos + offset] === k[offset])
877
+ ++offset;
878
+ const child = node.get(k);
879
+ if (offset === k.length) {
880
+ // The existing key is shorter than the key we need to create.
881
+ node = child;
882
+ }
883
+ else {
884
+ // Partial match: we need to insert an intermediate node to contain
885
+ // both the existing subtree and the new node.
886
+ const intermediate = new Map();
887
+ intermediate.set(k.slice(offset), child);
888
+ node.set(key.slice(pos, pos + offset), intermediate);
889
+ node.delete(k);
890
+ node = intermediate;
891
+ }
892
+ pos += offset;
893
+ continue outer;
894
+ }
895
+ }
896
+ // Create a final child node to contain the final suffix of the key.
897
+ const child = new Map();
898
+ node.set(key.slice(pos), child);
899
+ return child;
900
+ }
901
+ return node;
902
+ };
903
+ const remove = (tree, key) => {
904
+ const [node, path] = trackDown(tree, key);
905
+ if (node === undefined) {
906
+ return;
907
+ }
908
+ node.delete(LEAF);
909
+ if (node.size === 0) {
910
+ cleanup(path);
911
+ }
912
+ else if (node.size === 1) {
913
+ const [key, value] = node.entries().next().value;
914
+ merge(path, key, value);
915
+ }
916
+ };
917
+ const cleanup = (path) => {
918
+ if (path.length === 0) {
919
+ return;
920
+ }
921
+ const [node, key] = last(path);
922
+ node.delete(key);
923
+ if (node.size === 0) {
924
+ cleanup(path.slice(0, -1));
925
+ }
926
+ else if (node.size === 1) {
927
+ const [key, value] = node.entries().next().value;
928
+ if (key !== LEAF) {
929
+ merge(path.slice(0, -1), key, value);
930
+ }
931
+ }
932
+ };
933
+ const merge = (path, key, value) => {
934
+ if (path.length === 0) {
935
+ return;
936
+ }
937
+ const [node, nodeKey] = last(path);
938
+ node.set(nodeKey + key, value);
939
+ node.delete(nodeKey);
940
+ };
941
+ const last = (array) => {
942
+ return array[array.length - 1];
943
+ };
944
+
945
+ function labelSlice(heap, start, len) {
946
+ return heap.slice(start, start + len);
947
+ }
948
+
949
+ /* eslint-disable no-labels */
950
+ /**
951
+ * Lazy generator, same Iterable contract as `prefixRefs`. Matches are yielded as
952
+ * found (no eager `PackedFuzzyRef[]`). Slightly slower than materializing an array
953
+ * on micro-benches, but keeps the ref-first API uniform and leaves room for future
954
+ * query push-down (term-level gate skip, early abort). Easy to revert to eager if
955
+ * that never pays off.
956
+ */
957
+ function packedRadixFuzzyRefs(tree, query, maxDistance) {
958
+ return runFuzzy(tree, query, maxDistance);
959
+ }
960
+ /** @deprecated Internal benchmark/compat wrapper. Prefer `packedRadixFuzzyRefs`. */
961
+ function packedRadixFuzzyEntries(tree, query, maxDistance) {
962
+ const results = [];
963
+ for (const { termIndex, distance } of packedRadixFuzzyRefs(tree, query, maxDistance)) {
964
+ results.push([tree.termByIndex(termIndex), termIndex, distance]);
965
+ }
966
+ return results;
967
+ }
968
+ function* runFuzzy(tree, query, maxDistance) {
969
+ if (maxDistance < 0)
970
+ return;
971
+ const n = query.length + 1;
972
+ const m = n + maxDistance;
973
+ const matrix = new Uint8Array(m * n).fill(maxDistance + 1);
974
+ for (let j = 0; j < n; ++j)
975
+ matrix[j] = j;
976
+ for (let i = 1; i < m; ++i)
977
+ matrix[i * n] = i;
978
+ const queryLen = query.length;
979
+ const queryCodes = new Uint16Array(n);
980
+ for (let j = 0; j < queryLen; j++)
981
+ queryCodes[j] = query.charCodeAt(j);
982
+ yield* recurse(tree, queryLen, queryCodes, maxDistance, matrix, 1, 0, 0);
983
+ }
984
+ function* recurse(tree, queryLen, queryCodes, maxDistance, matrix, rowStart, node, termLength) {
985
+ const heap = tree.labelHeap;
986
+ const n = queryLen + 1;
987
+ const offset = rowStart * n;
988
+ const first = tree.nodeEdgeOffset[node];
989
+ const edgeCount = tree.nodeEdgeOffset[node + 1] - first;
990
+ const leafSlot = decodeLeafSlot(tree.nodeLeafOrder[node]);
991
+ const totalCount = packedNodeChildCount(edgeCount, leafSlot >= 0);
992
+ edge: for (let slot = 0; slot < totalCount; slot++) {
993
+ const edgeOffset = edgeOffsetAtSlot(slot, leafSlot);
994
+ if (edgeOffset < 0) {
995
+ const distance = matrix[offset - 1];
996
+ if (distance <= maxDistance) {
997
+ const termIndex = tree.nodeValue[node];
998
+ yield { termIndex, distance, length: termLength };
999
+ }
1000
+ continue;
1001
+ }
1002
+ const ei = first + edgeOffset;
1003
+ const labelStart = tree.edgeLabelStart[ei];
1004
+ const labelLen = tree.edgeLabelLength[ei];
1005
+ if (shouldPruneFuzzyEdge(rowStart - 1, labelLen, queryLen, maxDistance)) {
1006
+ continue edge;
1007
+ }
1008
+ let i = rowStart;
1009
+ let thisRowOffset = rowStart * n;
1010
+ for (let pos = 0; pos < labelLen; ++pos, ++i, thisRowOffset += n) {
1011
+ const char = heap.charCodeAt(labelStart + pos);
1012
+ const prevRowOffset = thisRowOffset - n;
1013
+ let minDistance = matrix[thisRowOffset];
1014
+ const jmin = Math.max(0, i - maxDistance - 1);
1015
+ const jmax = Math.min(queryLen, i + maxDistance);
1016
+ for (let j = jmin; j < jmax; ++j) {
1017
+ const different = char === queryCodes[j] ? 0 : 1;
1018
+ const rpl = matrix[prevRowOffset + j] + different;
1019
+ const del = matrix[prevRowOffset + j + 1] + 1;
1020
+ const ins = matrix[thisRowOffset + j] + 1;
1021
+ let dist = rpl;
1022
+ if (del < dist)
1023
+ dist = del;
1024
+ if (ins < dist)
1025
+ dist = ins;
1026
+ matrix[thisRowOffset + j + 1] = dist;
1027
+ if (dist < minDistance)
1028
+ minDistance = dist;
1029
+ }
1030
+ if (minDistance > maxDistance) {
1031
+ continue edge;
1032
+ }
1033
+ }
1034
+ yield* recurse(tree, queryLen, queryCodes, maxDistance, matrix, i, tree.edgeChild[ei], termLength + labelLen);
1035
+ }
1036
+ }
1037
+
1038
+ function buildLazyTermMetadata(tree) {
1039
+ const termCount = tree.size;
1040
+ const nodeCount = tree.nodeCount;
1041
+ const edgeCount = tree.edgeCount;
1042
+ if (termCount === 0 || nodeCount === 0) {
1043
+ return {
1044
+ leafNodeByTermIndex: new Uint8Array(0),
1045
+ parentNode: new Uint8Array(0),
1046
+ parentEdge: new Uint8Array(0),
1047
+ };
1048
+ }
1049
+ const leafNodeByTermIndex = packedIndexArray(termCount, nodeCount - 1);
1050
+ const parentNode = packedIndexArray(nodeCount, nodeCount - 1);
1051
+ const parentEdge = packedIndexArray(nodeCount, Math.max(edgeCount - 1, 0));
1052
+ let leafCount = 0;
1053
+ for (let node = 0; node < nodeCount; node++) {
1054
+ if (decodeLeafSlot(tree.nodeLeafOrder[node]) >= 0) {
1055
+ leafNodeByTermIndex[tree.nodeValue[node]] = node;
1056
+ leafCount++;
1057
+ }
1058
+ const end = tree.nodeEdgeOffset[node + 1];
1059
+ for (let ei = tree.nodeEdgeOffset[node]; ei < end; ei++) {
1060
+ const child = tree.edgeChild[ei];
1061
+ parentNode[child] = node;
1062
+ parentEdge[child] = ei;
1063
+ }
1064
+ }
1065
+ if (leafCount !== termCount) {
1066
+ throw new Error(`PackedRadixTree: lazy metadata leaf count ${leafCount} !== term count ${termCount}`);
1067
+ }
1068
+ return { leafNodeByTermIndex, parentNode, parentEdge };
1069
+ }
1070
+ function assertTermIndex(tree, termIndex) {
1071
+ if (termIndex < 0 || termIndex >= tree.size) {
1072
+ throw new RangeError(`PackedRadixTree: term index out of range: ${termIndex}`);
1073
+ }
1074
+ }
1075
+ /** Reconstruct a single term by climbing parent edges from its leaf to the root. */
1076
+ function reconstructTermFromIndex(tree, metadata, termIndex) {
1077
+ assertTermIndex(tree, termIndex);
1078
+ const heap = tree.labelHeap;
1079
+ const { edgeLabelStart, edgeLabelLength } = tree;
1080
+ let result = '';
1081
+ let node = metadata.leafNodeByTermIndex[termIndex];
1082
+ while (node !== 0) {
1083
+ const ei = metadata.parentEdge[node];
1084
+ result = labelSlice(heap, edgeLabelStart[ei], edgeLabelLength[ei]) + result;
1085
+ node = metadata.parentNode[node];
1086
+ }
1087
+ return result;
1088
+ }
1089
+ /** Term length (UTF-16 code units) without materializing the string. */
1090
+ function termLengthFromIndex(tree, metadata, termIndex) {
1091
+ assertTermIndex(tree, termIndex);
1092
+ let length = 0;
1093
+ let node = metadata.leafNodeByTermIndex[termIndex];
1094
+ while (node !== 0) {
1095
+ length += tree.edgeLabelLength[metadata.parentEdge[node]];
1096
+ node = metadata.parentNode[node];
1097
+ }
1098
+ return length;
1099
+ }
1100
+
1101
+ function labelsMatch(heap, start, len, key, keyOff) {
1102
+ for (let i = 0; i < len; i++) {
1103
+ if (heap.charCodeAt(start + i) !== key.charCodeAt(keyOff + i))
1104
+ return false;
1105
+ }
1106
+ return true;
1107
+ }
1108
+ function pushEmitFrame(frames, tree, node, prefix) {
1109
+ const first = tree.nodeEdgeOffset[node];
1110
+ const edgeCount = tree.nodeEdgeOffset[node + 1] - first;
1111
+ const leafSlot = decodeLeafSlot(tree.nodeLeafOrder[node]);
1112
+ const totalCount = packedNodeChildCount(edgeCount, leafSlot >= 0);
1113
+ frames.push({ node, slot: totalCount - 1, first, leafSlot, prefix });
1114
+ }
1115
+ function pushEmitRefFrame(frames, tree, node, length) {
1116
+ const first = tree.nodeEdgeOffset[node];
1117
+ const edgeCount = tree.nodeEdgeOffset[node + 1] - first;
1118
+ const leafSlot = decodeLeafSlot(tree.nodeLeafOrder[node]);
1119
+ const totalCount = packedNodeChildCount(edgeCount, leafSlot >= 0);
1120
+ frames.push({ node, slot: totalCount - 1, first, leafSlot, length });
1121
+ }
1122
+ class PackedRadixTree {
1123
+ constructor(data) {
1124
+ this.size = data.size;
1125
+ this.nodeCount = data.nodeCount;
1126
+ this.edgeCount = data.edgeCount;
1127
+ this.labelHeap = data.labelHeap;
1128
+ this.nodeEdgeOffset = data.nodeEdgeOffset;
1129
+ this.nodeValue = data.nodeValue;
1130
+ this.nodeLeafOrder = data.nodeLeafOrder;
1131
+ this.edgeLabelStart = data.edgeLabelStart;
1132
+ this.edgeLabelLength = data.edgeLabelLength;
1133
+ this.edgeChild = data.edgeChild;
1134
+ }
1135
+ static fromData(data) {
1136
+ return new PackedRadixTree(data);
1137
+ }
1138
+ findEdge(node, firstChar) {
1139
+ const end = this.nodeEdgeOffset[node + 1];
1140
+ const heap = this.labelHeap;
1141
+ for (let ei = this.nodeEdgeOffset[node]; ei < end; ei++) {
1142
+ if (heap.charCodeAt(this.edgeLabelStart[ei]) === firstChar)
1143
+ return ei;
1144
+ }
1145
+ return -1;
1146
+ }
1147
+ get(term) {
1148
+ const walk = this.walkKey(term, false);
1149
+ if (walk == null || !walk.keyFullyConsumed)
1150
+ return undefined;
1151
+ if (this.nodeLeafOrder[walk.node] === 0)
1152
+ return undefined;
1153
+ return this.nodeValue[walk.node];
1154
+ }
1155
+ *entries() {
1156
+ yield* this.emitSubtree(0, '');
1157
+ }
1158
+ /** @deprecated Internal benchmark/compat wrapper. Prefer `prefixRefs` + `termByIndex`. */
1159
+ *prefixEntries(prefix) {
1160
+ const start = this.resolvePrefixWalk(prefix);
1161
+ if (start == null)
1162
+ return;
1163
+ yield* this.emitSubtree(start.node, start.prefix);
1164
+ }
1165
+ *prefixRefs(prefix) {
1166
+ const start = this.resolvePrefixWalkRef(prefix);
1167
+ if (start == null)
1168
+ return;
1169
+ yield* this.emitSubtreeRefs(start.node, start.prefixLength);
1170
+ }
1171
+ /**
1172
+ * Walk `prefix` to the subtree root; returns accumulated heap label prefix string.
1173
+ * `null` when no terms share the prefix.
1174
+ */
1175
+ resolvePrefixWalk(prefix) {
1176
+ if (prefix.length === 0) {
1177
+ return { node: 0, prefix: '' };
1178
+ }
1179
+ const walk = this.walkKey(prefix, true);
1180
+ if (walk == null)
1181
+ return null;
1182
+ return { node: walk.node, prefix: walk.prefix };
1183
+ }
1184
+ resolvePrefixWalkRef(prefix) {
1185
+ if (prefix.length === 0) {
1186
+ return { node: 0, prefixLength: 0 };
1187
+ }
1188
+ const walk = this.walkKey(prefix, false);
1189
+ if (walk == null)
1190
+ return null;
1191
+ return { node: walk.node, prefixLength: walk.prefixLength };
1192
+ }
1193
+ /**
1194
+ * Follow `key` from the root. Shared by exact lookup and prefix iteration.
1195
+ * Mid-edge stop uses the full edge label in `prefix` (SearchableMap parity).
1196
+ */
1197
+ walkKey(key, accumulatePrefix) {
1198
+ let node = 0;
1199
+ let prefixStr = '';
1200
+ let prefixLength = 0;
1201
+ let pos = 0;
1202
+ const heap = this.labelHeap;
1203
+ const n = key.length;
1204
+ while (pos < n) {
1205
+ const ei = this.findEdge(node, key.charCodeAt(pos));
1206
+ if (ei < 0)
1207
+ return null;
1208
+ const start = this.edgeLabelStart[ei];
1209
+ const len = this.edgeLabelLength[ei];
1210
+ const remaining = n - pos;
1211
+ if (remaining < len) {
1212
+ if (!labelsMatch(heap, start, remaining, key, pos))
1213
+ return null;
1214
+ if (accumulatePrefix)
1215
+ prefixStr += labelSlice(heap, start, len);
1216
+ prefixLength += len;
1217
+ return { node: this.edgeChild[ei], prefix: prefixStr, prefixLength, keyFullyConsumed: false };
1218
+ }
1219
+ if (!labelsMatch(heap, start, len, key, pos))
1220
+ return null;
1221
+ if (accumulatePrefix)
1222
+ prefixStr += labelSlice(heap, start, len);
1223
+ prefixLength += len;
1224
+ pos += len;
1225
+ node = this.edgeChild[ei];
1226
+ }
1227
+ return { node, prefix: prefixStr, prefixLength, keyFullyConsumed: true };
1228
+ }
1229
+ /**
1230
+ * Depth-first traversal matching {@link SearchableMap}'s `TreeIterator`, which
1231
+ * visits siblings in reverse Map-insertion order (last key first). The leaf, if
1232
+ * any, sits at `nodeLeafOrder` among the original sibling slots; everything else
1233
+ * is an edge. Exact order matters for prefix iteration and autoSuggest parity.
1234
+ */
1235
+ *emitSubtree(startNode, startPrefix) {
1236
+ const heap = this.labelHeap;
1237
+ const frames = [];
1238
+ pushEmitFrame(frames, this, startNode, startPrefix);
1239
+ while (frames.length) {
1240
+ const frame = frames[frames.length - 1];
1241
+ if (frame.slot < 0) {
1242
+ frames.pop();
1243
+ continue;
1244
+ }
1245
+ const slot = frame.slot--;
1246
+ const edgeOffset = edgeOffsetAtSlot(slot, frame.leafSlot);
1247
+ if (edgeOffset < 0) {
1248
+ yield [frame.prefix, this.nodeValue[frame.node]];
1249
+ continue;
1250
+ }
1251
+ const ei = frame.first + edgeOffset;
1252
+ const start = this.edgeLabelStart[ei];
1253
+ const len = this.edgeLabelLength[ei];
1254
+ const childPrefix = frame.prefix + labelSlice(heap, start, len);
1255
+ pushEmitFrame(frames, this, this.edgeChild[ei], childPrefix);
1256
+ }
1257
+ }
1258
+ *emitSubtreeRefs(startNode, startLength) {
1259
+ const frames = [];
1260
+ pushEmitRefFrame(frames, this, startNode, startLength);
1261
+ while (frames.length) {
1262
+ const frame = frames[frames.length - 1];
1263
+ if (frame.slot < 0) {
1264
+ frames.pop();
1265
+ continue;
1266
+ }
1267
+ const slot = frame.slot--;
1268
+ const edgeOffset = edgeOffsetAtSlot(slot, frame.leafSlot);
1269
+ if (edgeOffset < 0) {
1270
+ yield { termIndex: this.nodeValue[frame.node], length: frame.length };
1271
+ continue;
1272
+ }
1273
+ const ei = frame.first + edgeOffset;
1274
+ const len = this.edgeLabelLength[ei];
1275
+ pushEmitRefFrame(frames, this, this.edgeChild[ei], frame.length + len);
1276
+ }
1277
+ }
1278
+ /** @deprecated Internal benchmark/compat wrapper. Prefer `fuzzyRefs` + `termByIndex`. */
1279
+ fuzzyEntries(term, maxDistance) {
1280
+ return packedRadixFuzzyEntries(this, term, maxDistance);
1281
+ }
1282
+ fuzzyRefs(term, maxDistance) {
1283
+ return packedRadixFuzzyRefs(this, term, maxDistance);
1284
+ }
1285
+ lazyTermMetadata() {
1286
+ if (this._lazyTermMetadata == null) {
1287
+ this._lazyTermMetadata = buildLazyTermMetadata(this);
1288
+ }
1289
+ return this._lazyTermMetadata;
1290
+ }
1291
+ termLengthByIndex(termIndex) {
1292
+ return termLengthFromIndex(this, this.lazyTermMetadata(), termIndex);
1293
+ }
1294
+ termByIndex(termIndex) {
1295
+ return reconstructTermFromIndex(this, this.lazyTermMetadata(), termIndex);
1296
+ }
1297
+ packedByteLength() {
1298
+ return (this.nodeEdgeOffset.byteLength
1299
+ + this.nodeValue.byteLength
1300
+ + this.nodeLeafOrder.byteLength
1301
+ + this.edgeLabelStart.byteLength
1302
+ + this.edgeLabelLength.byteLength
1303
+ + this.edgeChild.byteLength
1304
+ + this.labelHeap.length * 2);
1305
+ }
1306
+ packedNodeCount() {
1307
+ return this.nodeCount;
1308
+ }
1309
+ packedEdgeCount() {
1310
+ return this.edgeCount;
1311
+ }
1312
+ }
1313
+
1314
+ function fromRadixTree(tree, termCountOrOptions) {
1315
+ if (termCountOrOptions != null && typeof termCountOrOptions === 'object') {
1316
+ const { termCount, mapLeaf, inferTermCountFromLeaves = false } = termCountOrOptions;
1317
+ return packRadixTreeFromRadix(tree, termCount, mapLeaf, inferTermCountFromLeaves);
1318
+ }
1319
+ const termCount = termCountOrOptions;
1320
+ if (termCount == null) {
1321
+ return packRadixTreeFromRadix(tree, 0, leaf => leaf, true);
1322
+ }
1323
+ return packRadixTreeFromRadix(tree, termCount, leaf => leaf, false);
1324
+ }
1325
+ function packRadixTreeFromRadix(tree, termCount, mapLeaf, inferTermCountFromLeaves) {
1326
+ const nodes = [];
1327
+ let leafCount = 0;
1328
+ function packNode(node) {
1329
+ const nodeId = nodes.length;
1330
+ const scratch = { value: PACKED_NO_VALUE, leafOrder: PACKED_NO_VALUE, edges: [] };
1331
+ nodes.push(scratch);
1332
+ let childOrder = 0;
1333
+ for (const [key, val] of node) {
1334
+ if (key === LEAF) {
1335
+ scratch.value = mapLeaf(val);
1336
+ scratch.leafOrder = childOrder;
1337
+ leafCount++;
1338
+ }
1339
+ else {
1340
+ scratch.edges.push({ label: key, child: packNode(val) });
1341
+ }
1342
+ childOrder++;
1343
+ }
1344
+ return nodeId;
1345
+ }
1346
+ packNode(tree);
1347
+ const size = inferTermCountFromLeaves ? leafCount : termCount;
1348
+ const nodeCount = nodes.length;
1349
+ let edgeCount = 0;
1350
+ let totalLabelLength = 0;
1351
+ let maxLabelLength = 0;
1352
+ let maxNodeValue = 0;
1353
+ let maxLeafOrderEncoded = 0;
1354
+ for (const node of nodes) {
1355
+ edgeCount += node.edges.length;
1356
+ for (const edge of node.edges) {
1357
+ totalLabelLength += edge.label.length;
1358
+ if (edge.label.length > maxLabelLength)
1359
+ maxLabelLength = edge.label.length;
1360
+ }
1361
+ if (node.value !== PACKED_NO_VALUE) {
1362
+ if (node.value > maxNodeValue)
1363
+ maxNodeValue = node.value;
1364
+ if (node.leafOrder + 1 > maxLeafOrderEncoded)
1365
+ maxLeafOrderEncoded = node.leafOrder + 1;
1366
+ }
1367
+ }
1368
+ const nodeEdgeOffset = packedIndexArray(nodeCount + 1, edgeCount);
1369
+ const nodeValue = packedIndexArray(nodeCount, maxNodeValue);
1370
+ const nodeLeafOrder = packedIndexArray(nodeCount, maxLeafOrderEncoded);
1371
+ const edgeLabelStart = packedIndexArray(edgeCount, totalLabelLength);
1372
+ const edgeLabelLength = packedIndexArray(edgeCount, maxLabelLength);
1373
+ const edgeChild = packedIndexArray(edgeCount, Math.max(nodeCount - 1, 0));
1374
+ const labelParts = [];
1375
+ let labelHeapLength = 0;
1376
+ let edgeIndex = 0;
1377
+ for (let nodeId = 0; nodeId < nodeCount; nodeId++) {
1378
+ const node = nodes[nodeId];
1379
+ // nodeLeafOrder: 0 = no leaf, slot + 1 otherwise; nodeValue unused when no leaf.
1380
+ if (node.value !== PACKED_NO_VALUE) {
1381
+ nodeValue[nodeId] = node.value;
1382
+ nodeLeafOrder[nodeId] = node.leafOrder + 1;
1383
+ }
1384
+ nodeEdgeOffset[nodeId] = edgeIndex;
1385
+ for (const edge of node.edges) {
1386
+ if (edge.label.length > MAX_PACKED_EDGE_LABEL_LENGTH) {
1387
+ throw new Error('PackedRadixTree: edge label too long');
1388
+ }
1389
+ const start = labelHeapLength;
1390
+ labelParts.push(edge.label);
1391
+ labelHeapLength += edge.label.length;
1392
+ edgeLabelStart[edgeIndex] = start;
1393
+ edgeLabelLength[edgeIndex] = edge.label.length;
1394
+ edgeChild[edgeIndex] = edge.child;
1395
+ edgeIndex++;
1396
+ }
1397
+ }
1398
+ nodeEdgeOffset[nodeCount] = edgeIndex;
1399
+ const labelHeap = labelParts.join('');
1400
+ return PackedRadixTree.fromData({
1401
+ size,
1402
+ nodeCount,
1403
+ edgeCount,
1404
+ labelHeap,
1405
+ nodeEdgeOffset,
1406
+ nodeValue,
1407
+ nodeLeafOrder,
1408
+ edgeLabelStart,
1409
+ edgeLabelLength,
1410
+ edgeChild,
1411
+ });
1412
+ }
1413
+
1414
+ function detectIdentityNumericIds(externalIds, nextId) {
1415
+ if (nextId === 0)
1416
+ return true;
1417
+ for (let i = 0; i < nextId; i++) {
1418
+ if (externalIds[i] !== i)
1419
+ return false;
1420
+ }
1421
+ return true;
1422
+ }
1423
+ function buildLazyMap(externalIds, nextId) {
1424
+ const map = new Map();
1425
+ for (let i = 0; i < nextId; i++) {
1426
+ const id = externalIds[i];
1427
+ if (id !== undefined)
1428
+ map.set(id, i);
1429
+ }
1430
+ return map;
1431
+ }
1432
+ function createIdToShortIdLookup(externalIds, nextId) {
1433
+ if (detectIdentityNumericIds(externalIds, nextId)) {
1434
+ return {
1435
+ mode: 'identity',
1436
+ mapEntryCount: 0,
1437
+ has(id) {
1438
+ return typeof id === 'number' && Number.isInteger(id) && id >= 0 && id < nextId;
1439
+ },
1440
+ get(id) {
1441
+ if (typeof id === 'number' && Number.isInteger(id) && id >= 0 && id < nextId) {
1442
+ return id;
1443
+ }
1444
+ return undefined;
1445
+ },
1446
+ };
1447
+ }
1448
+ let map;
1449
+ const ensureMap = () => {
1450
+ if (map == null)
1451
+ map = buildLazyMap(externalIds, nextId);
1452
+ return map;
1453
+ };
1454
+ return {
1455
+ mode: 'lazy-map',
1456
+ get mapEntryCount() {
1457
+ var _a;
1458
+ return (_a = map === null || map === void 0 ? void 0 : map.size) !== null && _a !== void 0 ? _a : 0;
1459
+ },
1460
+ has(id) {
1461
+ return ensureMap().has(id);
1462
+ },
1463
+ get(id) {
1464
+ return ensureMap().get(id);
1465
+ },
1466
+ };
1467
+ }
1468
+
1469
+ const ID_TAG_EMPTY = 0;
1470
+ const ID_TAG_NUMBER = 1;
1471
+ const ID_TAG_STRING = 2;
1472
+ const ID_TAG_JSON = 3;
1473
+
1474
+ // Default import (not `{ crc32 }`): `zlib.crc32` landed in Node 22.2.0 / 20.15.0. A named ESM
1475
+ // import would throw at module load on older runtimes; property access is safe and lets
1476
+ // `crc32Update` fall back to the pure-JS table below.
1477
+ const zlibCrc32 = typeof zlib.crc32 === 'function' ? zlib.crc32 : undefined;
1478
+ function invalidFrozenIndex(detail) {
1479
+ return new Error(`Invalid frozen index: ${detail}`);
1480
+ }
1481
+ function assertBufferLength(buf, min) {
1482
+ if (buf.length < min) {
1483
+ throw invalidFrozenIndex(`buffer too short (${buf.length} < ${min})`);
1484
+ }
1485
+ }
1486
+ const CRC_TABLE = new Uint32Array(256);
1487
+ for (let i = 0; i < 256; i++) {
1488
+ let c = i;
1489
+ for (let j = 0; j < 8; j++) {
1490
+ c = (c & 1) ? (0xedb88320 ^ (c >>> 1)) : (c >>> 1);
1491
+ }
1492
+ CRC_TABLE[i] = c;
1493
+ }
1494
+ function crc32BufferFallback(buf, start, end, seed = 0) {
1495
+ let crc = (seed ^ 0xffffffff) >>> 0;
1496
+ for (let i = start; i < end; i++) {
1497
+ crc = (crc >>> 8) ^ CRC_TABLE[(crc ^ buf[i]) & 0xff];
1498
+ }
1499
+ return (crc ^ 0xffffffff) >>> 0;
1500
+ }
1501
+ /** Incremental CRC-32 IEEE update; pass the previous return value as `seed`. */
1502
+ function crc32Update(seed, buf, start = 0, end = buf.length) {
1503
+ if (typeof zlibCrc32 === 'function') {
1504
+ const slice = start === 0 && end === buf.length ? buf : buf.subarray(start, end);
1505
+ return zlibCrc32(slice, seed) >>> 0;
1506
+ }
1507
+ return crc32BufferFallback(buf, start, end, seed);
1508
+ }
1509
+ /** CRC-32 IEEE (zlib polynomial); uses `zlib.crc32` when available. */
1510
+ function crc32Buffer(buf, start = 0, end = buf.length) {
1511
+ return crc32Update(0, buf, start, end);
1512
+ }
1513
+ function readUint32Array(buf, offset, byteLength) {
1514
+ if (byteLength === 0)
1515
+ return new Uint32Array(0);
1516
+ if (byteLength % 4 !== 0) {
1517
+ throw invalidFrozenIndex('uint32 section length not aligned');
1518
+ }
1519
+ if (offset + byteLength > buf.length) {
1520
+ throw invalidFrozenIndex('uint32 section read past buffer end');
1521
+ }
1522
+ if (offset % 4 === 0) {
1523
+ return new Uint32Array(buf.buffer, buf.byteOffset + offset, byteLength / 4);
1524
+ }
1525
+ const out = new Uint32Array(byteLength / 4);
1526
+ for (let i = 0; i < out.length; i++)
1527
+ out[i] = buf.readUInt32LE(offset + i * 4);
1528
+ return out;
1529
+ }
1530
+ function readUint16Array(buf, offset, byteLength) {
1531
+ if (byteLength === 0)
1532
+ return new Uint16Array(0);
1533
+ if (byteLength % 2 !== 0) {
1534
+ throw invalidFrozenIndex('uint16 section length not aligned');
1535
+ }
1536
+ if (offset + byteLength > buf.length) {
1537
+ throw invalidFrozenIndex('uint16 section read past buffer end');
1538
+ }
1539
+ {
1540
+ return new Uint16Array(buf.buffer, buf.byteOffset + offset, byteLength / 2);
1541
+ }
1542
+ }
1543
+ function readUint8Array(buf, offset, byteLength) {
1544
+ if (byteLength === 0)
1545
+ return new Uint8Array(0);
1546
+ if (offset + byteLength > buf.length) {
1547
+ throw invalidFrozenIndex('uint8 section read past buffer end');
1548
+ }
1549
+ return new Uint8Array(buf.buffer, buf.byteOffset + offset, byteLength);
1550
+ }
1551
+ function readFloat32Array(buf, offset, byteLength) {
1552
+ if (byteLength === 0)
1553
+ return new Float32Array(0);
1554
+ if (byteLength % 4 !== 0) {
1555
+ throw invalidFrozenIndex('float32 section length not aligned');
1556
+ }
1557
+ if (offset + byteLength > buf.length) {
1558
+ throw invalidFrozenIndex('float32 section read past buffer end');
1559
+ }
1560
+ {
1561
+ return new Float32Array(buf.buffer, buf.byteOffset + offset, byteLength / 4);
1562
+ }
1563
+ }
1564
+ function bufferFromView(view) {
1565
+ return Buffer.from(view.buffer, view.byteOffset, view.byteLength);
1566
+ }
1567
+ function readFieldIdArray(buf, offset, byteLength, width) {
1568
+ if (width === 8)
1569
+ return readUint8Array(buf, offset, byteLength);
1570
+ return readUint16Array(buf, offset, byteLength);
1571
+ }
1572
+ function writeLengthPrefixedUtf8(chunks, str) {
1573
+ const body = Buffer.from(str, 'utf8');
1574
+ const header = Buffer.alloc(4);
1575
+ header.writeUInt32LE(body.length, 0);
1576
+ chunks.push(header, body);
1577
+ }
1578
+ function readLengthPrefixedUtf8(buf, offset) {
1579
+ if (offset + 4 > buf.length) {
1580
+ throw invalidFrozenIndex('length-prefixed string header truncated');
1581
+ }
1582
+ const len = buf.readUInt32LE(offset);
1583
+ const start = offset + 4;
1584
+ const end = start + len;
1585
+ if (end > buf.length) {
1586
+ throw invalidFrozenIndex('length-prefixed string body out of bounds');
1587
+ }
1588
+ return { value: buf.toString('utf8', start, end), next: end };
1589
+ }
1590
+ function writeExternalId(chunks, id) {
1591
+ if (id === undefined) {
1592
+ chunks.push(Buffer.from([ID_TAG_EMPTY]));
1593
+ return;
1594
+ }
1595
+ if (typeof id === 'number' && Number.isFinite(id)) {
1596
+ const header = Buffer.alloc(1 + 8);
1597
+ header.writeUInt8(ID_TAG_NUMBER, 0);
1598
+ header.writeDoubleLE(id, 1);
1599
+ chunks.push(header);
1600
+ return;
1601
+ }
1602
+ if (typeof id === 'string') {
1603
+ const tag = Buffer.from([ID_TAG_STRING]);
1604
+ chunks.push(tag);
1605
+ writeLengthPrefixedUtf8(chunks, id);
1606
+ return;
1607
+ }
1608
+ const json = JSON.stringify(id);
1609
+ const tag = Buffer.from([ID_TAG_JSON]);
1610
+ chunks.push(tag);
1611
+ writeLengthPrefixedUtf8(chunks, json);
1612
+ }
1613
+ function readExternalId(buf, offset) {
1614
+ if (offset >= buf.length) {
1615
+ throw invalidFrozenIndex('external id tag truncated');
1616
+ }
1617
+ const tag = buf.readUInt8(offset);
1618
+ if (tag === ID_TAG_EMPTY) {
1619
+ return { value: undefined, next: offset + 1 };
1620
+ }
1621
+ if (tag === ID_TAG_NUMBER) {
1622
+ if (offset + 9 > buf.length) {
1623
+ throw invalidFrozenIndex('external id number truncated');
1624
+ }
1625
+ return { value: buf.readDoubleLE(offset + 1), next: offset + 9 };
1626
+ }
1627
+ if (tag === ID_TAG_STRING) {
1628
+ const { value, next } = readLengthPrefixedUtf8(buf, offset + 1);
1629
+ return { value, next };
1630
+ }
1631
+ if (tag === ID_TAG_JSON) {
1632
+ const { value, next } = readLengthPrefixedUtf8(buf, offset + 1);
1633
+ return { value: JSON.parse(value), next };
1634
+ }
1635
+ throw invalidFrozenIndex(`unknown external id tag ${tag}`);
1636
+ }
1637
+
1638
+ /** MSv5 unified frozen snapshot (columnar tree, single payload zstd stream). */
1639
+ /** Postings / field-length flags (low 16 bits of global flags at offset 6). */
1640
+ const FLAG_DOC_ID_16 = 1;
1641
+ const FLAG_SPARSE_LAYOUT = 2;
1642
+ const FLAG_FIELD_ID_16 = 4;
1643
+ const FLAG_FL_U8 = 8;
1644
+ const FLAG_FL_U16 = 16;
1645
+ const FLAG_FREQ_U16 = 32;
1646
+ const CODEC_RAW = 0;
1647
+ /** Zstandard (`node:zlib`) on the whole payload. */
1648
+ const CODEC_ZSTD = 3;
1649
+ /** Single concatenated payload, one zstd stream (or raw). */
1650
+ const MSV5_FORMAT_REV_PAYLOAD = 1;
1651
+ /** Do not compress payloads smaller than this (bytes). */
1652
+ const MSV5_MIN_COMPRESS_BYTES = 64;
1653
+ /** Fixed zstd compression level for the whole payload. */
1654
+ const MSV5_ZSTD_LEVEL = 9;
1655
+ const MSV5_SECTION_COUNT = 12;
1656
+ /** Per-section catalogue entry: fileOffset(4) + uncompressedLength(4) + crc32(4) + reserved(8). */
1657
+ const MSV5_SECTION_ENTRY_BYTES = 20;
1658
+ /** magic(4) + version(2) + indexFlags(2) + payloadMeta(4) + formatRev(2) + sectionCount(4) */
1659
+ const MSV5_HEADER_PREFIX_SIZE = 32;
1660
+ const MSV5_HEADER_SIZE = MSV5_HEADER_PREFIX_SIZE + MSV5_SECTION_COUNT * MSV5_SECTION_ENTRY_BYTES;
1661
+ const MSV5_PAYLOAD_CODEC_OFFSET = 8;
1662
+ const MSV5_ZSTD_LEVEL_OFFSET = 9;
1663
+ const MSV5_FORMAT_REV_OFFSET = 10;
1664
+ const MSV5_SECTION_COUNT_OFFSET = 12;
1665
+ /** compressedOffset, compressedLength, uncompressedLength, payloadCrc32 (each u32). */
1666
+ const MSV5_PAYLOAD_COMPRESSED_OFFSET = 16;
1667
+ const MSV5_PAYLOAD_COMPRESSED_LENGTH_OFFSET = 20;
1668
+ const MSV5_PAYLOAD_UNCOMPRESSED_LENGTH_OFFSET = 24;
1669
+ const MSV5_PAYLOAD_CRC_OFFSET = 28;
1670
+ const MSV5_SECTION_DIR_OFFSET = MSV5_HEADER_PREFIX_SIZE;
1671
+ /** Tree column order for columnWidthFlags bit pairs (2 bits each). */
1672
+ const MSV5_TREE_COLUMN_COUNT = 6;
1673
+
1674
+ function maxInArrayLike(data, length) {
1675
+ var _a;
1676
+ const len = length !== null && length !== void 0 ? length : data.length;
1677
+ let max = 0;
1678
+ for (let i = 0; i < len; i++) {
1679
+ const v = (_a = data[i]) !== null && _a !== void 0 ? _a : 0;
1680
+ if (v > max)
1681
+ max = v;
1682
+ }
1683
+ return max;
1684
+ }
1685
+ function materializeFieldLengthMatrix(data, length) {
1686
+ var _a;
1687
+ const len = length !== null && length !== void 0 ? length : data.length;
1688
+ const matrix = packedIndexArray(len, maxInArrayLike(data, len));
1689
+ for (let i = 0; i < len; i++) {
1690
+ matrix[i] = (_a = data[i]) !== null && _a !== void 0 ? _a : 0;
1691
+ }
1692
+ return matrix;
1693
+ }
1694
+ /**
1695
+ * Wire encoding uses adaptive width ({@link fieldLengthMatrixWireFlags});
1696
+ * {@link readFieldLengthMatrixSection} restores u8/u16/u32.
1697
+ * {@link fieldLengthMatrixForWire} widens in-memory u8/u16 matrices to Uint32 for the encoder.
1698
+ */
1699
+ function fieldLengthMatrixForWire(matrix) {
1700
+ if (matrix instanceof Uint32Array)
1701
+ return matrix;
1702
+ const out = new Uint32Array(matrix.length);
1703
+ for (let i = 0; i < matrix.length; i++)
1704
+ out[i] = matrix[i];
1705
+ return out;
1706
+ }
1707
+ /** Global wire flags for {@link FieldLengthArray} width. */
1708
+ function fieldLengthMatrixWireFlags(matrix) {
1709
+ if (matrix instanceof Uint8Array)
1710
+ return FLAG_FL_U8;
1711
+ if (matrix instanceof Uint16Array)
1712
+ return FLAG_FL_U16;
1713
+ return 0;
1714
+ }
1715
+ function buildFieldLengthMatrixSection(matrix) {
1716
+ return Buffer.from(matrix.buffer, matrix.byteOffset, matrix.byteLength);
1717
+ }
1718
+ function readFieldLengthMatrixSection(buf, flags, cellCount) {
1719
+ if ((flags & FLAG_FL_U8) !== 0) {
1720
+ if (buf.length !== cellCount) {
1721
+ throw invalidFrozenIndex('fieldLengthMatrix u8 size mismatch');
1722
+ }
1723
+ return buf.length === 0
1724
+ ? new Uint8Array(0)
1725
+ : new Uint8Array(buf.buffer, buf.byteOffset, cellCount);
1726
+ }
1727
+ if ((flags & FLAG_FL_U16) !== 0) {
1728
+ if (buf.length !== cellCount * 2) {
1729
+ throw invalidFrozenIndex('fieldLengthMatrix u16 size mismatch');
1730
+ }
1731
+ return cellCount === 0
1732
+ ? new Uint16Array(0)
1733
+ : new Uint16Array(buf.buffer, buf.byteOffset, cellCount);
1734
+ }
1735
+ if (buf.length !== cellCount * 4) {
1736
+ throw invalidFrozenIndex('fieldLengthMatrix u32 size mismatch');
1737
+ }
1738
+ return cellCount === 0
1739
+ ? new Uint32Array(0)
1740
+ : new Uint32Array(buf.buffer, buf.byteOffset, cellCount);
1741
+ }
1742
+
1743
+ const DISCARDED_DOC_ID = 0xffffffff;
1744
+ function postingFreqValue(freq, clampFrequencies) {
1745
+ return clampFrequencies ? clampFreq(freq) : freq;
1746
+ }
1747
+ function materializeFlatPostings(params) {
1748
+ const { fieldCount, termCount, forEachPosting, remapDocId, clampFrequencies } = params;
1749
+ const slotCount = termCount * fieldCount;
1750
+ const postingsOffsets = new Uint32Array(slotCount);
1751
+ const postingsLengths = new Uint32Array(slotCount);
1752
+ let totalPostings = 0;
1753
+ let maxFreq = 0;
1754
+ for (let ti = 0; ti < termCount; ti++) {
1755
+ for (let f = 0; f < fieldCount; f++) {
1756
+ forEachPosting(ti, f, (rawDocId, freq) => {
1757
+ const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1758
+ if (docId === DISCARDED_DOC_ID)
1759
+ return;
1760
+ totalPostings++;
1761
+ const v = postingFreqValue(freq, clampFrequencies);
1762
+ if (v > maxFreq)
1763
+ maxFreq = v;
1764
+ });
1765
+ }
1766
+ }
1767
+ const useUint16 = params.nextId != null && params.nextId <= 65535;
1768
+ const allDocIds = useUint16
1769
+ ? new Uint16Array(totalPostings)
1770
+ : new Uint32Array(totalPostings);
1771
+ const allFreqs = allocateFreqs(totalPostings, maxFreq);
1772
+ // Slots are visited in ascending fieldId (0..fieldCount-1) per term. Sparse layouts
1773
+ // rely on this ordering so field ids per term stay sorted for binary lookup.
1774
+ let write = 0;
1775
+ for (let ti = 0; ti < termCount; ti++) {
1776
+ const base = ti * fieldCount;
1777
+ for (let f = 0; f < fieldCount; f++) {
1778
+ const offset = write;
1779
+ let count = 0;
1780
+ forEachPosting(ti, f, (rawDocId, freq) => {
1781
+ const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1782
+ if (docId === DISCARDED_DOC_ID)
1783
+ return;
1784
+ if (useUint16) {
1785
+ allDocIds[write] = docId;
1786
+ }
1787
+ else {
1788
+ allDocIds[write] = docId;
1789
+ }
1790
+ allFreqs[write] = postingFreqValue(freq, clampFrequencies);
1791
+ write++;
1792
+ count++;
1793
+ });
1794
+ postingsOffsets[base + f] = offset;
1795
+ postingsLengths[base + f] = count;
1796
+ }
1797
+ }
1798
+ return {
1799
+ postingsOffsets,
1800
+ postingsLengths,
1801
+ allDocIds,
1802
+ allFreqs,
1803
+ };
1804
+ }
1805
+
1806
+ function readFieldId(fieldIds, index) {
1807
+ return fieldIds[index];
1808
+ }
1809
+ function choosePostingsLayout(fieldCount) {
1810
+ return fieldCount === 1 ? 'dense' : 'sparse';
1811
+ }
1812
+ function chooseSparseFieldIdWidth(fieldCount) {
1813
+ return fieldCount > 255 ? 16 : 8;
1814
+ }
1815
+ function materializeFrozenPostings(params) {
1816
+ const { fieldCount, termCount, nextId } = params;
1817
+ const layout = choosePostingsLayout(fieldCount);
1818
+ const docIdWidth = nextId <= 65535 ? 16 : 32;
1819
+ if (layout === 'dense') {
1820
+ const flat = materializeFlatPostings({ ...params, nextId });
1821
+ return {
1822
+ fieldCount,
1823
+ termCount,
1824
+ nextId,
1825
+ layout,
1826
+ docIdWidth,
1827
+ sparseFieldIdWidth: null,
1828
+ allDocIds: flat.allDocIds,
1829
+ allFreqs: flat.allFreqs,
1830
+ denseOffsets: flat.postingsOffsets,
1831
+ denseLengths: flat.postingsLengths,
1832
+ sparseTermStarts: null,
1833
+ sparseFieldIds: null,
1834
+ sparseOffsets: null,
1835
+ sparseLengths: null,
1836
+ };
1837
+ }
1838
+ const sparseFieldIdWidth = chooseSparseFieldIdWidth(fieldCount);
1839
+ const sparseFieldIdsScratch = [];
1840
+ const sparseOffsets = [];
1841
+ const sparseLengths = [];
1842
+ const termStarts = new Array(termCount + 1).fill(0);
1843
+ const { forEachPosting, remapDocId, clampFrequencies } = params;
1844
+ // Non-empty slots per term are emitted with fieldId in ascending order (f loops 0..fieldCount-1).
1845
+ let totalPostings = 0;
1846
+ let maxFreq = 0;
1847
+ for (let ti = 0; ti < termCount; ti++) {
1848
+ termStarts[ti] = sparseFieldIdsScratch.length;
1849
+ for (let f = 0; f < fieldCount; f++) {
1850
+ let count = 0;
1851
+ forEachPosting(ti, f, (rawDocId, freq) => {
1852
+ const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1853
+ if (docId === DISCARDED_DOC_ID)
1854
+ return;
1855
+ count++;
1856
+ const v = postingFreqValue(freq, clampFrequencies);
1857
+ if (v > maxFreq)
1858
+ maxFreq = v;
1859
+ });
1860
+ if (count === 0)
1861
+ continue;
1862
+ sparseFieldIdsScratch.push(f);
1863
+ sparseOffsets.push(totalPostings);
1864
+ sparseLengths.push(count);
1865
+ totalPostings += count;
1866
+ }
1867
+ termStarts[ti + 1] = sparseFieldIdsScratch.length;
1868
+ }
1869
+ const allDocIds = docIdWidth === 16
1870
+ ? new Uint16Array(totalPostings)
1871
+ : new Uint32Array(totalPostings);
1872
+ const allFreqs = allocateFreqs(totalPostings, maxFreq);
1873
+ const sparseFieldIds = sparseFieldIdWidth === 16
1874
+ ? new Uint16Array(sparseFieldIdsScratch)
1875
+ : new Uint8Array(sparseFieldIdsScratch);
1876
+ let write = 0;
1877
+ for (let ti = 0; ti < termCount; ti++) {
1878
+ const start = termStarts[ti];
1879
+ const end = termStarts[ti + 1];
1880
+ for (let s = start; s < end; s++) {
1881
+ const f = readFieldId(sparseFieldIds, s);
1882
+ forEachPosting(ti, f, (rawDocId, freq) => {
1883
+ const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1884
+ if (docId === DISCARDED_DOC_ID)
1885
+ return;
1886
+ if (docIdWidth === 16) {
1887
+ allDocIds[write] = docId;
1888
+ }
1889
+ else {
1890
+ allDocIds[write] = docId;
1891
+ }
1892
+ allFreqs[write] = postingFreqValue(freq, clampFrequencies);
1893
+ write++;
1894
+ });
1895
+ }
1896
+ }
1897
+ return {
1898
+ fieldCount,
1899
+ termCount,
1900
+ nextId,
1901
+ layout,
1902
+ docIdWidth,
1903
+ sparseFieldIdWidth,
1904
+ allDocIds,
1905
+ allFreqs,
1906
+ denseOffsets: null,
1907
+ denseLengths: null,
1908
+ sparseTermStarts: new Uint32Array(termStarts),
1909
+ sparseFieldIds,
1910
+ sparseOffsets: new Uint32Array(sparseOffsets),
1911
+ sparseLengths: new Uint32Array(sparseLengths),
1912
+ };
1913
+ }
1914
+ /** One-pass materialize from {@link FrozenIndexBuilder} scratch (counts known upfront). */
1915
+ function materializeFrozenPostingsFromBuilder(state, nextId) {
1916
+ var _a;
1917
+ const { fieldCount, termCount, postingsDocIds, postingsFreqs, totalPostings, maxFreq } = state;
1918
+ const layout = choosePostingsLayout(fieldCount);
1919
+ const docIdWidth = nextId <= 65535 ? 16 : 32;
1920
+ const allDocIds = docIdWidth === 16
1921
+ ? new Uint16Array(totalPostings)
1922
+ : new Uint32Array(totalPostings);
1923
+ const allFreqs = allocateFreqs(totalPostings, maxFreq);
1924
+ if (layout === 'dense') {
1925
+ const slotCount = termCount * fieldCount;
1926
+ const denseOffsets = new Uint32Array(slotCount);
1927
+ const denseLengths = new Uint32Array(slotCount);
1928
+ let write = 0;
1929
+ for (let ti = 0; ti < termCount; ti++) {
1930
+ const base = ti * fieldCount;
1931
+ for (let f = 0; f < fieldCount; f++) {
1932
+ const slot = base + f;
1933
+ const docIds = postingsDocIds[slot];
1934
+ const freqs = postingsFreqs[slot];
1935
+ const len = (_a = docIds === null || docIds === void 0 ? void 0 : docIds.length) !== null && _a !== void 0 ? _a : 0;
1936
+ denseOffsets[slot] = write;
1937
+ denseLengths[slot] = len;
1938
+ for (let i = 0; i < len; i++) {
1939
+ const docId = docIds[i];
1940
+ if (docIdWidth === 16) {
1941
+ allDocIds[write] = docId;
1942
+ }
1943
+ else {
1944
+ allDocIds[write] = docId;
1945
+ }
1946
+ allFreqs[write] = freqs[i];
1947
+ write++;
1948
+ }
1949
+ }
1950
+ }
1951
+ return {
1952
+ fieldCount,
1953
+ termCount,
1954
+ nextId,
1955
+ layout,
1956
+ docIdWidth,
1957
+ sparseFieldIdWidth: null,
1958
+ allDocIds,
1959
+ allFreqs,
1960
+ denseOffsets,
1961
+ denseLengths,
1962
+ sparseTermStarts: null,
1963
+ sparseFieldIds: null,
1964
+ sparseOffsets: null,
1965
+ sparseLengths: null,
1966
+ };
1967
+ }
1968
+ const sparseFieldIdWidth = chooseSparseFieldIdWidth(fieldCount);
1969
+ const sparseFieldIdsScratch = [];
1970
+ const sparseOffsets = [];
1971
+ const sparseLengths = [];
1972
+ const termStarts = new Array(termCount + 1).fill(0);
1973
+ let write = 0;
1974
+ for (let ti = 0; ti < termCount; ti++) {
1975
+ termStarts[ti] = sparseFieldIdsScratch.length;
1976
+ for (let f = 0; f < fieldCount; f++) {
1977
+ const slot = ti * fieldCount + f;
1978
+ const docIds = postingsDocIds[slot];
1979
+ if (docIds == null || docIds.length === 0)
1980
+ continue;
1981
+ const freqs = postingsFreqs[slot];
1982
+ sparseFieldIdsScratch.push(f);
1983
+ sparseOffsets.push(write);
1984
+ sparseLengths.push(docIds.length);
1985
+ for (let i = 0; i < docIds.length; i++) {
1986
+ const docId = docIds[i];
1987
+ if (docIdWidth === 16) {
1988
+ allDocIds[write] = docId;
1989
+ }
1990
+ else {
1991
+ allDocIds[write] = docId;
1992
+ }
1993
+ allFreqs[write] = freqs[i];
1994
+ write++;
1995
+ }
1996
+ }
1997
+ termStarts[ti + 1] = sparseFieldIdsScratch.length;
1998
+ }
1999
+ const sparseFieldIds = sparseFieldIdWidth === 16
2000
+ ? new Uint16Array(sparseFieldIdsScratch)
2001
+ : new Uint8Array(sparseFieldIdsScratch);
2002
+ return {
2003
+ fieldCount,
2004
+ termCount,
2005
+ nextId,
2006
+ layout,
2007
+ docIdWidth,
2008
+ sparseFieldIdWidth,
2009
+ allDocIds,
2010
+ allFreqs,
2011
+ denseOffsets: null,
2012
+ denseLengths: null,
2013
+ sparseTermStarts: new Uint32Array(termStarts),
2014
+ sparseFieldIds,
2015
+ sparseOffsets: new Uint32Array(sparseOffsets),
2016
+ sparseLengths: new Uint32Array(sparseLengths),
2017
+ };
2018
+ }
2019
+ function postingsTypedBytes(layout) {
2020
+ const allDocIdsBytes = layout.allDocIds.byteLength;
2021
+ const allFreqsBytes = layout.allFreqs.byteLength;
2022
+ if (layout.layout === 'dense') {
2023
+ const offsetsBytes = layout.denseOffsets.byteLength;
2024
+ const lengthsBytes = layout.denseLengths.byteLength;
2025
+ return {
2026
+ allDocIdsBytes,
2027
+ allFreqsBytes,
2028
+ offsetsBytes,
2029
+ lengthsBytes,
2030
+ totalTypedBytes: allDocIdsBytes + allFreqsBytes + offsetsBytes + lengthsBytes,
2031
+ slotCount: layout.termCount * layout.fieldCount,
2032
+ };
2033
+ }
2034
+ const offsetsBytes = layout.sparseOffsets.byteLength + layout.sparseTermStarts.byteLength;
2035
+ const lengthsBytes = layout.sparseLengths.byteLength + layout.sparseFieldIds.byteLength;
2036
+ const slotCount = layout.sparseFieldIds.length;
2037
+ return {
2038
+ allDocIdsBytes,
2039
+ allFreqsBytes,
2040
+ offsetsBytes,
2041
+ lengthsBytes,
2042
+ totalTypedBytes: allDocIdsBytes + allFreqsBytes + offsetsBytes + lengthsBytes,
2043
+ slotCount,
2044
+ };
2045
+ }
2046
+ function validateFrozenPostingsLayout(layout, documentCount, nextId, fail = detail => { throw new Error(detail); }) {
2047
+ if (layout.fieldCount <= 0)
2048
+ fail('fieldCount must be positive');
2049
+ if (layout.nextId !== nextId)
2050
+ fail('nextId mismatch');
2051
+ if (layout.termCount < 0)
2052
+ fail('termCount out of range');
2053
+ if (layout.allDocIds.length !== layout.allFreqs.length) {
2054
+ fail('allDocIds and allFreqs length mismatch');
2055
+ }
2056
+ if (layout.layout === 'dense') {
2057
+ if (layout.sparseFieldIdWidth != null) {
2058
+ fail('dense layout must not have sparseFieldIdWidth');
2059
+ }
2060
+ const slotCount = layout.termCount * layout.fieldCount;
2061
+ if (layout.denseOffsets.length !== slotCount || layout.denseLengths.length !== slotCount) {
2062
+ fail('dense postings slot count mismatch');
2063
+ }
2064
+ for (let slot = 0; slot < slotCount; slot++) {
2065
+ const off = layout.denseOffsets[slot];
2066
+ const len = layout.denseLengths[slot];
2067
+ if (off + len > layout.allDocIds.length) {
2068
+ fail(`posting slot ${slot} exceeds allDocIds bounds`);
2069
+ }
2070
+ for (let i = 0; i < len; i++) {
2071
+ const docId = readDocId(layout.allDocIds, off + i);
2072
+ if (docId >= nextId)
2073
+ fail(`posting docId ${docId} >= nextId ${nextId}`);
2074
+ }
2075
+ }
2076
+ }
2077
+ else {
2078
+ const expectedFieldIdWidth = chooseSparseFieldIdWidth(layout.fieldCount);
2079
+ if (layout.sparseFieldIdWidth !== expectedFieldIdWidth) {
2080
+ fail('sparseFieldIdWidth mismatch with fieldCount');
2081
+ }
2082
+ const starts = layout.sparseTermStarts;
2083
+ if (starts.length !== layout.termCount + 1)
2084
+ fail('sparseTermStarts length mismatch');
2085
+ const slotCount = layout.sparseFieldIds.length;
2086
+ if (layout.sparseOffsets.length !== slotCount || layout.sparseLengths.length !== slotCount) {
2087
+ fail('sparse slot count mismatch');
2088
+ }
2089
+ for (let slot = 0; slot < slotCount; slot++) {
2090
+ const fieldId = readFieldId(layout.sparseFieldIds, slot);
2091
+ if (fieldId >= layout.fieldCount) {
2092
+ fail(`sparse fieldId ${fieldId} >= fieldCount ${layout.fieldCount}`);
2093
+ }
2094
+ const off = layout.sparseOffsets[slot];
2095
+ const len = layout.sparseLengths[slot];
2096
+ if (off + len > layout.allDocIds.length) {
2097
+ fail(`sparse slot ${slot} exceeds allDocIds bounds`);
2098
+ }
2099
+ for (let i = 0; i < len; i++) {
2100
+ const docId = readDocId(layout.allDocIds, off + i);
2101
+ if (docId >= nextId)
2102
+ fail(`posting docId ${docId} >= nextId ${nextId}`);
2103
+ }
2104
+ }
2105
+ }
2106
+ if (documentCount < 0 || documentCount > nextId) {
2107
+ fail('documentCount inconsistent with nextId');
2108
+ }
2109
+ }
2110
+ /**
2111
+ * Locate the slot for `fieldId` within a term's range.
2112
+ *
2113
+ * `sparseFieldIds[start..end)` is sorted ascending (see materializeFrozenPostings),
2114
+ * and the range is short (at most `fieldCount`, usually 1-3 fields per term). A linear
2115
+ * scan with early exit beats binary search at this size: sequential access, predictable
2116
+ * branches, no per-step division. The sorted invariant only powers the early break.
2117
+ */
2118
+ function findSparseSlotByFieldId(fieldIds, start, end, fieldId) {
2119
+ for (let i = start; i < end; i++) {
2120
+ const fid = readFieldId(fieldIds, i);
2121
+ if (fid === fieldId)
2122
+ return i;
2123
+ if (fid > fieldId)
2124
+ break;
2125
+ }
2126
+ return -1;
2127
+ }
2128
+ /** Resolve one (termIndex, fieldId) posting run in flat buffers; shared by flyweight and docId collect. */
2129
+ function frozenPostingSlice(layout, termIndex, fieldId) {
2130
+ if (layout.layout === 'dense') {
2131
+ const base = termIndex * layout.fieldCount + fieldId;
2132
+ const len = layout.denseLengths[base];
2133
+ if (len === 0)
2134
+ return undefined;
2135
+ return { offset: layout.denseOffsets[base], length: len };
2136
+ }
2137
+ const start = layout.sparseTermStarts[termIndex];
2138
+ const end = layout.sparseTermStarts[termIndex + 1];
2139
+ const slot = findSparseSlotByFieldId(layout.sparseFieldIds, start, end, fieldId);
2140
+ if (slot < 0)
2141
+ return undefined;
2142
+ const len = layout.sparseLengths[slot];
2143
+ if (len === 0)
2144
+ return undefined;
2145
+ return { offset: layout.sparseOffsets[slot], length: len };
2146
+ }
2147
+ /**
2148
+ * One flyweight wrapper for the lifetime of a frozen index. Call {@link bind} before each
2149
+ * `get`; the returned object is always the same instance (valid until the next `bind`).
2150
+ */
2151
+ function createFrozenFieldTermFlyweight(layout) {
2152
+ let termIndex = -1;
2153
+ const { allDocIds, allFreqs } = layout;
2154
+ const segment = new SegmentPostingList(allDocIds, allFreqs, 0, 0);
2155
+ const flyweight = {
2156
+ bind(ti) {
2157
+ termIndex = ti;
2158
+ return flyweight;
2159
+ },
2160
+ get(fieldId) {
2161
+ const slice = frozenPostingSlice(layout, termIndex, fieldId);
2162
+ if (slice == null)
2163
+ return undefined;
2164
+ return segment.rebind(slice.offset, slice.length);
2165
+ },
2166
+ };
2167
+ return flyweight;
2168
+ }
2169
+ function collectDocIdsFromFrozenSegment(allDocIds, offset, length, context, docIds, allowedDocs) {
2170
+ for (let i = 0; i < length; i++) {
2171
+ const docId = readDocId(allDocIds, offset + i);
2172
+ if (context.isDocActive != null && !context.isDocActive(docId))
2173
+ continue;
2174
+ if (allowedDocs != null && !allowedDocs.has(docId))
2175
+ continue;
2176
+ docIds.add(docId);
2177
+ }
2178
+ }
2179
+ /** Collect docIds from flat postings without {@link FieldTermDataLike} wrappers. */
2180
+ function collectDocIdsFromFrozenLayout(layout, termIndex, fieldBoosts, context, docIds, allowedDocs) {
2181
+ const { fieldIds } = context;
2182
+ for (const field of fieldBoosts.names) {
2183
+ const slice = frozenPostingSlice(layout, termIndex, fieldIds[field]);
2184
+ if (slice == null)
2185
+ continue;
2186
+ collectDocIdsFromFrozenSegment(layout.allDocIds, slice.offset, slice.length, context, docIds, allowedDocs);
2187
+ }
2188
+ }
2189
+
2190
+ /** Unicode space, newline, or punctuation — used by the default tokenizer */
2191
+ const SPACE_OR_PUNCTUATION = /[\n\r\p{Z}\p{P}]+/u;
2192
+ const defaultSearchOptions = {
2193
+ combineWith: OR,
2194
+ prefix: false,
2195
+ fuzzy: false,
2196
+ maxFuzzy: 6,
2197
+ boost: {},
2198
+ weights: { fuzzy: 0.45, prefix: 0.375 },
2199
+ bm25: defaultBM25params,
2200
+ };
2201
+ const defaultAutoSuggestOptions = {
2202
+ combineWith: AND,
2203
+ prefix: (term, i, terms) => i === terms.length - 1,
2204
+ };
2205
+ /** Option defaults applied by {@link FrozenMiniSearch.loadBinarySync} before caller overrides */
2206
+ const defaultFrozenLoadOptions = {
2207
+ idField: 'id',
2208
+ extractField: (document, fieldName) => document[fieldName],
2209
+ stringifyField: (fieldValue) => fieldValue.toString(),
2210
+ tokenize: (text) => text.split(SPACE_OR_PUNCTUATION),
2211
+ processTerm: (term) => term.toLowerCase(),
2212
+ storeFields: [],
2213
+ logger: () => { },
2214
+ autoVacuum: false,
2215
+ };
2216
+
2217
+ function resolveIndexingOptions(options) {
2218
+ if ((options === null || options === void 0 ? void 0 : options.fields) == null) {
2219
+ throw new Error('MiniSearch: option "fields" must be provided');
2220
+ }
2221
+ return {
2222
+ ...defaultFrozenLoadOptions,
2223
+ ...options,
2224
+ searchOptions: { ...defaultSearchOptions, ...(options.searchOptions || {}) },
2225
+ autoSuggestOptions: { ...defaultAutoSuggestOptions, ...(options.autoSuggestOptions || {}) },
2226
+ };
2227
+ }
2228
+ function buildFieldIds(fields) {
2229
+ const fieldIds = {};
2230
+ for (let i = 0; i < fields.length; i++) {
2231
+ fieldIds[fields[i]] = i;
2232
+ }
2233
+ return fieldIds;
2234
+ }
2235
+ /** Token frequencies for one document field (after processTerm). */
2236
+ function collectFieldTermFreqs(tokens, fieldName, processTerm) {
2237
+ const localFreqs = new Map();
2238
+ for (const term of tokens) {
2239
+ const processedTerm = processTerm(term, fieldName);
2240
+ if (Array.isArray(processedTerm)) {
2241
+ for (const t of processedTerm) {
2242
+ localFreqs.set(t, (localFreqs.get(t) || 0) + 1);
2243
+ }
2244
+ }
2245
+ else if (processedTerm) {
2246
+ localFreqs.set(processedTerm, (localFreqs.get(processedTerm) || 0) + 1);
2247
+ }
2248
+ }
2249
+ return localFreqs;
2250
+ }
2251
+ /** Same running average as {@link MiniSearch} private addFieldLength. */
2252
+ function updateAvgFieldLength(avgFieldLength, fieldId, count, length) {
2253
+ const averageFieldLength = avgFieldLength[fieldId] || 0;
2254
+ const totalFieldLength = (averageFieldLength * count) + length;
2255
+ avgFieldLength[fieldId] = totalFieldLength / (count + 1);
2256
+ }
2257
+ function saveStoredFieldsForDocument(storeFields, extractField, document) {
2258
+ if (storeFields.length === 0)
2259
+ return undefined;
2260
+ const documentFields = {};
2261
+ for (const fieldName of storeFields) {
2262
+ const fieldValue = extractField(document, fieldName);
2263
+ if (fieldValue !== undefined)
2264
+ documentFields[fieldName] = fieldValue;
2265
+ }
2266
+ return documentFields;
2267
+ }
2268
+
2269
+ const SUPPORTED_SERIALIZATION_VERSIONS = new Set([1, 2]);
2270
+ function parseIndexEntry(entry, serializationVersion) {
2271
+ if (serializationVersion === 1 && entry != null && typeof entry === 'object' && 'ds' in entry) {
2272
+ return entry.ds;
2273
+ }
2274
+ return entry;
2275
+ }
2276
+ function assertFieldsMatchSnapshot$1(optionsFields, snapFieldIds) {
2277
+ const snapNames = Object.keys(snapFieldIds).sort();
2278
+ const optNames = [...optionsFields].sort();
2279
+ if (snapNames.length !== optNames.length || snapNames.some((name, i) => name !== optNames[i])) {
2280
+ throw new Error(`FrozenMiniSearch: option "fields" must match the indexed fields exactly (expected: ${snapNames.join(', ')})`);
2281
+ }
2282
+ }
2283
+ function buildSearchableMapFromSnapshot(snapshot) {
2284
+ const index = new SearchableMap();
2285
+ const { index: entries, serializationVersion } = snapshot;
2286
+ for (const [term, data] of entries) {
2287
+ const dataMap = new Map();
2288
+ for (const fieldId of Object.keys(data)) {
2289
+ const raw = data[fieldId];
2290
+ const indexEntry = parseIndexEntry(raw, serializationVersion);
2291
+ const freqs = new Map();
2292
+ for (const [docId, freq] of Object.entries(indexEntry)) {
2293
+ freqs.set(parseInt(docId, 10), freq);
2294
+ }
2295
+ dataMap.set(parseInt(fieldId, 10), freqs);
2296
+ }
2297
+ index.set(term, dataMap);
2298
+ }
2299
+ return index;
2300
+ }
2301
+ function buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, nextId, shortIdRemap) {
2302
+ const fieldIndexByTermIndex = [];
2303
+ const packedIndex = fromRadixTree(searchableMap.radixTree, {
2304
+ termCount: 0,
2305
+ mapLeaf: (leaf) => {
2306
+ const ti = fieldIndexByTermIndex.length;
2307
+ fieldIndexByTermIndex[ti] = leaf;
2308
+ return ti;
2309
+ },
2310
+ inferTermCountFromLeaves: true,
2311
+ });
2312
+ const termCount = packedIndex.size;
2313
+ const remapDocId = shortIdRemap != null
2314
+ ? (docId) => shortIdRemap[docId]
2315
+ : undefined;
2316
+ const postings = materializeFrozenPostings({
2317
+ fieldCount,
2318
+ termCount,
2319
+ nextId,
2320
+ clampFrequencies: true,
2321
+ remapDocId,
2322
+ forEachPosting(ti, f, emit) {
2323
+ var _a;
2324
+ const freqs = (_a = fieldIndexByTermIndex[ti]) === null || _a === void 0 ? void 0 : _a.get(f);
2325
+ if (freqs == null)
2326
+ return;
2327
+ for (const [shortId, freq] of freqs) {
2328
+ emit(shortId, freq);
2329
+ }
2330
+ },
2331
+ });
2332
+ return { termCount, index: packedIndex, postings };
2333
+ }
2334
+ /** Build frozen assemble params from a lucaong MiniSearch JSON snapshot. */
2335
+ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
2336
+ var _a, _b, _c;
2337
+ if (!SUPPORTED_SERIALIZATION_VERSIONS.has(snapshot.serializationVersion)) {
2338
+ throw new Error(`FrozenMiniSearch: unsupported MiniSearch serializationVersion ${snapshot.serializationVersion}`);
2339
+ }
2340
+ const snapshotFieldNames = Object.keys(snapshot.fieldIds);
2341
+ const fields = ((_a = options.fields) === null || _a === void 0 ? void 0 : _a.length) ? options.fields : snapshotFieldNames;
2342
+ if ((_b = options.fields) === null || _b === void 0 ? void 0 : _b.length) {
2343
+ assertFieldsMatchSnapshot$1(fields, snapshot.fieldIds);
2344
+ }
2345
+ const opts = resolveIndexingOptions({ ...options, fields });
2346
+ const fieldCount = opts.fields.length;
2347
+ const { documentCount, nextId } = snapshot;
2348
+ const useDense = documentCount < nextId;
2349
+ let shortIdRemap = null;
2350
+ const resolvedNextId = useDense ? documentCount : nextId;
2351
+ const externalIds = new Array(resolvedNextId);
2352
+ const storedFields = new Array(externalIds.length);
2353
+ if (useDense) {
2354
+ shortIdRemap = new Uint32Array(nextId);
2355
+ shortIdRemap.fill(DISCARDED_DOC_ID);
2356
+ let dense = 0;
2357
+ const sortedShortIds = Object.keys(snapshot.documentIds)
2358
+ .map(s => parseInt(s, 10))
2359
+ .sort((a, b) => a - b);
2360
+ for (const shortId of sortedShortIds) {
2361
+ const shortIdStr = String(shortId);
2362
+ shortIdRemap[shortId] = dense;
2363
+ externalIds[dense] = snapshot.documentIds[shortIdStr];
2364
+ storedFields[dense] = snapshot.storedFields[shortIdStr];
2365
+ dense++;
2366
+ }
2367
+ }
2368
+ else {
2369
+ for (const [shortIdStr, id] of Object.entries(snapshot.documentIds)) {
2370
+ const shortId = parseInt(shortIdStr, 10);
2371
+ externalIds[shortId] = id;
2372
+ storedFields[shortId] = snapshot.storedFields[shortIdStr];
2373
+ }
2374
+ }
2375
+ const idLookup = createIdToShortIdLookup(externalIds, resolvedNextId);
2376
+ const matrixRows = useDense ? documentCount : nextId;
2377
+ const matrixCells = matrixRows * fieldCount;
2378
+ const fieldLengthScratch = new Array(matrixCells).fill(0);
2379
+ for (const [shortIdStr, lengths] of Object.entries(snapshot.fieldLength)) {
2380
+ const shortId = parseInt(shortIdStr, 10);
2381
+ const row = shortIdRemap != null ? shortIdRemap[shortId] : shortId;
2382
+ if (row === DISCARDED_DOC_ID)
2383
+ continue;
2384
+ for (let f = 0; f < fieldCount; f++) {
2385
+ fieldLengthScratch[row * fieldCount + f] = (_c = lengths[f]) !== null && _c !== void 0 ? _c : 0;
2386
+ }
2387
+ }
2388
+ const fieldLengthMatrix = materializeFieldLengthMatrix(fieldLengthScratch);
2389
+ const avgFieldLength = new Float32Array(snapshot.averageFieldLength.length);
2390
+ for (let i = 0; i < snapshot.averageFieldLength.length; i++) {
2391
+ avgFieldLength[i] = snapshot.averageFieldLength[i];
2392
+ }
2393
+ const searchableMap = buildSearchableMapFromSnapshot(snapshot);
2394
+ const flat = buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, resolvedNextId, shortIdRemap);
2395
+ return {
2396
+ options: opts,
2397
+ documentCount,
2398
+ nextId: resolvedNextId,
2399
+ fieldIds: snapshot.fieldIds,
2400
+ fieldCount,
2401
+ externalIds,
2402
+ idLookup,
2403
+ storedFields,
2404
+ fieldLengthMatrix,
2405
+ avgFieldLength,
2406
+ index: flat.index,
2407
+ termCount: flat.termCount,
2408
+ postings: flat.postings,
2409
+ };
2410
+ }
2411
+
2412
+ /** Hard cap on the uncompressed payload, rejected before allocation (zstd-bomb guard).
2413
+ * This is the single trust boundary for untrusted snapshots: {@link readPayloadMeta} rejects
2414
+ * headers above this size; sync decompress uses the same cap via `maxOutputLength`.
2415
+ * A malicious header can still declare up to 1 GiB — no tighter native limit helps without
2416
+ * trusting `uncompressedLength` from that same header. Semantic integrity (length match,
2417
+ * payload CRC, per-section CRC) is enforced after decode. */
2418
+ const MSV5_MAX_UNCOMPRESSED_BYTES = 1024 * 1024 * 1024;
2419
+ // zstd landed in node:zlib at Node 22.15.0 (22.x line) / 23.8.0, where the whole family
2420
+ // (zstdCompress[Sync], zstdDecompressSync, createZstdDecompress) ships together — so probing one
2421
+ // member is enough to know if the runtime supports zstd. Checked at call time (not captured at
2422
+ // module load) so it stays mockable in tests. On older runtimes we degrade gracefully: writes fall
2423
+ // back to a raw (uncompressed) payload, reads of a zstd payload throw a clear, actionable error.
2424
+ function zstdAvailable() {
2425
+ return typeof zlib.zstdCompressSync === 'function';
2426
+ }
2427
+ function zstdUnavailableReadError() {
2428
+ return new Error('MSv5 snapshot is zstd-compressed, but this Node.js runtime lacks node:zlib zstd support '
2429
+ + '(added in Node 22.15.0). Upgrade Node.js to read this snapshot, or re-save it from a '
2430
+ + 'newer runtime to embed a raw (uncompressed) payload.');
2431
+ }
2432
+ let warnedZstdSaveFallback = false;
2433
+ function warnZstdSaveFallbackOnce() {
2434
+ if (warnedZstdSaveFallback)
2435
+ return;
2436
+ warnedZstdSaveFallback = true;
2437
+ process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); MSv5 snapshots are written with a '
2438
+ + 'raw (uncompressed) payload. Upgrade to Node 22.15.0+ for compressed snapshots.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
2439
+ }
2440
+ function assertPayloadFormatRev(buf) {
2441
+ const rev = buf.readUInt16LE(MSV5_FORMAT_REV_OFFSET);
2442
+ if (rev !== MSV5_FORMAT_REV_PAYLOAD) {
2443
+ throw new Error(`MSv5 unsupported format revision ${rev}`);
2444
+ }
2445
+ }
2446
+ function readPayloadMeta(fileBuf) {
2447
+ const payloadOffset = fileBuf.readUInt32LE(MSV5_PAYLOAD_COMPRESSED_OFFSET);
2448
+ const compressedLength = fileBuf.readUInt32LE(MSV5_PAYLOAD_COMPRESSED_LENGTH_OFFSET);
2449
+ const uncompressedLength = fileBuf.readUInt32LE(MSV5_PAYLOAD_UNCOMPRESSED_LENGTH_OFFSET);
2450
+ const payloadCrc32 = fileBuf.readUInt32LE(MSV5_PAYLOAD_CRC_OFFSET);
2451
+ const payloadCodec = fileBuf.readUInt8(MSV5_PAYLOAD_CODEC_OFFSET);
2452
+ if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
2453
+ throw new Error('MSv5 payload exceeds 1 GiB limit');
2454
+ }
2455
+ return { payloadOffset, compressedLength, uncompressedLength, payloadCrc32, payloadCodec };
2456
+ }
2457
+ function concatRawSections(rawSections) {
2458
+ const entries = [];
2459
+ let uncompressedLength = 0;
2460
+ for (const raw of rawSections) {
2461
+ uncompressedLength = (uncompressedLength + 3) & -4;
2462
+ entries.push({
2463
+ fileOffset: uncompressedLength,
2464
+ uncompressedLength: raw.length,
2465
+ sectionCrc32: crc32Buffer(raw),
2466
+ });
2467
+ uncompressedLength += raw.length;
2468
+ }
2469
+ const uncompressed = Buffer.alloc(uncompressedLength);
2470
+ for (let i = 0; i < rawSections.length; i++) {
2471
+ rawSections[i].copy(uncompressed, entries[i].fileOffset);
2472
+ }
2473
+ return { uncompressed, entries };
2474
+ }
2475
+ /** Shared zstd encoder options for sync and async save paths.
2476
+ * - `pledgedSrcSize`: exact input size is known; lets libzstd size its window and buffers.
2477
+ * - `ZSTD_c_checksumFlag: 0`: MSv5 already stores payload + per-section CRC-32; frame checksum is redundant CPU.
2478
+ * Cast: `pledgedSrcSize` is supported at runtime by Node zlib but may lag in typings. */
2479
+ function msv5ZstdCompressOptions(uncompressed) {
2480
+ return {
2481
+ pledgedSrcSize: uncompressed.length,
2482
+ params: {
2483
+ [zlib.constants.ZSTD_c_compressionLevel]: MSV5_ZSTD_LEVEL,
2484
+ [zlib.constants.ZSTD_c_checksumFlag]: 0,
2485
+ },
2486
+ };
2487
+ }
2488
+ /** Raw if below {@link MSV5_MIN_COMPRESS_BYTES}; else zstd when strictly smaller than raw. */
2489
+ function pickPayloadCodec(uncompressed, compressed) {
2490
+ if (compressed.length < uncompressed.length) {
2491
+ return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2492
+ }
2493
+ return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2494
+ }
2495
+ function choosePayloadCodecSync(uncompressed) {
2496
+ if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2497
+ return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2498
+ }
2499
+ if (!zstdAvailable()) {
2500
+ warnZstdSaveFallbackOnce();
2501
+ return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2502
+ }
2503
+ const compressed = zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed));
2504
+ return pickPayloadCodec(uncompressed, compressed);
2505
+ }
2506
+ /**
2507
+ * Async zstd via {@link zstdCompress} (not {@link zstdCompressSync}).
2508
+ * Same level and input yield the same *decompressed* payload (catalogue CRC matches sync),
2509
+ * but the compressed blob is not guaranteed bit-identical — libzstd may pick a different
2510
+ * frame layout; only `payload.length` in the header differs.
2511
+ */
2512
+ function zstdCompressAsync(uncompressed) {
2513
+ return new Promise((resolve, reject) => {
2514
+ zlib.zstdCompress(uncompressed, msv5ZstdCompressOptions(uncompressed), (err, compressed) => {
2515
+ if (err != null) {
2516
+ reject(err);
2517
+ return;
2518
+ }
2519
+ resolve(compressed);
2520
+ });
2521
+ });
2522
+ }
2523
+ async function choosePayloadCodecAsync(uncompressed) {
2524
+ if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2525
+ return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2526
+ }
2527
+ if (!zstdAvailable()) {
2528
+ warnZstdSaveFallbackOnce();
2529
+ return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2530
+ }
2531
+ const compressed = await zstdCompressAsync(uncompressed);
2532
+ return pickPayloadCodec(uncompressed, compressed);
2533
+ }
2534
+ function concatAndValidateSections(rawSections) {
2535
+ if (rawSections.length !== MSV5_SECTION_COUNT) {
2536
+ throw new Error(`MSv5 expects ${MSV5_SECTION_COUNT} sections, got ${rawSections.length}`);
2537
+ }
2538
+ const { uncompressed, entries } = concatRawSections(rawSections);
2539
+ if (uncompressed.length > MSV5_MAX_UNCOMPRESSED_BYTES) {
2540
+ throw new Error('MSv5 payload exceeds 1 GiB limit');
2541
+ }
2542
+ return { uncompressed, entries, payloadCrc32: crc32Buffer(uncompressed) };
2543
+ }
2544
+ /** Writes MSv5 header + section catalogue and appends the payload blob. */
2545
+ function buildMsv5AssembledFile(globalFlags, entries, uncompressedLength, payloadCrc32, payload, codec, zstdLevel) {
2546
+ const out = Buffer.alloc(MSV5_HEADER_SIZE + payload.length);
2547
+ out.write('MSv5', 0, 4, 'ascii');
2548
+ out.writeUInt16LE(5, 4);
2549
+ out.writeUInt16LE(globalFlags & 0xffff, 6);
2550
+ out.writeUInt8(codec, MSV5_PAYLOAD_CODEC_OFFSET);
2551
+ out.writeUInt8(zstdLevel, MSV5_ZSTD_LEVEL_OFFSET);
2552
+ out.writeUInt16LE(MSV5_FORMAT_REV_PAYLOAD, MSV5_FORMAT_REV_OFFSET);
2553
+ out.writeUInt32LE(MSV5_SECTION_COUNT, MSV5_SECTION_COUNT_OFFSET);
2554
+ out.writeUInt32LE(MSV5_HEADER_SIZE, MSV5_PAYLOAD_COMPRESSED_OFFSET);
2555
+ out.writeUInt32LE(payload.length, MSV5_PAYLOAD_COMPRESSED_LENGTH_OFFSET);
2556
+ out.writeUInt32LE(uncompressedLength, MSV5_PAYLOAD_UNCOMPRESSED_LENGTH_OFFSET);
2557
+ out.writeUInt32LE(payloadCrc32, MSV5_PAYLOAD_CRC_OFFSET);
2558
+ let dirOff = MSV5_SECTION_DIR_OFFSET;
2559
+ for (const e of entries) {
2560
+ out.writeUInt32LE(e.fileOffset, dirOff);
2561
+ out.writeUInt32LE(e.uncompressedLength, dirOff + 4);
2562
+ out.writeUInt32LE(e.sectionCrc32, dirOff + 8);
2563
+ dirOff += MSV5_SECTION_ENTRY_BYTES;
2564
+ }
2565
+ payload.copy(out, MSV5_HEADER_SIZE);
2566
+ return {
2567
+ buffer: out,
2568
+ globalFlags,
2569
+ compression: {
2570
+ formatRev: MSV5_FORMAT_REV_PAYLOAD,
2571
+ payloadCodec: codec,
2572
+ zstdLevel,
2573
+ uncompressedLength,
2574
+ compressedLength: payload.length,
2575
+ payloadCrc32,
2576
+ sections: entries.map((e, sectionId) => ({
2577
+ sectionId,
2578
+ uncompressedOffset: e.fileOffset,
2579
+ uncompressedLength: e.uncompressedLength,
2580
+ sectionCrc32: e.sectionCrc32,
2581
+ })),
2582
+ },
2583
+ };
2584
+ }
2585
+ /**
2586
+ * MSv5 on disk: header + catalogue (uncompressed offsets) + **one** payload blob
2587
+ * (raw concatenation or a single zstd stream over it).
2588
+ */
2589
+ function assembleMsv5File(globalFlags, rawSections) {
2590
+ const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
2591
+ const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed);
2592
+ return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
2593
+ }
2594
+ async function assembleMsv5FileAsync(globalFlags, rawSections) {
2595
+ const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
2596
+ const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed);
2597
+ return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
2598
+ }
2599
+ function readMsv5SectionDirectory(buf) {
2600
+ if (buf.length < MSV5_HEADER_SIZE) {
2601
+ throw new Error('MSv5 buffer too short for header');
2602
+ }
2603
+ const sectionCount = buf.readUInt32LE(MSV5_SECTION_COUNT_OFFSET);
2604
+ if (sectionCount !== MSV5_SECTION_COUNT) {
2605
+ throw new Error(`MSv5 section count mismatch: ${sectionCount}`);
2606
+ }
2607
+ assertPayloadFormatRev(buf);
2608
+ const entries = [];
2609
+ let dirOff = MSV5_SECTION_DIR_OFFSET;
2610
+ for (let i = 0; i < sectionCount; i++) {
2611
+ entries.push({
2612
+ fileOffset: buf.readUInt32LE(dirOff),
2613
+ uncompressedLength: buf.readUInt32LE(dirOff + 4),
2614
+ sectionCrc32: buf.readUInt32LE(dirOff + 8),
2615
+ });
2616
+ dirOff += MSV5_SECTION_ENTRY_BYTES;
2617
+ }
2618
+ return entries;
2619
+ }
2620
+ function verifySectionCrc(section, expected) {
2621
+ if (crc32Buffer(section) !== expected) {
2622
+ throw new Error('MSv5 section CRC mismatch');
2623
+ }
2624
+ }
2625
+ /** Slice each section out of a fully materialized payload (zero-copy when 4-byte aligned). */
2626
+ function sectionsFromPayload(payload, directory, payloadCrc32) {
2627
+ if (crc32Buffer(payload) !== payloadCrc32) {
2628
+ throw new Error('MSv5 payload CRC mismatch');
2629
+ }
2630
+ return directory.map((entry) => {
2631
+ const slice = payload.subarray(entry.fileOffset, entry.fileOffset + entry.uncompressedLength);
2632
+ verifySectionCrc(slice, entry.sectionCrc32);
2633
+ if ((payload.byteOffset + entry.fileOffset) % 4 === 0)
2634
+ return slice;
2635
+ const out = Buffer.alloc(entry.uncompressedLength);
2636
+ slice.copy(out, 0);
2637
+ return out;
2638
+ });
2639
+ }
2640
+ /** Streaming zstd reader: keeps only one section in memory at a time.
2641
+ * No `maxOutputLength` on {@link createZstdDecompress}: output is bounded by accumulating
2642
+ * `streamOffset` against the header's `uncompressedLength` (same 1 GiB cap checked upfront).
2643
+ * Sync load uses `maxOutputLength` instead because it materializes the whole payload at once. */
2644
+ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32) {
2645
+ if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
2646
+ throw new Error('MSv5 payload exceeds 1 GiB limit');
2647
+ }
2648
+ const sections = new Array(directory.length);
2649
+ let sectionId = 0;
2650
+ let streamOffset = 0;
2651
+ let current = null;
2652
+ let payloadCrc = 0;
2653
+ function emitEmptySections() {
2654
+ while (sectionId < directory.length
2655
+ && directory[sectionId].uncompressedLength === 0
2656
+ && directory[sectionId].fileOffset === streamOffset) {
2657
+ verifySectionCrc(Buffer.alloc(0), directory[sectionId].sectionCrc32);
2658
+ sections[sectionId] = Buffer.alloc(0);
2659
+ sectionId++;
2660
+ }
2661
+ }
2662
+ function consume(chunk) {
2663
+ if (streamOffset + chunk.length > uncompressedLength) {
2664
+ throw new Error('MSv5 zstd payload exceeds declared length');
2665
+ }
2666
+ payloadCrc = crc32Update(payloadCrc, chunk);
2667
+ let off = 0;
2668
+ while (off < chunk.length) {
2669
+ emitEmptySections();
2670
+ if (sectionId >= directory.length) {
2671
+ streamOffset += chunk.length - off;
2672
+ return;
2673
+ }
2674
+ const entry = directory[sectionId];
2675
+ if (streamOffset < entry.fileOffset) {
2676
+ const skip = Math.min(entry.fileOffset - streamOffset, chunk.length - off);
2677
+ streamOffset += skip;
2678
+ off += skip;
2679
+ continue;
2680
+ }
2681
+ if (current == null) {
2682
+ current = Buffer.allocUnsafe(entry.uncompressedLength);
2683
+ }
2684
+ const written = streamOffset - entry.fileOffset;
2685
+ const take = Math.min(entry.uncompressedLength - written, chunk.length - off);
2686
+ chunk.copy(current, written, off, off + take);
2687
+ streamOffset += take;
2688
+ off += take;
2689
+ if (written + take === entry.uncompressedLength) {
2690
+ verifySectionCrc(current, entry.sectionCrc32);
2691
+ sections[sectionId] = current;
2692
+ current = null;
2693
+ sectionId++;
2694
+ }
2695
+ }
2696
+ }
2697
+ function finish() {
2698
+ emitEmptySections();
2699
+ if (streamOffset !== uncompressedLength || sectionId !== directory.length) {
2700
+ throw new Error('MSv5 zstd decompressed length mismatch');
2701
+ }
2702
+ if (payloadCrc !== payloadCrc32) {
2703
+ throw new Error('MSv5 payload CRC mismatch');
2704
+ }
2705
+ }
2706
+ return { sections, consume, finish };
2707
+ }
2708
+ function loadMsv5SectionsFromZstdStream(compressed, directory, uncompressedLength, payloadCrc32) {
2709
+ return new Promise((resolve, reject) => {
2710
+ const collector = collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32);
2711
+ const stream = zlib.createZstdDecompress();
2712
+ stream.on('data', (chunk) => {
2713
+ try {
2714
+ collector.consume(chunk);
2715
+ }
2716
+ catch (err) {
2717
+ stream.destroy(err);
2718
+ }
2719
+ });
2720
+ stream.on('error', reject);
2721
+ stream.on('end', () => {
2722
+ try {
2723
+ collector.finish();
2724
+ resolve(collector.sections);
2725
+ }
2726
+ catch (err) {
2727
+ reject(err);
2728
+ }
2729
+ });
2730
+ stream.end(compressed);
2731
+ });
2732
+ }
2733
+ function validatePayloadDirectory(directory, uncompressedLength) {
2734
+ let prevEnd = 0;
2735
+ for (const entry of directory) {
2736
+ if ((entry.fileOffset & 3) !== 0) {
2737
+ throw new Error('MSv5 section offset not aligned');
2738
+ }
2739
+ if (entry.fileOffset < prevEnd) {
2740
+ throw new Error('MSv5 section offsets not monotonic');
2741
+ }
2742
+ if (entry.fileOffset + entry.uncompressedLength > uncompressedLength) {
2743
+ throw new Error('MSv5 section out of uncompressed bounds');
2744
+ }
2745
+ prevEnd = entry.fileOffset + entry.uncompressedLength;
2746
+ }
2747
+ if (prevEnd !== uncompressedLength) {
2748
+ throw new Error('MSv5 uncompressed payload length mismatch');
2749
+ }
2750
+ }
2751
+ /** Shared validation + bounds for both the sync and async load paths. */
2752
+ function preparePayload(fileBuf, directory) {
2753
+ assertPayloadFormatRev(fileBuf);
2754
+ const { payloadOffset, compressedLength, uncompressedLength, payloadCrc32, payloadCodec } = readPayloadMeta(fileBuf);
2755
+ validatePayloadDirectory(directory, uncompressedLength);
2756
+ if (payloadOffset !== MSV5_HEADER_SIZE || payloadOffset + compressedLength > fileBuf.length) {
2757
+ throw new Error('MSv5 payload out of bounds');
2758
+ }
2759
+ if (payloadCodec === CODEC_RAW && compressedLength !== uncompressedLength) {
2760
+ throw new Error('MSv5 raw payload length mismatch');
2761
+ }
2762
+ return {
2763
+ payloadCodec,
2764
+ slice: fileBuf.subarray(payloadOffset, payloadOffset + compressedLength),
2765
+ uncompressedLength,
2766
+ payloadCrc32,
2767
+ };
2768
+ }
2769
+ /** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
2770
+ function loadMsv5Sections(fileBuf, directory) {
2771
+ const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
2772
+ if (payloadCodec === CODEC_RAW) {
2773
+ return sectionsFromPayload(slice, directory, payloadCrc32);
2774
+ }
2775
+ if (payloadCodec === CODEC_ZSTD) {
2776
+ if (!zstdAvailable()) {
2777
+ throw zstdUnavailableReadError();
2778
+ }
2779
+ // Native cap matches readPayloadMeta's 1 GiB limit (see MSV5_MAX_UNCOMPRESSED_BYTES).
2780
+ // Using header `uncompressedLength` here would only help when the header understates
2781
+ // the zstd stream but the attacker can inflate the header too — same worst case.
2782
+ const decoded = zlib.zstdDecompressSync(slice, {
2783
+ maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
2784
+ });
2785
+ if (decoded.length !== uncompressedLength) {
2786
+ throw new Error('MSv5 zstd decompressed length mismatch');
2787
+ }
2788
+ return sectionsFromPayload(decoded, directory, payloadCrc32);
2789
+ }
2790
+ throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
2791
+ }
2792
+ /** Streaming load; peak main-thread RAM ≈ largest single section (+ file buffer). */
2793
+ async function loadMsv5SectionsAsync(fileBuf, directory) {
2794
+ const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
2795
+ if (payloadCodec === CODEC_RAW) {
2796
+ return sectionsFromPayload(slice, directory, payloadCrc32);
2797
+ }
2798
+ if (payloadCodec === CODEC_ZSTD) {
2799
+ if (!zstdAvailable()) {
2800
+ throw zstdUnavailableReadError();
2801
+ }
2802
+ return loadMsv5SectionsFromZstdStream(slice, directory, uncompressedLength, payloadCrc32);
2803
+ }
2804
+ throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
2805
+ }
2806
+ function isMsv5Buffer(buf) {
2807
+ return buf.length >= 4 && buf.toString('ascii', 0, 4) === 'MSv5';
2808
+ }
2809
+ function readMsv5GlobalFlags(buf) {
2810
+ return buf.readUInt16LE(6);
2811
+ }
2812
+
2813
+ function validateTreeShape(shape, termCount) {
2814
+ if (!Array.isArray(shape)) {
2815
+ throw invalidFrozenIndex('treeShape node must be an array');
2816
+ }
2817
+ for (const entry of shape) {
2818
+ if (!Array.isArray(entry) || entry.length !== 2) {
2819
+ throw invalidFrozenIndex('treeShape entry must be a [key, value] pair');
2820
+ }
2821
+ const [key, value] = entry;
2822
+ if (key === LEAF) {
2823
+ const idx = value;
2824
+ if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
2825
+ throw invalidFrozenIndex(`treeShape leaf term index out of range: ${idx}`);
2826
+ }
2827
+ }
2828
+ else {
2829
+ validateTreeShape(value, termCount);
2830
+ }
2831
+ }
2832
+ }
2833
+ function termCountOf(snap) {
2834
+ return snap.postings.termCount;
2835
+ }
2836
+ /**
2837
+ * Numeric/structural invariants shared by both the decode path (untrusted binary)
2838
+ * and the build path (trusted internal code).
2839
+ */
2840
+ function validateFrozenSnapshotNumeric(snap) {
2841
+ if (snap.fieldCount <= 0) {
2842
+ throw invalidFrozenIndex('fieldCount must be positive');
2843
+ }
2844
+ if (snap.nextId < 0 || snap.nextId >= 0xffffffff) {
2845
+ throw invalidFrozenIndex('nextId out of range');
2846
+ }
2847
+ if (snap.documentCount < 0 || snap.documentCount > snap.nextId) {
2848
+ throw invalidFrozenIndex('documentCount inconsistent with nextId');
2849
+ }
2850
+ if (snap.fieldLengthMatrix.length !== snap.nextId * snap.fieldCount) {
2851
+ throw invalidFrozenIndex('fieldLengthMatrix size mismatch');
2852
+ }
2853
+ if (snap.avgFieldLength.length !== snap.fieldCount) {
2854
+ throw invalidFrozenIndex('avgFieldLength size mismatch');
2855
+ }
2856
+ validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, detail => {
2857
+ throw invalidFrozenIndex(detail);
2858
+ });
2859
+ const indexedFields = Object.keys(snap.fieldIds);
2860
+ if (indexedFields.length !== snap.fieldCount) {
2861
+ throw invalidFrozenIndex('fieldIds count mismatch');
2862
+ }
2863
+ for (let f = 0; f < snap.fieldCount; f++) {
2864
+ const found = indexedFields.some(name => snap.fieldIds[name] === f);
2865
+ if (!found) {
2866
+ throw invalidFrozenIndex(`missing field id ${f}`);
2867
+ }
2868
+ }
2869
+ }
2870
+ function readFieldNamesSection(buf, fieldNamesOff, fieldCount, externalIdsOff) {
2871
+ const fieldNames = [];
2872
+ let o = fieldNamesOff;
2873
+ for (let f = 0; f < fieldCount; f++) {
2874
+ const { value, next } = readLengthPrefixedUtf8(buf, o);
2875
+ fieldNames.push(value);
2876
+ o = next;
2877
+ }
2878
+ if (o !== externalIdsOff) {
2879
+ throw invalidFrozenIndex('field names section size mismatch');
2880
+ }
2881
+ return fieldNames;
2882
+ }
2883
+ function readExternalIdsSection(buf, externalIdsOff, nextId, storedOff) {
2884
+ const externalIds = new Array(nextId);
2885
+ let o = externalIdsOff;
2886
+ for (let i = 0; i < nextId; i++) {
2887
+ const { value, next } = readExternalId(buf, o);
2888
+ externalIds[i] = value;
2889
+ o = next;
2890
+ }
2891
+ if (o !== storedOff) {
2892
+ throw invalidFrozenIndex('external ids section size mismatch');
2893
+ }
2894
+ return externalIds;
2895
+ }
2896
+ function readStoredFieldsSection(buf, storedOff, nextId, sectionEnd) {
2897
+ const storedFields = new Array(nextId);
2898
+ const tableEnd = storedOff + nextId * 4;
2899
+ if (tableEnd > sectionEnd) {
2900
+ throw invalidFrozenIndex('stored fields table out of bounds');
2901
+ }
2902
+ for (let i = 0; i < nextId; i++) {
2903
+ const rel = buf.readUInt32LE(storedOff + i * 4);
2904
+ if (rel === 0) {
2905
+ storedFields[i] = undefined;
2906
+ continue;
2907
+ }
2908
+ const entryOff = tableEnd + rel - 1;
2909
+ if (entryOff + 4 > sectionEnd) {
2910
+ throw invalidFrozenIndex('stored fields entry offset out of bounds');
2911
+ }
2912
+ const jsonLen = buf.readUInt32LE(entryOff);
2913
+ const jsonStart = entryOff + 4;
2914
+ const jsonEnd = jsonStart + jsonLen;
2915
+ if (jsonEnd > sectionEnd) {
2916
+ throw invalidFrozenIndex('stored fields JSON out of bounds');
2917
+ }
2918
+ storedFields[i] = JSON.parse(buf.toString('utf8', jsonStart, jsonEnd));
2919
+ }
2920
+ return storedFields;
2921
+ }
2922
+ /** Validate structural invariants of a decoded or assembled frozen snapshot. */
2923
+ function validateFrozenSnapshot(snap) {
2924
+ validateFrozenSnapshotNumeric(snap);
2925
+ const termCount = termCountOf(snap);
2926
+ if (snap.packedTermIndex != null) {
2927
+ validateFrozenTermIndexLeaves(snap.packedTermIndex, termCount);
2928
+ }
2929
+ else if (snap.termTree != null) {
2930
+ validateTermTreeLeaves(snap.termTree, termCount);
2931
+ }
2932
+ else {
2933
+ validateTreeShape(snap.treeShape, termCount);
2934
+ }
2935
+ }
2936
+ function fieldNamesFromFieldIds(fieldIds) {
2937
+ const names = Object.keys(fieldIds);
2938
+ names.sort((a, b) => fieldIds[a] - fieldIds[b]);
2939
+ return names;
2940
+ }
2941
+ /** Core with explicit {@link termCountOf} (no dictionary section). */
2942
+ function buildCoreSectionWithTermCount(snap) {
2943
+ const out = Buffer.alloc(16);
2944
+ out.writeUInt32LE(snap.documentCount, 0);
2945
+ out.writeUInt32LE(snap.nextId, 4);
2946
+ out.writeUInt32LE(snap.fieldCount, 8);
2947
+ out.writeUInt32LE(termCountOf(snap), 12);
2948
+ return out;
2949
+ }
2950
+ function buildFieldNamesSection(fieldNames) {
2951
+ const chunks = [];
2952
+ for (const name of fieldNames) {
2953
+ const body = Buffer.from(name, 'utf8');
2954
+ const header = Buffer.alloc(4);
2955
+ header.writeUInt32LE(body.length, 0);
2956
+ chunks.push(header, body);
2957
+ }
2958
+ return Buffer.concat(chunks);
2959
+ }
2960
+ function buildExternalIdsSection(externalIds, nextId) {
2961
+ const chunks = [];
2962
+ for (let i = 0; i < nextId; i++) {
2963
+ writeExternalId(chunks, externalIds[i]);
2964
+ }
2965
+ return Buffer.concat(chunks);
2966
+ }
2967
+ function buildStoredFieldsSection(storedFields, nextId) {
2968
+ const table = Buffer.alloc(nextId * 4);
2969
+ const heapChunks = [];
2970
+ let heapOff = 0;
2971
+ for (let i = 0; i < nextId; i++) {
2972
+ const row = storedFields[i];
2973
+ if (row == null) {
2974
+ table.writeUInt32LE(0, i * 4);
2975
+ continue;
2976
+ }
2977
+ table.writeUInt32LE(heapOff + 1, i * 4);
2978
+ const json = Buffer.from(JSON.stringify(row), 'utf8');
2979
+ const entry = Buffer.alloc(4 + json.length);
2980
+ entry.writeUInt32LE(json.length, 0);
2981
+ json.copy(entry, 4);
2982
+ heapChunks.push(entry);
2983
+ heapOff += entry.length;
2984
+ }
2985
+ return Buffer.concat([table, ...heapChunks]);
2986
+ }
2987
+ function validateTermTreeLeaves(tree, termCount) {
2988
+ for (const [key, val] of tree) {
2989
+ if (key === LEAF) {
2990
+ const idx = val;
2991
+ if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
2992
+ throw invalidFrozenIndex(`term tree leaf index out of range: ${idx}`);
2993
+ }
2994
+ }
2995
+ else {
2996
+ validateTermTreeLeaves(val, termCount);
2997
+ }
2998
+ }
2999
+ }
3000
+ function deserializeTermIndexTree(shape) {
3001
+ const tree = new Map();
3002
+ for (const [key, value] of shape) {
3003
+ if (key === LEAF) {
3004
+ tree.set(LEAF, value);
3005
+ }
3006
+ else {
3007
+ tree.set(key, deserializeTermIndexTree(value));
3008
+ }
3009
+ }
3010
+ return tree;
3011
+ }
3012
+
3013
+ /** Global wire flags for {@link FreqArray} width. */
3014
+ function freqWireFlags(freqs) {
3015
+ if (freqs instanceof Uint16Array)
3016
+ return FLAG_FREQ_U16;
3017
+ return 0;
3018
+ }
3019
+ function readFreqsSection(buf, globalFlags, postingCount) {
3020
+ if ((globalFlags & FLAG_FREQ_U16) !== 0) {
3021
+ if (buf.length !== postingCount * 2) {
3022
+ throw invalidFrozenIndex('allFreqs u16 size mismatch');
3023
+ }
3024
+ return postingCount === 0
3025
+ ? new Uint16Array(0)
3026
+ : new Uint16Array(buf.buffer, buf.byteOffset, postingCount);
3027
+ }
3028
+ if (buf.length !== postingCount) {
3029
+ throw invalidFrozenIndex('allFreqs u8 size mismatch');
3030
+ }
3031
+ return postingCount === 0
3032
+ ? new Uint8Array(0)
3033
+ : new Uint8Array(buf.buffer, buf.byteOffset, postingCount);
3034
+ }
3035
+
3036
+ function msv5PostingsFlags(postings) {
3037
+ let flags = 0;
3038
+ if (postings.layout === 'sparse')
3039
+ flags |= FLAG_SPARSE_LAYOUT;
3040
+ if (postings.docIdWidth === 16)
3041
+ flags |= FLAG_DOC_ID_16;
3042
+ if (postings.sparseFieldIdWidth === 16)
3043
+ flags |= FLAG_FIELD_ID_16;
3044
+ return flags;
3045
+ }
3046
+ function buildMsv5PostingsSections(postings) {
3047
+ if (postings.layout === 'dense') {
3048
+ if (postings.denseOffsets == null || postings.denseLengths == null) {
3049
+ throw invalidFrozenIndex('dense postings missing offset tables');
3050
+ }
3051
+ return {
3052
+ flags: msv5PostingsFlags(postings),
3053
+ meta: bufferFromView(postings.denseOffsets),
3054
+ fields: bufferFromView(postings.denseLengths),
3055
+ optional: Buffer.alloc(0),
3056
+ docIds: bufferFromView(postings.allDocIds),
3057
+ freqs: bufferFromView(postings.allFreqs),
3058
+ };
3059
+ }
3060
+ if (postings.sparseTermStarts == null
3061
+ || postings.sparseFieldIds == null
3062
+ || postings.sparseOffsets == null
3063
+ || postings.sparseLengths == null) {
3064
+ throw invalidFrozenIndex('sparse postings missing tables');
3065
+ }
3066
+ const offBuf = bufferFromView(postings.sparseOffsets);
3067
+ const lenBuf = bufferFromView(postings.sparseLengths);
3068
+ const optional = Buffer.alloc(4 + offBuf.length + lenBuf.length);
3069
+ optional.writeUInt32LE(offBuf.length, 0);
3070
+ offBuf.copy(optional, 4);
3071
+ lenBuf.copy(optional, 4 + offBuf.length);
3072
+ return {
3073
+ flags: msv5PostingsFlags(postings),
3074
+ meta: bufferFromView(postings.sparseTermStarts),
3075
+ fields: bufferFromView(postings.sparseFieldIds),
3076
+ optional,
3077
+ docIds: bufferFromView(postings.allDocIds),
3078
+ freqs: bufferFromView(postings.allFreqs),
3079
+ };
3080
+ }
3081
+ function decodeMsv5PostingsSections(flags, fieldCount, termCount, nextId, meta, fields, optional, docIds, freqs) {
3082
+ const sparse = (flags & FLAG_SPARSE_LAYOUT) !== 0;
3083
+ const docId16 = (flags & FLAG_DOC_ID_16) !== 0;
3084
+ const fieldId16 = (flags & FLAG_FIELD_ID_16) !== 0;
3085
+ const readDocIds = () => {
3086
+ if (docIds.length === 0)
3087
+ return docId16 ? new Uint16Array(0) : new Uint32Array(0);
3088
+ if (docId16)
3089
+ return readUint16Array(docIds, 0, docIds.length);
3090
+ return readUint32Array(docIds, 0, docIds.length);
3091
+ };
3092
+ const allDocIds = readDocIds();
3093
+ const allFreqs = readFreqsSection(freqs, flags, allDocIds.length);
3094
+ if (sparse) {
3095
+ const sparseFieldIdWidth = fieldId16 ? 16 : 8;
3096
+ const offLen = optional.readUInt32LE(0);
3097
+ if (4 + offLen > optional.length) {
3098
+ throw invalidFrozenIndex('sparse optional section truncated');
3099
+ }
3100
+ const sparseOffsets = readUint32Array(optional, 4, offLen);
3101
+ const sparseLengths = readUint32Array(optional, 4 + offLen, optional.length - 4 - offLen);
3102
+ const sparseTermStarts = readUint32Array(meta, 0, meta.length);
3103
+ const sparseFieldIds = readFieldIdArray(fields, 0, fields.length, sparseFieldIdWidth);
3104
+ return {
3105
+ fieldCount,
3106
+ termCount,
3107
+ nextId,
3108
+ layout: 'sparse',
3109
+ docIdWidth: docId16 ? 16 : 32,
3110
+ sparseFieldIdWidth,
3111
+ allDocIds,
3112
+ allFreqs,
3113
+ denseOffsets: null,
3114
+ denseLengths: null,
3115
+ sparseTermStarts,
3116
+ sparseFieldIds,
3117
+ sparseOffsets,
3118
+ sparseLengths,
3119
+ };
3120
+ }
3121
+ const denseOffsets = readUint32Array(meta, 0, meta.length);
3122
+ const denseLengths = readUint32Array(fields, 0, fields.length);
3123
+ return {
3124
+ fieldCount,
3125
+ termCount,
3126
+ nextId,
3127
+ layout: 'dense',
3128
+ docIdWidth: docId16 ? 16 : 32,
3129
+ sparseFieldIdWidth: null,
3130
+ allDocIds,
3131
+ allFreqs,
3132
+ denseOffsets,
3133
+ denseLengths,
3134
+ sparseTermStarts: null,
3135
+ sparseFieldIds: null,
3136
+ sparseOffsets: null,
3137
+ sparseLengths: null,
3138
+ };
3139
+ }
3140
+
3141
+ const TREE_SECTION_HEADER_BYTES = 16;
3142
+ function columnWidthCode(arr) {
3143
+ if (arr instanceof Uint8Array)
3144
+ return 0;
3145
+ if (arr instanceof Uint16Array)
3146
+ return 1;
3147
+ return 2;
3148
+ }
3149
+ function columnWidthFlagsFromTree(tree) {
3150
+ const cols = [
3151
+ tree.nodeEdgeOffset,
3152
+ tree.nodeValue,
3153
+ tree.nodeLeafOrder,
3154
+ tree.edgeLabelStart,
3155
+ tree.edgeLabelLength,
3156
+ tree.edgeChild,
3157
+ ];
3158
+ if (cols.length !== MSV5_TREE_COLUMN_COUNT) {
3159
+ throw new Error('MSv5 tree column count mismatch');
3160
+ }
3161
+ let flags = 0;
3162
+ for (let i = 0; i < cols.length; i++) {
3163
+ flags |= columnWidthCode(cols[i]) << (i * 2);
3164
+ }
3165
+ return flags;
3166
+ }
3167
+ function pad4(n) {
3168
+ return (n + 3) & -4;
3169
+ }
3170
+ function appendColumn(chunks, arr) {
3171
+ const raw = Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength);
3172
+ chunks.push(raw);
3173
+ const pad = pad4(raw.length) - raw.length;
3174
+ if (pad > 0)
3175
+ chunks.push(Buffer.alloc(pad));
3176
+ }
3177
+ function buildTermTreeSectionColumnar(tree) {
3178
+ const header = Buffer.alloc(TREE_SECTION_HEADER_BYTES);
3179
+ header.writeUInt32LE(tree.size, 0);
3180
+ header.writeUInt32LE(tree.nodeCount, 4);
3181
+ header.writeUInt32LE(tree.edgeCount, 8);
3182
+ header.writeUInt32LE(columnWidthFlagsFromTree(tree), 12);
3183
+ const chunks = [header];
3184
+ appendColumn(chunks, tree.nodeEdgeOffset);
3185
+ appendColumn(chunks, tree.nodeValue);
3186
+ appendColumn(chunks, tree.nodeLeafOrder);
3187
+ appendColumn(chunks, tree.edgeLabelStart);
3188
+ appendColumn(chunks, tree.edgeLabelLength);
3189
+ appendColumn(chunks, tree.edgeChild);
3190
+ const labelBuf = Buffer.from(tree.labelHeap, 'utf8');
3191
+ chunks.push(labelBuf);
3192
+ return Buffer.concat(chunks);
3193
+ }
3194
+ function widthFromFlags(flags, columnIndex) {
3195
+ const code = (flags >> (columnIndex * 2)) & 3;
3196
+ if (code === 0)
3197
+ return 1;
3198
+ if (code === 1)
3199
+ return 2;
3200
+ if (code === 2)
3201
+ return 4;
3202
+ throw invalidFrozenIndex(`invalid tree column width code ${code}`);
3203
+ }
3204
+ function readColumn(buf, offset, elementCount, width) {
3205
+ const byteLength = elementCount * width;
3206
+ const padded = pad4(byteLength);
3207
+ if (offset + padded > buf.length) {
3208
+ throw invalidFrozenIndex('term tree column truncated');
3209
+ }
3210
+ let arr;
3211
+ if (width === 1) {
3212
+ arr = elementCount === 0
3213
+ ? new Uint8Array(0)
3214
+ : new Uint8Array(buf.buffer, buf.byteOffset + offset, elementCount);
3215
+ }
3216
+ else if (width === 2) {
3217
+ // Columns are pad4-aligned at encode time; section buffers are 4-aligned in MSv5 payloads.
3218
+ if (offset % 2 !== 0) {
3219
+ throw invalidFrozenIndex('term tree Uint16 column misaligned');
3220
+ }
3221
+ arr = elementCount === 0
3222
+ ? new Uint16Array(0)
3223
+ : new Uint16Array(buf.buffer, buf.byteOffset + offset, elementCount);
3224
+ }
3225
+ else {
3226
+ if (offset % 4 !== 0) {
3227
+ throw invalidFrozenIndex('term tree Uint32 column misaligned');
3228
+ }
3229
+ arr = elementCount === 0
3230
+ ? new Uint32Array(0)
3231
+ : new Uint32Array(buf.buffer, buf.byteOffset + offset, elementCount);
3232
+ }
3233
+ return { arr, next: offset + padded };
3234
+ }
3235
+ function readPackedTermTreeSectionColumnar(buf, termCount) {
3236
+ if (buf.length < TREE_SECTION_HEADER_BYTES) {
3237
+ throw invalidFrozenIndex('term tree section too short');
3238
+ }
3239
+ const size = buf.readUInt32LE(0);
3240
+ const nodeCount = buf.readUInt32LE(4);
3241
+ const edgeCount = buf.readUInt32LE(8);
3242
+ const widthFlags = buf.readUInt32LE(12);
3243
+ if (size !== termCount) {
3244
+ throw invalidFrozenIndex('term tree termCount mismatch');
3245
+ }
3246
+ let o = TREE_SECTION_HEADER_BYTES;
3247
+ const edgeOffLen = nodeCount + 1;
3248
+ const edgeOff = readColumn(buf, o, edgeOffLen, widthFromFlags(widthFlags, 0));
3249
+ o = edgeOff.next;
3250
+ const nVal = readColumn(buf, o, nodeCount, widthFromFlags(widthFlags, 1));
3251
+ o = nVal.next;
3252
+ const nLeaf = readColumn(buf, o, nodeCount, widthFromFlags(widthFlags, 2));
3253
+ o = nLeaf.next;
3254
+ const eStart = readColumn(buf, o, edgeCount, widthFromFlags(widthFlags, 3));
3255
+ o = eStart.next;
3256
+ const eLen = readColumn(buf, o, edgeCount, widthFromFlags(widthFlags, 4));
3257
+ o = eLen.next;
3258
+ const eChild = readColumn(buf, o, edgeCount, widthFromFlags(widthFlags, 5));
3259
+ o = eChild.next;
3260
+ if (o > buf.length) {
3261
+ throw invalidFrozenIndex('term tree label heap out of bounds');
3262
+ }
3263
+ const labelHeap = o === buf.length ? '' : buf.toString('utf8', o, buf.length);
3264
+ const data = {
3265
+ size,
3266
+ nodeCount,
3267
+ edgeCount,
3268
+ labelHeap,
3269
+ nodeEdgeOffset: edgeOff.arr,
3270
+ nodeValue: nVal.arr,
3271
+ nodeLeafOrder: nLeaf.arr,
3272
+ edgeLabelStart: eStart.arr,
3273
+ edgeLabelLength: eLen.arr,
3274
+ edgeChild: eChild.arr,
3275
+ };
3276
+ const tree = PackedRadixTree.fromData(data);
3277
+ validateFrozenTermIndexLeaves(tree, termCount);
3278
+ return tree;
3279
+ }
3280
+
3281
+ function resolvePackedTree(snap, termTree, packedTermIndex) {
3282
+ const termCount = termCountOf(snap);
3283
+ const packed = packedTermIndex !== null && packedTermIndex !== void 0 ? packedTermIndex : snap.packedTermIndex;
3284
+ if (packed != null) {
3285
+ validateFrozenTermIndexLeaves(packed, termCount);
3286
+ return packed;
3287
+ }
3288
+ const tree = deserializeTermIndexTree(snap.treeShape);
3289
+ validateTermTreeLeaves(tree, termCount);
3290
+ return fromRadixTree(tree, termCount);
3291
+ }
3292
+ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
3293
+ var _a;
3294
+ validateFrozenSnapshotNumeric(snap);
3295
+ const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
3296
+ if (fieldNames.length !== snap.fieldCount) {
3297
+ throw invalidFrozenIndex('fieldNames length mismatch');
3298
+ }
3299
+ const packed = resolvePackedTree(snap, termTree, packedTermIndex);
3300
+ const postingsWire = buildMsv5PostingsSections(snap.postings);
3301
+ const flFlags = fieldLengthMatrixWireFlags(snap.fieldLengthMatrix);
3302
+ const freqFlags = freqWireFlags(snap.postings.allFreqs);
3303
+ const globalFlags = postingsWire.flags | flFlags | freqFlags;
3304
+ const rawSections = [
3305
+ buildCoreSectionWithTermCount(snap),
3306
+ buildFieldNamesSection(fieldNames),
3307
+ buildExternalIdsSection(snap.externalIds, snap.nextId),
3308
+ buildStoredFieldsSection(snap.storedFields, snap.nextId),
3309
+ buildTermTreeSectionColumnar(packed),
3310
+ bufferFromView(snap.avgFieldLength),
3311
+ buildFieldLengthMatrixSection(snap.fieldLengthMatrix),
3312
+ postingsWire.meta,
3313
+ postingsWire.fields,
3314
+ postingsWire.optional,
3315
+ postingsWire.docIds,
3316
+ postingsWire.freqs,
3317
+ ];
3318
+ return assembleMsv5File(globalFlags, rawSections).buffer;
3319
+ }
3320
+ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
3321
+ var _a;
3322
+ validateFrozenSnapshotNumeric(snap);
3323
+ const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
3324
+ if (fieldNames.length !== snap.fieldCount) {
3325
+ throw invalidFrozenIndex('fieldNames length mismatch');
3326
+ }
3327
+ const packed = resolvePackedTree(snap, termTree, packedTermIndex);
3328
+ const postingsWire = buildMsv5PostingsSections(snap.postings);
3329
+ const flFlags = fieldLengthMatrixWireFlags(snap.fieldLengthMatrix);
3330
+ const freqFlags = freqWireFlags(snap.postings.allFreqs);
3331
+ const globalFlags = postingsWire.flags | flFlags | freqFlags;
3332
+ const rawSections = [
3333
+ buildCoreSectionWithTermCount(snap),
3334
+ buildFieldNamesSection(fieldNames),
3335
+ buildExternalIdsSection(snap.externalIds, snap.nextId),
3336
+ buildStoredFieldsSection(snap.storedFields, snap.nextId),
3337
+ buildTermTreeSectionColumnar(packed),
3338
+ bufferFromView(snap.avgFieldLength),
3339
+ buildFieldLengthMatrixSection(snap.fieldLengthMatrix),
3340
+ postingsWire.meta,
3341
+ postingsWire.fields,
3342
+ postingsWire.optional,
3343
+ postingsWire.docIds,
3344
+ postingsWire.freqs,
3345
+ ];
3346
+ return (await assembleMsv5FileAsync(globalFlags, rawSections)).buffer;
3347
+ }
3348
+
3349
+ function validateMsv5Container(buf) {
3350
+ if (!isMsv5Buffer(buf)) {
3351
+ throw invalidFrozenIndex('not a frozen binary snapshot');
3352
+ }
3353
+ const version = buf.readUInt16LE(4);
3354
+ if (version !== 5) {
3355
+ throw invalidFrozenIndex(`unsupported frozen snapshot version=${version}`);
3356
+ }
3357
+ const globalFlags = readMsv5GlobalFlags(buf);
3358
+ const directory = readMsv5SectionDirectory(buf);
3359
+ const payloadOff = buf.readUInt32LE(MSV5_PAYLOAD_COMPRESSED_OFFSET);
3360
+ const compressedLen = buf.readUInt32LE(MSV5_PAYLOAD_COMPRESSED_LENGTH_OFFSET);
3361
+ if (payloadOff !== MSV5_HEADER_SIZE || payloadOff + compressedLen > buf.length) {
3362
+ throw invalidFrozenIndex('frozen snapshot payload out of bounds');
3363
+ }
3364
+ return { globalFlags, directory };
3365
+ }
3366
+ function decodeMsv5Sections(globalFlags, sections) {
3367
+ const core = sections[0 /* Msv5SectionId.Core */];
3368
+ if (core.length !== 16) {
3369
+ throw invalidFrozenIndex('core section size mismatch');
3370
+ }
3371
+ const documentCount = core.readUInt32LE(0);
3372
+ const nextId = core.readUInt32LE(4);
3373
+ const fieldCount = core.readUInt32LE(8);
3374
+ const termCount = core.readUInt32LE(12);
3375
+ const fieldNames = readFieldNamesSection(sections[1 /* Msv5SectionId.FieldNames */], 0, fieldCount, sections[1 /* Msv5SectionId.FieldNames */].length);
3376
+ const fieldIds = {};
3377
+ for (let f = 0; f < fieldNames.length; f++) {
3378
+ fieldIds[fieldNames[f]] = f;
3379
+ }
3380
+ const externalIds = readExternalIdsSection(sections[2 /* Msv5SectionId.ExternalIds */], 0, nextId, sections[2 /* Msv5SectionId.ExternalIds */].length);
3381
+ const storedFields = readStoredFieldsSection(sections[3 /* Msv5SectionId.StoredFields */], 0, nextId, sections[3 /* Msv5SectionId.StoredFields */].length);
3382
+ const packedTermIndex = readPackedTermTreeSectionColumnar(sections[4 /* Msv5SectionId.TermTree */], termCount);
3383
+ const avgBuf = sections[5 /* Msv5SectionId.AvgFieldLength */];
3384
+ const avgFieldLength = readFloat32Array(avgBuf, 0, avgBuf.length);
3385
+ const fieldLengthMatrix = readFieldLengthMatrixSection(sections[6 /* Msv5SectionId.FieldLengthMatrix */], globalFlags, nextId * fieldCount);
3386
+ const postings = decodeMsv5PostingsSections(globalFlags, fieldCount, termCount, nextId, sections[7 /* Msv5SectionId.PostMeta */], sections[8 /* Msv5SectionId.PostFields */], sections[9 /* Msv5SectionId.PostOptional */], sections[10 /* Msv5SectionId.AllDocIds */], sections[11 /* Msv5SectionId.AllFreqs */]);
3387
+ if (postings.termCount !== termCount) {
3388
+ throw invalidFrozenIndex('core termCount mismatch with postings');
3389
+ }
3390
+ const snap = {
3391
+ documentCount,
3392
+ nextId,
3393
+ fieldIds,
3394
+ fieldCount,
3395
+ fieldNames,
3396
+ avgFieldLength,
3397
+ externalIds,
3398
+ storedFields,
3399
+ fieldLengthMatrix,
3400
+ treeShape: [],
3401
+ packedTermIndex,
3402
+ postings,
3403
+ };
3404
+ validateFrozenSnapshot(snap);
3405
+ return snap;
3406
+ }
3407
+ function decodeFrozenSnapshotMsv5(buf) {
3408
+ const { globalFlags, directory } = validateMsv5Container(buf);
3409
+ return decodeMsv5Sections(globalFlags, loadMsv5Sections(buf, directory));
3410
+ }
3411
+ async function decodeFrozenSnapshotMsv5Async(buf) {
3412
+ const { globalFlags, directory } = validateMsv5Container(buf);
3413
+ return decodeMsv5Sections(globalFlags, await loadMsv5SectionsAsync(buf, directory));
3414
+ }
3415
+
3416
+ /** Encode a frozen snapshot as a binary buffer. */
3417
+ function encodeFrozenSnapshot(snap, termTree, packedTermIndex) {
3418
+ return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex);
3419
+ }
3420
+ /** Async encoder; uses non-blocking zstd compression for large payloads. */
3421
+ function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
3422
+ return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex);
3423
+ }
3424
+
3425
+ const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
3426
+ /** Decode a frozen binary snapshot buffer. */
3427
+ function decodeFrozenSnapshot(buf) {
3428
+ assertBufferLength(buf, 8);
3429
+ const magic = buf.toString('ascii', 0, 4);
3430
+ const version = buf.readUInt16LE(4);
3431
+ if (isMsv5Buffer(buf) && version === 5) {
3432
+ return decodeFrozenSnapshotMsv5(buf);
3433
+ }
3434
+ if (LEGACY_MAGICS.has(magic)) {
3435
+ throw invalidFrozenIndex('Unsupported frozen binary snapshot; re-build with saveBinarySync() or from lucaong JSON');
3436
+ }
3437
+ throw invalidFrozenIndex('Unsupported frozen binary snapshot');
3438
+ }
3439
+ /** Async frozen snapshot decode (streaming zstd). */
3440
+ async function decodeFrozenSnapshotAsync(buf) {
3441
+ assertBufferLength(buf, 8);
3442
+ const version = buf.readUInt16LE(4);
3443
+ if (isMsv5Buffer(buf) && version === 5) {
3444
+ return decodeFrozenSnapshotMsv5Async(buf);
3445
+ }
3446
+ return decodeFrozenSnapshot(buf);
3447
+ }
3448
+
3449
+ function getOrCreateTermIndex(state, index, term) {
3450
+ const existing = index.get(term);
3451
+ if (existing != null)
3452
+ return existing;
3453
+ const ti = state.terms.length;
3454
+ state.terms.push(term);
3455
+ index.set(term, ti);
3456
+ return ti;
3457
+ }
3458
+ function appendPosting(state, termIndex, fieldId, docId, freq) {
3459
+ const slot = termIndex * state.fieldCount + fieldId;
3460
+ let docIds = state.postingsDocIds[slot];
3461
+ if (docIds == null) {
3462
+ docIds = [];
3463
+ state.postingsDocIds[slot] = docIds;
3464
+ state.postingsFreqs[slot] = [];
3465
+ }
3466
+ docIds.push(docId);
3467
+ state.postingsFreqs[slot].push(freq);
3468
+ const v = clampFreq(freq);
3469
+ if (v > state.maxFreq)
3470
+ state.maxFreq = v;
3471
+ state.totalPostings++;
3472
+ }
3473
+ function finalizeFlatPostings(state, nextId) {
3474
+ return materializeFrozenPostingsFromBuilder({
3475
+ fieldCount: state.fieldCount,
3476
+ termCount: state.terms.length,
3477
+ postingsDocIds: state.postingsDocIds,
3478
+ postingsFreqs: state.postingsFreqs,
3479
+ totalPostings: state.totalPostings,
3480
+ maxFreq: state.maxFreq,
3481
+ }, nextId);
3482
+ }
3483
+ /** Incremental builder for {@link FrozenMiniSearch} without materializing a full `documents[]` array. */
3484
+ class FrozenIndexBuilder {
3485
+ constructor(options, hints) {
3486
+ this._options = resolveIndexingOptions(options);
3487
+ this._fieldIds = buildFieldIds(this._options.fields);
3488
+ this._fieldCount = this._options.fields.length;
3489
+ this._index = new SearchableMap();
3490
+ this._terms = [];
3491
+ this._postingsDocIds = [];
3492
+ this._postingsFreqs = [];
3493
+ this._avgFieldLength = [];
3494
+ this._seenIds = new Set();
3495
+ this._nextId = 0;
3496
+ this._frozen = false;
3497
+ const estimated = hints === null || hints === void 0 ? void 0 : hints.estimatedDocumentCount;
3498
+ if (estimated != null && estimated > 0) {
3499
+ this._externalIds = new Array(estimated);
3500
+ this._storedFields = new Array(estimated);
3501
+ this._fieldLengthData = new Array(estimated * this._fieldCount).fill(0);
3502
+ }
3503
+ else {
3504
+ this._externalIds = [];
3505
+ this._storedFields = [];
3506
+ this._fieldLengthData = [];
3507
+ }
3508
+ this._postingsState = {
3509
+ fieldCount: this._fieldCount,
3510
+ terms: this._terms,
3511
+ postingsDocIds: this._postingsDocIds,
3512
+ postingsFreqs: this._postingsFreqs,
3513
+ totalPostings: 0,
3514
+ maxFreq: 0,
3515
+ };
3516
+ }
3517
+ /** Number of documents indexed so far (not yet frozen). */
3518
+ get documentCount() {
3519
+ return this._nextId;
3520
+ }
3521
+ add(document) {
3522
+ if (this._frozen) {
3523
+ throw new Error('FrozenIndexBuilder: cannot add after freezeParams()');
3524
+ }
3525
+ const { extractField, stringifyField, tokenize, processTerm, fields, idField, storeFields } = this._options;
3526
+ const id = extractField(document, idField);
3527
+ if (id == null) {
3528
+ throw new Error(`MiniSearch: document does not have ID field "${idField}"`);
3529
+ }
3530
+ if (this._seenIds.has(id)) {
3531
+ throw new Error(`MiniSearch: duplicate ID ${id}`);
3532
+ }
3533
+ this._seenIds.add(id);
3534
+ const shortId = this._nextId++;
3535
+ this._externalIds[shortId] = id;
3536
+ this._storedFields[shortId] = saveStoredFieldsForDocument(storeFields, extractField, document);
3537
+ const documentCount = shortId + 1;
3538
+ for (const field of fields) {
3539
+ const fieldValue = extractField(document, field);
3540
+ if (fieldValue == null)
3541
+ continue;
3542
+ const tokens = tokenize(stringifyField(fieldValue, field), field);
3543
+ const fieldId = this._fieldIds[field];
3544
+ const uniqueTerms = new Set(tokens).size;
3545
+ const localFreqs = collectFieldTermFreqs(tokens, field, processTerm);
3546
+ this._fieldLengthData[shortId * this._fieldCount + fieldId] = uniqueTerms;
3547
+ updateAvgFieldLength(this._avgFieldLength, fieldId, documentCount - 1, uniqueTerms);
3548
+ for (const [term, freq] of localFreqs) {
3549
+ const ti = getOrCreateTermIndex(this._postingsState, this._index, term);
3550
+ appendPosting(this._postingsState, ti, fieldId, shortId, freq);
3551
+ }
3552
+ }
3553
+ }
3554
+ /**
3555
+ * Adds all the given documents to the index.
3556
+ *
3557
+ * @param documents An array of documents to be indexed
3558
+ */
3559
+ addAll(documents) {
3560
+ for (const document of documents)
3561
+ this.add(document);
3562
+ }
3563
+ /**
3564
+ * Adds all the given documents to the index asynchronously.
3565
+ *
3566
+ * Returns a promise that resolves (to `undefined`) when the indexing is done.
3567
+ * This method is useful when indexing many documents, to avoid blocking the main
3568
+ * thread. The indexing is performed asynchronously and in chunks. Finalize with
3569
+ * {@link freezeFrozenIndexBuilder} when done.
3570
+ *
3571
+ * @param documents An array of documents to be indexed
3572
+ * @param options Configuration options
3573
+ * @return A promise resolving to `undefined` when the indexing is done
3574
+ */
3575
+ addAllAsync(documents, options = {}) {
3576
+ const { chunkSize = 10 } = options;
3577
+ if (!Number.isInteger(chunkSize) || chunkSize < 1) {
3578
+ throw new Error('MiniSearch: chunkSize must be a positive integer');
3579
+ }
3580
+ const acc = { chunk: [], promise: Promise.resolve() };
3581
+ const { chunk, promise } = documents.reduce(({ chunk, promise }, document, i) => {
3582
+ chunk.push(document);
3583
+ if ((i + 1) % chunkSize === 0) {
3584
+ return {
3585
+ chunk: [],
3586
+ promise: promise
3587
+ .then(() => new Promise(resolve => setTimeout(resolve, 0)))
3588
+ .then(() => this.addAll(chunk)),
3589
+ };
3590
+ }
3591
+ else {
3592
+ return { chunk, promise };
3593
+ }
3594
+ }, acc);
3595
+ return promise.then(() => this.addAll(chunk));
3596
+ }
3597
+ /**
3598
+ * Finalize this builder into assembly params. Call {@link assembleFrozen} or
3599
+ * {@link freezeFrozenIndexBuilder} to obtain a {@link FrozenMiniSearch} instance.
3600
+ */
3601
+ freezeParams() {
3602
+ var _a;
3603
+ if (this._frozen) {
3604
+ throw new Error('FrozenIndexBuilder: freezeParams() already called');
3605
+ }
3606
+ this._frozen = true;
3607
+ const documentCount = this._nextId;
3608
+ const postings = finalizeFlatPostings(this._postingsState, documentCount);
3609
+ const avgFieldLength = new Float32Array(this._fieldCount);
3610
+ for (let f = 0; f < this._fieldCount; f++) {
3611
+ avgFieldLength[f] = (_a = this._avgFieldLength[f]) !== null && _a !== void 0 ? _a : 0;
3612
+ }
3613
+ this._fieldLengthData.length = documentCount * this._fieldCount;
3614
+ const externalIds = this._externalIds.length > documentCount
3615
+ ? this._externalIds.slice(0, documentCount)
3616
+ : this._externalIds;
3617
+ const storedFields = this._storedFields.length > documentCount
3618
+ ? this._storedFields.slice(0, documentCount)
3619
+ : this._storedFields;
3620
+ const idLookup = createIdToShortIdLookup(externalIds, documentCount);
3621
+ // Incremental builder: numeric radix leaves + build-time terms[] for postings.
3622
+ // freezeFromMiniSearch packs Map leaves in one radix pass (no resident terms[]).
3623
+ return {
3624
+ options: this._options,
3625
+ documentCount,
3626
+ nextId: documentCount,
3627
+ fieldIds: this._fieldIds,
3628
+ fieldCount: this._fieldCount,
3629
+ externalIds,
3630
+ idLookup,
3631
+ storedFields,
3632
+ fieldLengthMatrix: materializeFieldLengthMatrix(this._fieldLengthData, documentCount * this._fieldCount),
3633
+ avgFieldLength,
3634
+ index: fromRadixTree(this._index.radixTree, this._terms.length),
3635
+ termCount: this._terms.length,
3636
+ postings,
3637
+ };
3638
+ }
3639
+ }
3640
+ /** Create an incremental builder for {@link FrozenMiniSearch}. */
3641
+ function createFrozenIndexBuilder(options, hints) {
3642
+ return new FrozenIndexBuilder(options, hints);
3643
+ }
3644
+ function buildFrozenParamsFromDocuments(documents, options) {
3645
+ const builder = createFrozenIndexBuilder(options, {
3646
+ estimatedDocumentCount: documents.length,
3647
+ });
3648
+ builder.addAll(documents);
3649
+ return builder.freezeParams();
3650
+ }
3651
+
3652
+ /**
3653
+ * Internal AND / AND_NOT gate thresholds (not exported from the public package entry).
3654
+ */
3655
+ const DEFAULT_AND_GATE_LIMITS = {
3656
+ maxAbsolute: 5000,
3657
+ maxFraction: 0.1,
3658
+ };
3659
+ function resolveGateMaxSize(documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
3660
+ return Math.min(limits.maxAbsolute, Math.max(100, Math.floor(documentCount * limits.maxFraction)));
3661
+ }
3662
+ function gateIsSelectiveEnough(gateSize, documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
3663
+ if (gateSize === 0)
3664
+ return true;
3665
+ return gateSize <= resolveGateMaxSize(documentCount, limits);
3666
+ }
3667
+
3668
+ function useGatedEvaluation(run, branchCount, operator, hasWildcard) {
3669
+ return shouldUseGatedEvaluation(branchCount, operator, hasWildcard);
3670
+ }
3671
+ function docIdsFromResult(result) {
3672
+ return new Set(result.keys());
3673
+ }
3674
+ function isQueryCombination(query) {
3675
+ return typeof query === 'object'
3676
+ && query != null
3677
+ && 'queries' in query
3678
+ && Array.isArray(query.queries);
3679
+ }
3680
+ function combinationHasWildcard(query) {
3681
+ return query.queries.some(q => isWildcardQuery(q) || (typeof q === 'object' && q != null && 'queries' in q && combinationHasWildcard(q)));
3682
+ }
3683
+ function isGatedCombinationOperator(operator) {
3684
+ const op = operator.toLowerCase();
3685
+ return op === 'and' || op === 'and_not';
3686
+ }
3687
+ function shouldUseGatedEvaluation(branchCount, operator, hasWildcard) {
3688
+ if (hasWildcard)
3689
+ return false;
3690
+ if (branchCount <= 1)
3691
+ return false;
3692
+ return isGatedCombinationOperator(operator);
3693
+ }
3694
+ function maxFuzzyDistance(query, maxFuzzy) {
3695
+ if (!query.fuzzy)
3696
+ return 0;
3697
+ const fuzzy = (query.fuzzy === true) ? 0.2 : query.fuzzy;
3698
+ return fuzzy < 1
3699
+ ? Math.min(maxFuzzy, Math.round(query.term.length * fuzzy))
3700
+ : fuzzy;
3701
+ }
3702
+ function normalizeStringQuery(query, searchOptions, params) {
3703
+ const options = {
3704
+ tokenize: params.tokenize,
3705
+ processTerm: params.processTerm,
3706
+ ...params.globalSearchOptions,
3707
+ ...searchOptions,
3708
+ };
3709
+ const terms = options.tokenize(query)
3710
+ .flatMap((term) => options.processTerm(term))
3711
+ .filter(term => !!term);
3712
+ return {
3713
+ options,
3714
+ specs: terms.map(termToQuerySpec(options)),
3715
+ operator: options.combineWith,
3716
+ };
3717
+ }
3718
+ function lazyIndexedTerm(indexView, termIndex) {
3719
+ return { kind: 'lazy', resolve: () => indexView.resolveTermByIndex(termIndex) };
3720
+ }
3721
+ function visitQuerySpecForScoring(query, options, params, visit) {
3722
+ const { indexView } = params;
3723
+ const { weights, maxFuzzy } = options;
3724
+ const { fuzzy: fuzzyWeight, prefix: prefixWeight } = { ...defaultSearchOptions.weights, ...weights };
3725
+ const maxDistance = maxFuzzyDistance(query, maxFuzzy);
3726
+ const exactTi = indexView.resolveTermIndex(query.term);
3727
+ visit(exactTi == null ? undefined : indexView.fieldTermData(exactTi), query.term, 1);
3728
+ const seenPrefix = new Set();
3729
+ if (query.prefix) {
3730
+ for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
3731
+ const distance = length - query.term.length;
3732
+ if (!distance)
3733
+ continue;
3734
+ seenPrefix.add(termIndex);
3735
+ visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), prefixWeight * length / (length + 0.3 * distance));
3736
+ }
3737
+ }
3738
+ if (!maxDistance)
3739
+ return;
3740
+ for (const { termIndex, length, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
3741
+ if (!distance || seenPrefix.has(termIndex))
3742
+ continue;
3743
+ visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), fuzzyWeight * length / (length + distance));
3744
+ }
3745
+ }
3746
+ function executeQuerySpecInternal(query, searchOptions, params, allowedDocs) {
3747
+ const options = { ...params.globalSearchOptions, ...searchOptions };
3748
+ const fieldBoosts = fieldBoostsForQuery(options, params.fields);
3749
+ const termOptions = allowedDocs == null ? undefined : { allowedDocs };
3750
+ const results = new Map();
3751
+ visitQuerySpecForScoring(query, options, params, (data, derivedTerm, termWeight) => {
3752
+ aggregateTerm(query.term, derivedTerm, termWeight, query.termBoost, data, fieldBoosts, params.aggregateContext, options.boostDocument, options.bm25, results, termOptions);
3753
+ });
3754
+ return results;
3755
+ }
3756
+ function collectDocIdsForQuerySpec(query, searchOptions, params, allowedDocs) {
3757
+ const options = { ...params.globalSearchOptions, ...searchOptions };
3758
+ const fieldBoosts = fieldBoostsForQuery(options, params.fields);
3759
+ const docIds = new Set();
3760
+ const { indexView, aggregateContext } = params;
3761
+ const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
3762
+ const exactTi = indexView.resolveTermIndex(query.term);
3763
+ if (exactTi != null) {
3764
+ indexView.collectDocIds(exactTi, fieldBoosts, aggregateContext, docIds, allowedDocs);
3765
+ }
3766
+ const seenPrefix = new Set();
3767
+ if (query.prefix) {
3768
+ for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
3769
+ const distance = length - query.term.length;
3770
+ if (!distance)
3771
+ continue;
3772
+ seenPrefix.add(termIndex);
3773
+ indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
3774
+ }
3775
+ }
3776
+ if (maxDistance) {
3777
+ for (const { termIndex, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
3778
+ if (!distance || seenPrefix.has(termIndex))
3779
+ continue;
3780
+ indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
3781
+ }
3782
+ }
3783
+ return docIds;
3784
+ }
3785
+ function intersectDocIdsInPlace(docIds, branchDocIds) {
3786
+ for (const docId of docIds) {
3787
+ if (!branchDocIds.has(docId))
3788
+ docIds.delete(docId);
3789
+ }
3790
+ }
3791
+ function subtractDocIdsInPlace(docIds, excludedDocIds) {
3792
+ for (const docId of excludedDocIds)
3793
+ docIds.delete(docId);
3794
+ }
3795
+ function subtractDocIdsFromResult(result, excludedDocIds) {
3796
+ for (const docId of excludedDocIds)
3797
+ result.delete(docId);
3798
+ }
3799
+ function collectCombinedDocIds(branches, operator, collectBranch, allowedDocs) {
3800
+ if (branches.length === 0)
3801
+ return new Set();
3802
+ const op = operator.toLowerCase();
3803
+ if (op === 'or') {
3804
+ const docIds = new Set();
3805
+ for (const branch of branches) {
3806
+ for (const docId of collectBranch(branch, allowedDocs)) {
3807
+ docIds.add(docId);
3808
+ }
3809
+ }
3810
+ return docIds;
3811
+ }
3812
+ const docIds = collectBranch(branches[0], allowedDocs);
3813
+ if (op === 'and') {
3814
+ for (let i = 1; i < branches.length; i++) {
3815
+ intersectDocIdsInPlace(docIds, collectBranch(branches[i], docIds));
3816
+ }
3817
+ return docIds;
3818
+ }
3819
+ if (op === 'and_not') {
3820
+ for (let i = 1; i < branches.length; i++) {
3821
+ subtractDocIdsInPlace(docIds, collectBranch(branches[i], docIds));
3822
+ }
3823
+ return docIds;
3824
+ }
3825
+ throw new Error(`Invalid combination operator: ${operator}`);
3826
+ }
3827
+ /**
3828
+ * AND: score every branch (with optional docId gate on later branches), then intersect scores.
3829
+ * AND_NOT: score the positive branch only; negated branches are collected as docId sets and
3830
+ * subtracted without scoring (avoids term materialization on excluded branches).
3831
+ */
3832
+ function executeCombinedBranches(branches, operator, params, executeBranch, collectBranch, allowedDocs, run) {
3833
+ if (branches.length === 0)
3834
+ return new Map();
3835
+ const op = operator.toLowerCase();
3836
+ if (op === 'or') {
3837
+ return combineResults(branches.map(branch => executeBranch(branch, allowedDocs)), operator);
3838
+ }
3839
+ let result = executeBranch(branches[0], allowedDocs);
3840
+ let gate = docIdsFromResult(result);
3841
+ if (op === 'and') {
3842
+ const limits = void 0 ;
3843
+ const documentCount = params.aggregateContext.documentCount;
3844
+ for (let i = 1; i < branches.length; i++) {
3845
+ const selective = gateIsSelectiveEnough(gate.size, documentCount, limits);
3846
+ const branchAllowed = selective ? gate : allowedDocs;
3847
+ result = combineResults([result, executeBranch(branches[i], branchAllowed)], AND);
3848
+ gate = docIdsFromResult(result);
3849
+ }
3850
+ return result;
3851
+ }
3852
+ if (op === 'and_not') {
3853
+ for (let i = 1; i < branches.length; i++) {
3854
+ subtractDocIdsFromResult(result, collectBranch(branches[i], gate));
3855
+ gate = docIdsFromResult(result);
3856
+ }
3857
+ return result;
3858
+ }
3859
+ throw new Error(`Invalid combination operator: ${operator}`);
3860
+ }
3861
+ /** Query adapter for packed frozen term indexes. */
3862
+ function createFrozenQueryIndexView(index, layout, flyweight, forEachActiveDoc) {
3863
+ return {
3864
+ resolveTermIndex(term) {
3865
+ const ti = index.get(term);
3866
+ return ti == null ? undefined : ti;
3867
+ },
3868
+ fieldTermData(termIndex) {
3869
+ return flyweight.bind(termIndex);
3870
+ },
3871
+ collectDocIds(termIndex, fieldBoosts, context, docIds, allowedDocs) {
3872
+ collectDocIdsFromFrozenLayout(layout, termIndex, fieldBoosts, context, docIds, allowedDocs);
3873
+ },
3874
+ getTermData(term) {
3875
+ const ti = index.get(term);
3876
+ return ti == null ? undefined : flyweight.bind(ti);
3877
+ },
3878
+ *getPrefixMatchesByIndex(term) {
3879
+ yield* index.prefixRefs(term);
3880
+ },
3881
+ *getFuzzyMatchesByIndex(term, maxDistance) {
3882
+ yield* index.fuzzyRefs(term, maxDistance);
3883
+ },
3884
+ resolveTermByIndex(termIndex) {
3885
+ return index.termByIndex(termIndex);
3886
+ },
3887
+ forEachActiveDoc,
3888
+ };
3889
+ }
3890
+ function collectDocIdsForQueryInternal(query, searchOptions, params, allowedDocs) {
3891
+ var _a, _b;
3892
+ if (isWildcardQuery(query)) {
3893
+ const docIds = new Set();
3894
+ params.indexView.forEachActiveDoc((docId) => {
3895
+ if (allowedDocs != null && !allowedDocs.has(docId))
3896
+ return;
3897
+ docIds.add(docId);
3898
+ });
3899
+ return docIds;
3900
+ }
3901
+ if (isQueryCombination(query)) {
3902
+ const options = { ...searchOptions, ...query, queries: undefined };
3903
+ const operator = ((_b = (_a = query.combineWith) !== null && _a !== void 0 ? _a : options.combineWith) !== null && _b !== void 0 ? _b : params.globalSearchOptions.combineWith);
3904
+ return collectCombinedDocIds(query.queries, operator, (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs);
3905
+ }
3906
+ if (typeof query !== 'string') {
3907
+ throw new Error('FrozenMiniSearch: invalid query');
3908
+ }
3909
+ const { options, specs, operator } = normalizeStringQuery(query, searchOptions, params);
3910
+ const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
3911
+ if (specs.length <= 1) {
3912
+ return specs.length === 1
3913
+ ? collectDocIdsForQuerySpec(specs[0], options, params, allowedDocs)
3914
+ : new Set();
3915
+ }
3916
+ return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, options, params, branchAllowed), allowedDocs);
3917
+ }
3918
+ function executeWildcardQuery(searchOptions, params) {
3919
+ const results = new Map();
3920
+ const options = { ...params.globalSearchOptions, ...searchOptions };
3921
+ const { boostDocument } = options;
3922
+ params.indexView.forEachActiveDoc((shortId, id, storedFields) => {
3923
+ const score = boostDocument ? boostDocument(id, '', storedFields) : 1;
3924
+ results.set(shortId, { score, terms: [], match: {} });
3925
+ });
3926
+ return results;
3927
+ }
3928
+ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
3929
+ var _a, _b;
3930
+ if (isWildcardQuery(query)) {
3931
+ return executeWildcardQuery(searchOptions, params);
3932
+ }
3933
+ if (isQueryCombination(query)) {
3934
+ // Spread inherits parent combineWith into child branches (lucaong 7.2 behavior).
3935
+ const options = { ...searchOptions, ...query, queries: undefined };
3936
+ const operator = ((_b = (_a = query.combineWith) !== null && _a !== void 0 ? _a : options.combineWith) !== null && _b !== void 0 ? _b : params.globalSearchOptions.combineWith);
3937
+ if (useGatedEvaluation(run, query.queries.length, operator, combinationHasWildcard(query))) {
3938
+ return executeCombinedBranches(query.queries, operator, params, (branch, branchAllowed) => executeQueryInternal(branch, options, params, branchAllowed, run), (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs);
3939
+ }
3940
+ const results = query.queries.map(subquery => executeQueryInternal(subquery, options, params, allowedDocs, run));
3941
+ return combineResults(results, operator);
3942
+ }
3943
+ if (typeof query !== 'string') {
3944
+ throw new Error('FrozenMiniSearch: invalid query');
3945
+ }
3946
+ const { options, specs, operator } = normalizeStringQuery(query, searchOptions, params);
3947
+ const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
3948
+ if (useGatedEvaluation(run, specs.length, combineWith, false)) {
3949
+ return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, options, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, options, params, branchAllowed), allowedDocs);
3950
+ }
3951
+ const results = specs.map(spec => executeQuerySpecInternal(spec, options, params, allowedDocs));
3952
+ return combineResults(results, combineWith);
3953
+ }
3954
+ function executeQuery(query, searchOptions, params) {
3955
+ return executeQueryInternal(query, searchOptions, params);
3956
+ }
3957
+
3958
+ /** Aggregate search hits into ranked phrase suggestions. */
3959
+ function suggestFromSearchResults(hits) {
3960
+ const suggestions = new Map();
3961
+ for (const { score, terms } of hits) {
3962
+ const phrase = terms.join(' ');
3963
+ const suggestion = suggestions.get(phrase);
3964
+ if (suggestion != null) {
3965
+ suggestion.score += score;
3966
+ suggestion.count += 1;
3967
+ }
3968
+ else {
3969
+ suggestions.set(phrase, { score, terms, count: 1 });
3970
+ }
3971
+ }
3972
+ const results = [];
3973
+ for (const [suggestion, { score, terms, count }] of suggestions) {
3974
+ results.push({ suggestion, terms, score: score / count });
3975
+ }
3976
+ results.sort(byScore);
3977
+ return results;
3978
+ }
3979
+ /** Run a search and turn hits into suggestions (shared by mutable and frozen indexes). */
3980
+ function autoSuggestFromSearch(search, queryString, options = {}) {
3981
+ return suggestFromSearchResults(search(queryString, options));
3982
+ }
3983
+
3984
+ function ownedIndexArray(arr) {
3985
+ if (arr instanceof Uint8Array)
3986
+ return new Uint8Array(arr);
3987
+ if (arr instanceof Uint16Array)
3988
+ return new Uint16Array(arr);
3989
+ return new Uint32Array(arr);
3990
+ }
3991
+ function ownedFieldLengthMatrix(matrix) {
3992
+ return ownedIndexArray(matrix);
3993
+ }
3994
+ function ownedPackedRadixTree(index) {
3995
+ return PackedRadixTree.fromData({
3996
+ size: index.size,
3997
+ nodeCount: index.nodeCount,
3998
+ edgeCount: index.edgeCount,
3999
+ labelHeap: index.labelHeap,
4000
+ nodeEdgeOffset: ownedIndexArray(index.nodeEdgeOffset),
4001
+ nodeValue: ownedIndexArray(index.nodeValue),
4002
+ nodeLeafOrder: ownedIndexArray(index.nodeLeafOrder),
4003
+ edgeLabelStart: ownedIndexArray(index.edgeLabelStart),
4004
+ edgeLabelLength: ownedIndexArray(index.edgeLabelLength),
4005
+ edgeChild: ownedIndexArray(index.edgeChild),
4006
+ });
4007
+ }
4008
+ function ownedPostingsLayout(postings) {
4009
+ const allDocIds = postings.docIdWidth === 16
4010
+ ? new Uint16Array(postings.allDocIds)
4011
+ : new Uint32Array(postings.allDocIds);
4012
+ const allFreqs = postings.allFreqs instanceof Uint8Array
4013
+ ? new Uint8Array(postings.allFreqs)
4014
+ : new Uint16Array(postings.allFreqs);
4015
+ if (postings.layout === 'dense') {
4016
+ return {
4017
+ ...postings,
4018
+ allDocIds,
4019
+ allFreqs,
4020
+ denseOffsets: new Uint32Array(postings.denseOffsets),
4021
+ denseLengths: new Uint32Array(postings.denseLengths),
4022
+ };
4023
+ }
4024
+ const sparseFieldIds = postings.sparseFieldIdWidth === 16
4025
+ ? new Uint16Array(postings.sparseFieldIds)
4026
+ : new Uint8Array(postings.sparseFieldIds);
4027
+ return {
4028
+ ...postings,
4029
+ allDocIds,
4030
+ allFreqs,
4031
+ sparseTermStarts: new Uint32Array(postings.sparseTermStarts),
4032
+ sparseFieldIds,
4033
+ sparseOffsets: new Uint32Array(postings.sparseOffsets),
4034
+ sparseLengths: new Uint32Array(postings.sparseLengths),
4035
+ };
4036
+ }
4037
+ function shallowCopyOptions(options) {
4038
+ return {
4039
+ ...options,
4040
+ fields: [...options.fields],
4041
+ searchOptions: { ...options.searchOptions },
4042
+ autoSuggestOptions: { ...options.autoSuggestOptions },
4043
+ };
4044
+ }
4045
+ function shallowCopyJsSnapshotFields(params) {
4046
+ return {
4047
+ fieldIds: { ...params.fieldIds },
4048
+ options: shallowCopyOptions(params.options),
4049
+ storedFields: params.storedFields.slice(),
4050
+ };
4051
+ }
4052
+ /**
4053
+ * Ensure {@link FrozenMiniSearch} owns its snapshot data (no aliases on source MiniSearch,
4054
+ * no TypedArray views on wire buffers after binary load).
4055
+ */
4056
+ function materializeOwnedSnapshot(params, mode) {
4057
+ if (mode === 'trusted-build') {
4058
+ return params;
4059
+ }
4060
+ if (mode === 'minisearch-json') {
4061
+ return { ...params, ...shallowCopyJsSnapshotFields(params) };
4062
+ }
4063
+ return {
4064
+ ...params,
4065
+ index: ownedPackedRadixTree(params.index),
4066
+ postings: ownedPostingsLayout(params.postings),
4067
+ fieldLengthMatrix: ownedFieldLengthMatrix(params.fieldLengthMatrix),
4068
+ avgFieldLength: new Float32Array(params.avgFieldLength),
4069
+ };
4070
+ }
4071
+
4072
+ function frozenMemoryBreakdown(frozen) {
4073
+ return frozen.memoryBreakdown();
4074
+ }
4075
+ function assertFieldsMatchSnapshot(optionsFields, snapFieldIds) {
4076
+ const snapNames = Object.keys(snapFieldIds).sort();
4077
+ const optNames = [...optionsFields].sort();
4078
+ if (snapNames.length !== optNames.length || snapNames.some((name, i) => name !== optNames[i])) {
4079
+ throw new Error(`FrozenMiniSearch: option "fields" must match the indexed fields exactly (expected: ${snapNames.join(', ')})`);
4080
+ }
4081
+ }
4082
+ function assembleFrozenInternal(params, trustedSource, ownershipMode) {
4083
+ const owned = materializeOwnedSnapshot(params, ownershipMode);
4084
+ const termCount = owned.termCount;
4085
+ if (owned.fieldLengthMatrix.length !== owned.nextId * owned.fieldCount) {
4086
+ throw new Error('FrozenMiniSearch: fieldLengthMatrix size mismatch');
4087
+ }
4088
+ if (owned.avgFieldLength.length !== owned.fieldCount) {
4089
+ throw new Error('FrozenMiniSearch: avgFieldLength size mismatch');
4090
+ }
4091
+ if (!trustedSource) {
4092
+ validateFrozenPostingsLayout(owned.postings, owned.documentCount, owned.nextId);
4093
+ validateFrozenTermIndexLeaves(owned.index, termCount);
4094
+ }
4095
+ return new FrozenMiniSearch(owned);
4096
+ }
4097
+ /** Trusted build paths only (same package); skips O(postings) layout checks. */
4098
+ function assembleFrozenTrusted(params, ownershipMode = 'trusted-build') {
4099
+ return assembleFrozenInternal(params, true, ownershipMode);
4100
+ }
4101
+ /** Instantiate {@link FrozenMiniSearch} from pre-built flat index parts (full validation). */
4102
+ function assembleFrozen(params) {
4103
+ return assembleFrozenInternal(params, false, 'binary-load');
4104
+ }
4105
+ function buildFrozenFromDocuments(documents, options) {
4106
+ return assembleFrozenTrusted(buildFrozenParamsFromDocuments(documents, options));
4107
+ }
4108
+ /** Finalize a {@link FrozenIndexBuilder} into a read-only index. */
4109
+ function freezeFrozenIndexBuilder(builder) {
4110
+ return assembleFrozenTrusted(builder.freezeParams());
4111
+ }
4112
+ class FrozenMiniSearch {
4113
+ constructor(params) {
4114
+ this._options = params.options;
4115
+ this._documentCount = params.documentCount;
4116
+ this._nextId = params.nextId;
4117
+ this._externalIds = params.externalIds;
4118
+ this._idLookup = params.idLookup;
4119
+ this._fieldIds = params.fieldIds;
4120
+ this._fieldCount = params.fieldCount;
4121
+ this._fieldLengthMatrix = params.fieldLengthMatrix;
4122
+ this._avgFieldLength = params.avgFieldLength;
4123
+ this._storedFields = params.storedFields;
4124
+ this._index = params.index;
4125
+ this._termCount = params.termCount;
4126
+ this._postings = params.postings;
4127
+ this._fieldTermFlyweight = createFrozenFieldTermFlyweight(this._postings);
4128
+ this._aggregateContext = {
4129
+ documentCount: this._documentCount,
4130
+ avgFieldLength: this._avgFieldLength,
4131
+ fieldIds: this._fieldIds,
4132
+ getFieldLength: (docId, fieldId) => this.getFieldLength(docId, fieldId),
4133
+ getExternalId: docId => this._externalIds[docId],
4134
+ getStoredFields: docId => this._storedFields[docId],
4135
+ };
4136
+ this._queryEngineParams = {
4137
+ fields: this._options.fields,
4138
+ globalSearchOptions: this._options.searchOptions,
4139
+ tokenize: this._options.tokenize,
4140
+ processTerm: this._options.processTerm,
4141
+ indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, (callback) => {
4142
+ for (let shortId = 0; shortId < this._nextId; shortId++) {
4143
+ const id = this._externalIds[shortId];
4144
+ if (id === undefined)
4145
+ continue;
4146
+ callback(shortId, id, this._storedFields[shortId]);
4147
+ }
4148
+ }),
4149
+ aggregateContext: this._aggregateContext,
4150
+ };
4151
+ }
4152
+ get documentCount() { return this._documentCount; }
4153
+ get termCount() { return this._termCount; }
4154
+ memoryBreakdown() {
4155
+ const termCount = this.termCount;
4156
+ const postingsStats = postingsTypedBytes(this._postings);
4157
+ let storedJson = 0;
4158
+ for (const row of this._storedFields) {
4159
+ if (row != null)
4160
+ storedJson += JSON.stringify(row).length;
4161
+ }
4162
+ const radixEst = this._index.packedByteLength();
4163
+ const idMapBytes = this._idLookup.mode === 'lazy-map' ? this._idLookup.mapEntryCount * 32 : 0;
4164
+ const estimatedStructuredBytes = postingsStats.totalTypedBytes
4165
+ + this._fieldLengthMatrix.byteLength
4166
+ + this._avgFieldLength.byteLength
4167
+ + radixEst
4168
+ + storedJson
4169
+ + idMapBytes;
4170
+ return {
4171
+ termCount,
4172
+ documentCount: this._documentCount,
4173
+ nextId: this._nextId,
4174
+ postings: {
4175
+ slotCount: postingsStats.slotCount,
4176
+ layout: this._postings.layout,
4177
+ docIdWidth: this._postings.docIdWidth,
4178
+ allDocIdsBytes: postingsStats.allDocIdsBytes,
4179
+ allFreqsBytes: postingsStats.allFreqsBytes,
4180
+ offsetsBytes: postingsStats.offsetsBytes,
4181
+ lengthsBytes: postingsStats.lengthsBytes,
4182
+ totalTypedBytes: postingsStats.totalTypedBytes,
4183
+ },
4184
+ radixTree: {
4185
+ nodeCount: this._index.packedNodeCount(),
4186
+ edgeCount: this._index.packedEdgeCount(),
4187
+ estimatedBytes: radixEst,
4188
+ },
4189
+ documents: {
4190
+ externalIdsSlots: this._externalIds.length,
4191
+ storedFieldsSlots: this._storedFields.length,
4192
+ idLookupMode: this._idLookup.mode,
4193
+ idToShortIdEntries: this._idLookup.mapEntryCount,
4194
+ fieldLengthMatrixBytes: this._fieldLengthMatrix.byteLength,
4195
+ avgFieldLengthBytes: this._avgFieldLength.byteLength,
4196
+ storedFieldsJsonBytes: storedJson,
4197
+ },
4198
+ estimatedStructuredBytes,
4199
+ };
4200
+ }
4201
+ has(id) {
4202
+ return this._idLookup.has(id);
4203
+ }
4204
+ getStoredFields(id) {
4205
+ const shortId = this._idLookup.get(id);
4206
+ return shortId == null ? undefined : this._storedFields[shortId];
4207
+ }
4208
+ search(query, searchOptions = {}) {
4209
+ return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], docId => this._storedFields[docId]);
4210
+ }
4211
+ autoSuggest(queryString, options = {}) {
4212
+ const merged = { ...this._options.autoSuggestOptions, ...options };
4213
+ return autoSuggestFromSearch((q, o) => this.search(q, o), queryString, merged);
4214
+ }
4215
+ /** Serialize this index as a frozen binary snapshot (synchronous). */
4216
+ saveBinarySync() {
4217
+ return encodeFrozenSnapshot({
4218
+ documentCount: this._documentCount,
4219
+ nextId: this._nextId,
4220
+ fieldIds: this._fieldIds,
4221
+ fieldCount: this._fieldCount,
4222
+ fieldNames: fieldNamesFromFieldIds(this._fieldIds),
4223
+ avgFieldLength: this._avgFieldLength,
4224
+ externalIds: this._externalIds,
4225
+ storedFields: this._storedFields,
4226
+ fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
4227
+ treeShape: [],
4228
+ postings: this._postings,
4229
+ }, undefined, this._index);
4230
+ }
4231
+ /** Non-blocking zstd compression; same output as {@link saveBinarySync}. */
4232
+ async saveBinaryAsync() {
4233
+ return encodeFrozenSnapshotAsync({
4234
+ documentCount: this._documentCount,
4235
+ nextId: this._nextId,
4236
+ fieldIds: this._fieldIds,
4237
+ fieldCount: this._fieldCount,
4238
+ fieldNames: fieldNamesFromFieldIds(this._fieldIds),
4239
+ avgFieldLength: this._avgFieldLength,
4240
+ externalIds: this._externalIds,
4241
+ storedFields: this._storedFields,
4242
+ fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
4243
+ treeShape: [],
4244
+ postings: this._postings,
4245
+ }, undefined, this._index);
4246
+ }
4247
+ /** Load a frozen binary snapshot. */
4248
+ static loadBinarySync(buffer, options = {}) {
4249
+ const snap = decodeFrozenSnapshot(buffer);
4250
+ return FrozenMiniSearch.fromBinarySnapshot(snap, options);
4251
+ }
4252
+ /** Load a frozen binary snapshot with streaming zstd decompression (bounded memory). */
4253
+ static async loadBinaryAsync(buffer, options = {}) {
4254
+ const snap = await decodeFrozenSnapshotAsync(buffer);
4255
+ return FrozenMiniSearch.fromBinarySnapshot(snap, options);
4256
+ }
4257
+ static fromBinarySnapshot(snap, options) {
4258
+ var _a, _b;
4259
+ const snapshotFields = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
4260
+ if (options.fields != null) {
4261
+ assertFieldsMatchSnapshot(options.fields, snap.fieldIds);
4262
+ }
4263
+ const opts = {
4264
+ ...defaultFrozenLoadOptions,
4265
+ ...options,
4266
+ fields: (_b = options.fields) !== null && _b !== void 0 ? _b : snapshotFields,
4267
+ searchOptions: {
4268
+ ...defaultSearchOptions,
4269
+ ...(options.searchOptions || {}),
4270
+ },
4271
+ autoSuggestOptions: { ...defaultAutoSuggestOptions, ...(options.autoSuggestOptions || {}) },
4272
+ };
4273
+ const index = snap.packedTermIndex;
4274
+ if (index == null) {
4275
+ throw new Error('FrozenMiniSearch: binary snapshot missing packed term index');
4276
+ }
4277
+ const idLookup = createIdToShortIdLookup(snap.externalIds, snap.nextId);
4278
+ return assembleFrozen({
4279
+ options: opts,
4280
+ documentCount: snap.documentCount,
4281
+ nextId: snap.nextId,
4282
+ fieldIds: snap.fieldIds,
4283
+ fieldCount: snap.fieldCount,
4284
+ externalIds: snap.externalIds,
4285
+ idLookup,
4286
+ storedFields: snap.storedFields,
4287
+ fieldLengthMatrix: snap.fieldLengthMatrix,
4288
+ avgFieldLength: snap.avgFieldLength,
4289
+ index,
4290
+ termCount: snap.postings.termCount,
4291
+ postings: snap.postings,
4292
+ });
4293
+ }
4294
+ /** Build a read-only index in one pass from documents. */
4295
+ static fromDocuments(documents, options) {
4296
+ return buildFrozenFromDocuments(documents, options);
4297
+ }
4298
+ /**
4299
+ * Convert a lucaong MiniSearch JSON snapshot (`toJSON` / `loadJSON` wire format) into a
4300
+ * frozen index. No runtime dependency on the `minisearch` package.
4301
+ */
4302
+ static fromMiniSearchJson(json, options = {}) {
4303
+ return FrozenMiniSearch.fromMiniSearchSnapshot(JSON.parse(json), options);
4304
+ }
4305
+ /**
4306
+ * Same as {@link fromMiniSearchJson} with a pre-parsed snapshot object.
4307
+ * `storedFields` are shallow-copied; callers must not mutate nested values
4308
+ * after load if they intend to keep the index immutable.
4309
+ */
4310
+ static fromMiniSearchSnapshot(snapshot, options = {}) {
4311
+ return assembleFrozenTrusted(buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options), 'minisearch-json');
4312
+ }
4313
+ /** Accepts any object exposing `toJSON()` in lucaong MiniSearch snapshot shape. */
4314
+ static fromMiniSearch(source, options = {}) {
4315
+ return FrozenMiniSearch.fromMiniSearchSnapshot(source.toJSON(), options);
4316
+ }
4317
+ /**
4318
+ * Build a read-only index from an async stream of documents (e.g. CSV parser).
4319
+ * For sync iterables, use {@link createFrozenIndexBuilder} with `for...of` instead.
4320
+ *
4321
+ * @param hints Optional builder hints; `estimatedDocumentCount` pre-allocates
4322
+ * per-document arrays when the final document count is known upfront.
4323
+ */
4324
+ static async fromAsyncIterable(iterable, options, hints) {
4325
+ const builder = createFrozenIndexBuilder(options, hints);
4326
+ for await (const document of iterable) {
4327
+ builder.add(document);
4328
+ }
4329
+ return freezeFrozenIndexBuilder(builder);
4330
+ }
4331
+ getFieldLength(docId, fieldId) {
4332
+ var _a;
4333
+ return (_a = this._fieldLengthMatrix[docId * this._fieldCount + fieldId]) !== null && _a !== void 0 ? _a : 0;
4334
+ }
4335
+ executeQuery(query, searchOptions = {}) {
4336
+ return executeQuery(query, searchOptions, this._queryEngineParams);
4337
+ }
4338
+ }
4339
+ FrozenMiniSearch.wildcard = WILDCARD_QUERY;
4340
+
4341
+ exports.AND = AND;
4342
+ exports.AND_NOT = AND_NOT;
4343
+ exports.FrozenIndexBuilder = FrozenIndexBuilder;
4344
+ exports.OR = OR;
4345
+ exports.assembleFrozen = assembleFrozen;
4346
+ exports.buildFrozenFromDocuments = buildFrozenFromDocuments;
4347
+ exports.createFrozenIndexBuilder = createFrozenIndexBuilder;
4348
+ exports.default = FrozenMiniSearch;
4349
+ exports.freezeFrozenIndexBuilder = freezeFrozenIndexBuilder;
4350
+ exports.frozenMemoryBreakdown = frozenMemoryBreakdown;