@yoch/frozenminisearch 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1911,111 +1911,6 @@ function materializeFrozenPostings(params) {
1911
1911
  sparseLengths: new Uint32Array(sparseLengths),
1912
1912
  };
1913
1913
  }
1914
- /** One-pass materialize from {@link FrozenIndexBuilder} scratch (counts known upfront). */
1915
- function materializeFrozenPostingsFromBuilder(state, nextId) {
1916
- var _a;
1917
- const { fieldCount, termCount, postingsDocIds, postingsFreqs, totalPostings, maxFreq } = state;
1918
- const layout = choosePostingsLayout(fieldCount);
1919
- const docIdWidth = nextId <= 65535 ? 16 : 32;
1920
- const allDocIds = docIdWidth === 16
1921
- ? new Uint16Array(totalPostings)
1922
- : new Uint32Array(totalPostings);
1923
- const allFreqs = allocateFreqs(totalPostings, maxFreq);
1924
- if (layout === 'dense') {
1925
- const slotCount = termCount * fieldCount;
1926
- const denseOffsets = new Uint32Array(slotCount);
1927
- const denseLengths = new Uint32Array(slotCount);
1928
- let write = 0;
1929
- for (let ti = 0; ti < termCount; ti++) {
1930
- const base = ti * fieldCount;
1931
- for (let f = 0; f < fieldCount; f++) {
1932
- const slot = base + f;
1933
- const docIds = postingsDocIds[slot];
1934
- const freqs = postingsFreqs[slot];
1935
- const len = (_a = docIds === null || docIds === void 0 ? void 0 : docIds.length) !== null && _a !== void 0 ? _a : 0;
1936
- denseOffsets[slot] = write;
1937
- denseLengths[slot] = len;
1938
- for (let i = 0; i < len; i++) {
1939
- const docId = docIds[i];
1940
- if (docIdWidth === 16) {
1941
- allDocIds[write] = docId;
1942
- }
1943
- else {
1944
- allDocIds[write] = docId;
1945
- }
1946
- allFreqs[write] = freqs[i];
1947
- write++;
1948
- }
1949
- }
1950
- }
1951
- return {
1952
- fieldCount,
1953
- termCount,
1954
- nextId,
1955
- layout,
1956
- docIdWidth,
1957
- sparseFieldIdWidth: null,
1958
- allDocIds,
1959
- allFreqs,
1960
- denseOffsets,
1961
- denseLengths,
1962
- sparseTermStarts: null,
1963
- sparseFieldIds: null,
1964
- sparseOffsets: null,
1965
- sparseLengths: null,
1966
- };
1967
- }
1968
- const sparseFieldIdWidth = chooseSparseFieldIdWidth(fieldCount);
1969
- const sparseFieldIdsScratch = [];
1970
- const sparseOffsets = [];
1971
- const sparseLengths = [];
1972
- const termStarts = new Array(termCount + 1).fill(0);
1973
- let write = 0;
1974
- for (let ti = 0; ti < termCount; ti++) {
1975
- termStarts[ti] = sparseFieldIdsScratch.length;
1976
- for (let f = 0; f < fieldCount; f++) {
1977
- const slot = ti * fieldCount + f;
1978
- const docIds = postingsDocIds[slot];
1979
- if (docIds == null || docIds.length === 0)
1980
- continue;
1981
- const freqs = postingsFreqs[slot];
1982
- sparseFieldIdsScratch.push(f);
1983
- sparseOffsets.push(write);
1984
- sparseLengths.push(docIds.length);
1985
- for (let i = 0; i < docIds.length; i++) {
1986
- const docId = docIds[i];
1987
- if (docIdWidth === 16) {
1988
- allDocIds[write] = docId;
1989
- }
1990
- else {
1991
- allDocIds[write] = docId;
1992
- }
1993
- allFreqs[write] = freqs[i];
1994
- write++;
1995
- }
1996
- }
1997
- termStarts[ti + 1] = sparseFieldIdsScratch.length;
1998
- }
1999
- const sparseFieldIds = sparseFieldIdWidth === 16
2000
- ? new Uint16Array(sparseFieldIdsScratch)
2001
- : new Uint8Array(sparseFieldIdsScratch);
2002
- return {
2003
- fieldCount,
2004
- termCount,
2005
- nextId,
2006
- layout,
2007
- docIdWidth,
2008
- sparseFieldIdWidth,
2009
- allDocIds,
2010
- allFreqs,
2011
- denseOffsets: null,
2012
- denseLengths: null,
2013
- sparseTermStarts: new Uint32Array(termStarts),
2014
- sparseFieldIds,
2015
- sparseOffsets: new Uint32Array(sparseOffsets),
2016
- sparseLengths: new Uint32Array(sparseLengths),
2017
- };
2018
- }
2019
1914
  function postingsTypedBytes(layout) {
2020
1915
  const allDocIdsBytes = layout.allDocIds.byteLength;
2021
1916
  const allFreqsBytes = layout.allFreqs.byteLength;
@@ -2232,21 +2127,110 @@ function buildFieldIds(fields) {
2232
2127
  }
2233
2128
  return fieldIds;
2234
2129
  }
2235
- /** Token frequencies for one document field (after processTerm). */
2236
- function collectFieldTermFreqs(tokens, fieldName, processTerm) {
2237
- const localFreqs = new Map();
2130
+ function accumulateProcessedTerm(localFreqs, processedTerm) {
2131
+ if (Array.isArray(processedTerm)) {
2132
+ for (const t of processedTerm) {
2133
+ localFreqs.set(t, (localFreqs.get(t) || 0) + 1);
2134
+ }
2135
+ }
2136
+ else if (processedTerm) {
2137
+ localFreqs.set(processedTerm, (localFreqs.get(processedTerm) || 0) + 1);
2138
+ }
2139
+ }
2140
+ /**
2141
+ * Accumulate token frequencies for one document field into `localFreqs` (cleared first).
2142
+ * Returns the number of distinct processed terms (replaces a separate `Set(tokens)` pass).
2143
+ */
2144
+ function collectFieldTermFreqsInto(localFreqs, tokens, fieldName, processTerm) {
2145
+ localFreqs.clear();
2238
2146
  for (const term of tokens) {
2239
- const processedTerm = processTerm(term, fieldName);
2240
- if (Array.isArray(processedTerm)) {
2241
- for (const t of processedTerm) {
2242
- localFreqs.set(t, (localFreqs.get(t) || 0) + 1);
2243
- }
2147
+ accumulateProcessedTerm(localFreqs, processTerm(term, fieldName));
2148
+ }
2149
+ return localFreqs.size;
2150
+ }
2151
+ /** Global delimiter pattern for incremental `exec` (must not reuse {@link SPACE_OR_PUNCTUATION} — no `g` flag). */
2152
+ const DEFAULT_TOKENIZE_DELIMITERS = /[\n\r\p{Z}\p{P}]+/gu;
2153
+ const defaultTokenizeProbe = 'a b';
2154
+ const defaultTokenizeProbeField = 'f';
2155
+ const tokenizeBehaviorCache = new WeakMap();
2156
+ /**
2157
+ * True when `tokenize` matches the library default (reference equality or split-equivalent
2158
+ * on a fixed probe). Custom tokenizers that pass the probe but diverge on other inputs
2159
+ * (e.g. leading delimiters) still take the fast path — use the default reference in prod.
2160
+ */
2161
+ function isDefaultTokenize(tokenize) {
2162
+ if (tokenize === defaultFrozenLoadOptions.tokenize)
2163
+ return true;
2164
+ const cached = tokenizeBehaviorCache.get(tokenize);
2165
+ if (cached != null)
2166
+ return cached;
2167
+ const splitTokens = defaultTokenizeProbe.split(SPACE_OR_PUNCTUATION);
2168
+ const customTokens = tokenize(defaultTokenizeProbe, defaultTokenizeProbeField);
2169
+ const ok = splitTokens.length === customTokens.length
2170
+ && splitTokens.every((t, i) => t === customTokens[i]);
2171
+ tokenizeBehaviorCache.set(tokenize, ok);
2172
+ return ok;
2173
+ }
2174
+ function forEachDefaultToken(text, onToken) {
2175
+ if (text.length === 0) {
2176
+ onToken('');
2177
+ return;
2178
+ }
2179
+ let start = 0;
2180
+ const re = DEFAULT_TOKENIZE_DELIMITERS;
2181
+ re.lastIndex = 0;
2182
+ let match;
2183
+ while ((match = re.exec(text)) !== null) {
2184
+ if (match.index > start) {
2185
+ onToken(text.slice(start, match.index));
2244
2186
  }
2245
- else if (processedTerm) {
2246
- localFreqs.set(processedTerm, (localFreqs.get(processedTerm) || 0) + 1);
2187
+ else if (match.index === start) {
2188
+ onToken('');
2247
2189
  }
2190
+ start = match.index + match[0].length;
2191
+ }
2192
+ if (start < text.length) {
2193
+ onToken(text.slice(start));
2194
+ }
2195
+ else if (start === 0) {
2196
+ onToken(text);
2197
+ }
2198
+ else if (start === text.length) {
2199
+ onToken('');
2248
2200
  }
2249
- return localFreqs;
2201
+ }
2202
+ /** Default tokenizer into a reusable buffer (avoids `text.split()` array allocation). */
2203
+ function tokenizeDefaultInto(out, text) {
2204
+ out.length = 0;
2205
+ forEachDefaultToken(text, (token) => out.push(token));
2206
+ }
2207
+ /** Tokenize field text into `out` (reused). Fast path when `tokenize` is the library default. */
2208
+ function tokenizeFieldInto(out, tokenize, text, fieldName) {
2209
+ if (isDefaultTokenize(tokenize)) {
2210
+ tokenizeDefaultInto(out, text);
2211
+ return;
2212
+ }
2213
+ const tokens = tokenize(text, fieldName);
2214
+ out.length = 0;
2215
+ out.push(...tokens);
2216
+ }
2217
+ function collectDefaultFieldTermFreqsInto(localFreqs, text, fieldName, processTerm) {
2218
+ localFreqs.clear();
2219
+ forEachDefaultToken(text, (token) => {
2220
+ accumulateProcessedTerm(localFreqs, processTerm(token, fieldName));
2221
+ });
2222
+ return localFreqs.size;
2223
+ }
2224
+ /**
2225
+ * Tokenize + accumulate field term frequencies in one pass when the default tokenizer is used.
2226
+ * `tokenScratch` is only used for custom tokenizers (two-phase fallback).
2227
+ */
2228
+ function collectFieldTermFreqsFromFieldInto(localFreqs, tokenScratch, tokenize, text, fieldName, processTerm) {
2229
+ if (isDefaultTokenize(tokenize)) {
2230
+ return collectDefaultFieldTermFreqsInto(localFreqs, text, fieldName, processTerm);
2231
+ }
2232
+ tokenizeFieldInto(tokenScratch, tokenize, text, fieldName);
2233
+ return collectFieldTermFreqsInto(localFreqs, tokenScratch, fieldName, processTerm);
2250
2234
  }
2251
2235
  /** Same running average as {@link MiniSearch} private addFieldLength. */
2252
2236
  function updateAvgFieldLength(avgFieldLength, fieldId, count, length) {
@@ -3446,50 +3430,260 @@ async function decodeFrozenSnapshotAsync(buf) {
3446
3430
  return decodeFrozenSnapshot(buf);
3447
3431
  }
3448
3432
 
3449
- function getOrCreateTermIndex(state, index, term) {
3433
+ const DEFAULT_CAPACITY = 16;
3434
+ /** Growable unsigned 32-bit column (build scratch; narrowed to u16 at finalize when possible). */
3435
+ class GrowableUint32Column {
3436
+ constructor(initialCapacity = DEFAULT_CAPACITY) {
3437
+ this._len = 0;
3438
+ this._buf = new Uint32Array(Math.max(1, initialCapacity));
3439
+ }
3440
+ get length() {
3441
+ return this._len;
3442
+ }
3443
+ push(value) {
3444
+ if (this._len >= this._buf.length) {
3445
+ const grown = new Uint32Array(Math.max(1, this._buf.length * 2));
3446
+ grown.set(this._buf);
3447
+ this._buf = grown;
3448
+ }
3449
+ this._buf[this._len++] = value;
3450
+ }
3451
+ copyRangeInto(sourceOffset, length, target, targetOffset, docIdWidth) {
3452
+ if (docIdWidth === 16) {
3453
+ const out = target;
3454
+ for (let i = 0; i < length; i++)
3455
+ out[targetOffset + i] = this._buf[sourceOffset + i];
3456
+ }
3457
+ else {
3458
+ const out = target;
3459
+ for (let i = 0; i < length; i++)
3460
+ out[targetOffset + i] = this._buf[sourceOffset + i];
3461
+ }
3462
+ }
3463
+ truncate(length) {
3464
+ this._len = length;
3465
+ if (length > 0 && length < this._buf.length) {
3466
+ this._buf = this._buf.slice(0, length);
3467
+ }
3468
+ }
3469
+ }
3470
+ /** Growable frequency column (u16 cells; matches frozen clamp range). */
3471
+ class GrowableFreqColumn {
3472
+ constructor(initialCapacity = DEFAULT_CAPACITY) {
3473
+ this._len = 0;
3474
+ this._buf = new Uint16Array(Math.max(1, initialCapacity));
3475
+ }
3476
+ get length() {
3477
+ return this._len;
3478
+ }
3479
+ push(freq) {
3480
+ if (this._len >= this._buf.length) {
3481
+ const grown = new Uint16Array(Math.max(1, this._buf.length * 2));
3482
+ grown.set(this._buf);
3483
+ this._buf = grown;
3484
+ }
3485
+ this._buf[this._len++] = clampFreq(freq);
3486
+ }
3487
+ copyRangeInto(sourceOffset, length, target, targetOffset) {
3488
+ for (let i = 0; i < length; i++) {
3489
+ target[targetOffset + i] = this._buf[sourceOffset + i];
3490
+ }
3491
+ }
3492
+ truncate(length) {
3493
+ this._len = length;
3494
+ if (length > 0 && length < this._buf.length) {
3495
+ this._buf = this._buf.slice(0, length);
3496
+ }
3497
+ }
3498
+ }
3499
+ /**
3500
+ * Single-pass postings accumulator for {@link FrozenIndexBuilder}.
3501
+ * One global TypedArray stream per docIds/freqs; per-slot range metadata only.
3502
+ */
3503
+ class IncrementalPostingsAccumulator {
3504
+ constructor(fieldCount, hints) {
3505
+ var _a;
3506
+ this._slots = new Map();
3507
+ this._totalPostings = 0;
3508
+ this._maxFreq = 0;
3509
+ this._fieldCount = fieldCount;
3510
+ const cap = Math.max(DEFAULT_CAPACITY, (_a = hints === null || hints === void 0 ? void 0 : hints.estimatedTotalPostings) !== null && _a !== void 0 ? _a : 0);
3511
+ this._docIds = new GrowableUint32Column(cap);
3512
+ this._freqs = new GrowableFreqColumn(cap);
3513
+ }
3514
+ get totalPostings() {
3515
+ return this._totalPostings;
3516
+ }
3517
+ get maxFreq() {
3518
+ return this._maxFreq;
3519
+ }
3520
+ append(termIndex, fieldId, docId, freq) {
3521
+ const slot = termIndex * this._fieldCount + fieldId;
3522
+ const writeIdx = this._docIds.length;
3523
+ this._docIds.push(docId);
3524
+ const v = clampFreq(freq);
3525
+ this._freqs.push(v);
3526
+ if (v > this._maxFreq)
3527
+ this._maxFreq = v;
3528
+ this._totalPostings++;
3529
+ let ranges = this._slots.get(slot);
3530
+ if (ranges == null) {
3531
+ ranges = { starts: [writeIdx], lengths: [1] };
3532
+ this._slots.set(slot, ranges);
3533
+ return;
3534
+ }
3535
+ const last = ranges.starts.length - 1;
3536
+ const end = ranges.starts[last] + ranges.lengths[last];
3537
+ if (end === writeIdx) {
3538
+ ranges.lengths[last]++;
3539
+ }
3540
+ else {
3541
+ ranges.starts.push(writeIdx);
3542
+ ranges.lengths.push(1);
3543
+ }
3544
+ }
3545
+ clear() {
3546
+ this._slots.clear();
3547
+ // Drop global scratch backing so finalize does not retain duplicate posting bytes.
3548
+ this._docIds.truncate(0);
3549
+ this._freqs.truncate(0);
3550
+ }
3551
+ copySlot(ranges, allDocIds, allFreqs, write, docIdWidth) {
3552
+ for (let r = 0; r < ranges.starts.length; r++) {
3553
+ const start = ranges.starts[r];
3554
+ const len = ranges.lengths[r];
3555
+ this._docIds.copyRangeInto(start, len, allDocIds, write, docIdWidth);
3556
+ this._freqs.copyRangeInto(start, len, allFreqs, write);
3557
+ write += len;
3558
+ }
3559
+ return write;
3560
+ }
3561
+ slotLength(ranges) {
3562
+ let n = 0;
3563
+ for (let i = 0; i < ranges.lengths.length; i++)
3564
+ n += ranges.lengths[i];
3565
+ return n;
3566
+ }
3567
+ finalize(termCount, nextId) {
3568
+ const fieldCount = this._fieldCount;
3569
+ const totalPostings = this._totalPostings;
3570
+ const maxFreq = this._maxFreq;
3571
+ const slots = this._slots;
3572
+ const layout = choosePostingsLayout(fieldCount);
3573
+ const docIdWidth = nextId <= 65535 ? 16 : 32;
3574
+ const allDocIds = docIdWidth === 16
3575
+ ? new Uint16Array(totalPostings)
3576
+ : new Uint32Array(totalPostings);
3577
+ const allFreqs = allocateFreqs(totalPostings, maxFreq);
3578
+ if (layout === 'dense') {
3579
+ const slotCount = termCount * fieldCount;
3580
+ const denseOffsets = new Uint32Array(slotCount);
3581
+ const denseLengths = new Uint32Array(slotCount);
3582
+ let write = 0;
3583
+ for (let ti = 0; ti < termCount; ti++) {
3584
+ const base = ti * fieldCount;
3585
+ for (let f = 0; f < fieldCount; f++) {
3586
+ const slot = base + f;
3587
+ const ranges = slots.get(slot);
3588
+ const len = ranges == null ? 0 : this.slotLength(ranges);
3589
+ denseOffsets[slot] = write;
3590
+ denseLengths[slot] = len;
3591
+ if (len > 0) {
3592
+ write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
3593
+ slots.delete(slot);
3594
+ }
3595
+ }
3596
+ }
3597
+ slots.clear();
3598
+ this.clear();
3599
+ return {
3600
+ fieldCount,
3601
+ termCount,
3602
+ nextId,
3603
+ layout,
3604
+ docIdWidth,
3605
+ sparseFieldIdWidth: null,
3606
+ allDocIds,
3607
+ allFreqs,
3608
+ denseOffsets,
3609
+ denseLengths,
3610
+ sparseTermStarts: null,
3611
+ sparseFieldIds: null,
3612
+ sparseOffsets: null,
3613
+ sparseLengths: null,
3614
+ };
3615
+ }
3616
+ const sparseFieldIdWidth = chooseSparseFieldIdWidth(fieldCount);
3617
+ const sparseFieldIdsScratch = [];
3618
+ const sparseOffsets = [];
3619
+ const sparseLengths = [];
3620
+ const termStarts = new Array(termCount + 1).fill(0);
3621
+ let write = 0;
3622
+ for (let ti = 0; ti < termCount; ti++) {
3623
+ termStarts[ti] = sparseFieldIdsScratch.length;
3624
+ for (let f = 0; f < fieldCount; f++) {
3625
+ const slot = ti * fieldCount + f;
3626
+ const ranges = slots.get(slot);
3627
+ const len = ranges == null ? 0 : this.slotLength(ranges);
3628
+ if (len === 0)
3629
+ continue;
3630
+ sparseFieldIdsScratch.push(f);
3631
+ sparseOffsets.push(write);
3632
+ sparseLengths.push(len);
3633
+ write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
3634
+ slots.delete(slot);
3635
+ }
3636
+ termStarts[ti + 1] = sparseFieldIdsScratch.length;
3637
+ }
3638
+ slots.clear();
3639
+ this.clear();
3640
+ const sparseFieldIds = sparseFieldIdWidth === 16
3641
+ ? new Uint16Array(sparseFieldIdsScratch)
3642
+ : new Uint8Array(sparseFieldIdsScratch);
3643
+ return {
3644
+ fieldCount,
3645
+ termCount,
3646
+ nextId,
3647
+ layout,
3648
+ docIdWidth,
3649
+ sparseFieldIdWidth,
3650
+ allDocIds,
3651
+ allFreqs,
3652
+ denseOffsets: null,
3653
+ denseLengths: null,
3654
+ sparseTermStarts: new Uint32Array(termStarts),
3655
+ sparseFieldIds,
3656
+ sparseOffsets: new Uint32Array(sparseOffsets),
3657
+ sparseLengths: new Uint32Array(sparseLengths),
3658
+ };
3659
+ }
3660
+ }
3661
+
3662
+ function getOrCreateTermIndex(termCount, index, term) {
3450
3663
  const existing = index.get(term);
3451
3664
  if (existing != null)
3452
3665
  return existing;
3453
- const ti = state.terms.length;
3454
- state.terms.push(term);
3666
+ const ti = termCount.value;
3667
+ termCount.value++;
3455
3668
  index.set(term, ti);
3456
3669
  return ti;
3457
3670
  }
3458
- function appendPosting(state, termIndex, fieldId, docId, freq) {
3459
- const slot = termIndex * state.fieldCount + fieldId;
3460
- let docIds = state.postingsDocIds[slot];
3461
- if (docIds == null) {
3462
- docIds = [];
3463
- state.postingsDocIds[slot] = docIds;
3464
- state.postingsFreqs[slot] = [];
3465
- }
3466
- docIds.push(docId);
3467
- state.postingsFreqs[slot].push(freq);
3468
- const v = clampFreq(freq);
3469
- if (v > state.maxFreq)
3470
- state.maxFreq = v;
3471
- state.totalPostings++;
3472
- }
3473
- function finalizeFlatPostings(state, nextId) {
3474
- return materializeFrozenPostingsFromBuilder({
3475
- fieldCount: state.fieldCount,
3476
- termCount: state.terms.length,
3477
- postingsDocIds: state.postingsDocIds,
3478
- postingsFreqs: state.postingsFreqs,
3479
- totalPostings: state.totalPostings,
3480
- maxFreq: state.maxFreq,
3481
- }, nextId);
3482
- }
3483
3671
  /** Incremental builder for {@link FrozenMiniSearch} without materializing a full `documents[]` array. */
3484
3672
  class FrozenIndexBuilder {
3485
3673
  constructor(options, hints) {
3674
+ var _a, _b;
3675
+ this._termCount = { value: 0 };
3676
+ this._fieldTermFreqScratch = new Map();
3677
+ this._tokenScratch = [];
3486
3678
  this._options = resolveIndexingOptions(options);
3487
3679
  this._fieldIds = buildFieldIds(this._options.fields);
3488
3680
  this._fieldCount = this._options.fields.length;
3489
3681
  this._index = new SearchableMap();
3490
- this._terms = [];
3491
- this._postingsDocIds = [];
3492
- this._postingsFreqs = [];
3682
+ const estimatedDocs = (_a = hints === null || hints === void 0 ? void 0 : hints.estimatedDocumentCount) !== null && _a !== void 0 ? _a : 0;
3683
+ const perSlot = (_b = hints === null || hints === void 0 ? void 0 : hints.estimatedPostingsPerSlot) !== null && _b !== void 0 ? _b : 4;
3684
+ this._postings = new IncrementalPostingsAccumulator(this._fieldCount, {
3685
+ estimatedTotalPostings: estimatedDocs > 0 ? estimatedDocs * perSlot : undefined,
3686
+ });
3493
3687
  this._avgFieldLength = [];
3494
3688
  this._seenIds = new Set();
3495
3689
  this._nextId = 0;
@@ -3505,14 +3699,6 @@ class FrozenIndexBuilder {
3505
3699
  this._storedFields = [];
3506
3700
  this._fieldLengthData = [];
3507
3701
  }
3508
- this._postingsState = {
3509
- fieldCount: this._fieldCount,
3510
- terms: this._terms,
3511
- postingsDocIds: this._postingsDocIds,
3512
- postingsFreqs: this._postingsFreqs,
3513
- totalPostings: 0,
3514
- maxFreq: 0,
3515
- };
3516
3702
  }
3517
3703
  /** Number of documents indexed so far (not yet frozen). */
3518
3704
  get documentCount() {
@@ -3539,16 +3725,17 @@ class FrozenIndexBuilder {
3539
3725
  const fieldValue = extractField(document, field);
3540
3726
  if (fieldValue == null)
3541
3727
  continue;
3542
- const tokens = tokenize(stringifyField(fieldValue, field), field);
3728
+ const fieldText = typeof fieldValue === 'string'
3729
+ ? fieldValue
3730
+ : stringifyField(fieldValue, field);
3543
3731
  const fieldId = this._fieldIds[field];
3544
- const uniqueTerms = new Set(tokens).size;
3545
- const localFreqs = collectFieldTermFreqs(tokens, field, processTerm);
3732
+ const uniqueTerms = collectFieldTermFreqsFromFieldInto(this._fieldTermFreqScratch, this._tokenScratch, tokenize, fieldText, field, processTerm);
3546
3733
  this._fieldLengthData[shortId * this._fieldCount + fieldId] = uniqueTerms;
3547
3734
  updateAvgFieldLength(this._avgFieldLength, fieldId, documentCount - 1, uniqueTerms);
3548
- for (const [term, freq] of localFreqs) {
3549
- const ti = getOrCreateTermIndex(this._postingsState, this._index, term);
3550
- appendPosting(this._postingsState, ti, fieldId, shortId, freq);
3551
- }
3735
+ this._fieldTermFreqScratch.forEach((freq, term) => {
3736
+ const ti = getOrCreateTermIndex(this._termCount, this._index, term);
3737
+ this._postings.append(ti, fieldId, shortId, freq);
3738
+ });
3552
3739
  }
3553
3740
  }
3554
3741
  /**
@@ -3605,7 +3792,11 @@ class FrozenIndexBuilder {
3605
3792
  }
3606
3793
  this._frozen = true;
3607
3794
  const documentCount = this._nextId;
3608
- const postings = finalizeFlatPostings(this._postingsState, documentCount);
3795
+ const termCount = this._termCount.value;
3796
+ const postings = this._postings.finalize(termCount, documentCount);
3797
+ const radixTree = this._index.radixTree;
3798
+ this._index = null;
3799
+ const index = fromRadixTree(radixTree, termCount);
3609
3800
  const avgFieldLength = new Float32Array(this._fieldCount);
3610
3801
  for (let f = 0; f < this._fieldCount; f++) {
3611
3802
  avgFieldLength[f] = (_a = this._avgFieldLength[f]) !== null && _a !== void 0 ? _a : 0;
@@ -3618,8 +3809,6 @@ class FrozenIndexBuilder {
3618
3809
  ? this._storedFields.slice(0, documentCount)
3619
3810
  : this._storedFields;
3620
3811
  const idLookup = createIdToShortIdLookup(externalIds, documentCount);
3621
- // Incremental builder: numeric radix leaves + build-time terms[] for postings.
3622
- // freezeFromMiniSearch packs Map leaves in one radix pass (no resident terms[]).
3623
3812
  return {
3624
3813
  options: this._options,
3625
3814
  documentCount,
@@ -3631,8 +3820,8 @@ class FrozenIndexBuilder {
3631
3820
  storedFields,
3632
3821
  fieldLengthMatrix: materializeFieldLengthMatrix(this._fieldLengthData, documentCount * this._fieldCount),
3633
3822
  avgFieldLength,
3634
- index: fromRadixTree(this._index.radixTree, this._terms.length),
3635
- termCount: this._terms.length,
3823
+ index,
3824
+ termCount,
3636
3825
  postings,
3637
3826
  };
3638
3827
  }
@@ -4341,6 +4530,7 @@ FrozenMiniSearch.wildcard = WILDCARD_QUERY;
4341
4530
  exports.AND = AND;
4342
4531
  exports.AND_NOT = AND_NOT;
4343
4532
  exports.FrozenIndexBuilder = FrozenIndexBuilder;
4533
+ exports.FrozenMiniSearch = FrozenMiniSearch;
4344
4534
  exports.OR = OR;
4345
4535
  exports.assembleFrozen = assembleFrozen;
4346
4536
  exports.buildFrozenFromDocuments = buildFrozenFromDocuments;
@@ -5,6 +5,7 @@ const main = mod.default || mod
5
5
 
6
6
  module.exports = main
7
7
  module.exports.default = main
8
+ module.exports.FrozenMiniSearch = main
8
9
 
9
10
  for (const key of Object.keys(mod)) {
10
11
  if (key !== 'default') {
@@ -494,22 +494,24 @@ type MiniSearchSnapshot = {
494
494
  interface FrozenIndexBuilderHints {
495
495
  /** Pre-size per-document arrays when the final document count is known. */
496
496
  estimatedDocumentCount?: number;
497
+ /** Hint for initial growable posting column capacity per (term, field) slot. */
498
+ estimatedPostingsPerSlot?: number;
497
499
  }
498
500
  /** Incremental builder for {@link FrozenMiniSearch} without materializing a full `documents[]` array. */
499
501
  declare class FrozenIndexBuilder<T> {
500
502
  private readonly _options;
501
503
  private readonly _fieldIds;
502
504
  private readonly _fieldCount;
503
- private readonly _index;
504
- private readonly _terms;
505
- private readonly _postingsDocIds;
506
- private readonly _postingsFreqs;
505
+ private _index;
506
+ private readonly _postings;
507
+ private readonly _termCount;
507
508
  private readonly _externalIds;
508
509
  private readonly _storedFields;
509
510
  private readonly _fieldLengthData;
510
511
  private readonly _avgFieldLength;
511
- private readonly _postingsState;
512
512
  private readonly _seenIds;
513
+ private readonly _fieldTermFreqScratch;
514
+ private readonly _tokenScratch;
513
515
  private _nextId;
514
516
  private _frozen;
515
517
  constructor(options: Options<T>, hints?: FrozenIndexBuilderHints);
@@ -616,5 +618,5 @@ declare class FrozenMiniSearch<T = any> {
616
618
  private executeQuery;
617
619
  }
618
620
 
619
- export { AND, AND_NOT, FrozenIndexBuilder, OR, assembleFrozen, buildFrozenFromDocuments, createFrozenIndexBuilder, FrozenMiniSearch as default, freezeFrozenIndexBuilder, frozenMemoryBreakdown };
621
+ export { AND, AND_NOT, FrozenIndexBuilder, FrozenMiniSearch, OR, assembleFrozen, buildFrozenFromDocuments, createFrozenIndexBuilder, FrozenMiniSearch as default, freezeFrozenIndexBuilder, frozenMemoryBreakdown };
620
622
  export type { BM25Params, CombinationOperator, FrozenAssembleParams, FrozenIndexBuilderHints, FrozenMemoryBreakdown, LogLevel, LowercaseCombinationOperator, MatchInfo, MiniSearchSnapshot, Options, Query, QueryCombination, SearchOptions, SearchResult, SerializedIndexEntry, Suggestion, Wildcard };