@yoch/frozenminisearch 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,10 +4,67 @@ Object.defineProperty(exports, '__esModule', { value: true });
4
4
 
5
5
  var zlib = require('node:zlib');
6
6
 
7
+ /**
8
+ * Internal AND / AND_NOT gate thresholds (not exported from the public package entry).
9
+ */
10
+ const DEFAULT_POSTING_GATE_MIN_LENGTH = 2048;
11
+ const DEFAULT_POSTING_GATE_RATIO_SHIFT = 2;
12
+ const DEFAULT_POSTING_GATE_POLICY = {
13
+ minLength: DEFAULT_POSTING_GATE_MIN_LENGTH,
14
+ ratioShift: DEFAULT_POSTING_GATE_RATIO_SHIFT,
15
+ };
16
+ function passGateByPostingRatio(gateSize, postingListLength, policy = DEFAULT_POSTING_GATE_POLICY) {
17
+ if (postingListLength < policy.minLength)
18
+ return false;
19
+ return gateSize <= (postingListLength >>> policy.ratioShift);
20
+ }
21
+ const DEFAULT_AND_GATE_LIMITS = {
22
+ maxAbsolute: 5000,
23
+ maxFraction: 0.1,
24
+ };
25
+ function resolveGateMaxSize(documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
26
+ return Math.min(limits.maxAbsolute, Math.max(100, Math.floor(documentCount * limits.maxFraction)));
27
+ }
28
+ function gateIsSelectiveEnough(gateSize, documentCount, limits = DEFAULT_AND_GATE_LIMITS, postingListLength, postingGatePolicy = DEFAULT_POSTING_GATE_POLICY) {
29
+ if (gateSize === 0)
30
+ return true;
31
+ if (gateSize <= resolveGateMaxSize(documentCount, limits))
32
+ return true;
33
+ if (postingListLength != null
34
+ && postingListLength > 0
35
+ && passGateByPostingRatio(gateSize, postingListLength, postingGatePolicy)) {
36
+ return true;
37
+ }
38
+ return false;
39
+ }
40
+
7
41
  const MAX_FREQ = 65535;
8
42
  function readDocId(docIds, index) {
9
43
  return docIds[index];
10
44
  }
45
+ /** Binary search for docId in a sorted segment; returns global index or -1. */
46
+ function findDocIndexInSortedSegment(docIds, offset, length, docId) {
47
+ let lo = 0;
48
+ let hi = length - 1;
49
+ while (lo <= hi) {
50
+ const mid = (lo + hi) >>> 1;
51
+ const v = readDocId(docIds, offset + mid);
52
+ if (v < docId)
53
+ lo = mid + 1;
54
+ else if (v > docId)
55
+ hi = mid - 1;
56
+ else
57
+ return offset + mid;
58
+ }
59
+ return -1;
60
+ }
61
+ /**
62
+ * Scan vs binary search once `allowedDocs` is already in effect (scoring layer).
63
+ * Uses the same numeric policy as {@link passGateByPostingRatio} today; distinct decision point.
64
+ */
65
+ function shouldSeekAllowedDocs(gateSize, listLength) {
66
+ return passGateByPostingRatio(gateSize, listLength);
67
+ }
11
68
  function allocateFreqs(length, maxValue) {
12
69
  if (maxValue <= 0xff)
13
70
  return new Uint8Array(length);
@@ -69,10 +126,15 @@ function bm25FieldConstants(bm25params, avgFieldLength) {
69
126
  const { k, b, d } = bm25params;
70
127
  return { k, d, k1: k + 1, oneMinusB: 1 - b, bOverAvg: b / avgFieldLength };
71
128
  }
72
- function calcBM25ScoreWithConstants(termFreq, matchingCount, totalCount, fieldLength, constants) {
129
+ function bm25Idf(matchingCount, totalCount) {
130
+ return Math.log(1 + (totalCount - matchingCount + 0.5) / (matchingCount + 0.5));
131
+ }
132
+ function calcBm25TfWithConstants(termFreq, fieldLength, constants, idf) {
73
133
  const { k, d, k1, oneMinusB, bOverAvg } = constants;
74
- const invDocFreq = Math.log(1 + (totalCount - matchingCount + 0.5) / (matchingCount + 0.5));
75
- return invDocFreq * (d + termFreq * k1 / (termFreq + k * (oneMinusB + bOverAvg * fieldLength)));
134
+ return idf * (d + termFreq * k1 / (termFreq + k * (oneMinusB + bOverAvg * fieldLength)));
135
+ }
136
+ function calcBM25ScoreWithConstants(termFreq, matchingCount, totalCount, fieldLength, constants) {
137
+ return calcBm25TfWithConstants(termFreq, fieldLength, constants, bm25Idf(matchingCount, totalCount));
76
138
  }
77
139
  const getOwnProperty = (object, property) => Object.prototype.hasOwnProperty.call(object, property) ? object[property] : undefined;
78
140
  function fieldBoostsForQuery(options, fields) {
@@ -101,7 +163,7 @@ function getDerivedTerm(derivedTerm, cache) {
101
163
  cache.value = derivedTerm.resolve();
102
164
  return cache.value;
103
165
  }
104
- function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache) {
166
+ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf) {
105
167
  const resolvedDerivedTerm = getDerivedTerm(derivedTerm, derivedTermCache);
106
168
  const docBoost = boostDocumentFn
107
169
  ? boostDocumentFn(context.getExternalId(docId), resolvedDerivedTerm, context.getStoredFields(docId))
@@ -109,7 +171,9 @@ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFre
109
171
  if (!docBoost)
110
172
  return;
111
173
  const fieldLength = context.getFieldLength(docId, fieldId);
112
- const rawScore = calcBM25ScoreWithConstants(termFreq, matchingFields, context.documentCount, fieldLength, bm25);
174
+ const rawScore = hoistedIdf !== undefined
175
+ ? calcBm25TfWithConstants(termFreq, fieldLength, bm25, hoistedIdf)
176
+ : calcBM25ScoreWithConstants(termFreq, matchingFields, context.documentCount, fieldLength, bm25);
113
177
  const weightedScore = termWeight * termBoost * fieldBoost * docBoost * rawScore;
114
178
  const result = results.get(docId);
115
179
  if (result) {
@@ -132,22 +196,39 @@ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFre
132
196
  }
133
197
  }
134
198
  function aggregateSegmentPostingList(sourceTerm, derivedTerm, termWeight, termBoost, field, fieldId, fieldBoost, list, context, boostDocumentFn, bm25params, results, allowedDocs) {
135
- var _a;
199
+ var _a, _b;
136
200
  let matchingFields = list.length;
137
201
  const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
202
+ const hoistedIdf = context.isDocActive == null
203
+ ? bm25Idf(matchingFields, context.documentCount)
204
+ : undefined;
138
205
  const { docIds, freqs, offset, length } = list;
139
206
  const derivedTermCache = {};
207
+ if (allowedDocs != null && shouldSeekAllowedDocs(allowedDocs.size, length)) {
208
+ for (const docId of allowedDocs) {
209
+ if (context.isDocActive != null && !context.isDocActive(docId)) {
210
+ (_a = context.onInactiveDoc) === null || _a === void 0 ? void 0 : _a.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
211
+ matchingFields -= 1;
212
+ continue;
213
+ }
214
+ const index = findDocIndexInSortedSegment(docIds, offset, length, docId);
215
+ if (index < 0)
216
+ continue;
217
+ scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, freqs[index], termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
218
+ }
219
+ return matchingFields;
220
+ }
140
221
  for (let i = 0; i < length; i++) {
141
222
  const docId = readDocId(docIds, offset + i);
142
223
  const termFreq = freqs[offset + i];
143
224
  if (context.isDocActive != null && !context.isDocActive(docId)) {
144
- (_a = context.onInactiveDoc) === null || _a === void 0 ? void 0 : _a.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
225
+ (_b = context.onInactiveDoc) === null || _b === void 0 ? void 0 : _b.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
145
226
  matchingFields -= 1;
146
227
  continue;
147
228
  }
148
229
  if (allowedDocs != null && !allowedDocs.has(docId))
149
230
  continue;
150
- scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
231
+ scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
151
232
  }
152
233
  return matchingFields;
153
234
  }
@@ -167,6 +248,9 @@ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTerm
167
248
  }
168
249
  let matchingFields = postingList.size;
169
250
  const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
251
+ const hoistedIdf = context.isDocActive == null
252
+ ? bm25Idf(matchingFields, context.documentCount)
253
+ : undefined;
170
254
  const derivedTermCache = {};
171
255
  postingList.forEachDoc((docId, termFreq) => {
172
256
  var _a;
@@ -177,7 +261,7 @@ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTerm
177
261
  }
178
262
  if (allowedDocs != null && !allowedDocs.has(docId))
179
263
  return;
180
- scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
264
+ scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
181
265
  });
182
266
  }
183
267
  return results;
@@ -1746,82 +1830,47 @@ function readFieldLengthMatrixSection(buf, flags, cellCount) {
1746
1830
 
1747
1831
  const DISCARDED_DOC_ID = 0xffffffff;
1748
1832
  function postingFreqValue(freq, clampFrequencies) {
1749
- return clampFrequencies ? clampFreq(freq) : freq;
1750
- }
1751
- function materializeFlatPostings(params) {
1752
- const { fieldCount, termCount, forEachPosting, remapDocId, clampFrequencies } = params;
1753
- const slotCount = termCount * fieldCount;
1754
- const postingsOffsets = new Uint32Array(slotCount);
1755
- const postingsLengths = new Uint32Array(slotCount);
1756
- let totalPostings = 0;
1757
- let maxFreq = 0;
1758
- for (let ti = 0; ti < termCount; ti++) {
1759
- for (let f = 0; f < fieldCount; f++) {
1760
- forEachPosting(ti, f, (rawDocId, freq) => {
1761
- const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1762
- if (docId === DISCARDED_DOC_ID)
1763
- return;
1764
- totalPostings++;
1765
- const v = postingFreqValue(freq, clampFrequencies);
1766
- if (v > maxFreq)
1767
- maxFreq = v;
1768
- });
1769
- }
1770
- }
1771
- const useUint16 = params.nextId != null && params.nextId <= 65535;
1772
- const allDocIds = useUint16
1773
- ? new Uint16Array(totalPostings)
1774
- : new Uint32Array(totalPostings);
1775
- const allFreqs = allocateFreqs(totalPostings, maxFreq);
1776
- // Slots are visited in ascending fieldId (0..fieldCount-1) per term. Sparse layouts
1777
- // rely on this ordering so field ids per term stay sorted for binary lookup.
1778
- let write = 0;
1779
- for (let ti = 0; ti < termCount; ti++) {
1780
- const base = ti * fieldCount;
1781
- for (let f = 0; f < fieldCount; f++) {
1782
- const offset = write;
1783
- let count = 0;
1784
- forEachPosting(ti, f, (rawDocId, freq) => {
1785
- const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1786
- if (docId === DISCARDED_DOC_ID)
1787
- return;
1788
- if (useUint16) {
1789
- allDocIds[write] = docId;
1790
- }
1791
- else {
1792
- allDocIds[write] = docId;
1793
- }
1794
- allFreqs[write] = postingFreqValue(freq, clampFrequencies);
1795
- write++;
1796
- count++;
1797
- });
1798
- postingsOffsets[base + f] = offset;
1799
- postingsLengths[base + f] = count;
1800
- }
1801
- }
1802
- return {
1803
- postingsOffsets,
1804
- postingsLengths,
1805
- allDocIds,
1806
- allFreqs,
1807
- };
1833
+ return clampFreq(freq) ;
1808
1834
  }
1809
1835
 
1810
1836
  function readFieldId(fieldIds, index) {
1811
1837
  return fieldIds[index];
1812
1838
  }
1813
- function choosePostingsLayout(fieldCount) {
1814
- return fieldCount === 1 ? 'dense' : 'sparse';
1815
- }
1816
1839
  function chooseSparseFieldIdWidth(fieldCount) {
1817
1840
  return fieldCount > 255 ? 16 : 8;
1818
1841
  }
1819
- function materializeFrozenPostings(params) {
1820
- const { fieldCount, termCount, nextId } = params;
1821
- const layout = choosePostingsLayout(fieldCount);
1842
+ function choosePostingsLayout(fieldCount, termCount, nonEmptySlots) {
1843
+ const denseBytes = termCount * fieldCount * 8;
1844
+ const sparseFieldIdBytes = chooseSparseFieldIdWidth(fieldCount) === 16 ? 2 : 1;
1845
+ const sparseBytes = (termCount + 1) * 4 + nonEmptySlots * (sparseFieldIdBytes + 8);
1846
+ return denseBytes <= sparseBytes ? 'dense' : 'sparse';
1847
+ }
1848
+ /** Shared dense/sparse layout emission; callers supply per-slot length and copy. */
1849
+ function buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, source) {
1850
+ const layout = choosePostingsLayout(fieldCount, termCount, source.nonEmptySlots);
1822
1851
  const docIdWidth = nextId <= 65535 ? 16 : 32;
1852
+ const allDocIds = docIdWidth === 16
1853
+ ? new Uint16Array(totalPostings)
1854
+ : new Uint32Array(totalPostings);
1855
+ const allFreqs = allocateFreqs(totalPostings, maxFreq);
1856
+ const targets = { allDocIds, allFreqs, docIdWidth };
1823
1857
  if (layout === 'dense') {
1824
- const flat = materializeFlatPostings({ ...params, nextId });
1858
+ const slotCount = termCount * fieldCount;
1859
+ const denseOffsets = new Uint32Array(slotCount);
1860
+ const denseLengths = new Uint32Array(slotCount);
1861
+ let write = 0;
1862
+ for (let ti = 0; ti < termCount; ti++) {
1863
+ const base = ti * fieldCount;
1864
+ for (let f = 0; f < fieldCount; f++) {
1865
+ const slot = base + f;
1866
+ const len = source.slotLength(ti, f);
1867
+ denseOffsets[slot] = write;
1868
+ denseLengths[slot] = len;
1869
+ if (len > 0) {
1870
+ write = source.writeSlot(ti, f, write, targets);
1871
+ }
1872
+ }
1873
+ }
1825
1874
  return {
1826
1875
  fieldCount,
1827
1876
  termCount,
@@ -1829,10 +1878,10 @@ function materializeFrozenPostings(params) {
1829
1878
  layout,
1830
1879
  docIdWidth,
1831
1880
  sparseFieldIdWidth: null,
1832
- allDocIds: flat.allDocIds,
1833
- allFreqs: flat.allFreqs,
1834
- denseOffsets: flat.postingsOffsets,
1835
- denseLengths: flat.postingsLengths,
1881
+ allDocIds,
1882
+ allFreqs,
1883
+ denseOffsets,
1884
+ denseLengths,
1836
1885
  sparseTermStarts: null,
1837
1886
  sparseFieldIds: null,
1838
1887
  sparseOffsets: null,
@@ -1844,60 +1893,23 @@ function materializeFrozenPostings(params) {
1844
1893
  const sparseOffsets = [];
1845
1894
  const sparseLengths = [];
1846
1895
  const termStarts = new Array(termCount + 1).fill(0);
1847
- const { forEachPosting, remapDocId, clampFrequencies } = params;
1848
- // Non-empty slots per term are emitted with fieldId in ascending order (f loops 0..fieldCount-1).
1849
- let totalPostings = 0;
1850
- let maxFreq = 0;
1896
+ let write = 0;
1851
1897
  for (let ti = 0; ti < termCount; ti++) {
1852
1898
  termStarts[ti] = sparseFieldIdsScratch.length;
1853
1899
  for (let f = 0; f < fieldCount; f++) {
1854
- let count = 0;
1855
- forEachPosting(ti, f, (rawDocId, freq) => {
1856
- const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1857
- if (docId === DISCARDED_DOC_ID)
1858
- return;
1859
- count++;
1860
- const v = postingFreqValue(freq, clampFrequencies);
1861
- if (v > maxFreq)
1862
- maxFreq = v;
1863
- });
1864
- if (count === 0)
1900
+ const len = source.slotLength(ti, f);
1901
+ if (len === 0)
1865
1902
  continue;
1866
1903
  sparseFieldIdsScratch.push(f);
1867
- sparseOffsets.push(totalPostings);
1868
- sparseLengths.push(count);
1869
- totalPostings += count;
1904
+ sparseOffsets.push(write);
1905
+ sparseLengths.push(len);
1906
+ write = source.writeSlot(ti, f, write, targets);
1870
1907
  }
1871
1908
  termStarts[ti + 1] = sparseFieldIdsScratch.length;
1872
1909
  }
1873
- const allDocIds = docIdWidth === 16
1874
- ? new Uint16Array(totalPostings)
1875
- : new Uint32Array(totalPostings);
1876
- const allFreqs = allocateFreqs(totalPostings, maxFreq);
1877
1910
  const sparseFieldIds = sparseFieldIdWidth === 16
1878
1911
  ? new Uint16Array(sparseFieldIdsScratch)
1879
1912
  : new Uint8Array(sparseFieldIdsScratch);
1880
- let write = 0;
1881
- for (let ti = 0; ti < termCount; ti++) {
1882
- const start = termStarts[ti];
1883
- const end = termStarts[ti + 1];
1884
- for (let s = start; s < end; s++) {
1885
- const f = readFieldId(sparseFieldIds, s);
1886
- forEachPosting(ti, f, (rawDocId, freq) => {
1887
- const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1888
- if (docId === DISCARDED_DOC_ID)
1889
- return;
1890
- if (docIdWidth === 16) {
1891
- allDocIds[write] = docId;
1892
- }
1893
- else {
1894
- allDocIds[write] = docId;
1895
- }
1896
- allFreqs[write] = postingFreqValue(freq, clampFrequencies);
1897
- write++;
1898
- });
1899
- }
1900
- }
1901
1913
  return {
1902
1914
  fieldCount,
1903
1915
  termCount,
@@ -1915,6 +1927,58 @@ function materializeFrozenPostings(params) {
1915
1927
  sparseLengths: new Uint32Array(sparseLengths),
1916
1928
  };
1917
1929
  }
1930
+ function materializeFrozenPostings(params) {
1931
+ const { fieldCount, termCount, nextId } = params;
1932
+ const { forEachPosting, remapDocId} = params;
1933
+ const slotCount = termCount * fieldCount;
1934
+ const slotLengths = new Uint32Array(slotCount);
1935
+ let totalPostings = 0;
1936
+ let maxFreq = 0;
1937
+ let nonEmptySlots = 0;
1938
+ for (let ti = 0; ti < termCount; ti++) {
1939
+ const base = ti * fieldCount;
1940
+ for (let f = 0; f < fieldCount; f++) {
1941
+ let count = 0;
1942
+ forEachPosting(ti, f, (rawDocId, freq) => {
1943
+ const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1944
+ if (docId === DISCARDED_DOC_ID)
1945
+ return;
1946
+ count++;
1947
+ const v = postingFreqValue(freq);
1948
+ if (v > maxFreq)
1949
+ maxFreq = v;
1950
+ });
1951
+ if (count === 0)
1952
+ continue;
1953
+ slotLengths[base + f] = count;
1954
+ totalPostings += count;
1955
+ nonEmptySlots++;
1956
+ }
1957
+ }
1958
+ return buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, {
1959
+ nonEmptySlots,
1960
+ slotLength(ti, f) {
1961
+ return slotLengths[ti * fieldCount + f];
1962
+ },
1963
+ writeSlot(ti, f, write, targets) {
1964
+ const { allDocIds: outDocIds, allFreqs: outFreqs, docIdWidth: width } = targets;
1965
+ forEachPosting(ti, f, (rawDocId, freq) => {
1966
+ const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
1967
+ if (docId === DISCARDED_DOC_ID)
1968
+ return;
1969
+ if (width === 16) {
1970
+ outDocIds[write] = docId;
1971
+ }
1972
+ else {
1973
+ outDocIds[write] = docId;
1974
+ }
1975
+ outFreqs[write] = postingFreqValue(freq);
1976
+ write++;
1977
+ });
1978
+ return write;
1979
+ },
1980
+ });
1981
+ }
1918
1982
  function postingsTypedBytes(layout) {
1919
1983
  const allDocIdsBytes = layout.allDocIds.byteLength;
1920
1984
  const allFreqsBytes = layout.allFreqs.byteLength;
@@ -2677,7 +2741,6 @@ function buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, nextId, s
2677
2741
  fieldCount,
2678
2742
  termCount,
2679
2743
  nextId,
2680
- clampFrequencies: true,
2681
2744
  remapDocId,
2682
2745
  forEachPosting(ti, f, emit) {
2683
2746
  var _a;
@@ -2921,8 +2984,8 @@ async function zlibPayloadChoiceAsync(uncompressed) {
2921
2984
  return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
2922
2985
  }
2923
2986
  const autoSyncCompressors = {
2924
- zstd: (uncompressed) => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
2925
- zlib: (uncompressed) => zlib.deflateSync(uncompressed),
2987
+ zstd: uncompressed => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
2988
+ zlib: uncompressed => zlib.deflateSync(uncompressed),
2926
2989
  };
2927
2990
  const autoAsyncCompressors = {
2928
2991
  zstd: zstdCompressAsync,
@@ -3865,93 +3928,23 @@ class IncrementalPostingsAccumulator {
3865
3928
  const totalPostings = this._totalPostings;
3866
3929
  const maxFreq = this._maxFreq;
3867
3930
  const slots = this._slots;
3868
- const layout = choosePostingsLayout(fieldCount);
3869
- const docIdWidth = nextId <= 65535 ? 16 : 32;
3870
- const allDocIds = docIdWidth === 16
3871
- ? new Uint16Array(totalPostings)
3872
- : new Uint32Array(totalPostings);
3873
- const allFreqs = allocateFreqs(totalPostings, maxFreq);
3874
- if (layout === 'dense') {
3875
- const slotCount = termCount * fieldCount;
3876
- const denseOffsets = new Uint32Array(slotCount);
3877
- const denseLengths = new Uint32Array(slotCount);
3878
- let write = 0;
3879
- for (let ti = 0; ti < termCount; ti++) {
3880
- const base = ti * fieldCount;
3881
- for (let f = 0; f < fieldCount; f++) {
3882
- const slot = base + f;
3883
- const ranges = slots.get(slot);
3884
- const len = ranges == null ? 0 : this.slotLength(ranges);
3885
- denseOffsets[slot] = write;
3886
- denseLengths[slot] = len;
3887
- if (len > 0) {
3888
- write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
3889
- slots.delete(slot);
3890
- }
3891
- }
3892
- }
3893
- slots.clear();
3894
- this.clear();
3895
- return {
3896
- fieldCount,
3897
- termCount,
3898
- nextId,
3899
- layout,
3900
- docIdWidth,
3901
- sparseFieldIdWidth: null,
3902
- allDocIds,
3903
- allFreqs,
3904
- denseOffsets,
3905
- denseLengths,
3906
- sparseTermStarts: null,
3907
- sparseFieldIds: null,
3908
- sparseOffsets: null,
3909
- sparseLengths: null,
3910
- };
3911
- }
3912
- const sparseFieldIdWidth = chooseSparseFieldIdWidth(fieldCount);
3913
- const sparseFieldIdsScratch = [];
3914
- const sparseOffsets = [];
3915
- const sparseLengths = [];
3916
- const termStarts = new Array(termCount + 1).fill(0);
3917
- let write = 0;
3918
- for (let ti = 0; ti < termCount; ti++) {
3919
- termStarts[ti] = sparseFieldIdsScratch.length;
3920
- for (let f = 0; f < fieldCount; f++) {
3931
+ const layout = buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, {
3932
+ nonEmptySlots: slots.size,
3933
+ slotLength: (ti, f) => {
3934
+ const ranges = slots.get(ti * fieldCount + f);
3935
+ return ranges == null ? 0 : this.slotLength(ranges);
3936
+ },
3937
+ writeSlot: (ti, f, write, targets) => {
3921
3938
  const slot = ti * fieldCount + f;
3922
3939
  const ranges = slots.get(slot);
3923
- const len = ranges == null ? 0 : this.slotLength(ranges);
3924
- if (len === 0)
3925
- continue;
3926
- sparseFieldIdsScratch.push(f);
3927
- sparseOffsets.push(write);
3928
- sparseLengths.push(len);
3929
- write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
3940
+ const next = this.copySlot(ranges, targets.allDocIds, targets.allFreqs, write, targets.docIdWidth);
3930
3941
  slots.delete(slot);
3931
- }
3932
- termStarts[ti + 1] = sparseFieldIdsScratch.length;
3933
- }
3942
+ return next;
3943
+ },
3944
+ });
3934
3945
  slots.clear();
3935
3946
  this.clear();
3936
- const sparseFieldIds = sparseFieldIdWidth === 16
3937
- ? new Uint16Array(sparseFieldIdsScratch)
3938
- : new Uint8Array(sparseFieldIdsScratch);
3939
- return {
3940
- fieldCount,
3941
- termCount,
3942
- nextId,
3943
- layout,
3944
- docIdWidth,
3945
- sparseFieldIdWidth,
3946
- allDocIds,
3947
- allFreqs,
3948
- denseOffsets: null,
3949
- denseLengths: null,
3950
- sparseTermStarts: new Uint32Array(termStarts),
3951
- sparseFieldIds,
3952
- sparseOffsets: new Uint32Array(sparseOffsets),
3953
- sparseLengths: new Uint32Array(sparseLengths),
3954
- };
3947
+ return layout;
3955
3948
  }
3956
3949
  }
3957
3950
 
@@ -4131,22 +4124,6 @@ function buildFrozenParamsFromDocuments(documents, options) {
4131
4124
  return builder.freezeParams();
4132
4125
  }
4133
4126
 
4134
- /**
4135
- * Internal AND / AND_NOT gate thresholds (not exported from the public package entry).
4136
- */
4137
- const DEFAULT_AND_GATE_LIMITS = {
4138
- maxAbsolute: 5000,
4139
- maxFraction: 0.1,
4140
- };
4141
- function resolveGateMaxSize(documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
4142
- return Math.min(limits.maxAbsolute, Math.max(100, Math.floor(documentCount * limits.maxFraction)));
4143
- }
4144
- function gateIsSelectiveEnough(gateSize, documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
4145
- if (gateSize === 0)
4146
- return true;
4147
- return gateSize <= resolveGateMaxSize(documentCount, limits);
4148
- }
4149
-
4150
4127
  function useGatedEvaluation(run, branchCount, operator, hasWildcard) {
4151
4128
  return shouldUseGatedEvaluation(branchCount, operator, hasWildcard);
4152
4129
  }
@@ -4223,12 +4200,11 @@ function normalizeStringQuery(query, searchOptions, params) {
4223
4200
  function lazyIndexedTerm(indexView, termIndex) {
4224
4201
  return { kind: 'lazy', resolve: () => indexView.resolveTermByIndex(termIndex) };
4225
4202
  }
4226
- function visitQuerySpecForScoring(query, normalized, params, visit) {
4203
+ function forEachQuerySpecTermRef(query, normalized, params, visit) {
4227
4204
  const { indexView } = params;
4228
- const { fuzzyWeight, options, prefixWeight } = normalized;
4205
+ const { options } = normalized;
4229
4206
  const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
4230
- const exactTi = indexView.resolveTermIndex(query.term);
4231
- visit(exactTi == null ? undefined : indexView.fieldTermData(exactTi), query.term, 1);
4207
+ visit({ kind: 'exact', termIndex: indexView.resolveTermIndex(query.term) });
4232
4208
  const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
4233
4209
  if (query.prefix) {
4234
4210
  for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
@@ -4236,7 +4212,7 @@ function visitQuerySpecForScoring(query, normalized, params, visit) {
4236
4212
  if (!distance)
4237
4213
  continue;
4238
4214
  seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
4239
- visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), prefixWeight * length / (length + 0.3 * distance));
4215
+ visit({ kind: 'prefix', termIndex, length, distance });
4240
4216
  }
4241
4217
  }
4242
4218
  if (!maxDistance)
@@ -4244,9 +4220,24 @@ function visitQuerySpecForScoring(query, normalized, params, visit) {
4244
4220
  for (const { termIndex, length, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
4245
4221
  if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
4246
4222
  continue;
4247
- visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), fuzzyWeight * length / (length + distance));
4223
+ visit({ kind: 'fuzzy', termIndex, length, distance });
4248
4224
  }
4249
4225
  }
4226
+ function visitQuerySpecForScoring(query, normalized, params, visit) {
4227
+ const { indexView } = params;
4228
+ const { fuzzyWeight, prefixWeight } = normalized;
4229
+ forEachQuerySpecTermRef(query, normalized, params, (ref) => {
4230
+ if (ref.kind === 'exact') {
4231
+ visit(ref.termIndex == null ? undefined : indexView.fieldTermData(ref.termIndex), query.term, 1);
4232
+ return;
4233
+ }
4234
+ if (ref.kind === 'prefix') {
4235
+ visit(indexView.fieldTermData(ref.termIndex), lazyIndexedTerm(indexView, ref.termIndex), prefixWeight * ref.length / (ref.length + 0.3 * ref.distance));
4236
+ return;
4237
+ }
4238
+ visit(indexView.fieldTermData(ref.termIndex), lazyIndexedTerm(indexView, ref.termIndex), fuzzyWeight * ref.length / (ref.length + ref.distance));
4239
+ });
4240
+ }
4250
4241
  function executeQuerySpecInternal(query, normalized, params, allowedDocs) {
4251
4242
  const { fieldBoosts, options } = normalized;
4252
4243
  const termOptions = allowedDocs == null ? undefined : { allowedDocs };
@@ -4256,32 +4247,73 @@ function executeQuerySpecInternal(query, normalized, params, allowedDocs) {
4256
4247
  });
4257
4248
  return results;
4258
4249
  }
4259
- function collectDocIdsForQuerySpec(query, normalized, params, allowedDocs) {
4260
- const { fieldBoosts, options } = normalized;
4261
- const docIds = new Set();
4262
- const { indexView, aggregateContext } = params;
4263
- const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
4264
- const exactTi = indexView.resolveTermIndex(query.term);
4265
- if (exactTi != null) {
4266
- indexView.collectDocIds(exactTi, fieldBoosts, aggregateContext, docIds, allowedDocs);
4250
+ function maxPostingLengthForFieldTermData(data, fieldBoosts, fieldIds) {
4251
+ if (data == null)
4252
+ return 0;
4253
+ let maxLen = 0;
4254
+ for (const field of fieldBoosts.names) {
4255
+ const fieldId = fieldIds[field];
4256
+ const postingList = data.get(fieldId);
4257
+ if (postingList == null)
4258
+ continue;
4259
+ const len = postingList instanceof SegmentPostingList ? postingList.length : postingList.size;
4260
+ if (len > maxLen)
4261
+ maxLen = len;
4267
4262
  }
4268
- const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
4269
- if (query.prefix) {
4270
- for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
4271
- const distance = length - query.term.length;
4272
- if (!distance)
4273
- continue;
4274
- seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
4275
- indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
4263
+ return maxLen;
4264
+ }
4265
+ function estimateMaxPostingLengthForQuerySpec(query, normalized, params) {
4266
+ const { indexView, aggregateContext } = params;
4267
+ const { fieldBoosts } = normalized;
4268
+ const { fieldIds } = aggregateContext;
4269
+ let maxLen = 0;
4270
+ const consider = (data) => {
4271
+ maxLen = Math.max(maxLen, maxPostingLengthForFieldTermData(data, fieldBoosts, fieldIds));
4272
+ };
4273
+ forEachQuerySpecTermRef(query, normalized, params, (ref) => {
4274
+ if (ref.kind === 'exact') {
4275
+ if (ref.termIndex != null)
4276
+ consider(indexView.fieldTermData(ref.termIndex));
4277
+ return;
4276
4278
  }
4279
+ consider(indexView.fieldTermData(ref.termIndex));
4280
+ });
4281
+ return maxLen;
4282
+ }
4283
+ function estimateMaxPostingLengthForQuery(query, searchOptions, params) {
4284
+ if (isWildcardQuery(query)) {
4285
+ return params.aggregateContext.documentCount;
4277
4286
  }
4278
- if (maxDistance) {
4279
- for (const { termIndex, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
4280
- if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
4281
- continue;
4282
- indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
4287
+ if (isQueryCombination(query)) {
4288
+ const options = { ...searchOptions, ...query, queries: undefined };
4289
+ let maxLen = 0;
4290
+ for (const branch of query.queries) {
4291
+ maxLen = Math.max(maxLen, estimateMaxPostingLengthForQuery(branch, options, params));
4283
4292
  }
4293
+ return maxLen;
4294
+ }
4295
+ if (typeof query !== 'string')
4296
+ return 0;
4297
+ const normalized = normalizeStringQuery(query, searchOptions, params);
4298
+ let maxLen = 0;
4299
+ for (const spec of normalized.specs) {
4300
+ maxLen = Math.max(maxLen, estimateMaxPostingLengthForQuerySpec(spec, normalized, params));
4284
4301
  }
4302
+ return maxLen;
4303
+ }
4304
+ function collectDocIdsForQuerySpec(query, normalized, params, allowedDocs) {
4305
+ const { fieldBoosts } = normalized;
4306
+ const docIds = new Set();
4307
+ const { indexView, aggregateContext } = params;
4308
+ forEachQuerySpecTermRef(query, normalized, params, (ref) => {
4309
+ if (ref.kind === 'exact') {
4310
+ if (ref.termIndex != null) {
4311
+ indexView.collectDocIds(ref.termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
4312
+ }
4313
+ return;
4314
+ }
4315
+ indexView.collectDocIds(ref.termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
4316
+ });
4285
4317
  return docIds;
4286
4318
  }
4287
4319
  function intersectDocIdsInPlace(docIds, branchDocIds) {
@@ -4331,7 +4363,8 @@ function collectCombinedDocIds(branches, operator, collectBranch, allowedDocs) {
4331
4363
  * AND_NOT: score the positive branch only; negated branches are collected as docId sets and
4332
4364
  * subtracted without scoring (avoids term materialization on excluded branches).
4333
4365
  */
4334
- function executeCombinedBranches(branches, operator, params, executeBranch, collectBranch, allowedDocs, run) {
4366
+ function executeCombinedBranches(branches, operator, params, executeBranch, collectBranch, allowedDocs, run, estimateBranchPostingLength) {
4367
+ var _a;
4335
4368
  if (branches.length === 0)
4336
4369
  return new Map();
4337
4370
  const op = operator.toLowerCase();
@@ -4343,8 +4376,16 @@ function executeCombinedBranches(branches, operator, params, executeBranch, coll
4343
4376
  if (op === 'and') {
4344
4377
  const limits = void 0 ;
4345
4378
  const documentCount = params.aggregateContext.documentCount;
4379
+ const postingGatePolicy = (_a = void 0 ) !== null && _a !== void 0 ? _a : DEFAULT_POSTING_GATE_POLICY;
4380
+ const maxGateSize = resolveGateMaxSize(documentCount, limits);
4346
4381
  for (let i = 1; i < branches.length; i++) {
4347
- const selective = gateIsSelectiveEnough(gate.size, documentCount, limits);
4382
+ if (gate.size === 0)
4383
+ return result;
4384
+ const ratioPath = gate.size > maxGateSize;
4385
+ const postingListLength = ratioPath
4386
+ ? estimateBranchPostingLength === null || estimateBranchPostingLength === void 0 ? void 0 : estimateBranchPostingLength(branches[i])
4387
+ : undefined;
4388
+ const selective = gateIsSelectiveEnough(gate.size, documentCount, limits, postingListLength, postingGatePolicy);
4348
4389
  const branchAllowed = selective ? gate : allowedDocs;
4349
4390
  result = combineResults([result, executeBranch(branches[i], branchAllowed)], AND);
4350
4391
  gate = docIdsFromResult(result);
@@ -4438,7 +4479,7 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
4438
4479
  const options = { ...searchOptions, ...query, queries: undefined };
4439
4480
  const operator = ((_b = (_a = query.combineWith) !== null && _a !== void 0 ? _a : options.combineWith) !== null && _b !== void 0 ? _b : params.globalSearchOptions.combineWith);
4440
4481
  if (useGatedEvaluation(run, query.queries.length, operator, combinationHasWildcard(query))) {
4441
- return executeCombinedBranches(query.queries, operator, params, (branch, branchAllowed) => executeQueryInternal(branch, options, params, branchAllowed, run), (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs);
4482
+ return executeCombinedBranches(query.queries, operator, params, (branch, branchAllowed) => executeQueryInternal(branch, options, params, branchAllowed, run), (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs, run, branch => estimateMaxPostingLengthForQuery(branch, options, params));
4442
4483
  }
4443
4484
  const results = query.queries.map(subquery => executeQueryInternal(subquery, options, params, allowedDocs, run));
4444
4485
  return combineResults(results, operator);
@@ -4450,7 +4491,7 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
4450
4491
  const { specs, operator } = normalized;
4451
4492
  const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
4452
4493
  if (useGatedEvaluation(run, specs.length, combineWith, false)) {
4453
- return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, normalized, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
4494
+ return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, normalized, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs, run, spec => estimateMaxPostingLengthForQuerySpec(spec, normalized, params));
4454
4495
  }
4455
4496
  const results = specs.map(spec => executeQuerySpecInternal(spec, normalized, params, allowedDocs));
4456
4497
  return combineResults(results, combineWith);