@yoch/frozenminisearch 1.2.1 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/README.md +43 -99
- package/dist/cjs/index.cjs +419 -263
- package/dist/es/index.js +419 -263
- package/package.json +2 -1
package/dist/es/index.js
CHANGED
|
@@ -1,9 +1,81 @@
|
|
|
1
1
|
import zlib from 'node:zlib';
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* Internal AND / AND_NOT gate thresholds (not exported from the public package entry).
|
|
5
|
+
*/
|
|
6
|
+
const DEFAULT_POSTING_GATE_MIN_LENGTH = 2048;
|
|
7
|
+
const DEFAULT_POSTING_GATE_RATIO_SHIFT = 2;
|
|
8
|
+
const DEFAULT_POSTING_GATE_POLICY = {
|
|
9
|
+
minLength: DEFAULT_POSTING_GATE_MIN_LENGTH,
|
|
10
|
+
ratioShift: DEFAULT_POSTING_GATE_RATIO_SHIFT,
|
|
11
|
+
};
|
|
12
|
+
function passGateByPostingRatio(gateSize, postingListLength, policy = DEFAULT_POSTING_GATE_POLICY) {
|
|
13
|
+
if (postingListLength < policy.minLength)
|
|
14
|
+
return false;
|
|
15
|
+
return gateSize <= (postingListLength >>> policy.ratioShift);
|
|
16
|
+
}
|
|
17
|
+
const DEFAULT_AND_GATE_LIMITS = {
|
|
18
|
+
maxAbsolute: 5000,
|
|
19
|
+
maxFraction: 0.1,
|
|
20
|
+
};
|
|
21
|
+
function resolveGateMaxSize(documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
|
|
22
|
+
return Math.min(limits.maxAbsolute, Math.max(100, Math.floor(documentCount * limits.maxFraction)));
|
|
23
|
+
}
|
|
24
|
+
function gateIsSelectiveEnough(gateSize, documentCount, limits = DEFAULT_AND_GATE_LIMITS, postingListLength, postingGatePolicy = DEFAULT_POSTING_GATE_POLICY) {
|
|
25
|
+
if (gateSize === 0)
|
|
26
|
+
return true;
|
|
27
|
+
if (gateSize <= resolveGateMaxSize(documentCount, limits))
|
|
28
|
+
return true;
|
|
29
|
+
if (postingListLength != null
|
|
30
|
+
&& postingListLength > 0
|
|
31
|
+
&& passGateByPostingRatio(gateSize, postingListLength, postingGatePolicy)) {
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
/** True when passing gate as allowedDocs can skip docs vs scanning the full branch posting. */
|
|
37
|
+
function gateFilterShrinksScan(gateSize, postingListLength) {
|
|
38
|
+
return postingListLength > gateSize;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Whether to pass the AND gate as allowedDocs to the next branch (perf only; scores unchanged if false).
|
|
42
|
+
* Distinct from gateIsSelectiveEnough: a selective gate may still be too large to filter a short posting.
|
|
43
|
+
*/
|
|
44
|
+
function shouldPassGateAsAllowedDocs(selective, gateSize, postingListLength) {
|
|
45
|
+
if (!selective || gateSize === 0)
|
|
46
|
+
return false;
|
|
47
|
+
if (postingListLength == null || postingListLength <= 0)
|
|
48
|
+
return false;
|
|
49
|
+
return gateFilterShrinksScan(gateSize, postingListLength);
|
|
50
|
+
}
|
|
51
|
+
|
|
3
52
|
const MAX_FREQ = 65535;
|
|
4
53
|
function readDocId(docIds, index) {
|
|
5
54
|
return docIds[index];
|
|
6
55
|
}
|
|
56
|
+
/** Binary search for docId in a sorted segment; returns global index or -1. */
|
|
57
|
+
function findDocIndexInSortedSegment(docIds, offset, length, docId) {
|
|
58
|
+
let lo = 0;
|
|
59
|
+
let hi = length - 1;
|
|
60
|
+
while (lo <= hi) {
|
|
61
|
+
const mid = (lo + hi) >>> 1;
|
|
62
|
+
const v = readDocId(docIds, offset + mid);
|
|
63
|
+
if (v < docId)
|
|
64
|
+
lo = mid + 1;
|
|
65
|
+
else if (v > docId)
|
|
66
|
+
hi = mid - 1;
|
|
67
|
+
else
|
|
68
|
+
return offset + mid;
|
|
69
|
+
}
|
|
70
|
+
return -1;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Scan vs binary search once `allowedDocs` is already in effect (scoring layer).
|
|
74
|
+
* Uses the same numeric policy as {@link passGateByPostingRatio} today; distinct decision point.
|
|
75
|
+
*/
|
|
76
|
+
function shouldSeekAllowedDocs(gateSize, listLength) {
|
|
77
|
+
return passGateByPostingRatio(gateSize, listLength);
|
|
78
|
+
}
|
|
7
79
|
function allocateFreqs(length, maxValue) {
|
|
8
80
|
if (maxValue <= 0xff)
|
|
9
81
|
return new Uint8Array(length);
|
|
@@ -65,10 +137,15 @@ function bm25FieldConstants(bm25params, avgFieldLength) {
|
|
|
65
137
|
const { k, b, d } = bm25params;
|
|
66
138
|
return { k, d, k1: k + 1, oneMinusB: 1 - b, bOverAvg: b / avgFieldLength };
|
|
67
139
|
}
|
|
68
|
-
function
|
|
140
|
+
function bm25Idf(matchingCount, totalCount) {
|
|
141
|
+
return Math.log(1 + (totalCount - matchingCount + 0.5) / (matchingCount + 0.5));
|
|
142
|
+
}
|
|
143
|
+
function calcBm25TfWithConstants(termFreq, fieldLength, constants, idf) {
|
|
69
144
|
const { k, d, k1, oneMinusB, bOverAvg } = constants;
|
|
70
|
-
|
|
71
|
-
|
|
145
|
+
return idf * (d + termFreq * k1 / (termFreq + k * (oneMinusB + bOverAvg * fieldLength)));
|
|
146
|
+
}
|
|
147
|
+
function calcBM25ScoreWithConstants(termFreq, matchingCount, totalCount, fieldLength, constants) {
|
|
148
|
+
return calcBm25TfWithConstants(termFreq, fieldLength, constants, bm25Idf(matchingCount, totalCount));
|
|
72
149
|
}
|
|
73
150
|
const getOwnProperty = (object, property) => Object.prototype.hasOwnProperty.call(object, property) ? object[property] : undefined;
|
|
74
151
|
function fieldBoostsForQuery(options, fields) {
|
|
@@ -97,7 +174,7 @@ function getDerivedTerm(derivedTerm, cache) {
|
|
|
97
174
|
cache.value = derivedTerm.resolve();
|
|
98
175
|
return cache.value;
|
|
99
176
|
}
|
|
100
|
-
function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache) {
|
|
177
|
+
function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf) {
|
|
101
178
|
const resolvedDerivedTerm = getDerivedTerm(derivedTerm, derivedTermCache);
|
|
102
179
|
const docBoost = boostDocumentFn
|
|
103
180
|
? boostDocumentFn(context.getExternalId(docId), resolvedDerivedTerm, context.getStoredFields(docId))
|
|
@@ -105,7 +182,9 @@ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFre
|
|
|
105
182
|
if (!docBoost)
|
|
106
183
|
return;
|
|
107
184
|
const fieldLength = context.getFieldLength(docId, fieldId);
|
|
108
|
-
const rawScore =
|
|
185
|
+
const rawScore = hoistedIdf !== undefined
|
|
186
|
+
? calcBm25TfWithConstants(termFreq, fieldLength, bm25, hoistedIdf)
|
|
187
|
+
: calcBM25ScoreWithConstants(termFreq, matchingFields, context.documentCount, fieldLength, bm25);
|
|
109
188
|
const weightedScore = termWeight * termBoost * fieldBoost * docBoost * rawScore;
|
|
110
189
|
const result = results.get(docId);
|
|
111
190
|
if (result) {
|
|
@@ -128,22 +207,39 @@ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFre
|
|
|
128
207
|
}
|
|
129
208
|
}
|
|
130
209
|
function aggregateSegmentPostingList(sourceTerm, derivedTerm, termWeight, termBoost, field, fieldId, fieldBoost, list, context, boostDocumentFn, bm25params, results, allowedDocs) {
|
|
131
|
-
var _a;
|
|
210
|
+
var _a, _b;
|
|
132
211
|
let matchingFields = list.length;
|
|
133
212
|
const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
|
|
213
|
+
const hoistedIdf = context.isDocActive == null
|
|
214
|
+
? bm25Idf(matchingFields, context.documentCount)
|
|
215
|
+
: undefined;
|
|
134
216
|
const { docIds, freqs, offset, length } = list;
|
|
135
217
|
const derivedTermCache = {};
|
|
218
|
+
if (allowedDocs != null && shouldSeekAllowedDocs(allowedDocs.size, length)) {
|
|
219
|
+
for (const docId of allowedDocs) {
|
|
220
|
+
if (context.isDocActive != null && !context.isDocActive(docId)) {
|
|
221
|
+
(_a = context.onInactiveDoc) === null || _a === void 0 ? void 0 : _a.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
|
|
222
|
+
matchingFields -= 1;
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
const index = findDocIndexInSortedSegment(docIds, offset, length, docId);
|
|
226
|
+
if (index < 0)
|
|
227
|
+
continue;
|
|
228
|
+
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, freqs[index], termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
|
|
229
|
+
}
|
|
230
|
+
return matchingFields;
|
|
231
|
+
}
|
|
136
232
|
for (let i = 0; i < length; i++) {
|
|
137
233
|
const docId = readDocId(docIds, offset + i);
|
|
138
234
|
const termFreq = freqs[offset + i];
|
|
139
235
|
if (context.isDocActive != null && !context.isDocActive(docId)) {
|
|
140
|
-
(
|
|
236
|
+
(_b = context.onInactiveDoc) === null || _b === void 0 ? void 0 : _b.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
|
|
141
237
|
matchingFields -= 1;
|
|
142
238
|
continue;
|
|
143
239
|
}
|
|
144
240
|
if (allowedDocs != null && !allowedDocs.has(docId))
|
|
145
241
|
continue;
|
|
146
|
-
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
|
|
242
|
+
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
|
|
147
243
|
}
|
|
148
244
|
return matchingFields;
|
|
149
245
|
}
|
|
@@ -163,6 +259,9 @@ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTerm
|
|
|
163
259
|
}
|
|
164
260
|
let matchingFields = postingList.size;
|
|
165
261
|
const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
|
|
262
|
+
const hoistedIdf = context.isDocActive == null
|
|
263
|
+
? bm25Idf(matchingFields, context.documentCount)
|
|
264
|
+
: undefined;
|
|
166
265
|
const derivedTermCache = {};
|
|
167
266
|
postingList.forEachDoc((docId, termFreq) => {
|
|
168
267
|
var _a;
|
|
@@ -173,7 +272,7 @@ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTerm
|
|
|
173
272
|
}
|
|
174
273
|
if (allowedDocs != null && !allowedDocs.has(docId))
|
|
175
274
|
return;
|
|
176
|
-
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
|
|
275
|
+
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
|
|
177
276
|
});
|
|
178
277
|
}
|
|
179
278
|
return results;
|
|
@@ -1742,82 +1841,47 @@ function readFieldLengthMatrixSection(buf, flags, cellCount) {
|
|
|
1742
1841
|
|
|
1743
1842
|
const DISCARDED_DOC_ID = 0xffffffff;
|
|
1744
1843
|
function postingFreqValue(freq, clampFrequencies) {
|
|
1745
|
-
return
|
|
1746
|
-
}
|
|
1747
|
-
function materializeFlatPostings(params) {
|
|
1748
|
-
const { fieldCount, termCount, forEachPosting, remapDocId, clampFrequencies } = params;
|
|
1749
|
-
const slotCount = termCount * fieldCount;
|
|
1750
|
-
const postingsOffsets = new Uint32Array(slotCount);
|
|
1751
|
-
const postingsLengths = new Uint32Array(slotCount);
|
|
1752
|
-
let totalPostings = 0;
|
|
1753
|
-
let maxFreq = 0;
|
|
1754
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
1755
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
1756
|
-
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1757
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1758
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1759
|
-
return;
|
|
1760
|
-
totalPostings++;
|
|
1761
|
-
const v = postingFreqValue(freq, clampFrequencies);
|
|
1762
|
-
if (v > maxFreq)
|
|
1763
|
-
maxFreq = v;
|
|
1764
|
-
});
|
|
1765
|
-
}
|
|
1766
|
-
}
|
|
1767
|
-
const useUint16 = params.nextId != null && params.nextId <= 65535;
|
|
1768
|
-
const allDocIds = useUint16
|
|
1769
|
-
? new Uint16Array(totalPostings)
|
|
1770
|
-
: new Uint32Array(totalPostings);
|
|
1771
|
-
const allFreqs = allocateFreqs(totalPostings, maxFreq);
|
|
1772
|
-
// Slots are visited in ascending fieldId (0..fieldCount-1) per term. Sparse layouts
|
|
1773
|
-
// rely on this ordering so field ids per term stay sorted for binary lookup.
|
|
1774
|
-
let write = 0;
|
|
1775
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
1776
|
-
const base = ti * fieldCount;
|
|
1777
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
1778
|
-
const offset = write;
|
|
1779
|
-
let count = 0;
|
|
1780
|
-
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1781
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1782
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1783
|
-
return;
|
|
1784
|
-
if (useUint16) {
|
|
1785
|
-
allDocIds[write] = docId;
|
|
1786
|
-
}
|
|
1787
|
-
else {
|
|
1788
|
-
allDocIds[write] = docId;
|
|
1789
|
-
}
|
|
1790
|
-
allFreqs[write] = postingFreqValue(freq, clampFrequencies);
|
|
1791
|
-
write++;
|
|
1792
|
-
count++;
|
|
1793
|
-
});
|
|
1794
|
-
postingsOffsets[base + f] = offset;
|
|
1795
|
-
postingsLengths[base + f] = count;
|
|
1796
|
-
}
|
|
1797
|
-
}
|
|
1798
|
-
return {
|
|
1799
|
-
postingsOffsets,
|
|
1800
|
-
postingsLengths,
|
|
1801
|
-
allDocIds,
|
|
1802
|
-
allFreqs,
|
|
1803
|
-
};
|
|
1844
|
+
return clampFreq(freq) ;
|
|
1804
1845
|
}
|
|
1805
1846
|
|
|
1806
1847
|
function readFieldId(fieldIds, index) {
|
|
1807
1848
|
return fieldIds[index];
|
|
1808
1849
|
}
|
|
1809
|
-
function choosePostingsLayout(fieldCount) {
|
|
1810
|
-
return fieldCount === 1 ? 'dense' : 'sparse';
|
|
1811
|
-
}
|
|
1812
1850
|
function chooseSparseFieldIdWidth(fieldCount) {
|
|
1813
1851
|
return fieldCount > 255 ? 16 : 8;
|
|
1814
1852
|
}
|
|
1815
|
-
function
|
|
1816
|
-
const
|
|
1817
|
-
const
|
|
1853
|
+
function choosePostingsLayout(fieldCount, termCount, nonEmptySlots) {
|
|
1854
|
+
const denseBytes = termCount * fieldCount * 8;
|
|
1855
|
+
const sparseFieldIdBytes = chooseSparseFieldIdWidth(fieldCount) === 16 ? 2 : 1;
|
|
1856
|
+
const sparseBytes = (termCount + 1) * 4 + nonEmptySlots * (sparseFieldIdBytes + 8);
|
|
1857
|
+
return denseBytes <= sparseBytes ? 'dense' : 'sparse';
|
|
1858
|
+
}
|
|
1859
|
+
/** Shared dense/sparse layout emission; callers supply per-slot length and copy. */
|
|
1860
|
+
function buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, source) {
|
|
1861
|
+
const layout = choosePostingsLayout(fieldCount, termCount, source.nonEmptySlots);
|
|
1818
1862
|
const docIdWidth = nextId <= 65535 ? 16 : 32;
|
|
1863
|
+
const allDocIds = docIdWidth === 16
|
|
1864
|
+
? new Uint16Array(totalPostings)
|
|
1865
|
+
: new Uint32Array(totalPostings);
|
|
1866
|
+
const allFreqs = allocateFreqs(totalPostings, maxFreq);
|
|
1867
|
+
const targets = { allDocIds, allFreqs, docIdWidth };
|
|
1819
1868
|
if (layout === 'dense') {
|
|
1820
|
-
const
|
|
1869
|
+
const slotCount = termCount * fieldCount;
|
|
1870
|
+
const denseOffsets = new Uint32Array(slotCount);
|
|
1871
|
+
const denseLengths = new Uint32Array(slotCount);
|
|
1872
|
+
let write = 0;
|
|
1873
|
+
for (let ti = 0; ti < termCount; ti++) {
|
|
1874
|
+
const base = ti * fieldCount;
|
|
1875
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
1876
|
+
const slot = base + f;
|
|
1877
|
+
const len = source.slotLength(ti, f);
|
|
1878
|
+
denseOffsets[slot] = write;
|
|
1879
|
+
denseLengths[slot] = len;
|
|
1880
|
+
if (len > 0) {
|
|
1881
|
+
write = source.writeSlot(ti, f, write, targets);
|
|
1882
|
+
}
|
|
1883
|
+
}
|
|
1884
|
+
}
|
|
1821
1885
|
return {
|
|
1822
1886
|
fieldCount,
|
|
1823
1887
|
termCount,
|
|
@@ -1825,10 +1889,10 @@ function materializeFrozenPostings(params) {
|
|
|
1825
1889
|
layout,
|
|
1826
1890
|
docIdWidth,
|
|
1827
1891
|
sparseFieldIdWidth: null,
|
|
1828
|
-
allDocIds
|
|
1829
|
-
allFreqs
|
|
1830
|
-
denseOffsets
|
|
1831
|
-
denseLengths
|
|
1892
|
+
allDocIds,
|
|
1893
|
+
allFreqs,
|
|
1894
|
+
denseOffsets,
|
|
1895
|
+
denseLengths,
|
|
1832
1896
|
sparseTermStarts: null,
|
|
1833
1897
|
sparseFieldIds: null,
|
|
1834
1898
|
sparseOffsets: null,
|
|
@@ -1840,60 +1904,23 @@ function materializeFrozenPostings(params) {
|
|
|
1840
1904
|
const sparseOffsets = [];
|
|
1841
1905
|
const sparseLengths = [];
|
|
1842
1906
|
const termStarts = new Array(termCount + 1).fill(0);
|
|
1843
|
-
|
|
1844
|
-
// Non-empty slots per term are emitted with fieldId in ascending order (f loops 0..fieldCount-1).
|
|
1845
|
-
let totalPostings = 0;
|
|
1846
|
-
let maxFreq = 0;
|
|
1907
|
+
let write = 0;
|
|
1847
1908
|
for (let ti = 0; ti < termCount; ti++) {
|
|
1848
1909
|
termStarts[ti] = sparseFieldIdsScratch.length;
|
|
1849
1910
|
for (let f = 0; f < fieldCount; f++) {
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1853
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1854
|
-
return;
|
|
1855
|
-
count++;
|
|
1856
|
-
const v = postingFreqValue(freq, clampFrequencies);
|
|
1857
|
-
if (v > maxFreq)
|
|
1858
|
-
maxFreq = v;
|
|
1859
|
-
});
|
|
1860
|
-
if (count === 0)
|
|
1911
|
+
const len = source.slotLength(ti, f);
|
|
1912
|
+
if (len === 0)
|
|
1861
1913
|
continue;
|
|
1862
1914
|
sparseFieldIdsScratch.push(f);
|
|
1863
|
-
sparseOffsets.push(
|
|
1864
|
-
sparseLengths.push(
|
|
1865
|
-
|
|
1915
|
+
sparseOffsets.push(write);
|
|
1916
|
+
sparseLengths.push(len);
|
|
1917
|
+
write = source.writeSlot(ti, f, write, targets);
|
|
1866
1918
|
}
|
|
1867
1919
|
termStarts[ti + 1] = sparseFieldIdsScratch.length;
|
|
1868
1920
|
}
|
|
1869
|
-
const allDocIds = docIdWidth === 16
|
|
1870
|
-
? new Uint16Array(totalPostings)
|
|
1871
|
-
: new Uint32Array(totalPostings);
|
|
1872
|
-
const allFreqs = allocateFreqs(totalPostings, maxFreq);
|
|
1873
1921
|
const sparseFieldIds = sparseFieldIdWidth === 16
|
|
1874
1922
|
? new Uint16Array(sparseFieldIdsScratch)
|
|
1875
1923
|
: new Uint8Array(sparseFieldIdsScratch);
|
|
1876
|
-
let write = 0;
|
|
1877
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
1878
|
-
const start = termStarts[ti];
|
|
1879
|
-
const end = termStarts[ti + 1];
|
|
1880
|
-
for (let s = start; s < end; s++) {
|
|
1881
|
-
const f = readFieldId(sparseFieldIds, s);
|
|
1882
|
-
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1883
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1884
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1885
|
-
return;
|
|
1886
|
-
if (docIdWidth === 16) {
|
|
1887
|
-
allDocIds[write] = docId;
|
|
1888
|
-
}
|
|
1889
|
-
else {
|
|
1890
|
-
allDocIds[write] = docId;
|
|
1891
|
-
}
|
|
1892
|
-
allFreqs[write] = postingFreqValue(freq, clampFrequencies);
|
|
1893
|
-
write++;
|
|
1894
|
-
});
|
|
1895
|
-
}
|
|
1896
|
-
}
|
|
1897
1924
|
return {
|
|
1898
1925
|
fieldCount,
|
|
1899
1926
|
termCount,
|
|
@@ -1911,6 +1938,58 @@ function materializeFrozenPostings(params) {
|
|
|
1911
1938
|
sparseLengths: new Uint32Array(sparseLengths),
|
|
1912
1939
|
};
|
|
1913
1940
|
}
|
|
1941
|
+
function materializeFrozenPostings(params) {
|
|
1942
|
+
const { fieldCount, termCount, nextId } = params;
|
|
1943
|
+
const { forEachPosting, remapDocId} = params;
|
|
1944
|
+
const slotCount = termCount * fieldCount;
|
|
1945
|
+
const slotLengths = new Uint32Array(slotCount);
|
|
1946
|
+
let totalPostings = 0;
|
|
1947
|
+
let maxFreq = 0;
|
|
1948
|
+
let nonEmptySlots = 0;
|
|
1949
|
+
for (let ti = 0; ti < termCount; ti++) {
|
|
1950
|
+
const base = ti * fieldCount;
|
|
1951
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
1952
|
+
let count = 0;
|
|
1953
|
+
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1954
|
+
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1955
|
+
if (docId === DISCARDED_DOC_ID)
|
|
1956
|
+
return;
|
|
1957
|
+
count++;
|
|
1958
|
+
const v = postingFreqValue(freq);
|
|
1959
|
+
if (v > maxFreq)
|
|
1960
|
+
maxFreq = v;
|
|
1961
|
+
});
|
|
1962
|
+
if (count === 0)
|
|
1963
|
+
continue;
|
|
1964
|
+
slotLengths[base + f] = count;
|
|
1965
|
+
totalPostings += count;
|
|
1966
|
+
nonEmptySlots++;
|
|
1967
|
+
}
|
|
1968
|
+
}
|
|
1969
|
+
return buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, {
|
|
1970
|
+
nonEmptySlots,
|
|
1971
|
+
slotLength(ti, f) {
|
|
1972
|
+
return slotLengths[ti * fieldCount + f];
|
|
1973
|
+
},
|
|
1974
|
+
writeSlot(ti, f, write, targets) {
|
|
1975
|
+
const { allDocIds: outDocIds, allFreqs: outFreqs, docIdWidth: width } = targets;
|
|
1976
|
+
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1977
|
+
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1978
|
+
if (docId === DISCARDED_DOC_ID)
|
|
1979
|
+
return;
|
|
1980
|
+
if (width === 16) {
|
|
1981
|
+
outDocIds[write] = docId;
|
|
1982
|
+
}
|
|
1983
|
+
else {
|
|
1984
|
+
outDocIds[write] = docId;
|
|
1985
|
+
}
|
|
1986
|
+
outFreqs[write] = postingFreqValue(freq);
|
|
1987
|
+
write++;
|
|
1988
|
+
});
|
|
1989
|
+
return write;
|
|
1990
|
+
},
|
|
1991
|
+
});
|
|
1992
|
+
}
|
|
1914
1993
|
function postingsTypedBytes(layout) {
|
|
1915
1994
|
const allDocIdsBytes = layout.allDocIds.byteLength;
|
|
1916
1995
|
const allFreqsBytes = layout.allFreqs.byteLength;
|
|
@@ -2070,6 +2149,16 @@ function createFrozenFieldTermFlyweight(layout) {
|
|
|
2070
2149
|
return flyweight;
|
|
2071
2150
|
}
|
|
2072
2151
|
function collectDocIdsFromFrozenSegment(allDocIds, offset, length, context, docIds, allowedDocs) {
|
|
2152
|
+
if (allowedDocs != null && shouldSeekAllowedDocs(allowedDocs.size, length)) {
|
|
2153
|
+
for (const docId of allowedDocs) {
|
|
2154
|
+
if (context.isDocActive != null && !context.isDocActive(docId))
|
|
2155
|
+
continue;
|
|
2156
|
+
if (findDocIndexInSortedSegment(allDocIds, offset, length, docId) >= 0) {
|
|
2157
|
+
docIds.add(docId);
|
|
2158
|
+
}
|
|
2159
|
+
}
|
|
2160
|
+
return;
|
|
2161
|
+
}
|
|
2073
2162
|
for (let i = 0; i < length; i++) {
|
|
2074
2163
|
const docId = readDocId(allDocIds, offset + i);
|
|
2075
2164
|
if (context.isDocActive != null && !context.isDocActive(docId))
|
|
@@ -2673,7 +2762,6 @@ function buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, nextId, s
|
|
|
2673
2762
|
fieldCount,
|
|
2674
2763
|
termCount,
|
|
2675
2764
|
nextId,
|
|
2676
|
-
clampFrequencies: true,
|
|
2677
2765
|
remapDocId,
|
|
2678
2766
|
forEachPosting(ti, f, emit) {
|
|
2679
2767
|
var _a;
|
|
@@ -2917,8 +3005,8 @@ async function zlibPayloadChoiceAsync(uncompressed) {
|
|
|
2917
3005
|
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2918
3006
|
}
|
|
2919
3007
|
const autoSyncCompressors = {
|
|
2920
|
-
zstd:
|
|
2921
|
-
zlib:
|
|
3008
|
+
zstd: uncompressed => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
|
|
3009
|
+
zlib: uncompressed => zlib.deflateSync(uncompressed),
|
|
2922
3010
|
};
|
|
2923
3011
|
const autoAsyncCompressors = {
|
|
2924
3012
|
zstd: zstdCompressAsync,
|
|
@@ -3861,93 +3949,23 @@ class IncrementalPostingsAccumulator {
|
|
|
3861
3949
|
const totalPostings = this._totalPostings;
|
|
3862
3950
|
const maxFreq = this._maxFreq;
|
|
3863
3951
|
const slots = this._slots;
|
|
3864
|
-
const layout =
|
|
3865
|
-
|
|
3866
|
-
|
|
3867
|
-
|
|
3868
|
-
|
|
3869
|
-
|
|
3870
|
-
|
|
3871
|
-
const slotCount = termCount * fieldCount;
|
|
3872
|
-
const denseOffsets = new Uint32Array(slotCount);
|
|
3873
|
-
const denseLengths = new Uint32Array(slotCount);
|
|
3874
|
-
let write = 0;
|
|
3875
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
3876
|
-
const base = ti * fieldCount;
|
|
3877
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
3878
|
-
const slot = base + f;
|
|
3879
|
-
const ranges = slots.get(slot);
|
|
3880
|
-
const len = ranges == null ? 0 : this.slotLength(ranges);
|
|
3881
|
-
denseOffsets[slot] = write;
|
|
3882
|
-
denseLengths[slot] = len;
|
|
3883
|
-
if (len > 0) {
|
|
3884
|
-
write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
|
|
3885
|
-
slots.delete(slot);
|
|
3886
|
-
}
|
|
3887
|
-
}
|
|
3888
|
-
}
|
|
3889
|
-
slots.clear();
|
|
3890
|
-
this.clear();
|
|
3891
|
-
return {
|
|
3892
|
-
fieldCount,
|
|
3893
|
-
termCount,
|
|
3894
|
-
nextId,
|
|
3895
|
-
layout,
|
|
3896
|
-
docIdWidth,
|
|
3897
|
-
sparseFieldIdWidth: null,
|
|
3898
|
-
allDocIds,
|
|
3899
|
-
allFreqs,
|
|
3900
|
-
denseOffsets,
|
|
3901
|
-
denseLengths,
|
|
3902
|
-
sparseTermStarts: null,
|
|
3903
|
-
sparseFieldIds: null,
|
|
3904
|
-
sparseOffsets: null,
|
|
3905
|
-
sparseLengths: null,
|
|
3906
|
-
};
|
|
3907
|
-
}
|
|
3908
|
-
const sparseFieldIdWidth = chooseSparseFieldIdWidth(fieldCount);
|
|
3909
|
-
const sparseFieldIdsScratch = [];
|
|
3910
|
-
const sparseOffsets = [];
|
|
3911
|
-
const sparseLengths = [];
|
|
3912
|
-
const termStarts = new Array(termCount + 1).fill(0);
|
|
3913
|
-
let write = 0;
|
|
3914
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
3915
|
-
termStarts[ti] = sparseFieldIdsScratch.length;
|
|
3916
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
3952
|
+
const layout = buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, {
|
|
3953
|
+
nonEmptySlots: slots.size,
|
|
3954
|
+
slotLength: (ti, f) => {
|
|
3955
|
+
const ranges = slots.get(ti * fieldCount + f);
|
|
3956
|
+
return ranges == null ? 0 : this.slotLength(ranges);
|
|
3957
|
+
},
|
|
3958
|
+
writeSlot: (ti, f, write, targets) => {
|
|
3917
3959
|
const slot = ti * fieldCount + f;
|
|
3918
3960
|
const ranges = slots.get(slot);
|
|
3919
|
-
const
|
|
3920
|
-
if (len === 0)
|
|
3921
|
-
continue;
|
|
3922
|
-
sparseFieldIdsScratch.push(f);
|
|
3923
|
-
sparseOffsets.push(write);
|
|
3924
|
-
sparseLengths.push(len);
|
|
3925
|
-
write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
|
|
3961
|
+
const next = this.copySlot(ranges, targets.allDocIds, targets.allFreqs, write, targets.docIdWidth);
|
|
3926
3962
|
slots.delete(slot);
|
|
3927
|
-
|
|
3928
|
-
|
|
3929
|
-
}
|
|
3963
|
+
return next;
|
|
3964
|
+
},
|
|
3965
|
+
});
|
|
3930
3966
|
slots.clear();
|
|
3931
3967
|
this.clear();
|
|
3932
|
-
|
|
3933
|
-
? new Uint16Array(sparseFieldIdsScratch)
|
|
3934
|
-
: new Uint8Array(sparseFieldIdsScratch);
|
|
3935
|
-
return {
|
|
3936
|
-
fieldCount,
|
|
3937
|
-
termCount,
|
|
3938
|
-
nextId,
|
|
3939
|
-
layout,
|
|
3940
|
-
docIdWidth,
|
|
3941
|
-
sparseFieldIdWidth,
|
|
3942
|
-
allDocIds,
|
|
3943
|
-
allFreqs,
|
|
3944
|
-
denseOffsets: null,
|
|
3945
|
-
denseLengths: null,
|
|
3946
|
-
sparseTermStarts: new Uint32Array(termStarts),
|
|
3947
|
-
sparseFieldIds,
|
|
3948
|
-
sparseOffsets: new Uint32Array(sparseOffsets),
|
|
3949
|
-
sparseLengths: new Uint32Array(sparseLengths),
|
|
3950
|
-
};
|
|
3968
|
+
return layout;
|
|
3951
3969
|
}
|
|
3952
3970
|
}
|
|
3953
3971
|
|
|
@@ -4127,27 +4145,21 @@ function buildFrozenParamsFromDocuments(documents, options) {
|
|
|
4127
4145
|
return builder.freezeParams();
|
|
4128
4146
|
}
|
|
4129
4147
|
|
|
4130
|
-
/**
|
|
4131
|
-
* Internal AND / AND_NOT gate thresholds (not exported from the public package entry).
|
|
4132
|
-
*/
|
|
4133
|
-
const DEFAULT_AND_GATE_LIMITS = {
|
|
4134
|
-
maxAbsolute: 5000,
|
|
4135
|
-
maxFraction: 0.1,
|
|
4136
|
-
};
|
|
4137
|
-
function resolveGateMaxSize(documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
|
|
4138
|
-
return Math.min(limits.maxAbsolute, Math.max(100, Math.floor(documentCount * limits.maxFraction)));
|
|
4139
|
-
}
|
|
4140
|
-
function gateIsSelectiveEnough(gateSize, documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
|
|
4141
|
-
if (gateSize === 0)
|
|
4142
|
-
return true;
|
|
4143
|
-
return gateSize <= resolveGateMaxSize(documentCount, limits);
|
|
4144
|
-
}
|
|
4145
|
-
|
|
4146
4148
|
function useGatedEvaluation(run, branchCount, operator, hasWildcard) {
|
|
4147
4149
|
return shouldUseGatedEvaluation(branchCount, operator, hasWildcard);
|
|
4148
4150
|
}
|
|
4149
|
-
function
|
|
4150
|
-
return
|
|
4151
|
+
function gateFromResult(result) {
|
|
4152
|
+
return {
|
|
4153
|
+
get size() {
|
|
4154
|
+
return result.size;
|
|
4155
|
+
},
|
|
4156
|
+
has(docId) {
|
|
4157
|
+
return result.has(docId);
|
|
4158
|
+
},
|
|
4159
|
+
[Symbol.iterator]() {
|
|
4160
|
+
return result.keys();
|
|
4161
|
+
},
|
|
4162
|
+
};
|
|
4151
4163
|
}
|
|
4152
4164
|
function isQueryCombination(query) {
|
|
4153
4165
|
return typeof query === 'object'
|
|
@@ -4219,12 +4231,12 @@ function normalizeStringQuery(query, searchOptions, params) {
|
|
|
4219
4231
|
function lazyIndexedTerm(indexView, termIndex) {
|
|
4220
4232
|
return { kind: 'lazy', resolve: () => indexView.resolveTermByIndex(termIndex) };
|
|
4221
4233
|
}
|
|
4222
|
-
|
|
4234
|
+
const TWO_PHASE_AND_NOT_MIN_FRACTION = 0.5;
|
|
4235
|
+
function forEachQuerySpecTermRef(query, normalized, params, visit) {
|
|
4223
4236
|
const { indexView } = params;
|
|
4224
|
-
const {
|
|
4237
|
+
const { options } = normalized;
|
|
4225
4238
|
const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
|
|
4226
|
-
|
|
4227
|
-
visit(exactTi == null ? undefined : indexView.fieldTermData(exactTi), query.term, 1);
|
|
4239
|
+
visit({ kind: 'exact', termIndex: indexView.resolveTermIndex(query.term) });
|
|
4228
4240
|
const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
|
|
4229
4241
|
if (query.prefix) {
|
|
4230
4242
|
for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
|
|
@@ -4232,7 +4244,7 @@ function visitQuerySpecForScoring(query, normalized, params, visit) {
|
|
|
4232
4244
|
if (!distance)
|
|
4233
4245
|
continue;
|
|
4234
4246
|
seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
|
|
4235
|
-
visit(
|
|
4247
|
+
visit({ kind: 'prefix', termIndex, length, distance });
|
|
4236
4248
|
}
|
|
4237
4249
|
}
|
|
4238
4250
|
if (!maxDistance)
|
|
@@ -4240,9 +4252,24 @@ function visitQuerySpecForScoring(query, normalized, params, visit) {
|
|
|
4240
4252
|
for (const { termIndex, length, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
|
|
4241
4253
|
if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
|
|
4242
4254
|
continue;
|
|
4243
|
-
visit(
|
|
4255
|
+
visit({ kind: 'fuzzy', termIndex, length, distance });
|
|
4244
4256
|
}
|
|
4245
4257
|
}
|
|
4258
|
+
function visitQuerySpecForScoring(query, normalized, params, visit) {
|
|
4259
|
+
const { indexView } = params;
|
|
4260
|
+
const { fuzzyWeight, prefixWeight } = normalized;
|
|
4261
|
+
forEachQuerySpecTermRef(query, normalized, params, (ref) => {
|
|
4262
|
+
if (ref.kind === 'exact') {
|
|
4263
|
+
visit(ref.termIndex == null ? undefined : indexView.fieldTermData(ref.termIndex), query.term, 1);
|
|
4264
|
+
return;
|
|
4265
|
+
}
|
|
4266
|
+
if (ref.kind === 'prefix') {
|
|
4267
|
+
visit(indexView.fieldTermData(ref.termIndex), lazyIndexedTerm(indexView, ref.termIndex), prefixWeight * ref.length / (ref.length + 0.3 * ref.distance));
|
|
4268
|
+
return;
|
|
4269
|
+
}
|
|
4270
|
+
visit(indexView.fieldTermData(ref.termIndex), lazyIndexedTerm(indexView, ref.termIndex), fuzzyWeight * ref.length / (ref.length + ref.distance));
|
|
4271
|
+
});
|
|
4272
|
+
}
|
|
4246
4273
|
function executeQuerySpecInternal(query, normalized, params, allowedDocs) {
|
|
4247
4274
|
const { fieldBoosts, options } = normalized;
|
|
4248
4275
|
const termOptions = allowedDocs == null ? undefined : { allowedDocs };
|
|
@@ -4252,32 +4279,73 @@ function executeQuerySpecInternal(query, normalized, params, allowedDocs) {
|
|
|
4252
4279
|
});
|
|
4253
4280
|
return results;
|
|
4254
4281
|
}
|
|
4255
|
-
function
|
|
4256
|
-
|
|
4257
|
-
|
|
4258
|
-
|
|
4259
|
-
const
|
|
4260
|
-
|
|
4261
|
-
|
|
4262
|
-
|
|
4282
|
+
function maxPostingLengthForFieldTermData(data, fieldBoosts, fieldIds) {
|
|
4283
|
+
if (data == null)
|
|
4284
|
+
return 0;
|
|
4285
|
+
let maxLen = 0;
|
|
4286
|
+
for (const field of fieldBoosts.names) {
|
|
4287
|
+
const fieldId = fieldIds[field];
|
|
4288
|
+
const postingList = data.get(fieldId);
|
|
4289
|
+
if (postingList == null)
|
|
4290
|
+
continue;
|
|
4291
|
+
const len = postingList instanceof SegmentPostingList ? postingList.length : postingList.size;
|
|
4292
|
+
if (len > maxLen)
|
|
4293
|
+
maxLen = len;
|
|
4263
4294
|
}
|
|
4264
|
-
|
|
4265
|
-
|
|
4266
|
-
|
|
4267
|
-
|
|
4268
|
-
|
|
4269
|
-
|
|
4270
|
-
|
|
4271
|
-
|
|
4295
|
+
return maxLen;
|
|
4296
|
+
}
|
|
4297
|
+
function estimateMaxPostingLengthForQuerySpec(query, normalized, params) {
|
|
4298
|
+
const { indexView, aggregateContext } = params;
|
|
4299
|
+
const { fieldBoosts } = normalized;
|
|
4300
|
+
const { fieldIds } = aggregateContext;
|
|
4301
|
+
let maxLen = 0;
|
|
4302
|
+
const consider = (data) => {
|
|
4303
|
+
maxLen = Math.max(maxLen, maxPostingLengthForFieldTermData(data, fieldBoosts, fieldIds));
|
|
4304
|
+
};
|
|
4305
|
+
forEachQuerySpecTermRef(query, normalized, params, (ref) => {
|
|
4306
|
+
if (ref.kind === 'exact') {
|
|
4307
|
+
if (ref.termIndex != null)
|
|
4308
|
+
consider(indexView.fieldTermData(ref.termIndex));
|
|
4309
|
+
return;
|
|
4272
4310
|
}
|
|
4311
|
+
consider(indexView.fieldTermData(ref.termIndex));
|
|
4312
|
+
});
|
|
4313
|
+
return maxLen;
|
|
4314
|
+
}
|
|
4315
|
+
function estimateMaxPostingLengthForQuery(query, searchOptions, params) {
|
|
4316
|
+
if (isWildcardQuery(query)) {
|
|
4317
|
+
return params.aggregateContext.documentCount;
|
|
4273
4318
|
}
|
|
4274
|
-
if (
|
|
4275
|
-
|
|
4276
|
-
|
|
4277
|
-
|
|
4278
|
-
|
|
4319
|
+
if (isQueryCombination(query)) {
|
|
4320
|
+
const options = { ...searchOptions, ...query, queries: undefined };
|
|
4321
|
+
let maxLen = 0;
|
|
4322
|
+
for (const branch of query.queries) {
|
|
4323
|
+
maxLen = Math.max(maxLen, estimateMaxPostingLengthForQuery(branch, options, params));
|
|
4279
4324
|
}
|
|
4325
|
+
return maxLen;
|
|
4280
4326
|
}
|
|
4327
|
+
if (typeof query !== 'string')
|
|
4328
|
+
return 0;
|
|
4329
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4330
|
+
let maxLen = 0;
|
|
4331
|
+
for (const spec of normalized.specs) {
|
|
4332
|
+
maxLen = Math.max(maxLen, estimateMaxPostingLengthForQuerySpec(spec, normalized, params));
|
|
4333
|
+
}
|
|
4334
|
+
return maxLen;
|
|
4335
|
+
}
|
|
4336
|
+
function collectDocIdsForQuerySpec(query, normalized, params, allowedDocs) {
|
|
4337
|
+
const { fieldBoosts } = normalized;
|
|
4338
|
+
const docIds = new Set();
|
|
4339
|
+
const { indexView, aggregateContext } = params;
|
|
4340
|
+
forEachQuerySpecTermRef(query, normalized, params, (ref) => {
|
|
4341
|
+
if (ref.kind === 'exact') {
|
|
4342
|
+
if (ref.termIndex != null) {
|
|
4343
|
+
indexView.collectDocIds(ref.termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4344
|
+
}
|
|
4345
|
+
return;
|
|
4346
|
+
}
|
|
4347
|
+
indexView.collectDocIds(ref.termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4348
|
+
});
|
|
4281
4349
|
return docIds;
|
|
4282
4350
|
}
|
|
4283
4351
|
function intersectDocIdsInPlace(docIds, branchDocIds) {
|
|
@@ -4294,6 +4362,70 @@ function subtractDocIdsFromResult(result, excludedDocIds) {
|
|
|
4294
4362
|
for (const docId of excludedDocIds)
|
|
4295
4363
|
result.delete(docId);
|
|
4296
4364
|
}
|
|
4365
|
+
function twoPhasePostingLengths(branches, allowTwoPhase, estimateBranchPostingLength) {
|
|
4366
|
+
if (!allowTwoPhase || estimateBranchPostingLength == null)
|
|
4367
|
+
return undefined;
|
|
4368
|
+
const lengths = new Array(branches.length);
|
|
4369
|
+
for (let i = 0; i < branches.length; i++) {
|
|
4370
|
+
lengths[i] = estimateBranchPostingLength(branches[i]);
|
|
4371
|
+
}
|
|
4372
|
+
return lengths;
|
|
4373
|
+
}
|
|
4374
|
+
function shouldUseTwoPhaseAnd(branchPostingLengths, allowedDocs) {
|
|
4375
|
+
if (branchPostingLengths.length <= 1)
|
|
4376
|
+
return false;
|
|
4377
|
+
const firstLength = branchPostingLengths[0];
|
|
4378
|
+
const effectiveFirstLength = allowedDocs == null
|
|
4379
|
+
? firstLength
|
|
4380
|
+
: Math.min(firstLength, allowedDocs.size);
|
|
4381
|
+
if (effectiveFirstLength < DEFAULT_POSTING_GATE_MIN_LENGTH)
|
|
4382
|
+
return false;
|
|
4383
|
+
const targetLength = effectiveFirstLength >>> DEFAULT_POSTING_GATE_RATIO_SHIFT;
|
|
4384
|
+
for (let i = 1; i < branchPostingLengths.length; i++) {
|
|
4385
|
+
const len = branchPostingLengths[i];
|
|
4386
|
+
if (len > 0 && len <= targetLength)
|
|
4387
|
+
return true;
|
|
4388
|
+
}
|
|
4389
|
+
return false;
|
|
4390
|
+
}
|
|
4391
|
+
function shouldUseTwoPhaseAndNot(branchPostingLengths, allowedDocs, documentCount) {
|
|
4392
|
+
if (branchPostingLengths.length <= 1)
|
|
4393
|
+
return false;
|
|
4394
|
+
const firstLength = branchPostingLengths[0];
|
|
4395
|
+
const effectiveFirstLength = allowedDocs == null
|
|
4396
|
+
? firstLength
|
|
4397
|
+
: Math.min(firstLength, allowedDocs.size);
|
|
4398
|
+
const largeThreshold = Math.max(DEFAULT_POSTING_GATE_MIN_LENGTH, Math.floor(documentCount * TWO_PHASE_AND_NOT_MIN_FRACTION));
|
|
4399
|
+
if (effectiveFirstLength < largeThreshold)
|
|
4400
|
+
return false;
|
|
4401
|
+
for (let i = 1; i < branchPostingLengths.length; i++) {
|
|
4402
|
+
if (branchPostingLengths[i] >= largeThreshold)
|
|
4403
|
+
return true;
|
|
4404
|
+
}
|
|
4405
|
+
return false;
|
|
4406
|
+
}
|
|
4407
|
+
function executeAndWithFinalGate(branches, finalGate, executeBranch) {
|
|
4408
|
+
if (finalGate.size === 0)
|
|
4409
|
+
return new Map();
|
|
4410
|
+
let result = executeBranch(branches[0], finalGate);
|
|
4411
|
+
for (let i = 1; i < branches.length; i++) {
|
|
4412
|
+
if (result.size === 0)
|
|
4413
|
+
return result;
|
|
4414
|
+
result = combineResults([result, executeBranch(branches[i], finalGate)], AND);
|
|
4415
|
+
}
|
|
4416
|
+
return result;
|
|
4417
|
+
}
|
|
4418
|
+
function collectAndDocIdsByEstimatedLength(branches, branchPostingLengths, collectBranch, allowedDocs) {
|
|
4419
|
+
const order = branches.map((_, i) => i);
|
|
4420
|
+
order.sort((a, b) => branchPostingLengths[a] - branchPostingLengths[b] || a - b);
|
|
4421
|
+
const docIds = collectBranch(branches[order[0]], allowedDocs);
|
|
4422
|
+
for (let i = 1; i < order.length; i++) {
|
|
4423
|
+
if (docIds.size === 0)
|
|
4424
|
+
return docIds;
|
|
4425
|
+
intersectDocIdsInPlace(docIds, collectBranch(branches[order[i]], docIds));
|
|
4426
|
+
}
|
|
4427
|
+
return docIds;
|
|
4428
|
+
}
|
|
4297
4429
|
function collectCombinedDocIds(branches, operator, collectBranch, allowedDocs) {
|
|
4298
4430
|
if (branches.length === 0)
|
|
4299
4431
|
return new Set();
|
|
@@ -4323,34 +4455,58 @@ function collectCombinedDocIds(branches, operator, collectBranch, allowedDocs) {
|
|
|
4323
4455
|
throw new Error(`Invalid combination operator: ${operator}`);
|
|
4324
4456
|
}
|
|
4325
4457
|
/**
|
|
4326
|
-
* AND: score
|
|
4458
|
+
* AND: normally score left-to-right with optional docId gates; for broad-first selective
|
|
4459
|
+
* exact queries, collect the final gate first, then score branches in original order.
|
|
4327
4460
|
* AND_NOT: score the positive branch only; negated branches are collected as docId sets and
|
|
4328
|
-
* subtracted without scoring
|
|
4461
|
+
* subtracted without scoring. Large exact exclusions may collect survivors before positive scoring.
|
|
4329
4462
|
*/
|
|
4330
|
-
function executeCombinedBranches(branches, operator, params, executeBranch, collectBranch, allowedDocs, run) {
|
|
4463
|
+
function executeCombinedBranches(branches, operator, params, executeBranch, collectBranch, allowedDocs, run, estimateBranchPostingLength, allowTwoPhase = false) {
|
|
4464
|
+
var _a;
|
|
4331
4465
|
if (branches.length === 0)
|
|
4332
4466
|
return new Map();
|
|
4333
4467
|
const op = operator.toLowerCase();
|
|
4334
4468
|
if (op === 'or') {
|
|
4335
4469
|
return combineResults(branches.map(branch => executeBranch(branch, allowedDocs)), operator);
|
|
4336
4470
|
}
|
|
4337
|
-
let result = executeBranch(branches[0], allowedDocs);
|
|
4338
|
-
let gate = docIdsFromResult(result);
|
|
4339
4471
|
if (op === 'and') {
|
|
4472
|
+
const branchPostingLengths = twoPhasePostingLengths(branches, allowTwoPhase, estimateBranchPostingLength);
|
|
4473
|
+
if (branchPostingLengths != null && shouldUseTwoPhaseAnd(branchPostingLengths, allowedDocs)) {
|
|
4474
|
+
const finalGate = collectAndDocIdsByEstimatedLength(branches, branchPostingLengths, collectBranch, allowedDocs);
|
|
4475
|
+
return executeAndWithFinalGate(branches, finalGate, executeBranch);
|
|
4476
|
+
}
|
|
4477
|
+
let result = executeBranch(branches[0], allowedDocs);
|
|
4478
|
+
let gate = gateFromResult(result);
|
|
4340
4479
|
const limits = void 0 ;
|
|
4341
4480
|
const documentCount = params.aggregateContext.documentCount;
|
|
4481
|
+
const postingGatePolicy = (_a = void 0 ) !== null && _a !== void 0 ? _a : DEFAULT_POSTING_GATE_POLICY;
|
|
4482
|
+
const maxGateSize = resolveGateMaxSize(documentCount, limits);
|
|
4342
4483
|
for (let i = 1; i < branches.length; i++) {
|
|
4343
|
-
|
|
4344
|
-
|
|
4484
|
+
if (gate.size === 0)
|
|
4485
|
+
return result;
|
|
4486
|
+
const absoluteSelective = gate.size <= maxGateSize;
|
|
4487
|
+
const postingListLength = absoluteSelective
|
|
4488
|
+
? undefined
|
|
4489
|
+
: estimateBranchPostingLength === null || estimateBranchPostingLength === void 0 ? void 0 : estimateBranchPostingLength(branches[i]);
|
|
4490
|
+
const selective = gateIsSelectiveEnough(gate.size, documentCount, limits, postingListLength, postingGatePolicy);
|
|
4491
|
+
const branchAllowed = absoluteSelective || shouldPassGateAsAllowedDocs(selective, gate.size, postingListLength)
|
|
4492
|
+
? gate
|
|
4493
|
+
: allowedDocs;
|
|
4345
4494
|
result = combineResults([result, executeBranch(branches[i], branchAllowed)], AND);
|
|
4346
|
-
gate =
|
|
4495
|
+
gate = gateFromResult(result);
|
|
4347
4496
|
}
|
|
4348
4497
|
return result;
|
|
4349
4498
|
}
|
|
4350
4499
|
if (op === 'and_not') {
|
|
4500
|
+
const branchPostingLengths = twoPhasePostingLengths(branches, allowTwoPhase, estimateBranchPostingLength);
|
|
4501
|
+
if (branchPostingLengths != null && shouldUseTwoPhaseAndNot(branchPostingLengths, allowedDocs, params.aggregateContext.documentCount)) {
|
|
4502
|
+
const finalGate = collectCombinedDocIds(branches, operator, collectBranch, allowedDocs);
|
|
4503
|
+
return finalGate.size === 0 ? new Map() : executeBranch(branches[0], finalGate);
|
|
4504
|
+
}
|
|
4505
|
+
const result = executeBranch(branches[0], allowedDocs);
|
|
4506
|
+
let gate = gateFromResult(result);
|
|
4351
4507
|
for (let i = 1; i < branches.length; i++) {
|
|
4352
4508
|
subtractDocIdsFromResult(result, collectBranch(branches[i], gate));
|
|
4353
|
-
gate =
|
|
4509
|
+
gate = gateFromResult(result);
|
|
4354
4510
|
}
|
|
4355
4511
|
return result;
|
|
4356
4512
|
}
|
|
@@ -4434,7 +4590,7 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
|
|
|
4434
4590
|
const options = { ...searchOptions, ...query, queries: undefined };
|
|
4435
4591
|
const operator = ((_b = (_a = query.combineWith) !== null && _a !== void 0 ? _a : options.combineWith) !== null && _b !== void 0 ? _b : params.globalSearchOptions.combineWith);
|
|
4436
4592
|
if (useGatedEvaluation(run, query.queries.length, operator, combinationHasWildcard(query))) {
|
|
4437
|
-
return executeCombinedBranches(query.queries, operator, params, (branch, branchAllowed) => executeQueryInternal(branch, options, params, branchAllowed, run), (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs);
|
|
4593
|
+
return executeCombinedBranches(query.queries, operator, params, (branch, branchAllowed) => executeQueryInternal(branch, options, params, branchAllowed, run), (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs, run, branch => estimateMaxPostingLengthForQuery(branch, options, params));
|
|
4438
4594
|
}
|
|
4439
4595
|
const results = query.queries.map(subquery => executeQueryInternal(subquery, options, params, allowedDocs, run));
|
|
4440
4596
|
return combineResults(results, operator);
|
|
@@ -4446,7 +4602,7 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
|
|
|
4446
4602
|
const { specs, operator } = normalized;
|
|
4447
4603
|
const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
|
|
4448
4604
|
if (useGatedEvaluation(run, specs.length, combineWith, false)) {
|
|
4449
|
-
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, normalized, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
|
|
4605
|
+
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, normalized, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs, run, spec => estimateMaxPostingLengthForQuerySpec(spec, normalized, params), specs.every(spec => !spec.prefix && !spec.fuzzy));
|
|
4450
4606
|
}
|
|
4451
4607
|
const results = specs.map(spec => executeQuerySpecInternal(spec, normalized, params, allowedDocs));
|
|
4452
4608
|
return combineResults(results, combineWith);
|