@yoch/frozenminisearch 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/README.md +12 -13
- package/dist/cjs/index.cjs +356 -281
- package/dist/es/index.d.ts +1 -0
- package/dist/es/index.js +356 -281
- package/package.json +2 -1
package/dist/es/index.js
CHANGED
|
@@ -1,9 +1,66 @@
|
|
|
1
1
|
import zlib from 'node:zlib';
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* Internal AND / AND_NOT gate thresholds (not exported from the public package entry).
|
|
5
|
+
*/
|
|
6
|
+
const DEFAULT_POSTING_GATE_MIN_LENGTH = 2048;
|
|
7
|
+
const DEFAULT_POSTING_GATE_RATIO_SHIFT = 2;
|
|
8
|
+
const DEFAULT_POSTING_GATE_POLICY = {
|
|
9
|
+
minLength: DEFAULT_POSTING_GATE_MIN_LENGTH,
|
|
10
|
+
ratioShift: DEFAULT_POSTING_GATE_RATIO_SHIFT,
|
|
11
|
+
};
|
|
12
|
+
function passGateByPostingRatio(gateSize, postingListLength, policy = DEFAULT_POSTING_GATE_POLICY) {
|
|
13
|
+
if (postingListLength < policy.minLength)
|
|
14
|
+
return false;
|
|
15
|
+
return gateSize <= (postingListLength >>> policy.ratioShift);
|
|
16
|
+
}
|
|
17
|
+
const DEFAULT_AND_GATE_LIMITS = {
|
|
18
|
+
maxAbsolute: 5000,
|
|
19
|
+
maxFraction: 0.1,
|
|
20
|
+
};
|
|
21
|
+
function resolveGateMaxSize(documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
|
|
22
|
+
return Math.min(limits.maxAbsolute, Math.max(100, Math.floor(documentCount * limits.maxFraction)));
|
|
23
|
+
}
|
|
24
|
+
function gateIsSelectiveEnough(gateSize, documentCount, limits = DEFAULT_AND_GATE_LIMITS, postingListLength, postingGatePolicy = DEFAULT_POSTING_GATE_POLICY) {
|
|
25
|
+
if (gateSize === 0)
|
|
26
|
+
return true;
|
|
27
|
+
if (gateSize <= resolveGateMaxSize(documentCount, limits))
|
|
28
|
+
return true;
|
|
29
|
+
if (postingListLength != null
|
|
30
|
+
&& postingListLength > 0
|
|
31
|
+
&& passGateByPostingRatio(gateSize, postingListLength, postingGatePolicy)) {
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
|
|
3
37
|
const MAX_FREQ = 65535;
|
|
4
38
|
function readDocId(docIds, index) {
|
|
5
39
|
return docIds[index];
|
|
6
40
|
}
|
|
41
|
+
/** Binary search for docId in a sorted segment; returns global index or -1. */
|
|
42
|
+
function findDocIndexInSortedSegment(docIds, offset, length, docId) {
|
|
43
|
+
let lo = 0;
|
|
44
|
+
let hi = length - 1;
|
|
45
|
+
while (lo <= hi) {
|
|
46
|
+
const mid = (lo + hi) >>> 1;
|
|
47
|
+
const v = readDocId(docIds, offset + mid);
|
|
48
|
+
if (v < docId)
|
|
49
|
+
lo = mid + 1;
|
|
50
|
+
else if (v > docId)
|
|
51
|
+
hi = mid - 1;
|
|
52
|
+
else
|
|
53
|
+
return offset + mid;
|
|
54
|
+
}
|
|
55
|
+
return -1;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Scan vs binary search once `allowedDocs` is already in effect (scoring layer).
|
|
59
|
+
* Uses the same numeric policy as {@link passGateByPostingRatio} today; distinct decision point.
|
|
60
|
+
*/
|
|
61
|
+
function shouldSeekAllowedDocs(gateSize, listLength) {
|
|
62
|
+
return passGateByPostingRatio(gateSize, listLength);
|
|
63
|
+
}
|
|
7
64
|
function allocateFreqs(length, maxValue) {
|
|
8
65
|
if (maxValue <= 0xff)
|
|
9
66
|
return new Uint8Array(length);
|
|
@@ -65,10 +122,15 @@ function bm25FieldConstants(bm25params, avgFieldLength) {
|
|
|
65
122
|
const { k, b, d } = bm25params;
|
|
66
123
|
return { k, d, k1: k + 1, oneMinusB: 1 - b, bOverAvg: b / avgFieldLength };
|
|
67
124
|
}
|
|
68
|
-
function
|
|
125
|
+
function bm25Idf(matchingCount, totalCount) {
|
|
126
|
+
return Math.log(1 + (totalCount - matchingCount + 0.5) / (matchingCount + 0.5));
|
|
127
|
+
}
|
|
128
|
+
function calcBm25TfWithConstants(termFreq, fieldLength, constants, idf) {
|
|
69
129
|
const { k, d, k1, oneMinusB, bOverAvg } = constants;
|
|
70
|
-
|
|
71
|
-
|
|
130
|
+
return idf * (d + termFreq * k1 / (termFreq + k * (oneMinusB + bOverAvg * fieldLength)));
|
|
131
|
+
}
|
|
132
|
+
function calcBM25ScoreWithConstants(termFreq, matchingCount, totalCount, fieldLength, constants) {
|
|
133
|
+
return calcBm25TfWithConstants(termFreq, fieldLength, constants, bm25Idf(matchingCount, totalCount));
|
|
72
134
|
}
|
|
73
135
|
const getOwnProperty = (object, property) => Object.prototype.hasOwnProperty.call(object, property) ? object[property] : undefined;
|
|
74
136
|
function fieldBoostsForQuery(options, fields) {
|
|
@@ -97,7 +159,7 @@ function getDerivedTerm(derivedTerm, cache) {
|
|
|
97
159
|
cache.value = derivedTerm.resolve();
|
|
98
160
|
return cache.value;
|
|
99
161
|
}
|
|
100
|
-
function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache) {
|
|
162
|
+
function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf) {
|
|
101
163
|
const resolvedDerivedTerm = getDerivedTerm(derivedTerm, derivedTermCache);
|
|
102
164
|
const docBoost = boostDocumentFn
|
|
103
165
|
? boostDocumentFn(context.getExternalId(docId), resolvedDerivedTerm, context.getStoredFields(docId))
|
|
@@ -105,7 +167,9 @@ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFre
|
|
|
105
167
|
if (!docBoost)
|
|
106
168
|
return;
|
|
107
169
|
const fieldLength = context.getFieldLength(docId, fieldId);
|
|
108
|
-
const rawScore =
|
|
170
|
+
const rawScore = hoistedIdf !== undefined
|
|
171
|
+
? calcBm25TfWithConstants(termFreq, fieldLength, bm25, hoistedIdf)
|
|
172
|
+
: calcBM25ScoreWithConstants(termFreq, matchingFields, context.documentCount, fieldLength, bm25);
|
|
109
173
|
const weightedScore = termWeight * termBoost * fieldBoost * docBoost * rawScore;
|
|
110
174
|
const result = results.get(docId);
|
|
111
175
|
if (result) {
|
|
@@ -128,22 +192,39 @@ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFre
|
|
|
128
192
|
}
|
|
129
193
|
}
|
|
130
194
|
function aggregateSegmentPostingList(sourceTerm, derivedTerm, termWeight, termBoost, field, fieldId, fieldBoost, list, context, boostDocumentFn, bm25params, results, allowedDocs) {
|
|
131
|
-
var _a;
|
|
195
|
+
var _a, _b;
|
|
132
196
|
let matchingFields = list.length;
|
|
133
197
|
const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
|
|
198
|
+
const hoistedIdf = context.isDocActive == null
|
|
199
|
+
? bm25Idf(matchingFields, context.documentCount)
|
|
200
|
+
: undefined;
|
|
134
201
|
const { docIds, freqs, offset, length } = list;
|
|
135
202
|
const derivedTermCache = {};
|
|
203
|
+
if (allowedDocs != null && shouldSeekAllowedDocs(allowedDocs.size, length)) {
|
|
204
|
+
for (const docId of allowedDocs) {
|
|
205
|
+
if (context.isDocActive != null && !context.isDocActive(docId)) {
|
|
206
|
+
(_a = context.onInactiveDoc) === null || _a === void 0 ? void 0 : _a.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
|
|
207
|
+
matchingFields -= 1;
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
const index = findDocIndexInSortedSegment(docIds, offset, length, docId);
|
|
211
|
+
if (index < 0)
|
|
212
|
+
continue;
|
|
213
|
+
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, freqs[index], termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
|
|
214
|
+
}
|
|
215
|
+
return matchingFields;
|
|
216
|
+
}
|
|
136
217
|
for (let i = 0; i < length; i++) {
|
|
137
218
|
const docId = readDocId(docIds, offset + i);
|
|
138
219
|
const termFreq = freqs[offset + i];
|
|
139
220
|
if (context.isDocActive != null && !context.isDocActive(docId)) {
|
|
140
|
-
(
|
|
221
|
+
(_b = context.onInactiveDoc) === null || _b === void 0 ? void 0 : _b.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
|
|
141
222
|
matchingFields -= 1;
|
|
142
223
|
continue;
|
|
143
224
|
}
|
|
144
225
|
if (allowedDocs != null && !allowedDocs.has(docId))
|
|
145
226
|
continue;
|
|
146
|
-
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
|
|
227
|
+
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
|
|
147
228
|
}
|
|
148
229
|
return matchingFields;
|
|
149
230
|
}
|
|
@@ -163,6 +244,9 @@ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTerm
|
|
|
163
244
|
}
|
|
164
245
|
let matchingFields = postingList.size;
|
|
165
246
|
const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
|
|
247
|
+
const hoistedIdf = context.isDocActive == null
|
|
248
|
+
? bm25Idf(matchingFields, context.documentCount)
|
|
249
|
+
: undefined;
|
|
166
250
|
const derivedTermCache = {};
|
|
167
251
|
postingList.forEachDoc((docId, termFreq) => {
|
|
168
252
|
var _a;
|
|
@@ -173,7 +257,7 @@ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTerm
|
|
|
173
257
|
}
|
|
174
258
|
if (allowedDocs != null && !allowedDocs.has(docId))
|
|
175
259
|
return;
|
|
176
|
-
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
|
|
260
|
+
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
|
|
177
261
|
});
|
|
178
262
|
}
|
|
179
263
|
return results;
|
|
@@ -256,7 +340,9 @@ function finalizeSearchResults(params) {
|
|
|
256
340
|
queryTerms: terms,
|
|
257
341
|
match,
|
|
258
342
|
};
|
|
259
|
-
|
|
343
|
+
if (getStoredFields != null) {
|
|
344
|
+
Object.assign(result, getStoredFields(docId));
|
|
345
|
+
}
|
|
260
346
|
if (filter == null || filter(result)) {
|
|
261
347
|
results.push(result);
|
|
262
348
|
}
|
|
@@ -1740,82 +1826,47 @@ function readFieldLengthMatrixSection(buf, flags, cellCount) {
|
|
|
1740
1826
|
|
|
1741
1827
|
const DISCARDED_DOC_ID = 0xffffffff;
|
|
1742
1828
|
function postingFreqValue(freq, clampFrequencies) {
|
|
1743
|
-
return
|
|
1744
|
-
}
|
|
1745
|
-
function materializeFlatPostings(params) {
|
|
1746
|
-
const { fieldCount, termCount, forEachPosting, remapDocId, clampFrequencies } = params;
|
|
1747
|
-
const slotCount = termCount * fieldCount;
|
|
1748
|
-
const postingsOffsets = new Uint32Array(slotCount);
|
|
1749
|
-
const postingsLengths = new Uint32Array(slotCount);
|
|
1750
|
-
let totalPostings = 0;
|
|
1751
|
-
let maxFreq = 0;
|
|
1752
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
1753
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
1754
|
-
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1755
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1756
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1757
|
-
return;
|
|
1758
|
-
totalPostings++;
|
|
1759
|
-
const v = postingFreqValue(freq, clampFrequencies);
|
|
1760
|
-
if (v > maxFreq)
|
|
1761
|
-
maxFreq = v;
|
|
1762
|
-
});
|
|
1763
|
-
}
|
|
1764
|
-
}
|
|
1765
|
-
const useUint16 = params.nextId != null && params.nextId <= 65535;
|
|
1766
|
-
const allDocIds = useUint16
|
|
1767
|
-
? new Uint16Array(totalPostings)
|
|
1768
|
-
: new Uint32Array(totalPostings);
|
|
1769
|
-
const allFreqs = allocateFreqs(totalPostings, maxFreq);
|
|
1770
|
-
// Slots are visited in ascending fieldId (0..fieldCount-1) per term. Sparse layouts
|
|
1771
|
-
// rely on this ordering so field ids per term stay sorted for binary lookup.
|
|
1772
|
-
let write = 0;
|
|
1773
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
1774
|
-
const base = ti * fieldCount;
|
|
1775
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
1776
|
-
const offset = write;
|
|
1777
|
-
let count = 0;
|
|
1778
|
-
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1779
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1780
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1781
|
-
return;
|
|
1782
|
-
if (useUint16) {
|
|
1783
|
-
allDocIds[write] = docId;
|
|
1784
|
-
}
|
|
1785
|
-
else {
|
|
1786
|
-
allDocIds[write] = docId;
|
|
1787
|
-
}
|
|
1788
|
-
allFreqs[write] = postingFreqValue(freq, clampFrequencies);
|
|
1789
|
-
write++;
|
|
1790
|
-
count++;
|
|
1791
|
-
});
|
|
1792
|
-
postingsOffsets[base + f] = offset;
|
|
1793
|
-
postingsLengths[base + f] = count;
|
|
1794
|
-
}
|
|
1795
|
-
}
|
|
1796
|
-
return {
|
|
1797
|
-
postingsOffsets,
|
|
1798
|
-
postingsLengths,
|
|
1799
|
-
allDocIds,
|
|
1800
|
-
allFreqs,
|
|
1801
|
-
};
|
|
1829
|
+
return clampFreq(freq) ;
|
|
1802
1830
|
}
|
|
1803
1831
|
|
|
1804
1832
|
function readFieldId(fieldIds, index) {
|
|
1805
1833
|
return fieldIds[index];
|
|
1806
1834
|
}
|
|
1807
|
-
function choosePostingsLayout(fieldCount) {
|
|
1808
|
-
return fieldCount === 1 ? 'dense' : 'sparse';
|
|
1809
|
-
}
|
|
1810
1835
|
function chooseSparseFieldIdWidth(fieldCount) {
|
|
1811
1836
|
return fieldCount > 255 ? 16 : 8;
|
|
1812
1837
|
}
|
|
1813
|
-
function
|
|
1814
|
-
const
|
|
1815
|
-
const
|
|
1838
|
+
function choosePostingsLayout(fieldCount, termCount, nonEmptySlots) {
|
|
1839
|
+
const denseBytes = termCount * fieldCount * 8;
|
|
1840
|
+
const sparseFieldIdBytes = chooseSparseFieldIdWidth(fieldCount) === 16 ? 2 : 1;
|
|
1841
|
+
const sparseBytes = (termCount + 1) * 4 + nonEmptySlots * (sparseFieldIdBytes + 8);
|
|
1842
|
+
return denseBytes <= sparseBytes ? 'dense' : 'sparse';
|
|
1843
|
+
}
|
|
1844
|
+
/** Shared dense/sparse layout emission; callers supply per-slot length and copy. */
|
|
1845
|
+
function buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, source) {
|
|
1846
|
+
const layout = choosePostingsLayout(fieldCount, termCount, source.nonEmptySlots);
|
|
1816
1847
|
const docIdWidth = nextId <= 65535 ? 16 : 32;
|
|
1848
|
+
const allDocIds = docIdWidth === 16
|
|
1849
|
+
? new Uint16Array(totalPostings)
|
|
1850
|
+
: new Uint32Array(totalPostings);
|
|
1851
|
+
const allFreqs = allocateFreqs(totalPostings, maxFreq);
|
|
1852
|
+
const targets = { allDocIds, allFreqs, docIdWidth };
|
|
1817
1853
|
if (layout === 'dense') {
|
|
1818
|
-
const
|
|
1854
|
+
const slotCount = termCount * fieldCount;
|
|
1855
|
+
const denseOffsets = new Uint32Array(slotCount);
|
|
1856
|
+
const denseLengths = new Uint32Array(slotCount);
|
|
1857
|
+
let write = 0;
|
|
1858
|
+
for (let ti = 0; ti < termCount; ti++) {
|
|
1859
|
+
const base = ti * fieldCount;
|
|
1860
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
1861
|
+
const slot = base + f;
|
|
1862
|
+
const len = source.slotLength(ti, f);
|
|
1863
|
+
denseOffsets[slot] = write;
|
|
1864
|
+
denseLengths[slot] = len;
|
|
1865
|
+
if (len > 0) {
|
|
1866
|
+
write = source.writeSlot(ti, f, write, targets);
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
}
|
|
1819
1870
|
return {
|
|
1820
1871
|
fieldCount,
|
|
1821
1872
|
termCount,
|
|
@@ -1823,10 +1874,10 @@ function materializeFrozenPostings(params) {
|
|
|
1823
1874
|
layout,
|
|
1824
1875
|
docIdWidth,
|
|
1825
1876
|
sparseFieldIdWidth: null,
|
|
1826
|
-
allDocIds
|
|
1827
|
-
allFreqs
|
|
1828
|
-
denseOffsets
|
|
1829
|
-
denseLengths
|
|
1877
|
+
allDocIds,
|
|
1878
|
+
allFreqs,
|
|
1879
|
+
denseOffsets,
|
|
1880
|
+
denseLengths,
|
|
1830
1881
|
sparseTermStarts: null,
|
|
1831
1882
|
sparseFieldIds: null,
|
|
1832
1883
|
sparseOffsets: null,
|
|
@@ -1838,60 +1889,23 @@ function materializeFrozenPostings(params) {
|
|
|
1838
1889
|
const sparseOffsets = [];
|
|
1839
1890
|
const sparseLengths = [];
|
|
1840
1891
|
const termStarts = new Array(termCount + 1).fill(0);
|
|
1841
|
-
|
|
1842
|
-
// Non-empty slots per term are emitted with fieldId in ascending order (f loops 0..fieldCount-1).
|
|
1843
|
-
let totalPostings = 0;
|
|
1844
|
-
let maxFreq = 0;
|
|
1892
|
+
let write = 0;
|
|
1845
1893
|
for (let ti = 0; ti < termCount; ti++) {
|
|
1846
1894
|
termStarts[ti] = sparseFieldIdsScratch.length;
|
|
1847
1895
|
for (let f = 0; f < fieldCount; f++) {
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1851
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1852
|
-
return;
|
|
1853
|
-
count++;
|
|
1854
|
-
const v = postingFreqValue(freq, clampFrequencies);
|
|
1855
|
-
if (v > maxFreq)
|
|
1856
|
-
maxFreq = v;
|
|
1857
|
-
});
|
|
1858
|
-
if (count === 0)
|
|
1896
|
+
const len = source.slotLength(ti, f);
|
|
1897
|
+
if (len === 0)
|
|
1859
1898
|
continue;
|
|
1860
1899
|
sparseFieldIdsScratch.push(f);
|
|
1861
|
-
sparseOffsets.push(
|
|
1862
|
-
sparseLengths.push(
|
|
1863
|
-
|
|
1900
|
+
sparseOffsets.push(write);
|
|
1901
|
+
sparseLengths.push(len);
|
|
1902
|
+
write = source.writeSlot(ti, f, write, targets);
|
|
1864
1903
|
}
|
|
1865
1904
|
termStarts[ti + 1] = sparseFieldIdsScratch.length;
|
|
1866
1905
|
}
|
|
1867
|
-
const allDocIds = docIdWidth === 16
|
|
1868
|
-
? new Uint16Array(totalPostings)
|
|
1869
|
-
: new Uint32Array(totalPostings);
|
|
1870
|
-
const allFreqs = allocateFreqs(totalPostings, maxFreq);
|
|
1871
1906
|
const sparseFieldIds = sparseFieldIdWidth === 16
|
|
1872
1907
|
? new Uint16Array(sparseFieldIdsScratch)
|
|
1873
1908
|
: new Uint8Array(sparseFieldIdsScratch);
|
|
1874
|
-
let write = 0;
|
|
1875
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
1876
|
-
const start = termStarts[ti];
|
|
1877
|
-
const end = termStarts[ti + 1];
|
|
1878
|
-
for (let s = start; s < end; s++) {
|
|
1879
|
-
const f = readFieldId(sparseFieldIds, s);
|
|
1880
|
-
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1881
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1882
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1883
|
-
return;
|
|
1884
|
-
if (docIdWidth === 16) {
|
|
1885
|
-
allDocIds[write] = docId;
|
|
1886
|
-
}
|
|
1887
|
-
else {
|
|
1888
|
-
allDocIds[write] = docId;
|
|
1889
|
-
}
|
|
1890
|
-
allFreqs[write] = postingFreqValue(freq, clampFrequencies);
|
|
1891
|
-
write++;
|
|
1892
|
-
});
|
|
1893
|
-
}
|
|
1894
|
-
}
|
|
1895
1909
|
return {
|
|
1896
1910
|
fieldCount,
|
|
1897
1911
|
termCount,
|
|
@@ -1909,6 +1923,58 @@ function materializeFrozenPostings(params) {
|
|
|
1909
1923
|
sparseLengths: new Uint32Array(sparseLengths),
|
|
1910
1924
|
};
|
|
1911
1925
|
}
|
|
1926
|
+
function materializeFrozenPostings(params) {
|
|
1927
|
+
const { fieldCount, termCount, nextId } = params;
|
|
1928
|
+
const { forEachPosting, remapDocId} = params;
|
|
1929
|
+
const slotCount = termCount * fieldCount;
|
|
1930
|
+
const slotLengths = new Uint32Array(slotCount);
|
|
1931
|
+
let totalPostings = 0;
|
|
1932
|
+
let maxFreq = 0;
|
|
1933
|
+
let nonEmptySlots = 0;
|
|
1934
|
+
for (let ti = 0; ti < termCount; ti++) {
|
|
1935
|
+
const base = ti * fieldCount;
|
|
1936
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
1937
|
+
let count = 0;
|
|
1938
|
+
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1939
|
+
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1940
|
+
if (docId === DISCARDED_DOC_ID)
|
|
1941
|
+
return;
|
|
1942
|
+
count++;
|
|
1943
|
+
const v = postingFreqValue(freq);
|
|
1944
|
+
if (v > maxFreq)
|
|
1945
|
+
maxFreq = v;
|
|
1946
|
+
});
|
|
1947
|
+
if (count === 0)
|
|
1948
|
+
continue;
|
|
1949
|
+
slotLengths[base + f] = count;
|
|
1950
|
+
totalPostings += count;
|
|
1951
|
+
nonEmptySlots++;
|
|
1952
|
+
}
|
|
1953
|
+
}
|
|
1954
|
+
return buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, {
|
|
1955
|
+
nonEmptySlots,
|
|
1956
|
+
slotLength(ti, f) {
|
|
1957
|
+
return slotLengths[ti * fieldCount + f];
|
|
1958
|
+
},
|
|
1959
|
+
writeSlot(ti, f, write, targets) {
|
|
1960
|
+
const { allDocIds: outDocIds, allFreqs: outFreqs, docIdWidth: width } = targets;
|
|
1961
|
+
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1962
|
+
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1963
|
+
if (docId === DISCARDED_DOC_ID)
|
|
1964
|
+
return;
|
|
1965
|
+
if (width === 16) {
|
|
1966
|
+
outDocIds[write] = docId;
|
|
1967
|
+
}
|
|
1968
|
+
else {
|
|
1969
|
+
outDocIds[write] = docId;
|
|
1970
|
+
}
|
|
1971
|
+
outFreqs[write] = postingFreqValue(freq);
|
|
1972
|
+
write++;
|
|
1973
|
+
});
|
|
1974
|
+
return write;
|
|
1975
|
+
},
|
|
1976
|
+
});
|
|
1977
|
+
}
|
|
1912
1978
|
function postingsTypedBytes(layout) {
|
|
1913
1979
|
const allDocIdsBytes = layout.allDocIds.byteLength;
|
|
1914
1980
|
const allFreqsBytes = layout.allFreqs.byteLength;
|
|
@@ -2671,7 +2737,6 @@ function buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, nextId, s
|
|
|
2671
2737
|
fieldCount,
|
|
2672
2738
|
termCount,
|
|
2673
2739
|
nextId,
|
|
2674
|
-
clampFrequencies: true,
|
|
2675
2740
|
remapDocId,
|
|
2676
2741
|
forEachPosting(ti, f, emit) {
|
|
2677
2742
|
var _a;
|
|
@@ -2915,8 +2980,8 @@ async function zlibPayloadChoiceAsync(uncompressed) {
|
|
|
2915
2980
|
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2916
2981
|
}
|
|
2917
2982
|
const autoSyncCompressors = {
|
|
2918
|
-
zstd:
|
|
2919
|
-
zlib:
|
|
2983
|
+
zstd: uncompressed => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
|
|
2984
|
+
zlib: uncompressed => zlib.deflateSync(uncompressed),
|
|
2920
2985
|
};
|
|
2921
2986
|
const autoAsyncCompressors = {
|
|
2922
2987
|
zstd: zstdCompressAsync,
|
|
@@ -3859,93 +3924,23 @@ class IncrementalPostingsAccumulator {
|
|
|
3859
3924
|
const totalPostings = this._totalPostings;
|
|
3860
3925
|
const maxFreq = this._maxFreq;
|
|
3861
3926
|
const slots = this._slots;
|
|
3862
|
-
const layout =
|
|
3863
|
-
|
|
3864
|
-
|
|
3865
|
-
|
|
3866
|
-
|
|
3867
|
-
|
|
3868
|
-
|
|
3869
|
-
const slotCount = termCount * fieldCount;
|
|
3870
|
-
const denseOffsets = new Uint32Array(slotCount);
|
|
3871
|
-
const denseLengths = new Uint32Array(slotCount);
|
|
3872
|
-
let write = 0;
|
|
3873
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
3874
|
-
const base = ti * fieldCount;
|
|
3875
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
3876
|
-
const slot = base + f;
|
|
3877
|
-
const ranges = slots.get(slot);
|
|
3878
|
-
const len = ranges == null ? 0 : this.slotLength(ranges);
|
|
3879
|
-
denseOffsets[slot] = write;
|
|
3880
|
-
denseLengths[slot] = len;
|
|
3881
|
-
if (len > 0) {
|
|
3882
|
-
write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
|
|
3883
|
-
slots.delete(slot);
|
|
3884
|
-
}
|
|
3885
|
-
}
|
|
3886
|
-
}
|
|
3887
|
-
slots.clear();
|
|
3888
|
-
this.clear();
|
|
3889
|
-
return {
|
|
3890
|
-
fieldCount,
|
|
3891
|
-
termCount,
|
|
3892
|
-
nextId,
|
|
3893
|
-
layout,
|
|
3894
|
-
docIdWidth,
|
|
3895
|
-
sparseFieldIdWidth: null,
|
|
3896
|
-
allDocIds,
|
|
3897
|
-
allFreqs,
|
|
3898
|
-
denseOffsets,
|
|
3899
|
-
denseLengths,
|
|
3900
|
-
sparseTermStarts: null,
|
|
3901
|
-
sparseFieldIds: null,
|
|
3902
|
-
sparseOffsets: null,
|
|
3903
|
-
sparseLengths: null,
|
|
3904
|
-
};
|
|
3905
|
-
}
|
|
3906
|
-
const sparseFieldIdWidth = chooseSparseFieldIdWidth(fieldCount);
|
|
3907
|
-
const sparseFieldIdsScratch = [];
|
|
3908
|
-
const sparseOffsets = [];
|
|
3909
|
-
const sparseLengths = [];
|
|
3910
|
-
const termStarts = new Array(termCount + 1).fill(0);
|
|
3911
|
-
let write = 0;
|
|
3912
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
3913
|
-
termStarts[ti] = sparseFieldIdsScratch.length;
|
|
3914
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
3927
|
+
const layout = buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, {
|
|
3928
|
+
nonEmptySlots: slots.size,
|
|
3929
|
+
slotLength: (ti, f) => {
|
|
3930
|
+
const ranges = slots.get(ti * fieldCount + f);
|
|
3931
|
+
return ranges == null ? 0 : this.slotLength(ranges);
|
|
3932
|
+
},
|
|
3933
|
+
writeSlot: (ti, f, write, targets) => {
|
|
3915
3934
|
const slot = ti * fieldCount + f;
|
|
3916
3935
|
const ranges = slots.get(slot);
|
|
3917
|
-
const
|
|
3918
|
-
if (len === 0)
|
|
3919
|
-
continue;
|
|
3920
|
-
sparseFieldIdsScratch.push(f);
|
|
3921
|
-
sparseOffsets.push(write);
|
|
3922
|
-
sparseLengths.push(len);
|
|
3923
|
-
write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
|
|
3936
|
+
const next = this.copySlot(ranges, targets.allDocIds, targets.allFreqs, write, targets.docIdWidth);
|
|
3924
3937
|
slots.delete(slot);
|
|
3925
|
-
|
|
3926
|
-
|
|
3927
|
-
}
|
|
3938
|
+
return next;
|
|
3939
|
+
},
|
|
3940
|
+
});
|
|
3928
3941
|
slots.clear();
|
|
3929
3942
|
this.clear();
|
|
3930
|
-
|
|
3931
|
-
? new Uint16Array(sparseFieldIdsScratch)
|
|
3932
|
-
: new Uint8Array(sparseFieldIdsScratch);
|
|
3933
|
-
return {
|
|
3934
|
-
fieldCount,
|
|
3935
|
-
termCount,
|
|
3936
|
-
nextId,
|
|
3937
|
-
layout,
|
|
3938
|
-
docIdWidth,
|
|
3939
|
-
sparseFieldIdWidth,
|
|
3940
|
-
allDocIds,
|
|
3941
|
-
allFreqs,
|
|
3942
|
-
denseOffsets: null,
|
|
3943
|
-
denseLengths: null,
|
|
3944
|
-
sparseTermStarts: new Uint32Array(termStarts),
|
|
3945
|
-
sparseFieldIds,
|
|
3946
|
-
sparseOffsets: new Uint32Array(sparseOffsets),
|
|
3947
|
-
sparseLengths: new Uint32Array(sparseLengths),
|
|
3948
|
-
};
|
|
3943
|
+
return layout;
|
|
3949
3944
|
}
|
|
3950
3945
|
}
|
|
3951
3946
|
|
|
@@ -4125,22 +4120,6 @@ function buildFrozenParamsFromDocuments(documents, options) {
|
|
|
4125
4120
|
return builder.freezeParams();
|
|
4126
4121
|
}
|
|
4127
4122
|
|
|
4128
|
-
/**
|
|
4129
|
-
* Internal AND / AND_NOT gate thresholds (not exported from the public package entry).
|
|
4130
|
-
*/
|
|
4131
|
-
const DEFAULT_AND_GATE_LIMITS = {
|
|
4132
|
-
maxAbsolute: 5000,
|
|
4133
|
-
maxFraction: 0.1,
|
|
4134
|
-
};
|
|
4135
|
-
function resolveGateMaxSize(documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
|
|
4136
|
-
return Math.min(limits.maxAbsolute, Math.max(100, Math.floor(documentCount * limits.maxFraction)));
|
|
4137
|
-
}
|
|
4138
|
-
function gateIsSelectiveEnough(gateSize, documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
|
|
4139
|
-
if (gateSize === 0)
|
|
4140
|
-
return true;
|
|
4141
|
-
return gateSize <= resolveGateMaxSize(documentCount, limits);
|
|
4142
|
-
}
|
|
4143
|
-
|
|
4144
4123
|
function useGatedEvaluation(run, branchCount, operator, hasWildcard) {
|
|
4145
4124
|
return shouldUseGatedEvaluation(branchCount, operator, hasWildcard);
|
|
4146
4125
|
}
|
|
@@ -4182,80 +4161,155 @@ function normalizeStringQuery(query, searchOptions, params) {
|
|
|
4182
4161
|
...params.globalSearchOptions,
|
|
4183
4162
|
...searchOptions,
|
|
4184
4163
|
};
|
|
4185
|
-
const
|
|
4186
|
-
|
|
4187
|
-
|
|
4164
|
+
const tokens = options.tokenize(query);
|
|
4165
|
+
const terms = [];
|
|
4166
|
+
for (const token of tokens) {
|
|
4167
|
+
const processed = options.processTerm(token);
|
|
4168
|
+
if (Array.isArray(processed)) {
|
|
4169
|
+
for (const term of processed) {
|
|
4170
|
+
if (term)
|
|
4171
|
+
terms.push(term);
|
|
4172
|
+
}
|
|
4173
|
+
}
|
|
4174
|
+
else if (processed) {
|
|
4175
|
+
terms.push(processed);
|
|
4176
|
+
}
|
|
4177
|
+
}
|
|
4178
|
+
const toSpec = termToQuerySpec(options);
|
|
4179
|
+
const specs = new Array(terms.length);
|
|
4180
|
+
for (let i = 0; i < terms.length; i++) {
|
|
4181
|
+
specs[i] = toSpec(terms[i], i, terms);
|
|
4182
|
+
}
|
|
4183
|
+
const { fuzzy: fuzzyWeight, prefix: prefixWeight } = {
|
|
4184
|
+
...defaultSearchOptions.weights,
|
|
4185
|
+
...options.weights,
|
|
4186
|
+
};
|
|
4188
4187
|
return {
|
|
4189
4188
|
options,
|
|
4190
|
-
specs
|
|
4189
|
+
specs,
|
|
4191
4190
|
operator: options.combineWith,
|
|
4191
|
+
fieldBoosts: fieldBoostsForQuery(options, params.fields),
|
|
4192
|
+
fuzzyWeight,
|
|
4193
|
+
prefixWeight,
|
|
4192
4194
|
};
|
|
4193
4195
|
}
|
|
4194
4196
|
function lazyIndexedTerm(indexView, termIndex) {
|
|
4195
4197
|
return { kind: 'lazy', resolve: () => indexView.resolveTermByIndex(termIndex) };
|
|
4196
4198
|
}
|
|
4197
|
-
function
|
|
4199
|
+
function forEachQuerySpecTermRef(query, normalized, params, visit) {
|
|
4198
4200
|
const { indexView } = params;
|
|
4199
|
-
const {
|
|
4200
|
-
const
|
|
4201
|
-
|
|
4202
|
-
const
|
|
4203
|
-
visit(exactTi == null ? undefined : indexView.fieldTermData(exactTi), query.term, 1);
|
|
4204
|
-
const seenPrefix = new Set();
|
|
4201
|
+
const { options } = normalized;
|
|
4202
|
+
const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
|
|
4203
|
+
visit({ kind: 'exact', termIndex: indexView.resolveTermIndex(query.term) });
|
|
4204
|
+
const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
|
|
4205
4205
|
if (query.prefix) {
|
|
4206
4206
|
for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
|
|
4207
4207
|
const distance = length - query.term.length;
|
|
4208
4208
|
if (!distance)
|
|
4209
4209
|
continue;
|
|
4210
|
-
seenPrefix.add(termIndex);
|
|
4211
|
-
visit(
|
|
4210
|
+
seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
|
|
4211
|
+
visit({ kind: 'prefix', termIndex, length, distance });
|
|
4212
4212
|
}
|
|
4213
4213
|
}
|
|
4214
4214
|
if (!maxDistance)
|
|
4215
4215
|
return;
|
|
4216
4216
|
for (const { termIndex, length, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
|
|
4217
|
-
if (!distance || seenPrefix.has(termIndex))
|
|
4217
|
+
if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
|
|
4218
4218
|
continue;
|
|
4219
|
-
visit(
|
|
4219
|
+
visit({ kind: 'fuzzy', termIndex, length, distance });
|
|
4220
4220
|
}
|
|
4221
4221
|
}
|
|
4222
|
-
function
|
|
4223
|
-
const
|
|
4224
|
-
const
|
|
4222
|
+
function visitQuerySpecForScoring(query, normalized, params, visit) {
|
|
4223
|
+
const { indexView } = params;
|
|
4224
|
+
const { fuzzyWeight, prefixWeight } = normalized;
|
|
4225
|
+
forEachQuerySpecTermRef(query, normalized, params, (ref) => {
|
|
4226
|
+
if (ref.kind === 'exact') {
|
|
4227
|
+
visit(ref.termIndex == null ? undefined : indexView.fieldTermData(ref.termIndex), query.term, 1);
|
|
4228
|
+
return;
|
|
4229
|
+
}
|
|
4230
|
+
if (ref.kind === 'prefix') {
|
|
4231
|
+
visit(indexView.fieldTermData(ref.termIndex), lazyIndexedTerm(indexView, ref.termIndex), prefixWeight * ref.length / (ref.length + 0.3 * ref.distance));
|
|
4232
|
+
return;
|
|
4233
|
+
}
|
|
4234
|
+
visit(indexView.fieldTermData(ref.termIndex), lazyIndexedTerm(indexView, ref.termIndex), fuzzyWeight * ref.length / (ref.length + ref.distance));
|
|
4235
|
+
});
|
|
4236
|
+
}
|
|
4237
|
+
function executeQuerySpecInternal(query, normalized, params, allowedDocs) {
|
|
4238
|
+
const { fieldBoosts, options } = normalized;
|
|
4225
4239
|
const termOptions = allowedDocs == null ? undefined : { allowedDocs };
|
|
4226
4240
|
const results = new Map();
|
|
4227
|
-
visitQuerySpecForScoring(query,
|
|
4241
|
+
visitQuerySpecForScoring(query, normalized, params, (data, derivedTerm, termWeight) => {
|
|
4228
4242
|
aggregateTerm(query.term, derivedTerm, termWeight, query.termBoost, data, fieldBoosts, params.aggregateContext, options.boostDocument, options.bm25, results, termOptions);
|
|
4229
4243
|
});
|
|
4230
4244
|
return results;
|
|
4231
4245
|
}
|
|
4232
|
-
function
|
|
4233
|
-
|
|
4234
|
-
|
|
4235
|
-
|
|
4236
|
-
const
|
|
4237
|
-
|
|
4238
|
-
|
|
4239
|
-
|
|
4240
|
-
|
|
4246
|
+
function maxPostingLengthForFieldTermData(data, fieldBoosts, fieldIds) {
|
|
4247
|
+
if (data == null)
|
|
4248
|
+
return 0;
|
|
4249
|
+
let maxLen = 0;
|
|
4250
|
+
for (const field of fieldBoosts.names) {
|
|
4251
|
+
const fieldId = fieldIds[field];
|
|
4252
|
+
const postingList = data.get(fieldId);
|
|
4253
|
+
if (postingList == null)
|
|
4254
|
+
continue;
|
|
4255
|
+
const len = postingList instanceof SegmentPostingList ? postingList.length : postingList.size;
|
|
4256
|
+
if (len > maxLen)
|
|
4257
|
+
maxLen = len;
|
|
4241
4258
|
}
|
|
4242
|
-
|
|
4243
|
-
|
|
4244
|
-
|
|
4245
|
-
|
|
4246
|
-
|
|
4247
|
-
|
|
4248
|
-
|
|
4249
|
-
|
|
4259
|
+
return maxLen;
|
|
4260
|
+
}
|
|
4261
|
+
function estimateMaxPostingLengthForQuerySpec(query, normalized, params) {
|
|
4262
|
+
const { indexView, aggregateContext } = params;
|
|
4263
|
+
const { fieldBoosts } = normalized;
|
|
4264
|
+
const { fieldIds } = aggregateContext;
|
|
4265
|
+
let maxLen = 0;
|
|
4266
|
+
const consider = (data) => {
|
|
4267
|
+
maxLen = Math.max(maxLen, maxPostingLengthForFieldTermData(data, fieldBoosts, fieldIds));
|
|
4268
|
+
};
|
|
4269
|
+
forEachQuerySpecTermRef(query, normalized, params, (ref) => {
|
|
4270
|
+
if (ref.kind === 'exact') {
|
|
4271
|
+
if (ref.termIndex != null)
|
|
4272
|
+
consider(indexView.fieldTermData(ref.termIndex));
|
|
4273
|
+
return;
|
|
4250
4274
|
}
|
|
4275
|
+
consider(indexView.fieldTermData(ref.termIndex));
|
|
4276
|
+
});
|
|
4277
|
+
return maxLen;
|
|
4278
|
+
}
|
|
4279
|
+
function estimateMaxPostingLengthForQuery(query, searchOptions, params) {
|
|
4280
|
+
if (isWildcardQuery(query)) {
|
|
4281
|
+
return params.aggregateContext.documentCount;
|
|
4251
4282
|
}
|
|
4252
|
-
if (
|
|
4253
|
-
|
|
4254
|
-
|
|
4255
|
-
|
|
4256
|
-
|
|
4283
|
+
if (isQueryCombination(query)) {
|
|
4284
|
+
const options = { ...searchOptions, ...query, queries: undefined };
|
|
4285
|
+
let maxLen = 0;
|
|
4286
|
+
for (const branch of query.queries) {
|
|
4287
|
+
maxLen = Math.max(maxLen, estimateMaxPostingLengthForQuery(branch, options, params));
|
|
4257
4288
|
}
|
|
4289
|
+
return maxLen;
|
|
4258
4290
|
}
|
|
4291
|
+
if (typeof query !== 'string')
|
|
4292
|
+
return 0;
|
|
4293
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4294
|
+
let maxLen = 0;
|
|
4295
|
+
for (const spec of normalized.specs) {
|
|
4296
|
+
maxLen = Math.max(maxLen, estimateMaxPostingLengthForQuerySpec(spec, normalized, params));
|
|
4297
|
+
}
|
|
4298
|
+
return maxLen;
|
|
4299
|
+
}
|
|
4300
|
+
function collectDocIdsForQuerySpec(query, normalized, params, allowedDocs) {
|
|
4301
|
+
const { fieldBoosts } = normalized;
|
|
4302
|
+
const docIds = new Set();
|
|
4303
|
+
const { indexView, aggregateContext } = params;
|
|
4304
|
+
forEachQuerySpecTermRef(query, normalized, params, (ref) => {
|
|
4305
|
+
if (ref.kind === 'exact') {
|
|
4306
|
+
if (ref.termIndex != null) {
|
|
4307
|
+
indexView.collectDocIds(ref.termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4308
|
+
}
|
|
4309
|
+
return;
|
|
4310
|
+
}
|
|
4311
|
+
indexView.collectDocIds(ref.termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4312
|
+
});
|
|
4259
4313
|
return docIds;
|
|
4260
4314
|
}
|
|
4261
4315
|
function intersectDocIdsInPlace(docIds, branchDocIds) {
|
|
@@ -4305,7 +4359,8 @@ function collectCombinedDocIds(branches, operator, collectBranch, allowedDocs) {
|
|
|
4305
4359
|
* AND_NOT: score the positive branch only; negated branches are collected as docId sets and
|
|
4306
4360
|
* subtracted without scoring (avoids term materialization on excluded branches).
|
|
4307
4361
|
*/
|
|
4308
|
-
function executeCombinedBranches(branches, operator, params, executeBranch, collectBranch, allowedDocs, run) {
|
|
4362
|
+
function executeCombinedBranches(branches, operator, params, executeBranch, collectBranch, allowedDocs, run, estimateBranchPostingLength) {
|
|
4363
|
+
var _a;
|
|
4309
4364
|
if (branches.length === 0)
|
|
4310
4365
|
return new Map();
|
|
4311
4366
|
const op = operator.toLowerCase();
|
|
@@ -4317,8 +4372,16 @@ function executeCombinedBranches(branches, operator, params, executeBranch, coll
|
|
|
4317
4372
|
if (op === 'and') {
|
|
4318
4373
|
const limits = void 0 ;
|
|
4319
4374
|
const documentCount = params.aggregateContext.documentCount;
|
|
4375
|
+
const postingGatePolicy = (_a = void 0 ) !== null && _a !== void 0 ? _a : DEFAULT_POSTING_GATE_POLICY;
|
|
4376
|
+
const maxGateSize = resolveGateMaxSize(documentCount, limits);
|
|
4320
4377
|
for (let i = 1; i < branches.length; i++) {
|
|
4321
|
-
|
|
4378
|
+
if (gate.size === 0)
|
|
4379
|
+
return result;
|
|
4380
|
+
const ratioPath = gate.size > maxGateSize;
|
|
4381
|
+
const postingListLength = ratioPath
|
|
4382
|
+
? estimateBranchPostingLength === null || estimateBranchPostingLength === void 0 ? void 0 : estimateBranchPostingLength(branches[i])
|
|
4383
|
+
: undefined;
|
|
4384
|
+
const selective = gateIsSelectiveEnough(gate.size, documentCount, limits, postingListLength, postingGatePolicy);
|
|
4322
4385
|
const branchAllowed = selective ? gate : allowedDocs;
|
|
4323
4386
|
result = combineResults([result, executeBranch(branches[i], branchAllowed)], AND);
|
|
4324
4387
|
gate = docIdsFromResult(result);
|
|
@@ -4382,14 +4445,15 @@ function collectDocIdsForQueryInternal(query, searchOptions, params, allowedDocs
|
|
|
4382
4445
|
if (typeof query !== 'string') {
|
|
4383
4446
|
throw new Error('FrozenMiniSearch: invalid query');
|
|
4384
4447
|
}
|
|
4385
|
-
const
|
|
4448
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4449
|
+
const { specs, operator } = normalized;
|
|
4386
4450
|
const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
|
|
4387
4451
|
if (specs.length <= 1) {
|
|
4388
4452
|
return specs.length === 1
|
|
4389
|
-
? collectDocIdsForQuerySpec(specs[0],
|
|
4453
|
+
? collectDocIdsForQuerySpec(specs[0], normalized, params, allowedDocs)
|
|
4390
4454
|
: new Set();
|
|
4391
4455
|
}
|
|
4392
|
-
return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec,
|
|
4456
|
+
return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
|
|
4393
4457
|
}
|
|
4394
4458
|
function executeWildcardQuery(searchOptions, params) {
|
|
4395
4459
|
const results = new Map();
|
|
@@ -4411,7 +4475,7 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
|
|
|
4411
4475
|
const options = { ...searchOptions, ...query, queries: undefined };
|
|
4412
4476
|
const operator = ((_b = (_a = query.combineWith) !== null && _a !== void 0 ? _a : options.combineWith) !== null && _b !== void 0 ? _b : params.globalSearchOptions.combineWith);
|
|
4413
4477
|
if (useGatedEvaluation(run, query.queries.length, operator, combinationHasWildcard(query))) {
|
|
4414
|
-
return executeCombinedBranches(query.queries, operator, params, (branch, branchAllowed) => executeQueryInternal(branch, options, params, branchAllowed, run), (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs);
|
|
4478
|
+
return executeCombinedBranches(query.queries, operator, params, (branch, branchAllowed) => executeQueryInternal(branch, options, params, branchAllowed, run), (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs, run, branch => estimateMaxPostingLengthForQuery(branch, options, params));
|
|
4415
4479
|
}
|
|
4416
4480
|
const results = query.queries.map(subquery => executeQueryInternal(subquery, options, params, allowedDocs, run));
|
|
4417
4481
|
return combineResults(results, operator);
|
|
@@ -4419,12 +4483,13 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
|
|
|
4419
4483
|
if (typeof query !== 'string') {
|
|
4420
4484
|
throw new Error('FrozenMiniSearch: invalid query');
|
|
4421
4485
|
}
|
|
4422
|
-
const
|
|
4486
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4487
|
+
const { specs, operator } = normalized;
|
|
4423
4488
|
const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
|
|
4424
4489
|
if (useGatedEvaluation(run, specs.length, combineWith, false)) {
|
|
4425
|
-
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec,
|
|
4490
|
+
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, normalized, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs, run, spec => estimateMaxPostingLengthForQuerySpec(spec, normalized, params));
|
|
4426
4491
|
}
|
|
4427
|
-
const results = specs.map(spec => executeQuerySpecInternal(spec,
|
|
4492
|
+
const results = specs.map(spec => executeQuerySpecInternal(spec, normalized, params, allowedDocs));
|
|
4428
4493
|
return combineResults(results, combineWith);
|
|
4429
4494
|
}
|
|
4430
4495
|
function executeQuery(query, searchOptions, params) {
|
|
@@ -4615,6 +4680,7 @@ function materializeOwnedSnapshot(params, mode) {
|
|
|
4615
4680
|
function frozenMemoryBreakdown(frozen) {
|
|
4616
4681
|
return frozen.memoryBreakdown();
|
|
4617
4682
|
}
|
|
4683
|
+
const noStoredFields = () => undefined;
|
|
4618
4684
|
function assertFieldsMatchSnapshot(optionsFields, snapFieldIds) {
|
|
4619
4685
|
const snapNames = Object.keys(snapFieldIds).sort();
|
|
4620
4686
|
const optNames = [...optionsFields].sort();
|
|
@@ -4668,24 +4734,31 @@ class FrozenMiniSearch {
|
|
|
4668
4734
|
this._termCount = params.termCount;
|
|
4669
4735
|
this._postings = params.postings;
|
|
4670
4736
|
this._fieldTermFlyweight = createFrozenFieldTermFlyweight(this._postings);
|
|
4737
|
+
this._hasStoredFields = this._storedFields.kind !== 'none';
|
|
4671
4738
|
this._aggregateContext = {
|
|
4672
4739
|
documentCount: this._documentCount,
|
|
4673
4740
|
avgFieldLength: this._avgFieldLength,
|
|
4674
4741
|
fieldIds: this._fieldIds,
|
|
4675
4742
|
getFieldLength: (docId, fieldId) => this.getFieldLength(docId, fieldId),
|
|
4676
4743
|
getExternalId: docId => this._externalIds[docId],
|
|
4677
|
-
getStoredFields:
|
|
4744
|
+
getStoredFields: this._hasStoredFields
|
|
4745
|
+
? docId => readStoredFields(this._storedFields, docId)
|
|
4746
|
+
: noStoredFields,
|
|
4678
4747
|
};
|
|
4679
4748
|
this._queryEngineParams = {
|
|
4680
4749
|
fields: this._options.fields,
|
|
4681
4750
|
globalSearchOptions: this._options.searchOptions,
|
|
4682
4751
|
tokenize: this._options.tokenize,
|
|
4683
4752
|
processTerm: this._options.processTerm,
|
|
4684
|
-
indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight,
|
|
4685
|
-
|
|
4686
|
-
|
|
4687
|
-
|
|
4688
|
-
|
|
4753
|
+
indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, this._hasStoredFields
|
|
4754
|
+
? (callback) => {
|
|
4755
|
+
forEachLiveShortId(this._nextId, this._externalIds, (shortId, id) => {
|
|
4756
|
+
callback(shortId, id, readStoredFields(this._storedFields, shortId));
|
|
4757
|
+
});
|
|
4758
|
+
}
|
|
4759
|
+
: (callback) => {
|
|
4760
|
+
forEachLiveShortId(this._nextId, this._externalIds, callback);
|
|
4761
|
+
}),
|
|
4689
4762
|
aggregateContext: this._aggregateContext,
|
|
4690
4763
|
};
|
|
4691
4764
|
}
|
|
@@ -4742,7 +4815,9 @@ class FrozenMiniSearch {
|
|
|
4742
4815
|
return shortId == null ? undefined : readStoredFields(this._storedFields, shortId);
|
|
4743
4816
|
}
|
|
4744
4817
|
search(query, searchOptions = {}) {
|
|
4745
|
-
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId],
|
|
4818
|
+
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], this._hasStoredFields
|
|
4819
|
+
? docId => readStoredFields(this._storedFields, docId)
|
|
4820
|
+
: undefined);
|
|
4746
4821
|
}
|
|
4747
4822
|
autoSuggest(queryString, options = {}) {
|
|
4748
4823
|
const merged = { ...this._options.autoSuggestOptions, ...options };
|