@yoch/frozenminisearch 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/README.md +12 -13
- package/dist/cjs/index.cjs +356 -281
- package/dist/es/index.d.ts +1 -0
- package/dist/es/index.js +356 -281
- package/package.json +2 -1
package/dist/cjs/index.cjs
CHANGED
|
@@ -4,10 +4,67 @@ Object.defineProperty(exports, '__esModule', { value: true });
|
|
|
4
4
|
|
|
5
5
|
var zlib = require('node:zlib');
|
|
6
6
|
|
|
7
|
+
/**
|
|
8
|
+
* Internal AND / AND_NOT gate thresholds (not exported from the public package entry).
|
|
9
|
+
*/
|
|
10
|
+
const DEFAULT_POSTING_GATE_MIN_LENGTH = 2048;
|
|
11
|
+
const DEFAULT_POSTING_GATE_RATIO_SHIFT = 2;
|
|
12
|
+
const DEFAULT_POSTING_GATE_POLICY = {
|
|
13
|
+
minLength: DEFAULT_POSTING_GATE_MIN_LENGTH,
|
|
14
|
+
ratioShift: DEFAULT_POSTING_GATE_RATIO_SHIFT,
|
|
15
|
+
};
|
|
16
|
+
function passGateByPostingRatio(gateSize, postingListLength, policy = DEFAULT_POSTING_GATE_POLICY) {
|
|
17
|
+
if (postingListLength < policy.minLength)
|
|
18
|
+
return false;
|
|
19
|
+
return gateSize <= (postingListLength >>> policy.ratioShift);
|
|
20
|
+
}
|
|
21
|
+
const DEFAULT_AND_GATE_LIMITS = {
|
|
22
|
+
maxAbsolute: 5000,
|
|
23
|
+
maxFraction: 0.1,
|
|
24
|
+
};
|
|
25
|
+
function resolveGateMaxSize(documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
|
|
26
|
+
return Math.min(limits.maxAbsolute, Math.max(100, Math.floor(documentCount * limits.maxFraction)));
|
|
27
|
+
}
|
|
28
|
+
function gateIsSelectiveEnough(gateSize, documentCount, limits = DEFAULT_AND_GATE_LIMITS, postingListLength, postingGatePolicy = DEFAULT_POSTING_GATE_POLICY) {
|
|
29
|
+
if (gateSize === 0)
|
|
30
|
+
return true;
|
|
31
|
+
if (gateSize <= resolveGateMaxSize(documentCount, limits))
|
|
32
|
+
return true;
|
|
33
|
+
if (postingListLength != null
|
|
34
|
+
&& postingListLength > 0
|
|
35
|
+
&& passGateByPostingRatio(gateSize, postingListLength, postingGatePolicy)) {
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
|
|
7
41
|
const MAX_FREQ = 65535;
|
|
8
42
|
function readDocId(docIds, index) {
|
|
9
43
|
return docIds[index];
|
|
10
44
|
}
|
|
45
|
+
/** Binary search for docId in a sorted segment; returns global index or -1. */
|
|
46
|
+
function findDocIndexInSortedSegment(docIds, offset, length, docId) {
|
|
47
|
+
let lo = 0;
|
|
48
|
+
let hi = length - 1;
|
|
49
|
+
while (lo <= hi) {
|
|
50
|
+
const mid = (lo + hi) >>> 1;
|
|
51
|
+
const v = readDocId(docIds, offset + mid);
|
|
52
|
+
if (v < docId)
|
|
53
|
+
lo = mid + 1;
|
|
54
|
+
else if (v > docId)
|
|
55
|
+
hi = mid - 1;
|
|
56
|
+
else
|
|
57
|
+
return offset + mid;
|
|
58
|
+
}
|
|
59
|
+
return -1;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Scan vs binary search once `allowedDocs` is already in effect (scoring layer).
|
|
63
|
+
* Uses the same numeric policy as {@link passGateByPostingRatio} today; distinct decision point.
|
|
64
|
+
*/
|
|
65
|
+
function shouldSeekAllowedDocs(gateSize, listLength) {
|
|
66
|
+
return passGateByPostingRatio(gateSize, listLength);
|
|
67
|
+
}
|
|
11
68
|
function allocateFreqs(length, maxValue) {
|
|
12
69
|
if (maxValue <= 0xff)
|
|
13
70
|
return new Uint8Array(length);
|
|
@@ -69,10 +126,15 @@ function bm25FieldConstants(bm25params, avgFieldLength) {
|
|
|
69
126
|
const { k, b, d } = bm25params;
|
|
70
127
|
return { k, d, k1: k + 1, oneMinusB: 1 - b, bOverAvg: b / avgFieldLength };
|
|
71
128
|
}
|
|
72
|
-
function
|
|
129
|
+
function bm25Idf(matchingCount, totalCount) {
|
|
130
|
+
return Math.log(1 + (totalCount - matchingCount + 0.5) / (matchingCount + 0.5));
|
|
131
|
+
}
|
|
132
|
+
function calcBm25TfWithConstants(termFreq, fieldLength, constants, idf) {
|
|
73
133
|
const { k, d, k1, oneMinusB, bOverAvg } = constants;
|
|
74
|
-
|
|
75
|
-
|
|
134
|
+
return idf * (d + termFreq * k1 / (termFreq + k * (oneMinusB + bOverAvg * fieldLength)));
|
|
135
|
+
}
|
|
136
|
+
function calcBM25ScoreWithConstants(termFreq, matchingCount, totalCount, fieldLength, constants) {
|
|
137
|
+
return calcBm25TfWithConstants(termFreq, fieldLength, constants, bm25Idf(matchingCount, totalCount));
|
|
76
138
|
}
|
|
77
139
|
const getOwnProperty = (object, property) => Object.prototype.hasOwnProperty.call(object, property) ? object[property] : undefined;
|
|
78
140
|
function fieldBoostsForQuery(options, fields) {
|
|
@@ -101,7 +163,7 @@ function getDerivedTerm(derivedTerm, cache) {
|
|
|
101
163
|
cache.value = derivedTerm.resolve();
|
|
102
164
|
return cache.value;
|
|
103
165
|
}
|
|
104
|
-
function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache) {
|
|
166
|
+
function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf) {
|
|
105
167
|
const resolvedDerivedTerm = getDerivedTerm(derivedTerm, derivedTermCache);
|
|
106
168
|
const docBoost = boostDocumentFn
|
|
107
169
|
? boostDocumentFn(context.getExternalId(docId), resolvedDerivedTerm, context.getStoredFields(docId))
|
|
@@ -109,7 +171,9 @@ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFre
|
|
|
109
171
|
if (!docBoost)
|
|
110
172
|
return;
|
|
111
173
|
const fieldLength = context.getFieldLength(docId, fieldId);
|
|
112
|
-
const rawScore =
|
|
174
|
+
const rawScore = hoistedIdf !== undefined
|
|
175
|
+
? calcBm25TfWithConstants(termFreq, fieldLength, bm25, hoistedIdf)
|
|
176
|
+
: calcBM25ScoreWithConstants(termFreq, matchingFields, context.documentCount, fieldLength, bm25);
|
|
113
177
|
const weightedScore = termWeight * termBoost * fieldBoost * docBoost * rawScore;
|
|
114
178
|
const result = results.get(docId);
|
|
115
179
|
if (result) {
|
|
@@ -132,22 +196,39 @@ function scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFre
|
|
|
132
196
|
}
|
|
133
197
|
}
|
|
134
198
|
function aggregateSegmentPostingList(sourceTerm, derivedTerm, termWeight, termBoost, field, fieldId, fieldBoost, list, context, boostDocumentFn, bm25params, results, allowedDocs) {
|
|
135
|
-
var _a;
|
|
199
|
+
var _a, _b;
|
|
136
200
|
let matchingFields = list.length;
|
|
137
201
|
const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
|
|
202
|
+
const hoistedIdf = context.isDocActive == null
|
|
203
|
+
? bm25Idf(matchingFields, context.documentCount)
|
|
204
|
+
: undefined;
|
|
138
205
|
const { docIds, freqs, offset, length } = list;
|
|
139
206
|
const derivedTermCache = {};
|
|
207
|
+
if (allowedDocs != null && shouldSeekAllowedDocs(allowedDocs.size, length)) {
|
|
208
|
+
for (const docId of allowedDocs) {
|
|
209
|
+
if (context.isDocActive != null && !context.isDocActive(docId)) {
|
|
210
|
+
(_a = context.onInactiveDoc) === null || _a === void 0 ? void 0 : _a.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
|
|
211
|
+
matchingFields -= 1;
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
const index = findDocIndexInSortedSegment(docIds, offset, length, docId);
|
|
215
|
+
if (index < 0)
|
|
216
|
+
continue;
|
|
217
|
+
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, freqs[index], termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
|
|
218
|
+
}
|
|
219
|
+
return matchingFields;
|
|
220
|
+
}
|
|
140
221
|
for (let i = 0; i < length; i++) {
|
|
141
222
|
const docId = readDocId(docIds, offset + i);
|
|
142
223
|
const termFreq = freqs[offset + i];
|
|
143
224
|
if (context.isDocActive != null && !context.isDocActive(docId)) {
|
|
144
|
-
(
|
|
225
|
+
(_b = context.onInactiveDoc) === null || _b === void 0 ? void 0 : _b.call(context, docId, fieldId, getDerivedTerm(derivedTerm, derivedTermCache));
|
|
145
226
|
matchingFields -= 1;
|
|
146
227
|
continue;
|
|
147
228
|
}
|
|
148
229
|
if (allowedDocs != null && !allowedDocs.has(docId))
|
|
149
230
|
continue;
|
|
150
|
-
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
|
|
231
|
+
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
|
|
151
232
|
}
|
|
152
233
|
return matchingFields;
|
|
153
234
|
}
|
|
@@ -167,6 +248,9 @@ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTerm
|
|
|
167
248
|
}
|
|
168
249
|
let matchingFields = postingList.size;
|
|
169
250
|
const bm25 = bm25FieldConstants(bm25params, context.avgFieldLength[fieldId]);
|
|
251
|
+
const hoistedIdf = context.isDocActive == null
|
|
252
|
+
? bm25Idf(matchingFields, context.documentCount)
|
|
253
|
+
: undefined;
|
|
170
254
|
const derivedTermCache = {};
|
|
171
255
|
postingList.forEachDoc((docId, termFreq) => {
|
|
172
256
|
var _a;
|
|
@@ -177,7 +261,7 @@ function aggregateTerm(sourceTerm, derivedTerm, termWeight, termBoost, fieldTerm
|
|
|
177
261
|
}
|
|
178
262
|
if (allowedDocs != null && !allowedDocs.has(docId))
|
|
179
263
|
return;
|
|
180
|
-
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache);
|
|
264
|
+
scorePostingDoc(sourceTerm, derivedTerm, field, fieldId, docId, termFreq, termWeight, termBoost, fieldBoost, matchingFields, context, boostDocumentFn, bm25, results, derivedTermCache, hoistedIdf);
|
|
181
265
|
});
|
|
182
266
|
}
|
|
183
267
|
return results;
|
|
@@ -260,7 +344,9 @@ function finalizeSearchResults(params) {
|
|
|
260
344
|
queryTerms: terms,
|
|
261
345
|
match,
|
|
262
346
|
};
|
|
263
|
-
|
|
347
|
+
if (getStoredFields != null) {
|
|
348
|
+
Object.assign(result, getStoredFields(docId));
|
|
349
|
+
}
|
|
264
350
|
if (filter == null || filter(result)) {
|
|
265
351
|
results.push(result);
|
|
266
352
|
}
|
|
@@ -1744,82 +1830,47 @@ function readFieldLengthMatrixSection(buf, flags, cellCount) {
|
|
|
1744
1830
|
|
|
1745
1831
|
const DISCARDED_DOC_ID = 0xffffffff;
|
|
1746
1832
|
function postingFreqValue(freq, clampFrequencies) {
|
|
1747
|
-
return
|
|
1748
|
-
}
|
|
1749
|
-
function materializeFlatPostings(params) {
|
|
1750
|
-
const { fieldCount, termCount, forEachPosting, remapDocId, clampFrequencies } = params;
|
|
1751
|
-
const slotCount = termCount * fieldCount;
|
|
1752
|
-
const postingsOffsets = new Uint32Array(slotCount);
|
|
1753
|
-
const postingsLengths = new Uint32Array(slotCount);
|
|
1754
|
-
let totalPostings = 0;
|
|
1755
|
-
let maxFreq = 0;
|
|
1756
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
1757
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
1758
|
-
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1759
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1760
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1761
|
-
return;
|
|
1762
|
-
totalPostings++;
|
|
1763
|
-
const v = postingFreqValue(freq, clampFrequencies);
|
|
1764
|
-
if (v > maxFreq)
|
|
1765
|
-
maxFreq = v;
|
|
1766
|
-
});
|
|
1767
|
-
}
|
|
1768
|
-
}
|
|
1769
|
-
const useUint16 = params.nextId != null && params.nextId <= 65535;
|
|
1770
|
-
const allDocIds = useUint16
|
|
1771
|
-
? new Uint16Array(totalPostings)
|
|
1772
|
-
: new Uint32Array(totalPostings);
|
|
1773
|
-
const allFreqs = allocateFreqs(totalPostings, maxFreq);
|
|
1774
|
-
// Slots are visited in ascending fieldId (0..fieldCount-1) per term. Sparse layouts
|
|
1775
|
-
// rely on this ordering so field ids per term stay sorted for binary lookup.
|
|
1776
|
-
let write = 0;
|
|
1777
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
1778
|
-
const base = ti * fieldCount;
|
|
1779
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
1780
|
-
const offset = write;
|
|
1781
|
-
let count = 0;
|
|
1782
|
-
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1783
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1784
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1785
|
-
return;
|
|
1786
|
-
if (useUint16) {
|
|
1787
|
-
allDocIds[write] = docId;
|
|
1788
|
-
}
|
|
1789
|
-
else {
|
|
1790
|
-
allDocIds[write] = docId;
|
|
1791
|
-
}
|
|
1792
|
-
allFreqs[write] = postingFreqValue(freq, clampFrequencies);
|
|
1793
|
-
write++;
|
|
1794
|
-
count++;
|
|
1795
|
-
});
|
|
1796
|
-
postingsOffsets[base + f] = offset;
|
|
1797
|
-
postingsLengths[base + f] = count;
|
|
1798
|
-
}
|
|
1799
|
-
}
|
|
1800
|
-
return {
|
|
1801
|
-
postingsOffsets,
|
|
1802
|
-
postingsLengths,
|
|
1803
|
-
allDocIds,
|
|
1804
|
-
allFreqs,
|
|
1805
|
-
};
|
|
1833
|
+
return clampFreq(freq) ;
|
|
1806
1834
|
}
|
|
1807
1835
|
|
|
1808
1836
|
function readFieldId(fieldIds, index) {
|
|
1809
1837
|
return fieldIds[index];
|
|
1810
1838
|
}
|
|
1811
|
-
function choosePostingsLayout(fieldCount) {
|
|
1812
|
-
return fieldCount === 1 ? 'dense' : 'sparse';
|
|
1813
|
-
}
|
|
1814
1839
|
function chooseSparseFieldIdWidth(fieldCount) {
|
|
1815
1840
|
return fieldCount > 255 ? 16 : 8;
|
|
1816
1841
|
}
|
|
1817
|
-
function
|
|
1818
|
-
const
|
|
1819
|
-
const
|
|
1842
|
+
function choosePostingsLayout(fieldCount, termCount, nonEmptySlots) {
|
|
1843
|
+
const denseBytes = termCount * fieldCount * 8;
|
|
1844
|
+
const sparseFieldIdBytes = chooseSparseFieldIdWidth(fieldCount) === 16 ? 2 : 1;
|
|
1845
|
+
const sparseBytes = (termCount + 1) * 4 + nonEmptySlots * (sparseFieldIdBytes + 8);
|
|
1846
|
+
return denseBytes <= sparseBytes ? 'dense' : 'sparse';
|
|
1847
|
+
}
|
|
1848
|
+
/** Shared dense/sparse layout emission; callers supply per-slot length and copy. */
|
|
1849
|
+
function buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, source) {
|
|
1850
|
+
const layout = choosePostingsLayout(fieldCount, termCount, source.nonEmptySlots);
|
|
1820
1851
|
const docIdWidth = nextId <= 65535 ? 16 : 32;
|
|
1852
|
+
const allDocIds = docIdWidth === 16
|
|
1853
|
+
? new Uint16Array(totalPostings)
|
|
1854
|
+
: new Uint32Array(totalPostings);
|
|
1855
|
+
const allFreqs = allocateFreqs(totalPostings, maxFreq);
|
|
1856
|
+
const targets = { allDocIds, allFreqs, docIdWidth };
|
|
1821
1857
|
if (layout === 'dense') {
|
|
1822
|
-
const
|
|
1858
|
+
const slotCount = termCount * fieldCount;
|
|
1859
|
+
const denseOffsets = new Uint32Array(slotCount);
|
|
1860
|
+
const denseLengths = new Uint32Array(slotCount);
|
|
1861
|
+
let write = 0;
|
|
1862
|
+
for (let ti = 0; ti < termCount; ti++) {
|
|
1863
|
+
const base = ti * fieldCount;
|
|
1864
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
1865
|
+
const slot = base + f;
|
|
1866
|
+
const len = source.slotLength(ti, f);
|
|
1867
|
+
denseOffsets[slot] = write;
|
|
1868
|
+
denseLengths[slot] = len;
|
|
1869
|
+
if (len > 0) {
|
|
1870
|
+
write = source.writeSlot(ti, f, write, targets);
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1823
1874
|
return {
|
|
1824
1875
|
fieldCount,
|
|
1825
1876
|
termCount,
|
|
@@ -1827,10 +1878,10 @@ function materializeFrozenPostings(params) {
|
|
|
1827
1878
|
layout,
|
|
1828
1879
|
docIdWidth,
|
|
1829
1880
|
sparseFieldIdWidth: null,
|
|
1830
|
-
allDocIds
|
|
1831
|
-
allFreqs
|
|
1832
|
-
denseOffsets
|
|
1833
|
-
denseLengths
|
|
1881
|
+
allDocIds,
|
|
1882
|
+
allFreqs,
|
|
1883
|
+
denseOffsets,
|
|
1884
|
+
denseLengths,
|
|
1834
1885
|
sparseTermStarts: null,
|
|
1835
1886
|
sparseFieldIds: null,
|
|
1836
1887
|
sparseOffsets: null,
|
|
@@ -1842,60 +1893,23 @@ function materializeFrozenPostings(params) {
|
|
|
1842
1893
|
const sparseOffsets = [];
|
|
1843
1894
|
const sparseLengths = [];
|
|
1844
1895
|
const termStarts = new Array(termCount + 1).fill(0);
|
|
1845
|
-
|
|
1846
|
-
// Non-empty slots per term are emitted with fieldId in ascending order (f loops 0..fieldCount-1).
|
|
1847
|
-
let totalPostings = 0;
|
|
1848
|
-
let maxFreq = 0;
|
|
1896
|
+
let write = 0;
|
|
1849
1897
|
for (let ti = 0; ti < termCount; ti++) {
|
|
1850
1898
|
termStarts[ti] = sparseFieldIdsScratch.length;
|
|
1851
1899
|
for (let f = 0; f < fieldCount; f++) {
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1855
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1856
|
-
return;
|
|
1857
|
-
count++;
|
|
1858
|
-
const v = postingFreqValue(freq, clampFrequencies);
|
|
1859
|
-
if (v > maxFreq)
|
|
1860
|
-
maxFreq = v;
|
|
1861
|
-
});
|
|
1862
|
-
if (count === 0)
|
|
1900
|
+
const len = source.slotLength(ti, f);
|
|
1901
|
+
if (len === 0)
|
|
1863
1902
|
continue;
|
|
1864
1903
|
sparseFieldIdsScratch.push(f);
|
|
1865
|
-
sparseOffsets.push(
|
|
1866
|
-
sparseLengths.push(
|
|
1867
|
-
|
|
1904
|
+
sparseOffsets.push(write);
|
|
1905
|
+
sparseLengths.push(len);
|
|
1906
|
+
write = source.writeSlot(ti, f, write, targets);
|
|
1868
1907
|
}
|
|
1869
1908
|
termStarts[ti + 1] = sparseFieldIdsScratch.length;
|
|
1870
1909
|
}
|
|
1871
|
-
const allDocIds = docIdWidth === 16
|
|
1872
|
-
? new Uint16Array(totalPostings)
|
|
1873
|
-
: new Uint32Array(totalPostings);
|
|
1874
|
-
const allFreqs = allocateFreqs(totalPostings, maxFreq);
|
|
1875
1910
|
const sparseFieldIds = sparseFieldIdWidth === 16
|
|
1876
1911
|
? new Uint16Array(sparseFieldIdsScratch)
|
|
1877
1912
|
: new Uint8Array(sparseFieldIdsScratch);
|
|
1878
|
-
let write = 0;
|
|
1879
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
1880
|
-
const start = termStarts[ti];
|
|
1881
|
-
const end = termStarts[ti + 1];
|
|
1882
|
-
for (let s = start; s < end; s++) {
|
|
1883
|
-
const f = readFieldId(sparseFieldIds, s);
|
|
1884
|
-
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1885
|
-
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1886
|
-
if (docId === DISCARDED_DOC_ID)
|
|
1887
|
-
return;
|
|
1888
|
-
if (docIdWidth === 16) {
|
|
1889
|
-
allDocIds[write] = docId;
|
|
1890
|
-
}
|
|
1891
|
-
else {
|
|
1892
|
-
allDocIds[write] = docId;
|
|
1893
|
-
}
|
|
1894
|
-
allFreqs[write] = postingFreqValue(freq, clampFrequencies);
|
|
1895
|
-
write++;
|
|
1896
|
-
});
|
|
1897
|
-
}
|
|
1898
|
-
}
|
|
1899
1913
|
return {
|
|
1900
1914
|
fieldCount,
|
|
1901
1915
|
termCount,
|
|
@@ -1913,6 +1927,58 @@ function materializeFrozenPostings(params) {
|
|
|
1913
1927
|
sparseLengths: new Uint32Array(sparseLengths),
|
|
1914
1928
|
};
|
|
1915
1929
|
}
|
|
1930
|
+
function materializeFrozenPostings(params) {
|
|
1931
|
+
const { fieldCount, termCount, nextId } = params;
|
|
1932
|
+
const { forEachPosting, remapDocId} = params;
|
|
1933
|
+
const slotCount = termCount * fieldCount;
|
|
1934
|
+
const slotLengths = new Uint32Array(slotCount);
|
|
1935
|
+
let totalPostings = 0;
|
|
1936
|
+
let maxFreq = 0;
|
|
1937
|
+
let nonEmptySlots = 0;
|
|
1938
|
+
for (let ti = 0; ti < termCount; ti++) {
|
|
1939
|
+
const base = ti * fieldCount;
|
|
1940
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
1941
|
+
let count = 0;
|
|
1942
|
+
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1943
|
+
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1944
|
+
if (docId === DISCARDED_DOC_ID)
|
|
1945
|
+
return;
|
|
1946
|
+
count++;
|
|
1947
|
+
const v = postingFreqValue(freq);
|
|
1948
|
+
if (v > maxFreq)
|
|
1949
|
+
maxFreq = v;
|
|
1950
|
+
});
|
|
1951
|
+
if (count === 0)
|
|
1952
|
+
continue;
|
|
1953
|
+
slotLengths[base + f] = count;
|
|
1954
|
+
totalPostings += count;
|
|
1955
|
+
nonEmptySlots++;
|
|
1956
|
+
}
|
|
1957
|
+
}
|
|
1958
|
+
return buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, {
|
|
1959
|
+
nonEmptySlots,
|
|
1960
|
+
slotLength(ti, f) {
|
|
1961
|
+
return slotLengths[ti * fieldCount + f];
|
|
1962
|
+
},
|
|
1963
|
+
writeSlot(ti, f, write, targets) {
|
|
1964
|
+
const { allDocIds: outDocIds, allFreqs: outFreqs, docIdWidth: width } = targets;
|
|
1965
|
+
forEachPosting(ti, f, (rawDocId, freq) => {
|
|
1966
|
+
const docId = remapDocId != null ? remapDocId(rawDocId) : rawDocId;
|
|
1967
|
+
if (docId === DISCARDED_DOC_ID)
|
|
1968
|
+
return;
|
|
1969
|
+
if (width === 16) {
|
|
1970
|
+
outDocIds[write] = docId;
|
|
1971
|
+
}
|
|
1972
|
+
else {
|
|
1973
|
+
outDocIds[write] = docId;
|
|
1974
|
+
}
|
|
1975
|
+
outFreqs[write] = postingFreqValue(freq);
|
|
1976
|
+
write++;
|
|
1977
|
+
});
|
|
1978
|
+
return write;
|
|
1979
|
+
},
|
|
1980
|
+
});
|
|
1981
|
+
}
|
|
1916
1982
|
function postingsTypedBytes(layout) {
|
|
1917
1983
|
const allDocIdsBytes = layout.allDocIds.byteLength;
|
|
1918
1984
|
const allFreqsBytes = layout.allFreqs.byteLength;
|
|
@@ -2675,7 +2741,6 @@ function buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, nextId, s
|
|
|
2675
2741
|
fieldCount,
|
|
2676
2742
|
termCount,
|
|
2677
2743
|
nextId,
|
|
2678
|
-
clampFrequencies: true,
|
|
2679
2744
|
remapDocId,
|
|
2680
2745
|
forEachPosting(ti, f, emit) {
|
|
2681
2746
|
var _a;
|
|
@@ -2919,8 +2984,8 @@ async function zlibPayloadChoiceAsync(uncompressed) {
|
|
|
2919
2984
|
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2920
2985
|
}
|
|
2921
2986
|
const autoSyncCompressors = {
|
|
2922
|
-
zstd:
|
|
2923
|
-
zlib:
|
|
2987
|
+
zstd: uncompressed => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
|
|
2988
|
+
zlib: uncompressed => zlib.deflateSync(uncompressed),
|
|
2924
2989
|
};
|
|
2925
2990
|
const autoAsyncCompressors = {
|
|
2926
2991
|
zstd: zstdCompressAsync,
|
|
@@ -3863,93 +3928,23 @@ class IncrementalPostingsAccumulator {
|
|
|
3863
3928
|
const totalPostings = this._totalPostings;
|
|
3864
3929
|
const maxFreq = this._maxFreq;
|
|
3865
3930
|
const slots = this._slots;
|
|
3866
|
-
const layout =
|
|
3867
|
-
|
|
3868
|
-
|
|
3869
|
-
|
|
3870
|
-
|
|
3871
|
-
|
|
3872
|
-
|
|
3873
|
-
const slotCount = termCount * fieldCount;
|
|
3874
|
-
const denseOffsets = new Uint32Array(slotCount);
|
|
3875
|
-
const denseLengths = new Uint32Array(slotCount);
|
|
3876
|
-
let write = 0;
|
|
3877
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
3878
|
-
const base = ti * fieldCount;
|
|
3879
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
3880
|
-
const slot = base + f;
|
|
3881
|
-
const ranges = slots.get(slot);
|
|
3882
|
-
const len = ranges == null ? 0 : this.slotLength(ranges);
|
|
3883
|
-
denseOffsets[slot] = write;
|
|
3884
|
-
denseLengths[slot] = len;
|
|
3885
|
-
if (len > 0) {
|
|
3886
|
-
write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
|
|
3887
|
-
slots.delete(slot);
|
|
3888
|
-
}
|
|
3889
|
-
}
|
|
3890
|
-
}
|
|
3891
|
-
slots.clear();
|
|
3892
|
-
this.clear();
|
|
3893
|
-
return {
|
|
3894
|
-
fieldCount,
|
|
3895
|
-
termCount,
|
|
3896
|
-
nextId,
|
|
3897
|
-
layout,
|
|
3898
|
-
docIdWidth,
|
|
3899
|
-
sparseFieldIdWidth: null,
|
|
3900
|
-
allDocIds,
|
|
3901
|
-
allFreqs,
|
|
3902
|
-
denseOffsets,
|
|
3903
|
-
denseLengths,
|
|
3904
|
-
sparseTermStarts: null,
|
|
3905
|
-
sparseFieldIds: null,
|
|
3906
|
-
sparseOffsets: null,
|
|
3907
|
-
sparseLengths: null,
|
|
3908
|
-
};
|
|
3909
|
-
}
|
|
3910
|
-
const sparseFieldIdWidth = chooseSparseFieldIdWidth(fieldCount);
|
|
3911
|
-
const sparseFieldIdsScratch = [];
|
|
3912
|
-
const sparseOffsets = [];
|
|
3913
|
-
const sparseLengths = [];
|
|
3914
|
-
const termStarts = new Array(termCount + 1).fill(0);
|
|
3915
|
-
let write = 0;
|
|
3916
|
-
for (let ti = 0; ti < termCount; ti++) {
|
|
3917
|
-
termStarts[ti] = sparseFieldIdsScratch.length;
|
|
3918
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
3931
|
+
const layout = buildFrozenPostingsLayout(fieldCount, termCount, nextId, totalPostings, maxFreq, {
|
|
3932
|
+
nonEmptySlots: slots.size,
|
|
3933
|
+
slotLength: (ti, f) => {
|
|
3934
|
+
const ranges = slots.get(ti * fieldCount + f);
|
|
3935
|
+
return ranges == null ? 0 : this.slotLength(ranges);
|
|
3936
|
+
},
|
|
3937
|
+
writeSlot: (ti, f, write, targets) => {
|
|
3919
3938
|
const slot = ti * fieldCount + f;
|
|
3920
3939
|
const ranges = slots.get(slot);
|
|
3921
|
-
const
|
|
3922
|
-
if (len === 0)
|
|
3923
|
-
continue;
|
|
3924
|
-
sparseFieldIdsScratch.push(f);
|
|
3925
|
-
sparseOffsets.push(write);
|
|
3926
|
-
sparseLengths.push(len);
|
|
3927
|
-
write = this.copySlot(ranges, allDocIds, allFreqs, write, docIdWidth);
|
|
3940
|
+
const next = this.copySlot(ranges, targets.allDocIds, targets.allFreqs, write, targets.docIdWidth);
|
|
3928
3941
|
slots.delete(slot);
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
}
|
|
3942
|
+
return next;
|
|
3943
|
+
},
|
|
3944
|
+
});
|
|
3932
3945
|
slots.clear();
|
|
3933
3946
|
this.clear();
|
|
3934
|
-
|
|
3935
|
-
? new Uint16Array(sparseFieldIdsScratch)
|
|
3936
|
-
: new Uint8Array(sparseFieldIdsScratch);
|
|
3937
|
-
return {
|
|
3938
|
-
fieldCount,
|
|
3939
|
-
termCount,
|
|
3940
|
-
nextId,
|
|
3941
|
-
layout,
|
|
3942
|
-
docIdWidth,
|
|
3943
|
-
sparseFieldIdWidth,
|
|
3944
|
-
allDocIds,
|
|
3945
|
-
allFreqs,
|
|
3946
|
-
denseOffsets: null,
|
|
3947
|
-
denseLengths: null,
|
|
3948
|
-
sparseTermStarts: new Uint32Array(termStarts),
|
|
3949
|
-
sparseFieldIds,
|
|
3950
|
-
sparseOffsets: new Uint32Array(sparseOffsets),
|
|
3951
|
-
sparseLengths: new Uint32Array(sparseLengths),
|
|
3952
|
-
};
|
|
3947
|
+
return layout;
|
|
3953
3948
|
}
|
|
3954
3949
|
}
|
|
3955
3950
|
|
|
@@ -4129,22 +4124,6 @@ function buildFrozenParamsFromDocuments(documents, options) {
|
|
|
4129
4124
|
return builder.freezeParams();
|
|
4130
4125
|
}
|
|
4131
4126
|
|
|
4132
|
-
/**
|
|
4133
|
-
* Internal AND / AND_NOT gate thresholds (not exported from the public package entry).
|
|
4134
|
-
*/
|
|
4135
|
-
const DEFAULT_AND_GATE_LIMITS = {
|
|
4136
|
-
maxAbsolute: 5000,
|
|
4137
|
-
maxFraction: 0.1,
|
|
4138
|
-
};
|
|
4139
|
-
function resolveGateMaxSize(documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
|
|
4140
|
-
return Math.min(limits.maxAbsolute, Math.max(100, Math.floor(documentCount * limits.maxFraction)));
|
|
4141
|
-
}
|
|
4142
|
-
function gateIsSelectiveEnough(gateSize, documentCount, limits = DEFAULT_AND_GATE_LIMITS) {
|
|
4143
|
-
if (gateSize === 0)
|
|
4144
|
-
return true;
|
|
4145
|
-
return gateSize <= resolveGateMaxSize(documentCount, limits);
|
|
4146
|
-
}
|
|
4147
|
-
|
|
4148
4127
|
function useGatedEvaluation(run, branchCount, operator, hasWildcard) {
|
|
4149
4128
|
return shouldUseGatedEvaluation(branchCount, operator, hasWildcard);
|
|
4150
4129
|
}
|
|
@@ -4186,80 +4165,155 @@ function normalizeStringQuery(query, searchOptions, params) {
|
|
|
4186
4165
|
...params.globalSearchOptions,
|
|
4187
4166
|
...searchOptions,
|
|
4188
4167
|
};
|
|
4189
|
-
const
|
|
4190
|
-
|
|
4191
|
-
|
|
4168
|
+
const tokens = options.tokenize(query);
|
|
4169
|
+
const terms = [];
|
|
4170
|
+
for (const token of tokens) {
|
|
4171
|
+
const processed = options.processTerm(token);
|
|
4172
|
+
if (Array.isArray(processed)) {
|
|
4173
|
+
for (const term of processed) {
|
|
4174
|
+
if (term)
|
|
4175
|
+
terms.push(term);
|
|
4176
|
+
}
|
|
4177
|
+
}
|
|
4178
|
+
else if (processed) {
|
|
4179
|
+
terms.push(processed);
|
|
4180
|
+
}
|
|
4181
|
+
}
|
|
4182
|
+
const toSpec = termToQuerySpec(options);
|
|
4183
|
+
const specs = new Array(terms.length);
|
|
4184
|
+
for (let i = 0; i < terms.length; i++) {
|
|
4185
|
+
specs[i] = toSpec(terms[i], i, terms);
|
|
4186
|
+
}
|
|
4187
|
+
const { fuzzy: fuzzyWeight, prefix: prefixWeight } = {
|
|
4188
|
+
...defaultSearchOptions.weights,
|
|
4189
|
+
...options.weights,
|
|
4190
|
+
};
|
|
4192
4191
|
return {
|
|
4193
4192
|
options,
|
|
4194
|
-
specs
|
|
4193
|
+
specs,
|
|
4195
4194
|
operator: options.combineWith,
|
|
4195
|
+
fieldBoosts: fieldBoostsForQuery(options, params.fields),
|
|
4196
|
+
fuzzyWeight,
|
|
4197
|
+
prefixWeight,
|
|
4196
4198
|
};
|
|
4197
4199
|
}
|
|
4198
4200
|
function lazyIndexedTerm(indexView, termIndex) {
|
|
4199
4201
|
return { kind: 'lazy', resolve: () => indexView.resolveTermByIndex(termIndex) };
|
|
4200
4202
|
}
|
|
4201
|
-
function
|
|
4203
|
+
function forEachQuerySpecTermRef(query, normalized, params, visit) {
|
|
4202
4204
|
const { indexView } = params;
|
|
4203
|
-
const {
|
|
4204
|
-
const
|
|
4205
|
-
|
|
4206
|
-
const
|
|
4207
|
-
visit(exactTi == null ? undefined : indexView.fieldTermData(exactTi), query.term, 1);
|
|
4208
|
-
const seenPrefix = new Set();
|
|
4205
|
+
const { options } = normalized;
|
|
4206
|
+
const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
|
|
4207
|
+
visit({ kind: 'exact', termIndex: indexView.resolveTermIndex(query.term) });
|
|
4208
|
+
const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
|
|
4209
4209
|
if (query.prefix) {
|
|
4210
4210
|
for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
|
|
4211
4211
|
const distance = length - query.term.length;
|
|
4212
4212
|
if (!distance)
|
|
4213
4213
|
continue;
|
|
4214
|
-
seenPrefix.add(termIndex);
|
|
4215
|
-
visit(
|
|
4214
|
+
seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
|
|
4215
|
+
visit({ kind: 'prefix', termIndex, length, distance });
|
|
4216
4216
|
}
|
|
4217
4217
|
}
|
|
4218
4218
|
if (!maxDistance)
|
|
4219
4219
|
return;
|
|
4220
4220
|
for (const { termIndex, length, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
|
|
4221
|
-
if (!distance || seenPrefix.has(termIndex))
|
|
4221
|
+
if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
|
|
4222
4222
|
continue;
|
|
4223
|
-
visit(
|
|
4223
|
+
visit({ kind: 'fuzzy', termIndex, length, distance });
|
|
4224
4224
|
}
|
|
4225
4225
|
}
|
|
4226
|
-
function
|
|
4227
|
-
const
|
|
4228
|
-
const
|
|
4226
|
+
function visitQuerySpecForScoring(query, normalized, params, visit) {
|
|
4227
|
+
const { indexView } = params;
|
|
4228
|
+
const { fuzzyWeight, prefixWeight } = normalized;
|
|
4229
|
+
forEachQuerySpecTermRef(query, normalized, params, (ref) => {
|
|
4230
|
+
if (ref.kind === 'exact') {
|
|
4231
|
+
visit(ref.termIndex == null ? undefined : indexView.fieldTermData(ref.termIndex), query.term, 1);
|
|
4232
|
+
return;
|
|
4233
|
+
}
|
|
4234
|
+
if (ref.kind === 'prefix') {
|
|
4235
|
+
visit(indexView.fieldTermData(ref.termIndex), lazyIndexedTerm(indexView, ref.termIndex), prefixWeight * ref.length / (ref.length + 0.3 * ref.distance));
|
|
4236
|
+
return;
|
|
4237
|
+
}
|
|
4238
|
+
visit(indexView.fieldTermData(ref.termIndex), lazyIndexedTerm(indexView, ref.termIndex), fuzzyWeight * ref.length / (ref.length + ref.distance));
|
|
4239
|
+
});
|
|
4240
|
+
}
|
|
4241
|
+
function executeQuerySpecInternal(query, normalized, params, allowedDocs) {
|
|
4242
|
+
const { fieldBoosts, options } = normalized;
|
|
4229
4243
|
const termOptions = allowedDocs == null ? undefined : { allowedDocs };
|
|
4230
4244
|
const results = new Map();
|
|
4231
|
-
visitQuerySpecForScoring(query,
|
|
4245
|
+
visitQuerySpecForScoring(query, normalized, params, (data, derivedTerm, termWeight) => {
|
|
4232
4246
|
aggregateTerm(query.term, derivedTerm, termWeight, query.termBoost, data, fieldBoosts, params.aggregateContext, options.boostDocument, options.bm25, results, termOptions);
|
|
4233
4247
|
});
|
|
4234
4248
|
return results;
|
|
4235
4249
|
}
|
|
4236
|
-
function
|
|
4237
|
-
|
|
4238
|
-
|
|
4239
|
-
|
|
4240
|
-
const
|
|
4241
|
-
|
|
4242
|
-
|
|
4243
|
-
|
|
4244
|
-
|
|
4250
|
+
function maxPostingLengthForFieldTermData(data, fieldBoosts, fieldIds) {
|
|
4251
|
+
if (data == null)
|
|
4252
|
+
return 0;
|
|
4253
|
+
let maxLen = 0;
|
|
4254
|
+
for (const field of fieldBoosts.names) {
|
|
4255
|
+
const fieldId = fieldIds[field];
|
|
4256
|
+
const postingList = data.get(fieldId);
|
|
4257
|
+
if (postingList == null)
|
|
4258
|
+
continue;
|
|
4259
|
+
const len = postingList instanceof SegmentPostingList ? postingList.length : postingList.size;
|
|
4260
|
+
if (len > maxLen)
|
|
4261
|
+
maxLen = len;
|
|
4245
4262
|
}
|
|
4246
|
-
|
|
4247
|
-
|
|
4248
|
-
|
|
4249
|
-
|
|
4250
|
-
|
|
4251
|
-
|
|
4252
|
-
|
|
4253
|
-
|
|
4263
|
+
return maxLen;
|
|
4264
|
+
}
|
|
4265
|
+
function estimateMaxPostingLengthForQuerySpec(query, normalized, params) {
|
|
4266
|
+
const { indexView, aggregateContext } = params;
|
|
4267
|
+
const { fieldBoosts } = normalized;
|
|
4268
|
+
const { fieldIds } = aggregateContext;
|
|
4269
|
+
let maxLen = 0;
|
|
4270
|
+
const consider = (data) => {
|
|
4271
|
+
maxLen = Math.max(maxLen, maxPostingLengthForFieldTermData(data, fieldBoosts, fieldIds));
|
|
4272
|
+
};
|
|
4273
|
+
forEachQuerySpecTermRef(query, normalized, params, (ref) => {
|
|
4274
|
+
if (ref.kind === 'exact') {
|
|
4275
|
+
if (ref.termIndex != null)
|
|
4276
|
+
consider(indexView.fieldTermData(ref.termIndex));
|
|
4277
|
+
return;
|
|
4254
4278
|
}
|
|
4279
|
+
consider(indexView.fieldTermData(ref.termIndex));
|
|
4280
|
+
});
|
|
4281
|
+
return maxLen;
|
|
4282
|
+
}
|
|
4283
|
+
function estimateMaxPostingLengthForQuery(query, searchOptions, params) {
|
|
4284
|
+
if (isWildcardQuery(query)) {
|
|
4285
|
+
return params.aggregateContext.documentCount;
|
|
4255
4286
|
}
|
|
4256
|
-
if (
|
|
4257
|
-
|
|
4258
|
-
|
|
4259
|
-
|
|
4260
|
-
|
|
4287
|
+
if (isQueryCombination(query)) {
|
|
4288
|
+
const options = { ...searchOptions, ...query, queries: undefined };
|
|
4289
|
+
let maxLen = 0;
|
|
4290
|
+
for (const branch of query.queries) {
|
|
4291
|
+
maxLen = Math.max(maxLen, estimateMaxPostingLengthForQuery(branch, options, params));
|
|
4261
4292
|
}
|
|
4293
|
+
return maxLen;
|
|
4262
4294
|
}
|
|
4295
|
+
if (typeof query !== 'string')
|
|
4296
|
+
return 0;
|
|
4297
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4298
|
+
let maxLen = 0;
|
|
4299
|
+
for (const spec of normalized.specs) {
|
|
4300
|
+
maxLen = Math.max(maxLen, estimateMaxPostingLengthForQuerySpec(spec, normalized, params));
|
|
4301
|
+
}
|
|
4302
|
+
return maxLen;
|
|
4303
|
+
}
|
|
4304
|
+
function collectDocIdsForQuerySpec(query, normalized, params, allowedDocs) {
|
|
4305
|
+
const { fieldBoosts } = normalized;
|
|
4306
|
+
const docIds = new Set();
|
|
4307
|
+
const { indexView, aggregateContext } = params;
|
|
4308
|
+
forEachQuerySpecTermRef(query, normalized, params, (ref) => {
|
|
4309
|
+
if (ref.kind === 'exact') {
|
|
4310
|
+
if (ref.termIndex != null) {
|
|
4311
|
+
indexView.collectDocIds(ref.termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4312
|
+
}
|
|
4313
|
+
return;
|
|
4314
|
+
}
|
|
4315
|
+
indexView.collectDocIds(ref.termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4316
|
+
});
|
|
4263
4317
|
return docIds;
|
|
4264
4318
|
}
|
|
4265
4319
|
function intersectDocIdsInPlace(docIds, branchDocIds) {
|
|
@@ -4309,7 +4363,8 @@ function collectCombinedDocIds(branches, operator, collectBranch, allowedDocs) {
|
|
|
4309
4363
|
* AND_NOT: score the positive branch only; negated branches are collected as docId sets and
|
|
4310
4364
|
* subtracted without scoring (avoids term materialization on excluded branches).
|
|
4311
4365
|
*/
|
|
4312
|
-
function executeCombinedBranches(branches, operator, params, executeBranch, collectBranch, allowedDocs, run) {
|
|
4366
|
+
function executeCombinedBranches(branches, operator, params, executeBranch, collectBranch, allowedDocs, run, estimateBranchPostingLength) {
|
|
4367
|
+
var _a;
|
|
4313
4368
|
if (branches.length === 0)
|
|
4314
4369
|
return new Map();
|
|
4315
4370
|
const op = operator.toLowerCase();
|
|
@@ -4321,8 +4376,16 @@ function executeCombinedBranches(branches, operator, params, executeBranch, coll
|
|
|
4321
4376
|
if (op === 'and') {
|
|
4322
4377
|
const limits = void 0 ;
|
|
4323
4378
|
const documentCount = params.aggregateContext.documentCount;
|
|
4379
|
+
const postingGatePolicy = (_a = void 0 ) !== null && _a !== void 0 ? _a : DEFAULT_POSTING_GATE_POLICY;
|
|
4380
|
+
const maxGateSize = resolveGateMaxSize(documentCount, limits);
|
|
4324
4381
|
for (let i = 1; i < branches.length; i++) {
|
|
4325
|
-
|
|
4382
|
+
if (gate.size === 0)
|
|
4383
|
+
return result;
|
|
4384
|
+
const ratioPath = gate.size > maxGateSize;
|
|
4385
|
+
const postingListLength = ratioPath
|
|
4386
|
+
? estimateBranchPostingLength === null || estimateBranchPostingLength === void 0 ? void 0 : estimateBranchPostingLength(branches[i])
|
|
4387
|
+
: undefined;
|
|
4388
|
+
const selective = gateIsSelectiveEnough(gate.size, documentCount, limits, postingListLength, postingGatePolicy);
|
|
4326
4389
|
const branchAllowed = selective ? gate : allowedDocs;
|
|
4327
4390
|
result = combineResults([result, executeBranch(branches[i], branchAllowed)], AND);
|
|
4328
4391
|
gate = docIdsFromResult(result);
|
|
@@ -4386,14 +4449,15 @@ function collectDocIdsForQueryInternal(query, searchOptions, params, allowedDocs
|
|
|
4386
4449
|
if (typeof query !== 'string') {
|
|
4387
4450
|
throw new Error('FrozenMiniSearch: invalid query');
|
|
4388
4451
|
}
|
|
4389
|
-
const
|
|
4452
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4453
|
+
const { specs, operator } = normalized;
|
|
4390
4454
|
const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
|
|
4391
4455
|
if (specs.length <= 1) {
|
|
4392
4456
|
return specs.length === 1
|
|
4393
|
-
? collectDocIdsForQuerySpec(specs[0],
|
|
4457
|
+
? collectDocIdsForQuerySpec(specs[0], normalized, params, allowedDocs)
|
|
4394
4458
|
: new Set();
|
|
4395
4459
|
}
|
|
4396
|
-
return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec,
|
|
4460
|
+
return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
|
|
4397
4461
|
}
|
|
4398
4462
|
function executeWildcardQuery(searchOptions, params) {
|
|
4399
4463
|
const results = new Map();
|
|
@@ -4415,7 +4479,7 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
|
|
|
4415
4479
|
const options = { ...searchOptions, ...query, queries: undefined };
|
|
4416
4480
|
const operator = ((_b = (_a = query.combineWith) !== null && _a !== void 0 ? _a : options.combineWith) !== null && _b !== void 0 ? _b : params.globalSearchOptions.combineWith);
|
|
4417
4481
|
if (useGatedEvaluation(run, query.queries.length, operator, combinationHasWildcard(query))) {
|
|
4418
|
-
return executeCombinedBranches(query.queries, operator, params, (branch, branchAllowed) => executeQueryInternal(branch, options, params, branchAllowed, run), (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs);
|
|
4482
|
+
return executeCombinedBranches(query.queries, operator, params, (branch, branchAllowed) => executeQueryInternal(branch, options, params, branchAllowed, run), (branch, branchAllowed) => collectDocIdsForQueryInternal(branch, options, params, branchAllowed), allowedDocs, run, branch => estimateMaxPostingLengthForQuery(branch, options, params));
|
|
4419
4483
|
}
|
|
4420
4484
|
const results = query.queries.map(subquery => executeQueryInternal(subquery, options, params, allowedDocs, run));
|
|
4421
4485
|
return combineResults(results, operator);
|
|
@@ -4423,12 +4487,13 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
|
|
|
4423
4487
|
if (typeof query !== 'string') {
|
|
4424
4488
|
throw new Error('FrozenMiniSearch: invalid query');
|
|
4425
4489
|
}
|
|
4426
|
-
const
|
|
4490
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4491
|
+
const { specs, operator } = normalized;
|
|
4427
4492
|
const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
|
|
4428
4493
|
if (useGatedEvaluation(run, specs.length, combineWith, false)) {
|
|
4429
|
-
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec,
|
|
4494
|
+
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, normalized, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs, run, spec => estimateMaxPostingLengthForQuerySpec(spec, normalized, params));
|
|
4430
4495
|
}
|
|
4431
|
-
const results = specs.map(spec => executeQuerySpecInternal(spec,
|
|
4496
|
+
const results = specs.map(spec => executeQuerySpecInternal(spec, normalized, params, allowedDocs));
|
|
4432
4497
|
return combineResults(results, combineWith);
|
|
4433
4498
|
}
|
|
4434
4499
|
function executeQuery(query, searchOptions, params) {
|
|
@@ -4619,6 +4684,7 @@ function materializeOwnedSnapshot(params, mode) {
|
|
|
4619
4684
|
function frozenMemoryBreakdown(frozen) {
|
|
4620
4685
|
return frozen.memoryBreakdown();
|
|
4621
4686
|
}
|
|
4687
|
+
const noStoredFields = () => undefined;
|
|
4622
4688
|
function assertFieldsMatchSnapshot(optionsFields, snapFieldIds) {
|
|
4623
4689
|
const snapNames = Object.keys(snapFieldIds).sort();
|
|
4624
4690
|
const optNames = [...optionsFields].sort();
|
|
@@ -4672,24 +4738,31 @@ class FrozenMiniSearch {
|
|
|
4672
4738
|
this._termCount = params.termCount;
|
|
4673
4739
|
this._postings = params.postings;
|
|
4674
4740
|
this._fieldTermFlyweight = createFrozenFieldTermFlyweight(this._postings);
|
|
4741
|
+
this._hasStoredFields = this._storedFields.kind !== 'none';
|
|
4675
4742
|
this._aggregateContext = {
|
|
4676
4743
|
documentCount: this._documentCount,
|
|
4677
4744
|
avgFieldLength: this._avgFieldLength,
|
|
4678
4745
|
fieldIds: this._fieldIds,
|
|
4679
4746
|
getFieldLength: (docId, fieldId) => this.getFieldLength(docId, fieldId),
|
|
4680
4747
|
getExternalId: docId => this._externalIds[docId],
|
|
4681
|
-
getStoredFields:
|
|
4748
|
+
getStoredFields: this._hasStoredFields
|
|
4749
|
+
? docId => readStoredFields(this._storedFields, docId)
|
|
4750
|
+
: noStoredFields,
|
|
4682
4751
|
};
|
|
4683
4752
|
this._queryEngineParams = {
|
|
4684
4753
|
fields: this._options.fields,
|
|
4685
4754
|
globalSearchOptions: this._options.searchOptions,
|
|
4686
4755
|
tokenize: this._options.tokenize,
|
|
4687
4756
|
processTerm: this._options.processTerm,
|
|
4688
|
-
indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight,
|
|
4689
|
-
|
|
4690
|
-
|
|
4691
|
-
|
|
4692
|
-
|
|
4757
|
+
indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, this._hasStoredFields
|
|
4758
|
+
? (callback) => {
|
|
4759
|
+
forEachLiveShortId(this._nextId, this._externalIds, (shortId, id) => {
|
|
4760
|
+
callback(shortId, id, readStoredFields(this._storedFields, shortId));
|
|
4761
|
+
});
|
|
4762
|
+
}
|
|
4763
|
+
: (callback) => {
|
|
4764
|
+
forEachLiveShortId(this._nextId, this._externalIds, callback);
|
|
4765
|
+
}),
|
|
4693
4766
|
aggregateContext: this._aggregateContext,
|
|
4694
4767
|
};
|
|
4695
4768
|
}
|
|
@@ -4746,7 +4819,9 @@ class FrozenMiniSearch {
|
|
|
4746
4819
|
return shortId == null ? undefined : readStoredFields(this._storedFields, shortId);
|
|
4747
4820
|
}
|
|
4748
4821
|
search(query, searchOptions = {}) {
|
|
4749
|
-
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId],
|
|
4822
|
+
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], this._hasStoredFields
|
|
4823
|
+
? docId => readStoredFields(this._storedFields, docId)
|
|
4824
|
+
: undefined);
|
|
4750
4825
|
}
|
|
4751
4826
|
autoSuggest(queryString, options = {}) {
|
|
4752
4827
|
const merged = { ...this._options.autoSuggestOptions, ...options };
|