@yoch/frozenminisearch 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +19 -1
- package/dist/cjs/index.cjs +242 -99
- package/dist/es/index.d.ts +22 -7
- package/dist/es/index.js +242 -99
- package/package.json +7 -4
package/dist/es/index.js
CHANGED
|
@@ -256,7 +256,9 @@ function finalizeSearchResults(params) {
|
|
|
256
256
|
queryTerms: terms,
|
|
257
257
|
match,
|
|
258
258
|
};
|
|
259
|
-
|
|
259
|
+
if (getStoredFields != null) {
|
|
260
|
+
Object.assign(result, getStoredFields(docId));
|
|
261
|
+
}
|
|
260
262
|
if (filter == null || filter(result)) {
|
|
261
263
|
results.push(result);
|
|
262
264
|
}
|
|
@@ -1640,9 +1642,11 @@ const FLAG_FL_U8 = 8;
|
|
|
1640
1642
|
const FLAG_FL_U16 = 16;
|
|
1641
1643
|
const FLAG_FREQ_U16 = 32;
|
|
1642
1644
|
const CODEC_RAW = 0;
|
|
1645
|
+
/** Deflate/inflate (`node:zlib`) on the whole payload. */
|
|
1646
|
+
const CODEC_ZLIB = 1;
|
|
1643
1647
|
/** Zstandard (`node:zlib`) on the whole payload. */
|
|
1644
1648
|
const CODEC_ZSTD = 3;
|
|
1645
|
-
/** Single concatenated payload, one
|
|
1649
|
+
/** Single concatenated payload, one compressed stream (or raw). */
|
|
1646
1650
|
const MSV5_FORMAT_REV_PAYLOAD = 1;
|
|
1647
1651
|
/** Do not compress payloads smaller than this (bytes). */
|
|
1648
1652
|
const MSV5_MIN_COMPRESS_BYTES = 64;
|
|
@@ -2762,33 +2766,43 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2762
2766
|
};
|
|
2763
2767
|
}
|
|
2764
2768
|
|
|
2765
|
-
/** Hard cap on the uncompressed payload, rejected before allocation (
|
|
2769
|
+
/** Hard cap on the uncompressed payload, rejected before allocation (compressed-bomb guard).
|
|
2766
2770
|
* This is the single trust boundary for untrusted snapshots: {@link readPayloadMeta} rejects
|
|
2767
2771
|
* headers above this size; sync decompress uses the same cap via `maxOutputLength`.
|
|
2768
2772
|
* A malicious header can still declare up to 1 GiB — no tighter native limit helps without
|
|
2769
2773
|
* trusting `uncompressedLength` from that same header. Semantic integrity (length match,
|
|
2770
2774
|
* payload CRC, per-section CRC) is enforced after decode. */
|
|
2771
2775
|
const MSV5_MAX_UNCOMPRESSED_BYTES = 1024 * 1024 * 1024;
|
|
2776
|
+
const MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH = 'MSv5 compressed payload exceeds declared length';
|
|
2777
|
+
const MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH = 'MSv5 decompressed payload length mismatch';
|
|
2772
2778
|
// zstd landed in node:zlib at Node 22.15.0 (22.x line) / 23.8.0, where the whole family
|
|
2773
2779
|
// (zstdCompress[Sync], zstdDecompressSync, createZstdDecompress) ships together — so probing one
|
|
2774
2780
|
// member is enough to know if the runtime supports zstd. Checked at call time (not captured at
|
|
2775
|
-
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully:
|
|
2776
|
-
//
|
|
2781
|
+
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully: `auto`
|
|
2782
|
+
// tries zlib once (or raw if it does not help). When zstd is available, `auto` tries zstd once
|
|
2783
|
+
// and stays raw if it does not shrink — no second pass. Reads of a zstd payload throw a clear,
|
|
2784
|
+
// actionable error on runtimes without zstd.
|
|
2777
2785
|
function zstdAvailable() {
|
|
2778
2786
|
return typeof zlib.zstdCompressSync === 'function';
|
|
2779
2787
|
}
|
|
2788
|
+
function zstdUnavailableWriteError() {
|
|
2789
|
+
return new Error('MSv5 snapshot requested zstd compression, but this Node.js runtime lacks node:zlib zstd '
|
|
2790
|
+
+ 'support (added in Node 22.15.0). Upgrade Node.js, or use compression: "auto", "raw", '
|
|
2791
|
+
+ 'or "zlib".');
|
|
2792
|
+
}
|
|
2780
2793
|
function zstdUnavailableReadError() {
|
|
2781
2794
|
return new Error('MSv5 snapshot is zstd-compressed, but this Node.js runtime lacks node:zlib zstd support '
|
|
2782
2795
|
+ '(added in Node 22.15.0). Upgrade Node.js to read this snapshot, or re-save it from a '
|
|
2783
|
-
+ 'newer runtime
|
|
2796
|
+
+ 'newer runtime with compression: "raw" or "zlib".');
|
|
2784
2797
|
}
|
|
2785
2798
|
let warnedZstdSaveFallback = false;
|
|
2786
2799
|
function warnZstdSaveFallbackOnce() {
|
|
2787
2800
|
if (warnedZstdSaveFallback)
|
|
2788
2801
|
return;
|
|
2789
2802
|
warnedZstdSaveFallback = true;
|
|
2790
|
-
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0);
|
|
2791
|
-
+ 'raw (uncompressed)
|
|
2803
|
+
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); compression: "auto" falls back to '
|
|
2804
|
+
+ 'zlib when it shrinks the payload, otherwise raw (uncompressed). Upgrade to Node 22.15.0+ '
|
|
2805
|
+
+ 'for zstd.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
|
|
2792
2806
|
}
|
|
2793
2807
|
function assertPayloadFormatRev(buf) {
|
|
2794
2808
|
const rev = buf.readUInt16LE(MSV5_FORMAT_REV_OFFSET);
|
|
@@ -2838,23 +2852,26 @@ function msv5ZstdCompressOptions(uncompressed) {
|
|
|
2838
2852
|
},
|
|
2839
2853
|
};
|
|
2840
2854
|
}
|
|
2841
|
-
|
|
2842
|
-
function pickPayloadCodec(uncompressed, compressed) {
|
|
2843
|
-
if (compressed.length < uncompressed.length) {
|
|
2844
|
-
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2845
|
-
}
|
|
2855
|
+
function rawPayloadChoice(uncompressed) {
|
|
2846
2856
|
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2847
2857
|
}
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2858
|
+
/** Auto mode: one compression attempt; keep it only when strictly smaller than raw. */
|
|
2859
|
+
function pickAutoPayloadCodec(uncompressed, compressed, codec) {
|
|
2860
|
+
if (compressed.length < uncompressed.length) {
|
|
2861
|
+
return {
|
|
2862
|
+
payload: compressed,
|
|
2863
|
+
codec,
|
|
2864
|
+
zstdLevel: codec === CODEC_ZSTD ? MSV5_ZSTD_LEVEL : 0,
|
|
2865
|
+
};
|
|
2851
2866
|
}
|
|
2867
|
+
return rawPayloadChoice(uncompressed);
|
|
2868
|
+
}
|
|
2869
|
+
function zstdPayloadChoiceSync(uncompressed) {
|
|
2852
2870
|
if (!zstdAvailable()) {
|
|
2853
|
-
|
|
2854
|
-
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2871
|
+
throw zstdUnavailableWriteError();
|
|
2855
2872
|
}
|
|
2856
2873
|
const compressed = zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed));
|
|
2857
|
-
return
|
|
2874
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2858
2875
|
}
|
|
2859
2876
|
/**
|
|
2860
2877
|
* Async zstd via {@link zstdCompress} (not {@link zstdCompressSync}).
|
|
@@ -2873,16 +2890,91 @@ function zstdCompressAsync(uncompressed) {
|
|
|
2873
2890
|
});
|
|
2874
2891
|
});
|
|
2875
2892
|
}
|
|
2876
|
-
async function
|
|
2893
|
+
async function zstdPayloadChoiceAsync(uncompressed) {
|
|
2894
|
+
if (!zstdAvailable()) {
|
|
2895
|
+
throw zstdUnavailableWriteError();
|
|
2896
|
+
}
|
|
2897
|
+
const compressed = await zstdCompressAsync(uncompressed);
|
|
2898
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2899
|
+
}
|
|
2900
|
+
function zlibPayloadChoiceSync(uncompressed) {
|
|
2901
|
+
const compressed = zlib.deflateSync(uncompressed);
|
|
2902
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2903
|
+
}
|
|
2904
|
+
function zlibCompressAsync(uncompressed) {
|
|
2905
|
+
return new Promise((resolve, reject) => {
|
|
2906
|
+
zlib.deflate(uncompressed, (err, compressed) => {
|
|
2907
|
+
if (err != null) {
|
|
2908
|
+
reject(err);
|
|
2909
|
+
return;
|
|
2910
|
+
}
|
|
2911
|
+
resolve(compressed);
|
|
2912
|
+
});
|
|
2913
|
+
});
|
|
2914
|
+
}
|
|
2915
|
+
async function zlibPayloadChoiceAsync(uncompressed) {
|
|
2916
|
+
const compressed = await zlibCompressAsync(uncompressed);
|
|
2917
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2918
|
+
}
|
|
2919
|
+
const autoSyncCompressors = {
|
|
2920
|
+
zstd: (uncompressed) => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
|
|
2921
|
+
zlib: (uncompressed) => zlib.deflateSync(uncompressed),
|
|
2922
|
+
};
|
|
2923
|
+
const autoAsyncCompressors = {
|
|
2924
|
+
zstd: zstdCompressAsync,
|
|
2925
|
+
zlib: zlibCompressAsync,
|
|
2926
|
+
};
|
|
2927
|
+
function autoPayloadChoice(uncompressed, compressors) {
|
|
2877
2928
|
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2878
|
-
return
|
|
2929
|
+
return rawPayloadChoice(uncompressed);
|
|
2879
2930
|
}
|
|
2880
2931
|
if (!zstdAvailable()) {
|
|
2881
2932
|
warnZstdSaveFallbackOnce();
|
|
2882
|
-
return
|
|
2933
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2934
|
+
}
|
|
2935
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2936
|
+
}
|
|
2937
|
+
async function autoPayloadChoiceAsync(uncompressed, compressors) {
|
|
2938
|
+
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2939
|
+
return rawPayloadChoice(uncompressed);
|
|
2940
|
+
}
|
|
2941
|
+
if (!zstdAvailable()) {
|
|
2942
|
+
warnZstdSaveFallbackOnce();
|
|
2943
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2944
|
+
}
|
|
2945
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2946
|
+
}
|
|
2947
|
+
function choosePayloadCodecSync(uncompressed, compression = 'auto') {
|
|
2948
|
+
switch (compression) {
|
|
2949
|
+
case 'raw':
|
|
2950
|
+
return rawPayloadChoice(uncompressed);
|
|
2951
|
+
case 'zstd':
|
|
2952
|
+
return zstdPayloadChoiceSync(uncompressed);
|
|
2953
|
+
case 'zlib':
|
|
2954
|
+
return zlibPayloadChoiceSync(uncompressed);
|
|
2955
|
+
case 'auto':
|
|
2956
|
+
return autoPayloadChoice(uncompressed, autoSyncCompressors);
|
|
2957
|
+
default: {
|
|
2958
|
+
const _exhaustive = compression;
|
|
2959
|
+
return _exhaustive;
|
|
2960
|
+
}
|
|
2961
|
+
}
|
|
2962
|
+
}
|
|
2963
|
+
async function choosePayloadCodecAsync(uncompressed, compression = 'auto') {
|
|
2964
|
+
switch (compression) {
|
|
2965
|
+
case 'raw':
|
|
2966
|
+
return rawPayloadChoice(uncompressed);
|
|
2967
|
+
case 'zstd':
|
|
2968
|
+
return await zstdPayloadChoiceAsync(uncompressed);
|
|
2969
|
+
case 'zlib':
|
|
2970
|
+
return await zlibPayloadChoiceAsync(uncompressed);
|
|
2971
|
+
case 'auto':
|
|
2972
|
+
return await autoPayloadChoiceAsync(uncompressed, autoAsyncCompressors);
|
|
2973
|
+
default: {
|
|
2974
|
+
const _exhaustive = compression;
|
|
2975
|
+
return _exhaustive;
|
|
2976
|
+
}
|
|
2883
2977
|
}
|
|
2884
|
-
const compressed = await zstdCompressAsync(uncompressed);
|
|
2885
|
-
return pickPayloadCodec(uncompressed, compressed);
|
|
2886
2978
|
}
|
|
2887
2979
|
function concatAndValidateSections(rawSections) {
|
|
2888
2980
|
if (rawSections.length !== MSV5_SECTION_COUNT) {
|
|
@@ -2937,16 +3029,16 @@ function buildMsv5AssembledFile(globalFlags, entries, uncompressedLength, payloa
|
|
|
2937
3029
|
}
|
|
2938
3030
|
/**
|
|
2939
3031
|
* MSv5 on disk: header + catalogue (uncompressed offsets) + **one** payload blob
|
|
2940
|
-
* (raw concatenation or a single
|
|
3032
|
+
* (raw concatenation or a single compressed stream over it).
|
|
2941
3033
|
*/
|
|
2942
|
-
function assembleMsv5File(globalFlags, rawSections) {
|
|
3034
|
+
function assembleMsv5File(globalFlags, rawSections, compression = 'auto') {
|
|
2943
3035
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2944
|
-
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed);
|
|
3036
|
+
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed, compression);
|
|
2945
3037
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2946
3038
|
}
|
|
2947
|
-
async function assembleMsv5FileAsync(globalFlags, rawSections) {
|
|
3039
|
+
async function assembleMsv5FileAsync(globalFlags, rawSections, compression = 'auto') {
|
|
2948
3040
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2949
|
-
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed);
|
|
3041
|
+
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed, compression);
|
|
2950
3042
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2951
3043
|
}
|
|
2952
3044
|
function readMsv5SectionDirectory(buf) {
|
|
@@ -2990,11 +3082,11 @@ function sectionsFromPayload(payload, directory, payloadCrc32) {
|
|
|
2990
3082
|
return out;
|
|
2991
3083
|
});
|
|
2992
3084
|
}
|
|
2993
|
-
/** Streaming
|
|
2994
|
-
* No `maxOutputLength` on
|
|
2995
|
-
*
|
|
2996
|
-
*
|
|
2997
|
-
function
|
|
3085
|
+
/** Streaming compressed reader: keeps only one section in memory at a time.
|
|
3086
|
+
* No `maxOutputLength` on Transform streams: output is bounded by accumulating `streamOffset`
|
|
3087
|
+
* against the header's `uncompressedLength` (same 1 GiB cap checked upfront). Sync load uses
|
|
3088
|
+
* `maxOutputLength` because it materializes the whole payload at once. */
|
|
3089
|
+
function collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32) {
|
|
2998
3090
|
if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
|
|
2999
3091
|
throw new Error('MSv5 payload exceeds 1 GiB limit');
|
|
3000
3092
|
}
|
|
@@ -3014,7 +3106,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3014
3106
|
}
|
|
3015
3107
|
function consume(chunk) {
|
|
3016
3108
|
if (streamOffset + chunk.length > uncompressedLength) {
|
|
3017
|
-
throw new Error(
|
|
3109
|
+
throw new Error(MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH);
|
|
3018
3110
|
}
|
|
3019
3111
|
payloadCrc = crc32Update(payloadCrc, chunk);
|
|
3020
3112
|
let off = 0;
|
|
@@ -3050,7 +3142,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3050
3142
|
function finish() {
|
|
3051
3143
|
emitEmptySections();
|
|
3052
3144
|
if (streamOffset !== uncompressedLength || sectionId !== directory.length) {
|
|
3053
|
-
throw new Error(
|
|
3145
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3054
3146
|
}
|
|
3055
3147
|
if (payloadCrc !== payloadCrc32) {
|
|
3056
3148
|
throw new Error('MSv5 payload CRC mismatch');
|
|
@@ -3059,9 +3151,15 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3059
3151
|
return { sections, consume, finish };
|
|
3060
3152
|
}
|
|
3061
3153
|
function loadMsv5SectionsFromZstdStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3154
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createZstdDecompress());
|
|
3155
|
+
}
|
|
3156
|
+
function loadMsv5SectionsFromZlibStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3157
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createInflate());
|
|
3158
|
+
}
|
|
3159
|
+
function loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, createStream) {
|
|
3062
3160
|
return new Promise((resolve, reject) => {
|
|
3063
|
-
const collector =
|
|
3064
|
-
const stream =
|
|
3161
|
+
const collector = collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32);
|
|
3162
|
+
const stream = createStream();
|
|
3065
3163
|
stream.on('data', (chunk) => {
|
|
3066
3164
|
try {
|
|
3067
3165
|
collector.consume(chunk);
|
|
@@ -3119,29 +3217,39 @@ function preparePayload(fileBuf, directory) {
|
|
|
3119
3217
|
payloadCrc32,
|
|
3120
3218
|
};
|
|
3121
3219
|
}
|
|
3122
|
-
|
|
3123
|
-
function loadMsv5Sections(fileBuf, directory) {
|
|
3124
|
-
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3125
|
-
if (payloadCodec === CODEC_RAW) {
|
|
3126
|
-
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3127
|
-
}
|
|
3220
|
+
function decompressPayloadSync(payloadCodec, slice, uncompressedLength) {
|
|
3128
3221
|
if (payloadCodec === CODEC_ZSTD) {
|
|
3129
3222
|
if (!zstdAvailable()) {
|
|
3130
3223
|
throw zstdUnavailableReadError();
|
|
3131
3224
|
}
|
|
3132
|
-
// Native cap matches readPayloadMeta's 1 GiB limit (see MSV5_MAX_UNCOMPRESSED_BYTES).
|
|
3133
|
-
// Using header `uncompressedLength` here would only help when the header understates
|
|
3134
|
-
// the zstd stream but the attacker can inflate the header too — same worst case.
|
|
3135
3225
|
const decoded = zlib.zstdDecompressSync(slice, {
|
|
3136
3226
|
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3137
3227
|
});
|
|
3138
3228
|
if (decoded.length !== uncompressedLength) {
|
|
3139
|
-
throw new Error(
|
|
3229
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3230
|
+
}
|
|
3231
|
+
return decoded;
|
|
3232
|
+
}
|
|
3233
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3234
|
+
const decoded = zlib.inflateSync(slice, {
|
|
3235
|
+
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3236
|
+
});
|
|
3237
|
+
if (decoded.length !== uncompressedLength) {
|
|
3238
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3140
3239
|
}
|
|
3141
|
-
return
|
|
3240
|
+
return decoded;
|
|
3142
3241
|
}
|
|
3143
3242
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3144
3243
|
}
|
|
3244
|
+
/** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
|
|
3245
|
+
function loadMsv5Sections(fileBuf, directory) {
|
|
3246
|
+
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3247
|
+
if (payloadCodec === CODEC_RAW) {
|
|
3248
|
+
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3249
|
+
}
|
|
3250
|
+
const decoded = decompressPayloadSync(payloadCodec, slice, uncompressedLength);
|
|
3251
|
+
return sectionsFromPayload(decoded, directory, payloadCrc32);
|
|
3252
|
+
}
|
|
3145
3253
|
/** Streaming load; peak main-thread RAM ≈ largest single section (+ file buffer). */
|
|
3146
3254
|
async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
3147
3255
|
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
@@ -3154,6 +3262,9 @@ async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
|
3154
3262
|
}
|
|
3155
3263
|
return loadMsv5SectionsFromZstdStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3156
3264
|
}
|
|
3265
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3266
|
+
return loadMsv5SectionsFromZlibStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3267
|
+
}
|
|
3157
3268
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3158
3269
|
}
|
|
3159
3270
|
function isMsv5Buffer(buf) {
|
|
@@ -3442,7 +3553,7 @@ function resolvePackedTree(snap, termTree, packedTermIndex) {
|
|
|
3442
3553
|
validateTermTreeLeaves(tree, termCount);
|
|
3443
3554
|
return fromRadixTree(tree, termCount);
|
|
3444
3555
|
}
|
|
3445
|
-
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
3556
|
+
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression) {
|
|
3446
3557
|
var _a;
|
|
3447
3558
|
validateFrozenSnapshotNumeric(snap);
|
|
3448
3559
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3471,9 +3582,9 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
|
3471
3582
|
postingsWire.docIds,
|
|
3472
3583
|
postingsWire.freqs,
|
|
3473
3584
|
];
|
|
3474
|
-
return assembleMsv5File(globalFlags, rawSections).buffer;
|
|
3585
|
+
return assembleMsv5File(globalFlags, rawSections, compression).buffer;
|
|
3475
3586
|
}
|
|
3476
|
-
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
3587
|
+
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression) {
|
|
3477
3588
|
var _a;
|
|
3478
3589
|
validateFrozenSnapshotNumeric(snap);
|
|
3479
3590
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3502,7 +3613,7 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
|
3502
3613
|
postingsWire.docIds,
|
|
3503
3614
|
postingsWire.freqs,
|
|
3504
3615
|
];
|
|
3505
|
-
return (await assembleMsv5FileAsync(globalFlags, rawSections)).buffer;
|
|
3616
|
+
return (await assembleMsv5FileAsync(globalFlags, rawSections, compression)).buffer;
|
|
3506
3617
|
}
|
|
3507
3618
|
|
|
3508
3619
|
function validateMsv5Container(buf) {
|
|
@@ -3579,12 +3690,12 @@ async function decodeFrozenSnapshotMsv5Async(buf, hints) {
|
|
|
3579
3690
|
}
|
|
3580
3691
|
|
|
3581
3692
|
/** Encode a frozen snapshot as a binary buffer. */
|
|
3582
|
-
function encodeFrozenSnapshot(snap, termTree, packedTermIndex) {
|
|
3583
|
-
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex);
|
|
3693
|
+
function encodeFrozenSnapshot(snap, termTree, packedTermIndex, compression) {
|
|
3694
|
+
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression);
|
|
3584
3695
|
}
|
|
3585
|
-
/** Async encoder; uses
|
|
3586
|
-
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
|
|
3587
|
-
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex);
|
|
3696
|
+
/** Async encoder; uses the selected payload compression without blocking the event loop. */
|
|
3697
|
+
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex, compression) {
|
|
3698
|
+
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression);
|
|
3588
3699
|
}
|
|
3589
3700
|
|
|
3590
3701
|
const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
|
|
@@ -3601,7 +3712,7 @@ function decodeFrozenSnapshot(buf, hints) {
|
|
|
3601
3712
|
}
|
|
3602
3713
|
throw invalidFrozenIndex('Unsupported frozen binary snapshot');
|
|
3603
3714
|
}
|
|
3604
|
-
/** Async frozen snapshot decode (streaming
|
|
3715
|
+
/** Async frozen snapshot decode (streaming decompression when needed). */
|
|
3605
3716
|
async function decodeFrozenSnapshotAsync(buf, hints) {
|
|
3606
3717
|
assertBufferLength(buf, 8);
|
|
3607
3718
|
const version = buf.readUInt16LE(4);
|
|
@@ -4073,56 +4184,76 @@ function normalizeStringQuery(query, searchOptions, params) {
|
|
|
4073
4184
|
...params.globalSearchOptions,
|
|
4074
4185
|
...searchOptions,
|
|
4075
4186
|
};
|
|
4076
|
-
const
|
|
4077
|
-
|
|
4078
|
-
|
|
4187
|
+
const tokens = options.tokenize(query);
|
|
4188
|
+
const terms = [];
|
|
4189
|
+
for (const token of tokens) {
|
|
4190
|
+
const processed = options.processTerm(token);
|
|
4191
|
+
if (Array.isArray(processed)) {
|
|
4192
|
+
for (const term of processed) {
|
|
4193
|
+
if (term)
|
|
4194
|
+
terms.push(term);
|
|
4195
|
+
}
|
|
4196
|
+
}
|
|
4197
|
+
else if (processed) {
|
|
4198
|
+
terms.push(processed);
|
|
4199
|
+
}
|
|
4200
|
+
}
|
|
4201
|
+
const toSpec = termToQuerySpec(options);
|
|
4202
|
+
const specs = new Array(terms.length);
|
|
4203
|
+
for (let i = 0; i < terms.length; i++) {
|
|
4204
|
+
specs[i] = toSpec(terms[i], i, terms);
|
|
4205
|
+
}
|
|
4206
|
+
const { fuzzy: fuzzyWeight, prefix: prefixWeight } = {
|
|
4207
|
+
...defaultSearchOptions.weights,
|
|
4208
|
+
...options.weights,
|
|
4209
|
+
};
|
|
4079
4210
|
return {
|
|
4080
4211
|
options,
|
|
4081
|
-
specs
|
|
4212
|
+
specs,
|
|
4082
4213
|
operator: options.combineWith,
|
|
4214
|
+
fieldBoosts: fieldBoostsForQuery(options, params.fields),
|
|
4215
|
+
fuzzyWeight,
|
|
4216
|
+
prefixWeight,
|
|
4083
4217
|
};
|
|
4084
4218
|
}
|
|
4085
4219
|
function lazyIndexedTerm(indexView, termIndex) {
|
|
4086
4220
|
return { kind: 'lazy', resolve: () => indexView.resolveTermByIndex(termIndex) };
|
|
4087
4221
|
}
|
|
4088
|
-
function visitQuerySpecForScoring(query,
|
|
4222
|
+
function visitQuerySpecForScoring(query, normalized, params, visit) {
|
|
4089
4223
|
const { indexView } = params;
|
|
4090
|
-
const {
|
|
4091
|
-
const
|
|
4092
|
-
const maxDistance = maxFuzzyDistance(query, maxFuzzy);
|
|
4224
|
+
const { fuzzyWeight, options, prefixWeight } = normalized;
|
|
4225
|
+
const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
|
|
4093
4226
|
const exactTi = indexView.resolveTermIndex(query.term);
|
|
4094
4227
|
visit(exactTi == null ? undefined : indexView.fieldTermData(exactTi), query.term, 1);
|
|
4095
|
-
const seenPrefix = new Set();
|
|
4228
|
+
const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
|
|
4096
4229
|
if (query.prefix) {
|
|
4097
4230
|
for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
|
|
4098
4231
|
const distance = length - query.term.length;
|
|
4099
4232
|
if (!distance)
|
|
4100
4233
|
continue;
|
|
4101
|
-
seenPrefix.add(termIndex);
|
|
4234
|
+
seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
|
|
4102
4235
|
visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), prefixWeight * length / (length + 0.3 * distance));
|
|
4103
4236
|
}
|
|
4104
4237
|
}
|
|
4105
4238
|
if (!maxDistance)
|
|
4106
4239
|
return;
|
|
4107
4240
|
for (const { termIndex, length, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
|
|
4108
|
-
if (!distance || seenPrefix.has(termIndex))
|
|
4241
|
+
if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
|
|
4109
4242
|
continue;
|
|
4110
4243
|
visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), fuzzyWeight * length / (length + distance));
|
|
4111
4244
|
}
|
|
4112
4245
|
}
|
|
4113
|
-
function executeQuerySpecInternal(query,
|
|
4114
|
-
const
|
|
4115
|
-
const fieldBoosts = fieldBoostsForQuery(options, params.fields);
|
|
4246
|
+
function executeQuerySpecInternal(query, normalized, params, allowedDocs) {
|
|
4247
|
+
const { fieldBoosts, options } = normalized;
|
|
4116
4248
|
const termOptions = allowedDocs == null ? undefined : { allowedDocs };
|
|
4117
4249
|
const results = new Map();
|
|
4118
|
-
visitQuerySpecForScoring(query,
|
|
4250
|
+
visitQuerySpecForScoring(query, normalized, params, (data, derivedTerm, termWeight) => {
|
|
4119
4251
|
aggregateTerm(query.term, derivedTerm, termWeight, query.termBoost, data, fieldBoosts, params.aggregateContext, options.boostDocument, options.bm25, results, termOptions);
|
|
4120
4252
|
});
|
|
4121
4253
|
return results;
|
|
4122
4254
|
}
|
|
4123
|
-
function collectDocIdsForQuerySpec(query,
|
|
4124
|
-
const
|
|
4125
|
-
const fieldBoosts = fieldBoostsForQuery(options, params.fields);
|
|
4255
|
+
function collectDocIdsForQuerySpec(query, normalized, params, allowedDocs) {
|
|
4256
|
+
const { fieldBoosts, options } = normalized;
|
|
4126
4257
|
const docIds = new Set();
|
|
4127
4258
|
const { indexView, aggregateContext } = params;
|
|
4128
4259
|
const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
|
|
@@ -4130,19 +4261,19 @@ function collectDocIdsForQuerySpec(query, searchOptions, params, allowedDocs) {
|
|
|
4130
4261
|
if (exactTi != null) {
|
|
4131
4262
|
indexView.collectDocIds(exactTi, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4132
4263
|
}
|
|
4133
|
-
const seenPrefix = new Set();
|
|
4264
|
+
const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
|
|
4134
4265
|
if (query.prefix) {
|
|
4135
4266
|
for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
|
|
4136
4267
|
const distance = length - query.term.length;
|
|
4137
4268
|
if (!distance)
|
|
4138
4269
|
continue;
|
|
4139
|
-
seenPrefix.add(termIndex);
|
|
4270
|
+
seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
|
|
4140
4271
|
indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4141
4272
|
}
|
|
4142
4273
|
}
|
|
4143
4274
|
if (maxDistance) {
|
|
4144
4275
|
for (const { termIndex, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
|
|
4145
|
-
if (!distance || seenPrefix.has(termIndex))
|
|
4276
|
+
if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
|
|
4146
4277
|
continue;
|
|
4147
4278
|
indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4148
4279
|
}
|
|
@@ -4273,14 +4404,15 @@ function collectDocIdsForQueryInternal(query, searchOptions, params, allowedDocs
|
|
|
4273
4404
|
if (typeof query !== 'string') {
|
|
4274
4405
|
throw new Error('FrozenMiniSearch: invalid query');
|
|
4275
4406
|
}
|
|
4276
|
-
const
|
|
4407
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4408
|
+
const { specs, operator } = normalized;
|
|
4277
4409
|
const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
|
|
4278
4410
|
if (specs.length <= 1) {
|
|
4279
4411
|
return specs.length === 1
|
|
4280
|
-
? collectDocIdsForQuerySpec(specs[0],
|
|
4412
|
+
? collectDocIdsForQuerySpec(specs[0], normalized, params, allowedDocs)
|
|
4281
4413
|
: new Set();
|
|
4282
4414
|
}
|
|
4283
|
-
return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec,
|
|
4415
|
+
return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
|
|
4284
4416
|
}
|
|
4285
4417
|
function executeWildcardQuery(searchOptions, params) {
|
|
4286
4418
|
const results = new Map();
|
|
@@ -4310,12 +4442,13 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
|
|
|
4310
4442
|
if (typeof query !== 'string') {
|
|
4311
4443
|
throw new Error('FrozenMiniSearch: invalid query');
|
|
4312
4444
|
}
|
|
4313
|
-
const
|
|
4445
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4446
|
+
const { specs, operator } = normalized;
|
|
4314
4447
|
const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
|
|
4315
4448
|
if (useGatedEvaluation(run, specs.length, combineWith, false)) {
|
|
4316
|
-
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec,
|
|
4449
|
+
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, normalized, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
|
|
4317
4450
|
}
|
|
4318
|
-
const results = specs.map(spec => executeQuerySpecInternal(spec,
|
|
4451
|
+
const results = specs.map(spec => executeQuerySpecInternal(spec, normalized, params, allowedDocs));
|
|
4319
4452
|
return combineResults(results, combineWith);
|
|
4320
4453
|
}
|
|
4321
4454
|
function executeQuery(query, searchOptions, params) {
|
|
@@ -4506,6 +4639,7 @@ function materializeOwnedSnapshot(params, mode) {
|
|
|
4506
4639
|
function frozenMemoryBreakdown(frozen) {
|
|
4507
4640
|
return frozen.memoryBreakdown();
|
|
4508
4641
|
}
|
|
4642
|
+
const noStoredFields = () => undefined;
|
|
4509
4643
|
function assertFieldsMatchSnapshot(optionsFields, snapFieldIds) {
|
|
4510
4644
|
const snapNames = Object.keys(snapFieldIds).sort();
|
|
4511
4645
|
const optNames = [...optionsFields].sort();
|
|
@@ -4559,24 +4693,31 @@ class FrozenMiniSearch {
|
|
|
4559
4693
|
this._termCount = params.termCount;
|
|
4560
4694
|
this._postings = params.postings;
|
|
4561
4695
|
this._fieldTermFlyweight = createFrozenFieldTermFlyweight(this._postings);
|
|
4696
|
+
this._hasStoredFields = this._storedFields.kind !== 'none';
|
|
4562
4697
|
this._aggregateContext = {
|
|
4563
4698
|
documentCount: this._documentCount,
|
|
4564
4699
|
avgFieldLength: this._avgFieldLength,
|
|
4565
4700
|
fieldIds: this._fieldIds,
|
|
4566
4701
|
getFieldLength: (docId, fieldId) => this.getFieldLength(docId, fieldId),
|
|
4567
4702
|
getExternalId: docId => this._externalIds[docId],
|
|
4568
|
-
getStoredFields:
|
|
4703
|
+
getStoredFields: this._hasStoredFields
|
|
4704
|
+
? docId => readStoredFields(this._storedFields, docId)
|
|
4705
|
+
: noStoredFields,
|
|
4569
4706
|
};
|
|
4570
4707
|
this._queryEngineParams = {
|
|
4571
4708
|
fields: this._options.fields,
|
|
4572
4709
|
globalSearchOptions: this._options.searchOptions,
|
|
4573
4710
|
tokenize: this._options.tokenize,
|
|
4574
4711
|
processTerm: this._options.processTerm,
|
|
4575
|
-
indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight,
|
|
4576
|
-
|
|
4577
|
-
|
|
4578
|
-
|
|
4579
|
-
|
|
4712
|
+
indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, this._hasStoredFields
|
|
4713
|
+
? (callback) => {
|
|
4714
|
+
forEachLiveShortId(this._nextId, this._externalIds, (shortId, id) => {
|
|
4715
|
+
callback(shortId, id, readStoredFields(this._storedFields, shortId));
|
|
4716
|
+
});
|
|
4717
|
+
}
|
|
4718
|
+
: (callback) => {
|
|
4719
|
+
forEachLiveShortId(this._nextId, this._externalIds, callback);
|
|
4720
|
+
}),
|
|
4580
4721
|
aggregateContext: this._aggregateContext,
|
|
4581
4722
|
};
|
|
4582
4723
|
}
|
|
@@ -4633,14 +4774,16 @@ class FrozenMiniSearch {
|
|
|
4633
4774
|
return shortId == null ? undefined : readStoredFields(this._storedFields, shortId);
|
|
4634
4775
|
}
|
|
4635
4776
|
search(query, searchOptions = {}) {
|
|
4636
|
-
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId],
|
|
4777
|
+
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], this._hasStoredFields
|
|
4778
|
+
? docId => readStoredFields(this._storedFields, docId)
|
|
4779
|
+
: undefined);
|
|
4637
4780
|
}
|
|
4638
4781
|
autoSuggest(queryString, options = {}) {
|
|
4639
4782
|
const merged = { ...this._options.autoSuggestOptions, ...options };
|
|
4640
4783
|
return autoSuggestFromSearch((q, o) => this.search(q, o), queryString, merged);
|
|
4641
4784
|
}
|
|
4642
4785
|
/** Serialize this index as a frozen binary snapshot (synchronous). */
|
|
4643
|
-
saveBinarySync() {
|
|
4786
|
+
saveBinarySync(saveOptions = {}) {
|
|
4644
4787
|
return encodeFrozenSnapshot({
|
|
4645
4788
|
documentCount: this._documentCount,
|
|
4646
4789
|
nextId: this._nextId,
|
|
@@ -4654,10 +4797,10 @@ class FrozenMiniSearch {
|
|
|
4654
4797
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4655
4798
|
treeShape: [],
|
|
4656
4799
|
postings: this._postings,
|
|
4657
|
-
}, undefined, this._index);
|
|
4800
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4658
4801
|
}
|
|
4659
|
-
/** Non-blocking
|
|
4660
|
-
async saveBinaryAsync() {
|
|
4802
|
+
/** Non-blocking snapshot serialization with the selected compression codec. */
|
|
4803
|
+
async saveBinaryAsync(saveOptions = {}) {
|
|
4661
4804
|
return encodeFrozenSnapshotAsync({
|
|
4662
4805
|
documentCount: this._documentCount,
|
|
4663
4806
|
nextId: this._nextId,
|
|
@@ -4671,7 +4814,7 @@ class FrozenMiniSearch {
|
|
|
4671
4814
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4672
4815
|
treeShape: [],
|
|
4673
4816
|
postings: this._postings,
|
|
4674
|
-
}, undefined, this._index);
|
|
4817
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4675
4818
|
}
|
|
4676
4819
|
/** Load a frozen binary snapshot. */
|
|
4677
4820
|
static loadBinarySync(buffer, options = {}) {
|
|
@@ -4680,7 +4823,7 @@ class FrozenMiniSearch {
|
|
|
4680
4823
|
const snap = decodeFrozenSnapshot(buffer, { storeFields });
|
|
4681
4824
|
return FrozenMiniSearch.fromBinarySnapshot(snap, options);
|
|
4682
4825
|
}
|
|
4683
|
-
/** Load a frozen binary snapshot with streaming
|
|
4826
|
+
/** Load a frozen binary snapshot with streaming decompression when needed (bounded memory). */
|
|
4684
4827
|
static async loadBinaryAsync(buffer, options = {}) {
|
|
4685
4828
|
var _a;
|
|
4686
4829
|
const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
|