@yoch/frozenminisearch 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +19 -1
- package/dist/cjs/index.cjs +242 -99
- package/dist/es/index.d.ts +22 -7
- package/dist/es/index.js +242 -99
- package/package.json +7 -4
package/dist/cjs/index.cjs
CHANGED
|
@@ -260,7 +260,9 @@ function finalizeSearchResults(params) {
|
|
|
260
260
|
queryTerms: terms,
|
|
261
261
|
match,
|
|
262
262
|
};
|
|
263
|
-
|
|
263
|
+
if (getStoredFields != null) {
|
|
264
|
+
Object.assign(result, getStoredFields(docId));
|
|
265
|
+
}
|
|
264
266
|
if (filter == null || filter(result)) {
|
|
265
267
|
results.push(result);
|
|
266
268
|
}
|
|
@@ -1644,9 +1646,11 @@ const FLAG_FL_U8 = 8;
|
|
|
1644
1646
|
const FLAG_FL_U16 = 16;
|
|
1645
1647
|
const FLAG_FREQ_U16 = 32;
|
|
1646
1648
|
const CODEC_RAW = 0;
|
|
1649
|
+
/** Deflate/inflate (`node:zlib`) on the whole payload. */
|
|
1650
|
+
const CODEC_ZLIB = 1;
|
|
1647
1651
|
/** Zstandard (`node:zlib`) on the whole payload. */
|
|
1648
1652
|
const CODEC_ZSTD = 3;
|
|
1649
|
-
/** Single concatenated payload, one
|
|
1653
|
+
/** Single concatenated payload, one compressed stream (or raw). */
|
|
1650
1654
|
const MSV5_FORMAT_REV_PAYLOAD = 1;
|
|
1651
1655
|
/** Do not compress payloads smaller than this (bytes). */
|
|
1652
1656
|
const MSV5_MIN_COMPRESS_BYTES = 64;
|
|
@@ -2766,33 +2770,43 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2766
2770
|
};
|
|
2767
2771
|
}
|
|
2768
2772
|
|
|
2769
|
-
/** Hard cap on the uncompressed payload, rejected before allocation (
|
|
2773
|
+
/** Hard cap on the uncompressed payload, rejected before allocation (compressed-bomb guard).
|
|
2770
2774
|
* This is the single trust boundary for untrusted snapshots: {@link readPayloadMeta} rejects
|
|
2771
2775
|
* headers above this size; sync decompress uses the same cap via `maxOutputLength`.
|
|
2772
2776
|
* A malicious header can still declare up to 1 GiB — no tighter native limit helps without
|
|
2773
2777
|
* trusting `uncompressedLength` from that same header. Semantic integrity (length match,
|
|
2774
2778
|
* payload CRC, per-section CRC) is enforced after decode. */
|
|
2775
2779
|
const MSV5_MAX_UNCOMPRESSED_BYTES = 1024 * 1024 * 1024;
|
|
2780
|
+
const MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH = 'MSv5 compressed payload exceeds declared length';
|
|
2781
|
+
const MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH = 'MSv5 decompressed payload length mismatch';
|
|
2776
2782
|
// zstd landed in node:zlib at Node 22.15.0 (22.x line) / 23.8.0, where the whole family
|
|
2777
2783
|
// (zstdCompress[Sync], zstdDecompressSync, createZstdDecompress) ships together — so probing one
|
|
2778
2784
|
// member is enough to know if the runtime supports zstd. Checked at call time (not captured at
|
|
2779
|
-
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully:
|
|
2780
|
-
//
|
|
2785
|
+
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully: `auto`
|
|
2786
|
+
// tries zlib once (or raw if it does not help). When zstd is available, `auto` tries zstd once
|
|
2787
|
+
// and stays raw if it does not shrink — no second pass. Reads of a zstd payload throw a clear,
|
|
2788
|
+
// actionable error on runtimes without zstd.
|
|
2781
2789
|
function zstdAvailable() {
|
|
2782
2790
|
return typeof zlib.zstdCompressSync === 'function';
|
|
2783
2791
|
}
|
|
2792
|
+
function zstdUnavailableWriteError() {
|
|
2793
|
+
return new Error('MSv5 snapshot requested zstd compression, but this Node.js runtime lacks node:zlib zstd '
|
|
2794
|
+
+ 'support (added in Node 22.15.0). Upgrade Node.js, or use compression: "auto", "raw", '
|
|
2795
|
+
+ 'or "zlib".');
|
|
2796
|
+
}
|
|
2784
2797
|
function zstdUnavailableReadError() {
|
|
2785
2798
|
return new Error('MSv5 snapshot is zstd-compressed, but this Node.js runtime lacks node:zlib zstd support '
|
|
2786
2799
|
+ '(added in Node 22.15.0). Upgrade Node.js to read this snapshot, or re-save it from a '
|
|
2787
|
-
+ 'newer runtime
|
|
2800
|
+
+ 'newer runtime with compression: "raw" or "zlib".');
|
|
2788
2801
|
}
|
|
2789
2802
|
let warnedZstdSaveFallback = false;
|
|
2790
2803
|
function warnZstdSaveFallbackOnce() {
|
|
2791
2804
|
if (warnedZstdSaveFallback)
|
|
2792
2805
|
return;
|
|
2793
2806
|
warnedZstdSaveFallback = true;
|
|
2794
|
-
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0);
|
|
2795
|
-
+ 'raw (uncompressed)
|
|
2807
|
+
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); compression: "auto" falls back to '
|
|
2808
|
+
+ 'zlib when it shrinks the payload, otherwise raw (uncompressed). Upgrade to Node 22.15.0+ '
|
|
2809
|
+
+ 'for zstd.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
|
|
2796
2810
|
}
|
|
2797
2811
|
function assertPayloadFormatRev(buf) {
|
|
2798
2812
|
const rev = buf.readUInt16LE(MSV5_FORMAT_REV_OFFSET);
|
|
@@ -2842,23 +2856,26 @@ function msv5ZstdCompressOptions(uncompressed) {
|
|
|
2842
2856
|
},
|
|
2843
2857
|
};
|
|
2844
2858
|
}
|
|
2845
|
-
|
|
2846
|
-
function pickPayloadCodec(uncompressed, compressed) {
|
|
2847
|
-
if (compressed.length < uncompressed.length) {
|
|
2848
|
-
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2849
|
-
}
|
|
2859
|
+
function rawPayloadChoice(uncompressed) {
|
|
2850
2860
|
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2851
2861
|
}
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2862
|
+
/** Auto mode: one compression attempt; keep it only when strictly smaller than raw. */
|
|
2863
|
+
function pickAutoPayloadCodec(uncompressed, compressed, codec) {
|
|
2864
|
+
if (compressed.length < uncompressed.length) {
|
|
2865
|
+
return {
|
|
2866
|
+
payload: compressed,
|
|
2867
|
+
codec,
|
|
2868
|
+
zstdLevel: codec === CODEC_ZSTD ? MSV5_ZSTD_LEVEL : 0,
|
|
2869
|
+
};
|
|
2855
2870
|
}
|
|
2871
|
+
return rawPayloadChoice(uncompressed);
|
|
2872
|
+
}
|
|
2873
|
+
function zstdPayloadChoiceSync(uncompressed) {
|
|
2856
2874
|
if (!zstdAvailable()) {
|
|
2857
|
-
|
|
2858
|
-
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2875
|
+
throw zstdUnavailableWriteError();
|
|
2859
2876
|
}
|
|
2860
2877
|
const compressed = zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed));
|
|
2861
|
-
return
|
|
2878
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2862
2879
|
}
|
|
2863
2880
|
/**
|
|
2864
2881
|
* Async zstd via {@link zstdCompress} (not {@link zstdCompressSync}).
|
|
@@ -2877,16 +2894,91 @@ function zstdCompressAsync(uncompressed) {
|
|
|
2877
2894
|
});
|
|
2878
2895
|
});
|
|
2879
2896
|
}
|
|
2880
|
-
async function
|
|
2897
|
+
async function zstdPayloadChoiceAsync(uncompressed) {
|
|
2898
|
+
if (!zstdAvailable()) {
|
|
2899
|
+
throw zstdUnavailableWriteError();
|
|
2900
|
+
}
|
|
2901
|
+
const compressed = await zstdCompressAsync(uncompressed);
|
|
2902
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2903
|
+
}
|
|
2904
|
+
function zlibPayloadChoiceSync(uncompressed) {
|
|
2905
|
+
const compressed = zlib.deflateSync(uncompressed);
|
|
2906
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2907
|
+
}
|
|
2908
|
+
function zlibCompressAsync(uncompressed) {
|
|
2909
|
+
return new Promise((resolve, reject) => {
|
|
2910
|
+
zlib.deflate(uncompressed, (err, compressed) => {
|
|
2911
|
+
if (err != null) {
|
|
2912
|
+
reject(err);
|
|
2913
|
+
return;
|
|
2914
|
+
}
|
|
2915
|
+
resolve(compressed);
|
|
2916
|
+
});
|
|
2917
|
+
});
|
|
2918
|
+
}
|
|
2919
|
+
async function zlibPayloadChoiceAsync(uncompressed) {
|
|
2920
|
+
const compressed = await zlibCompressAsync(uncompressed);
|
|
2921
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2922
|
+
}
|
|
2923
|
+
const autoSyncCompressors = {
|
|
2924
|
+
zstd: (uncompressed) => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
|
|
2925
|
+
zlib: (uncompressed) => zlib.deflateSync(uncompressed),
|
|
2926
|
+
};
|
|
2927
|
+
const autoAsyncCompressors = {
|
|
2928
|
+
zstd: zstdCompressAsync,
|
|
2929
|
+
zlib: zlibCompressAsync,
|
|
2930
|
+
};
|
|
2931
|
+
function autoPayloadChoice(uncompressed, compressors) {
|
|
2881
2932
|
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2882
|
-
return
|
|
2933
|
+
return rawPayloadChoice(uncompressed);
|
|
2883
2934
|
}
|
|
2884
2935
|
if (!zstdAvailable()) {
|
|
2885
2936
|
warnZstdSaveFallbackOnce();
|
|
2886
|
-
return
|
|
2937
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2938
|
+
}
|
|
2939
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2940
|
+
}
|
|
2941
|
+
async function autoPayloadChoiceAsync(uncompressed, compressors) {
|
|
2942
|
+
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2943
|
+
return rawPayloadChoice(uncompressed);
|
|
2944
|
+
}
|
|
2945
|
+
if (!zstdAvailable()) {
|
|
2946
|
+
warnZstdSaveFallbackOnce();
|
|
2947
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2948
|
+
}
|
|
2949
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2950
|
+
}
|
|
2951
|
+
function choosePayloadCodecSync(uncompressed, compression = 'auto') {
|
|
2952
|
+
switch (compression) {
|
|
2953
|
+
case 'raw':
|
|
2954
|
+
return rawPayloadChoice(uncompressed);
|
|
2955
|
+
case 'zstd':
|
|
2956
|
+
return zstdPayloadChoiceSync(uncompressed);
|
|
2957
|
+
case 'zlib':
|
|
2958
|
+
return zlibPayloadChoiceSync(uncompressed);
|
|
2959
|
+
case 'auto':
|
|
2960
|
+
return autoPayloadChoice(uncompressed, autoSyncCompressors);
|
|
2961
|
+
default: {
|
|
2962
|
+
const _exhaustive = compression;
|
|
2963
|
+
return _exhaustive;
|
|
2964
|
+
}
|
|
2965
|
+
}
|
|
2966
|
+
}
|
|
2967
|
+
async function choosePayloadCodecAsync(uncompressed, compression = 'auto') {
|
|
2968
|
+
switch (compression) {
|
|
2969
|
+
case 'raw':
|
|
2970
|
+
return rawPayloadChoice(uncompressed);
|
|
2971
|
+
case 'zstd':
|
|
2972
|
+
return await zstdPayloadChoiceAsync(uncompressed);
|
|
2973
|
+
case 'zlib':
|
|
2974
|
+
return await zlibPayloadChoiceAsync(uncompressed);
|
|
2975
|
+
case 'auto':
|
|
2976
|
+
return await autoPayloadChoiceAsync(uncompressed, autoAsyncCompressors);
|
|
2977
|
+
default: {
|
|
2978
|
+
const _exhaustive = compression;
|
|
2979
|
+
return _exhaustive;
|
|
2980
|
+
}
|
|
2887
2981
|
}
|
|
2888
|
-
const compressed = await zstdCompressAsync(uncompressed);
|
|
2889
|
-
return pickPayloadCodec(uncompressed, compressed);
|
|
2890
2982
|
}
|
|
2891
2983
|
function concatAndValidateSections(rawSections) {
|
|
2892
2984
|
if (rawSections.length !== MSV5_SECTION_COUNT) {
|
|
@@ -2941,16 +3033,16 @@ function buildMsv5AssembledFile(globalFlags, entries, uncompressedLength, payloa
|
|
|
2941
3033
|
}
|
|
2942
3034
|
/**
|
|
2943
3035
|
* MSv5 on disk: header + catalogue (uncompressed offsets) + **one** payload blob
|
|
2944
|
-
* (raw concatenation or a single
|
|
3036
|
+
* (raw concatenation or a single compressed stream over it).
|
|
2945
3037
|
*/
|
|
2946
|
-
function assembleMsv5File(globalFlags, rawSections) {
|
|
3038
|
+
function assembleMsv5File(globalFlags, rawSections, compression = 'auto') {
|
|
2947
3039
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2948
|
-
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed);
|
|
3040
|
+
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed, compression);
|
|
2949
3041
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2950
3042
|
}
|
|
2951
|
-
async function assembleMsv5FileAsync(globalFlags, rawSections) {
|
|
3043
|
+
async function assembleMsv5FileAsync(globalFlags, rawSections, compression = 'auto') {
|
|
2952
3044
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2953
|
-
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed);
|
|
3045
|
+
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed, compression);
|
|
2954
3046
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2955
3047
|
}
|
|
2956
3048
|
function readMsv5SectionDirectory(buf) {
|
|
@@ -2994,11 +3086,11 @@ function sectionsFromPayload(payload, directory, payloadCrc32) {
|
|
|
2994
3086
|
return out;
|
|
2995
3087
|
});
|
|
2996
3088
|
}
|
|
2997
|
-
/** Streaming
|
|
2998
|
-
* No `maxOutputLength` on
|
|
2999
|
-
*
|
|
3000
|
-
*
|
|
3001
|
-
function
|
|
3089
|
+
/** Streaming compressed reader: keeps only one section in memory at a time.
|
|
3090
|
+
* No `maxOutputLength` on Transform streams: output is bounded by accumulating `streamOffset`
|
|
3091
|
+
* against the header's `uncompressedLength` (same 1 GiB cap checked upfront). Sync load uses
|
|
3092
|
+
* `maxOutputLength` because it materializes the whole payload at once. */
|
|
3093
|
+
function collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32) {
|
|
3002
3094
|
if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
|
|
3003
3095
|
throw new Error('MSv5 payload exceeds 1 GiB limit');
|
|
3004
3096
|
}
|
|
@@ -3018,7 +3110,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3018
3110
|
}
|
|
3019
3111
|
function consume(chunk) {
|
|
3020
3112
|
if (streamOffset + chunk.length > uncompressedLength) {
|
|
3021
|
-
throw new Error(
|
|
3113
|
+
throw new Error(MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH);
|
|
3022
3114
|
}
|
|
3023
3115
|
payloadCrc = crc32Update(payloadCrc, chunk);
|
|
3024
3116
|
let off = 0;
|
|
@@ -3054,7 +3146,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3054
3146
|
function finish() {
|
|
3055
3147
|
emitEmptySections();
|
|
3056
3148
|
if (streamOffset !== uncompressedLength || sectionId !== directory.length) {
|
|
3057
|
-
throw new Error(
|
|
3149
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3058
3150
|
}
|
|
3059
3151
|
if (payloadCrc !== payloadCrc32) {
|
|
3060
3152
|
throw new Error('MSv5 payload CRC mismatch');
|
|
@@ -3063,9 +3155,15 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3063
3155
|
return { sections, consume, finish };
|
|
3064
3156
|
}
|
|
3065
3157
|
function loadMsv5SectionsFromZstdStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3158
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createZstdDecompress());
|
|
3159
|
+
}
|
|
3160
|
+
function loadMsv5SectionsFromZlibStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3161
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createInflate());
|
|
3162
|
+
}
|
|
3163
|
+
function loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, createStream) {
|
|
3066
3164
|
return new Promise((resolve, reject) => {
|
|
3067
|
-
const collector =
|
|
3068
|
-
const stream =
|
|
3165
|
+
const collector = collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32);
|
|
3166
|
+
const stream = createStream();
|
|
3069
3167
|
stream.on('data', (chunk) => {
|
|
3070
3168
|
try {
|
|
3071
3169
|
collector.consume(chunk);
|
|
@@ -3123,29 +3221,39 @@ function preparePayload(fileBuf, directory) {
|
|
|
3123
3221
|
payloadCrc32,
|
|
3124
3222
|
};
|
|
3125
3223
|
}
|
|
3126
|
-
|
|
3127
|
-
function loadMsv5Sections(fileBuf, directory) {
|
|
3128
|
-
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3129
|
-
if (payloadCodec === CODEC_RAW) {
|
|
3130
|
-
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3131
|
-
}
|
|
3224
|
+
function decompressPayloadSync(payloadCodec, slice, uncompressedLength) {
|
|
3132
3225
|
if (payloadCodec === CODEC_ZSTD) {
|
|
3133
3226
|
if (!zstdAvailable()) {
|
|
3134
3227
|
throw zstdUnavailableReadError();
|
|
3135
3228
|
}
|
|
3136
|
-
// Native cap matches readPayloadMeta's 1 GiB limit (see MSV5_MAX_UNCOMPRESSED_BYTES).
|
|
3137
|
-
// Using header `uncompressedLength` here would only help when the header understates
|
|
3138
|
-
// the zstd stream but the attacker can inflate the header too — same worst case.
|
|
3139
3229
|
const decoded = zlib.zstdDecompressSync(slice, {
|
|
3140
3230
|
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3141
3231
|
});
|
|
3142
3232
|
if (decoded.length !== uncompressedLength) {
|
|
3143
|
-
throw new Error(
|
|
3233
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3234
|
+
}
|
|
3235
|
+
return decoded;
|
|
3236
|
+
}
|
|
3237
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3238
|
+
const decoded = zlib.inflateSync(slice, {
|
|
3239
|
+
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3240
|
+
});
|
|
3241
|
+
if (decoded.length !== uncompressedLength) {
|
|
3242
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3144
3243
|
}
|
|
3145
|
-
return
|
|
3244
|
+
return decoded;
|
|
3146
3245
|
}
|
|
3147
3246
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3148
3247
|
}
|
|
3248
|
+
/** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
|
|
3249
|
+
function loadMsv5Sections(fileBuf, directory) {
|
|
3250
|
+
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3251
|
+
if (payloadCodec === CODEC_RAW) {
|
|
3252
|
+
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3253
|
+
}
|
|
3254
|
+
const decoded = decompressPayloadSync(payloadCodec, slice, uncompressedLength);
|
|
3255
|
+
return sectionsFromPayload(decoded, directory, payloadCrc32);
|
|
3256
|
+
}
|
|
3149
3257
|
/** Streaming load; peak main-thread RAM ≈ largest single section (+ file buffer). */
|
|
3150
3258
|
async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
3151
3259
|
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
@@ -3158,6 +3266,9 @@ async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
|
3158
3266
|
}
|
|
3159
3267
|
return loadMsv5SectionsFromZstdStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3160
3268
|
}
|
|
3269
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3270
|
+
return loadMsv5SectionsFromZlibStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3271
|
+
}
|
|
3161
3272
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3162
3273
|
}
|
|
3163
3274
|
function isMsv5Buffer(buf) {
|
|
@@ -3446,7 +3557,7 @@ function resolvePackedTree(snap, termTree, packedTermIndex) {
|
|
|
3446
3557
|
validateTermTreeLeaves(tree, termCount);
|
|
3447
3558
|
return fromRadixTree(tree, termCount);
|
|
3448
3559
|
}
|
|
3449
|
-
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
3560
|
+
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression) {
|
|
3450
3561
|
var _a;
|
|
3451
3562
|
validateFrozenSnapshotNumeric(snap);
|
|
3452
3563
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3475,9 +3586,9 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
|
3475
3586
|
postingsWire.docIds,
|
|
3476
3587
|
postingsWire.freqs,
|
|
3477
3588
|
];
|
|
3478
|
-
return assembleMsv5File(globalFlags, rawSections).buffer;
|
|
3589
|
+
return assembleMsv5File(globalFlags, rawSections, compression).buffer;
|
|
3479
3590
|
}
|
|
3480
|
-
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
3591
|
+
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression) {
|
|
3481
3592
|
var _a;
|
|
3482
3593
|
validateFrozenSnapshotNumeric(snap);
|
|
3483
3594
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3506,7 +3617,7 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
|
3506
3617
|
postingsWire.docIds,
|
|
3507
3618
|
postingsWire.freqs,
|
|
3508
3619
|
];
|
|
3509
|
-
return (await assembleMsv5FileAsync(globalFlags, rawSections)).buffer;
|
|
3620
|
+
return (await assembleMsv5FileAsync(globalFlags, rawSections, compression)).buffer;
|
|
3510
3621
|
}
|
|
3511
3622
|
|
|
3512
3623
|
function validateMsv5Container(buf) {
|
|
@@ -3583,12 +3694,12 @@ async function decodeFrozenSnapshotMsv5Async(buf, hints) {
|
|
|
3583
3694
|
}
|
|
3584
3695
|
|
|
3585
3696
|
/** Encode a frozen snapshot as a binary buffer. */
|
|
3586
|
-
function encodeFrozenSnapshot(snap, termTree, packedTermIndex) {
|
|
3587
|
-
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex);
|
|
3697
|
+
function encodeFrozenSnapshot(snap, termTree, packedTermIndex, compression) {
|
|
3698
|
+
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression);
|
|
3588
3699
|
}
|
|
3589
|
-
/** Async encoder; uses
|
|
3590
|
-
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
|
|
3591
|
-
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex);
|
|
3700
|
+
/** Async encoder; uses the selected payload compression without blocking the event loop. */
|
|
3701
|
+
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex, compression) {
|
|
3702
|
+
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression);
|
|
3592
3703
|
}
|
|
3593
3704
|
|
|
3594
3705
|
const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
|
|
@@ -3605,7 +3716,7 @@ function decodeFrozenSnapshot(buf, hints) {
|
|
|
3605
3716
|
}
|
|
3606
3717
|
throw invalidFrozenIndex('Unsupported frozen binary snapshot');
|
|
3607
3718
|
}
|
|
3608
|
-
/** Async frozen snapshot decode (streaming
|
|
3719
|
+
/** Async frozen snapshot decode (streaming decompression when needed). */
|
|
3609
3720
|
async function decodeFrozenSnapshotAsync(buf, hints) {
|
|
3610
3721
|
assertBufferLength(buf, 8);
|
|
3611
3722
|
const version = buf.readUInt16LE(4);
|
|
@@ -4077,56 +4188,76 @@ function normalizeStringQuery(query, searchOptions, params) {
|
|
|
4077
4188
|
...params.globalSearchOptions,
|
|
4078
4189
|
...searchOptions,
|
|
4079
4190
|
};
|
|
4080
|
-
const
|
|
4081
|
-
|
|
4082
|
-
|
|
4191
|
+
const tokens = options.tokenize(query);
|
|
4192
|
+
const terms = [];
|
|
4193
|
+
for (const token of tokens) {
|
|
4194
|
+
const processed = options.processTerm(token);
|
|
4195
|
+
if (Array.isArray(processed)) {
|
|
4196
|
+
for (const term of processed) {
|
|
4197
|
+
if (term)
|
|
4198
|
+
terms.push(term);
|
|
4199
|
+
}
|
|
4200
|
+
}
|
|
4201
|
+
else if (processed) {
|
|
4202
|
+
terms.push(processed);
|
|
4203
|
+
}
|
|
4204
|
+
}
|
|
4205
|
+
const toSpec = termToQuerySpec(options);
|
|
4206
|
+
const specs = new Array(terms.length);
|
|
4207
|
+
for (let i = 0; i < terms.length; i++) {
|
|
4208
|
+
specs[i] = toSpec(terms[i], i, terms);
|
|
4209
|
+
}
|
|
4210
|
+
const { fuzzy: fuzzyWeight, prefix: prefixWeight } = {
|
|
4211
|
+
...defaultSearchOptions.weights,
|
|
4212
|
+
...options.weights,
|
|
4213
|
+
};
|
|
4083
4214
|
return {
|
|
4084
4215
|
options,
|
|
4085
|
-
specs
|
|
4216
|
+
specs,
|
|
4086
4217
|
operator: options.combineWith,
|
|
4218
|
+
fieldBoosts: fieldBoostsForQuery(options, params.fields),
|
|
4219
|
+
fuzzyWeight,
|
|
4220
|
+
prefixWeight,
|
|
4087
4221
|
};
|
|
4088
4222
|
}
|
|
4089
4223
|
function lazyIndexedTerm(indexView, termIndex) {
|
|
4090
4224
|
return { kind: 'lazy', resolve: () => indexView.resolveTermByIndex(termIndex) };
|
|
4091
4225
|
}
|
|
4092
|
-
function visitQuerySpecForScoring(query,
|
|
4226
|
+
function visitQuerySpecForScoring(query, normalized, params, visit) {
|
|
4093
4227
|
const { indexView } = params;
|
|
4094
|
-
const {
|
|
4095
|
-
const
|
|
4096
|
-
const maxDistance = maxFuzzyDistance(query, maxFuzzy);
|
|
4228
|
+
const { fuzzyWeight, options, prefixWeight } = normalized;
|
|
4229
|
+
const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
|
|
4097
4230
|
const exactTi = indexView.resolveTermIndex(query.term);
|
|
4098
4231
|
visit(exactTi == null ? undefined : indexView.fieldTermData(exactTi), query.term, 1);
|
|
4099
|
-
const seenPrefix = new Set();
|
|
4232
|
+
const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
|
|
4100
4233
|
if (query.prefix) {
|
|
4101
4234
|
for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
|
|
4102
4235
|
const distance = length - query.term.length;
|
|
4103
4236
|
if (!distance)
|
|
4104
4237
|
continue;
|
|
4105
|
-
seenPrefix.add(termIndex);
|
|
4238
|
+
seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
|
|
4106
4239
|
visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), prefixWeight * length / (length + 0.3 * distance));
|
|
4107
4240
|
}
|
|
4108
4241
|
}
|
|
4109
4242
|
if (!maxDistance)
|
|
4110
4243
|
return;
|
|
4111
4244
|
for (const { termIndex, length, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
|
|
4112
|
-
if (!distance || seenPrefix.has(termIndex))
|
|
4245
|
+
if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
|
|
4113
4246
|
continue;
|
|
4114
4247
|
visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), fuzzyWeight * length / (length + distance));
|
|
4115
4248
|
}
|
|
4116
4249
|
}
|
|
4117
|
-
function executeQuerySpecInternal(query,
|
|
4118
|
-
const
|
|
4119
|
-
const fieldBoosts = fieldBoostsForQuery(options, params.fields);
|
|
4250
|
+
function executeQuerySpecInternal(query, normalized, params, allowedDocs) {
|
|
4251
|
+
const { fieldBoosts, options } = normalized;
|
|
4120
4252
|
const termOptions = allowedDocs == null ? undefined : { allowedDocs };
|
|
4121
4253
|
const results = new Map();
|
|
4122
|
-
visitQuerySpecForScoring(query,
|
|
4254
|
+
visitQuerySpecForScoring(query, normalized, params, (data, derivedTerm, termWeight) => {
|
|
4123
4255
|
aggregateTerm(query.term, derivedTerm, termWeight, query.termBoost, data, fieldBoosts, params.aggregateContext, options.boostDocument, options.bm25, results, termOptions);
|
|
4124
4256
|
});
|
|
4125
4257
|
return results;
|
|
4126
4258
|
}
|
|
4127
|
-
function collectDocIdsForQuerySpec(query,
|
|
4128
|
-
const
|
|
4129
|
-
const fieldBoosts = fieldBoostsForQuery(options, params.fields);
|
|
4259
|
+
function collectDocIdsForQuerySpec(query, normalized, params, allowedDocs) {
|
|
4260
|
+
const { fieldBoosts, options } = normalized;
|
|
4130
4261
|
const docIds = new Set();
|
|
4131
4262
|
const { indexView, aggregateContext } = params;
|
|
4132
4263
|
const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
|
|
@@ -4134,19 +4265,19 @@ function collectDocIdsForQuerySpec(query, searchOptions, params, allowedDocs) {
|
|
|
4134
4265
|
if (exactTi != null) {
|
|
4135
4266
|
indexView.collectDocIds(exactTi, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4136
4267
|
}
|
|
4137
|
-
const seenPrefix = new Set();
|
|
4268
|
+
const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
|
|
4138
4269
|
if (query.prefix) {
|
|
4139
4270
|
for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
|
|
4140
4271
|
const distance = length - query.term.length;
|
|
4141
4272
|
if (!distance)
|
|
4142
4273
|
continue;
|
|
4143
|
-
seenPrefix.add(termIndex);
|
|
4274
|
+
seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
|
|
4144
4275
|
indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4145
4276
|
}
|
|
4146
4277
|
}
|
|
4147
4278
|
if (maxDistance) {
|
|
4148
4279
|
for (const { termIndex, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
|
|
4149
|
-
if (!distance || seenPrefix.has(termIndex))
|
|
4280
|
+
if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
|
|
4150
4281
|
continue;
|
|
4151
4282
|
indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
|
|
4152
4283
|
}
|
|
@@ -4277,14 +4408,15 @@ function collectDocIdsForQueryInternal(query, searchOptions, params, allowedDocs
|
|
|
4277
4408
|
if (typeof query !== 'string') {
|
|
4278
4409
|
throw new Error('FrozenMiniSearch: invalid query');
|
|
4279
4410
|
}
|
|
4280
|
-
const
|
|
4411
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4412
|
+
const { specs, operator } = normalized;
|
|
4281
4413
|
const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
|
|
4282
4414
|
if (specs.length <= 1) {
|
|
4283
4415
|
return specs.length === 1
|
|
4284
|
-
? collectDocIdsForQuerySpec(specs[0],
|
|
4416
|
+
? collectDocIdsForQuerySpec(specs[0], normalized, params, allowedDocs)
|
|
4285
4417
|
: new Set();
|
|
4286
4418
|
}
|
|
4287
|
-
return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec,
|
|
4419
|
+
return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
|
|
4288
4420
|
}
|
|
4289
4421
|
function executeWildcardQuery(searchOptions, params) {
|
|
4290
4422
|
const results = new Map();
|
|
@@ -4314,12 +4446,13 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
|
|
|
4314
4446
|
if (typeof query !== 'string') {
|
|
4315
4447
|
throw new Error('FrozenMiniSearch: invalid query');
|
|
4316
4448
|
}
|
|
4317
|
-
const
|
|
4449
|
+
const normalized = normalizeStringQuery(query, searchOptions, params);
|
|
4450
|
+
const { specs, operator } = normalized;
|
|
4318
4451
|
const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
|
|
4319
4452
|
if (useGatedEvaluation(run, specs.length, combineWith, false)) {
|
|
4320
|
-
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec,
|
|
4453
|
+
return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, normalized, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
|
|
4321
4454
|
}
|
|
4322
|
-
const results = specs.map(spec => executeQuerySpecInternal(spec,
|
|
4455
|
+
const results = specs.map(spec => executeQuerySpecInternal(spec, normalized, params, allowedDocs));
|
|
4323
4456
|
return combineResults(results, combineWith);
|
|
4324
4457
|
}
|
|
4325
4458
|
function executeQuery(query, searchOptions, params) {
|
|
@@ -4510,6 +4643,7 @@ function materializeOwnedSnapshot(params, mode) {
|
|
|
4510
4643
|
function frozenMemoryBreakdown(frozen) {
|
|
4511
4644
|
return frozen.memoryBreakdown();
|
|
4512
4645
|
}
|
|
4646
|
+
const noStoredFields = () => undefined;
|
|
4513
4647
|
function assertFieldsMatchSnapshot(optionsFields, snapFieldIds) {
|
|
4514
4648
|
const snapNames = Object.keys(snapFieldIds).sort();
|
|
4515
4649
|
const optNames = [...optionsFields].sort();
|
|
@@ -4563,24 +4697,31 @@ class FrozenMiniSearch {
|
|
|
4563
4697
|
this._termCount = params.termCount;
|
|
4564
4698
|
this._postings = params.postings;
|
|
4565
4699
|
this._fieldTermFlyweight = createFrozenFieldTermFlyweight(this._postings);
|
|
4700
|
+
this._hasStoredFields = this._storedFields.kind !== 'none';
|
|
4566
4701
|
this._aggregateContext = {
|
|
4567
4702
|
documentCount: this._documentCount,
|
|
4568
4703
|
avgFieldLength: this._avgFieldLength,
|
|
4569
4704
|
fieldIds: this._fieldIds,
|
|
4570
4705
|
getFieldLength: (docId, fieldId) => this.getFieldLength(docId, fieldId),
|
|
4571
4706
|
getExternalId: docId => this._externalIds[docId],
|
|
4572
|
-
getStoredFields:
|
|
4707
|
+
getStoredFields: this._hasStoredFields
|
|
4708
|
+
? docId => readStoredFields(this._storedFields, docId)
|
|
4709
|
+
: noStoredFields,
|
|
4573
4710
|
};
|
|
4574
4711
|
this._queryEngineParams = {
|
|
4575
4712
|
fields: this._options.fields,
|
|
4576
4713
|
globalSearchOptions: this._options.searchOptions,
|
|
4577
4714
|
tokenize: this._options.tokenize,
|
|
4578
4715
|
processTerm: this._options.processTerm,
|
|
4579
|
-
indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight,
|
|
4580
|
-
|
|
4581
|
-
|
|
4582
|
-
|
|
4583
|
-
|
|
4716
|
+
indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, this._hasStoredFields
|
|
4717
|
+
? (callback) => {
|
|
4718
|
+
forEachLiveShortId(this._nextId, this._externalIds, (shortId, id) => {
|
|
4719
|
+
callback(shortId, id, readStoredFields(this._storedFields, shortId));
|
|
4720
|
+
});
|
|
4721
|
+
}
|
|
4722
|
+
: (callback) => {
|
|
4723
|
+
forEachLiveShortId(this._nextId, this._externalIds, callback);
|
|
4724
|
+
}),
|
|
4584
4725
|
aggregateContext: this._aggregateContext,
|
|
4585
4726
|
};
|
|
4586
4727
|
}
|
|
@@ -4637,14 +4778,16 @@ class FrozenMiniSearch {
|
|
|
4637
4778
|
return shortId == null ? undefined : readStoredFields(this._storedFields, shortId);
|
|
4638
4779
|
}
|
|
4639
4780
|
search(query, searchOptions = {}) {
|
|
4640
|
-
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId],
|
|
4781
|
+
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], this._hasStoredFields
|
|
4782
|
+
? docId => readStoredFields(this._storedFields, docId)
|
|
4783
|
+
: undefined);
|
|
4641
4784
|
}
|
|
4642
4785
|
autoSuggest(queryString, options = {}) {
|
|
4643
4786
|
const merged = { ...this._options.autoSuggestOptions, ...options };
|
|
4644
4787
|
return autoSuggestFromSearch((q, o) => this.search(q, o), queryString, merged);
|
|
4645
4788
|
}
|
|
4646
4789
|
/** Serialize this index as a frozen binary snapshot (synchronous). */
|
|
4647
|
-
saveBinarySync() {
|
|
4790
|
+
saveBinarySync(saveOptions = {}) {
|
|
4648
4791
|
return encodeFrozenSnapshot({
|
|
4649
4792
|
documentCount: this._documentCount,
|
|
4650
4793
|
nextId: this._nextId,
|
|
@@ -4658,10 +4801,10 @@ class FrozenMiniSearch {
|
|
|
4658
4801
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4659
4802
|
treeShape: [],
|
|
4660
4803
|
postings: this._postings,
|
|
4661
|
-
}, undefined, this._index);
|
|
4804
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4662
4805
|
}
|
|
4663
|
-
/** Non-blocking
|
|
4664
|
-
async saveBinaryAsync() {
|
|
4806
|
+
/** Non-blocking snapshot serialization with the selected compression codec. */
|
|
4807
|
+
async saveBinaryAsync(saveOptions = {}) {
|
|
4665
4808
|
return encodeFrozenSnapshotAsync({
|
|
4666
4809
|
documentCount: this._documentCount,
|
|
4667
4810
|
nextId: this._nextId,
|
|
@@ -4675,7 +4818,7 @@ class FrozenMiniSearch {
|
|
|
4675
4818
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4676
4819
|
treeShape: [],
|
|
4677
4820
|
postings: this._postings,
|
|
4678
|
-
}, undefined, this._index);
|
|
4821
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4679
4822
|
}
|
|
4680
4823
|
/** Load a frozen binary snapshot. */
|
|
4681
4824
|
static loadBinarySync(buffer, options = {}) {
|
|
@@ -4684,7 +4827,7 @@ class FrozenMiniSearch {
|
|
|
4684
4827
|
const snap = decodeFrozenSnapshot(buffer, { storeFields });
|
|
4685
4828
|
return FrozenMiniSearch.fromBinarySnapshot(snap, options);
|
|
4686
4829
|
}
|
|
4687
|
-
/** Load a frozen binary snapshot with streaming
|
|
4830
|
+
/** Load a frozen binary snapshot with streaming decompression when needed (bounded memory). */
|
|
4688
4831
|
static async loadBinaryAsync(buffer, options = {}) {
|
|
4689
4832
|
var _a;
|
|
4690
4833
|
const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
|