@yoch/frozenminisearch 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/index.js CHANGED
@@ -256,7 +256,9 @@ function finalizeSearchResults(params) {
256
256
  queryTerms: terms,
257
257
  match,
258
258
  };
259
- Object.assign(result, getStoredFields(docId));
259
+ if (getStoredFields != null) {
260
+ Object.assign(result, getStoredFields(docId));
261
+ }
260
262
  if (filter == null || filter(result)) {
261
263
  results.push(result);
262
264
  }
@@ -1640,9 +1642,11 @@ const FLAG_FL_U8 = 8;
1640
1642
  const FLAG_FL_U16 = 16;
1641
1643
  const FLAG_FREQ_U16 = 32;
1642
1644
  const CODEC_RAW = 0;
1645
+ /** Deflate/inflate (`node:zlib`) on the whole payload. */
1646
+ const CODEC_ZLIB = 1;
1643
1647
  /** Zstandard (`node:zlib`) on the whole payload. */
1644
1648
  const CODEC_ZSTD = 3;
1645
- /** Single concatenated payload, one zstd stream (or raw). */
1649
+ /** Single concatenated payload, one compressed stream (or raw). */
1646
1650
  const MSV5_FORMAT_REV_PAYLOAD = 1;
1647
1651
  /** Do not compress payloads smaller than this (bytes). */
1648
1652
  const MSV5_MIN_COMPRESS_BYTES = 64;
@@ -2762,33 +2766,43 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
2762
2766
  };
2763
2767
  }
2764
2768
 
2765
- /** Hard cap on the uncompressed payload, rejected before allocation (zstd-bomb guard).
2769
+ /** Hard cap on the uncompressed payload, rejected before allocation (compressed-bomb guard).
2766
2770
  * This is the single trust boundary for untrusted snapshots: {@link readPayloadMeta} rejects
2767
2771
  * headers above this size; sync decompress uses the same cap via `maxOutputLength`.
2768
2772
  * A malicious header can still declare up to 1 GiB — no tighter native limit helps without
2769
2773
  * trusting `uncompressedLength` from that same header. Semantic integrity (length match,
2770
2774
  * payload CRC, per-section CRC) is enforced after decode. */
2771
2775
  const MSV5_MAX_UNCOMPRESSED_BYTES = 1024 * 1024 * 1024;
2776
+ const MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH = 'MSv5 compressed payload exceeds declared length';
2777
+ const MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH = 'MSv5 decompressed payload length mismatch';
2772
2778
  // zstd landed in node:zlib at Node 22.15.0 (22.x line) / 23.8.0, where the whole family
2773
2779
  // (zstdCompress[Sync], zstdDecompressSync, createZstdDecompress) ships together — so probing one
2774
2780
  // member is enough to know if the runtime supports zstd. Checked at call time (not captured at
2775
- // module load) so it stays mockable in tests. On older runtimes we degrade gracefully: writes fall
2776
- // back to a raw (uncompressed) payload, reads of a zstd payload throw a clear, actionable error.
2781
+ // module load) so it stays mockable in tests. On older runtimes we degrade gracefully: `auto`
2782
+ // tries zlib once (or raw if it does not help). When zstd is available, `auto` tries zstd once
2783
+ // and stays raw if it does not shrink — no second pass. Reads of a zstd payload throw a clear,
2784
+ // actionable error on runtimes without zstd.
2777
2785
  function zstdAvailable() {
2778
2786
  return typeof zlib.zstdCompressSync === 'function';
2779
2787
  }
2788
+ function zstdUnavailableWriteError() {
2789
+ return new Error('MSv5 snapshot requested zstd compression, but this Node.js runtime lacks node:zlib zstd '
2790
+ + 'support (added in Node 22.15.0). Upgrade Node.js, or use compression: "auto", "raw", '
2791
+ + 'or "zlib".');
2792
+ }
2780
2793
  function zstdUnavailableReadError() {
2781
2794
  return new Error('MSv5 snapshot is zstd-compressed, but this Node.js runtime lacks node:zlib zstd support '
2782
2795
  + '(added in Node 22.15.0). Upgrade Node.js to read this snapshot, or re-save it from a '
2783
- + 'newer runtime to embed a raw (uncompressed) payload.');
2796
+ + 'newer runtime with compression: "raw" or "zlib".');
2784
2797
  }
2785
2798
  let warnedZstdSaveFallback = false;
2786
2799
  function warnZstdSaveFallbackOnce() {
2787
2800
  if (warnedZstdSaveFallback)
2788
2801
  return;
2789
2802
  warnedZstdSaveFallback = true;
2790
- process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); MSv5 snapshots are written with a '
2791
- + 'raw (uncompressed) payload. Upgrade to Node 22.15.0+ for compressed snapshots.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
2803
+ process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); compression: "auto" falls back to '
2804
+ + 'zlib when it shrinks the payload, otherwise raw (uncompressed). Upgrade to Node 22.15.0+ '
2805
+ + 'for zstd.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
2792
2806
  }
2793
2807
  function assertPayloadFormatRev(buf) {
2794
2808
  const rev = buf.readUInt16LE(MSV5_FORMAT_REV_OFFSET);
@@ -2838,23 +2852,26 @@ function msv5ZstdCompressOptions(uncompressed) {
2838
2852
  },
2839
2853
  };
2840
2854
  }
2841
- /** Raw if below {@link MSV5_MIN_COMPRESS_BYTES}; else zstd when strictly smaller than raw. */
2842
- function pickPayloadCodec(uncompressed, compressed) {
2843
- if (compressed.length < uncompressed.length) {
2844
- return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2845
- }
2855
+ function rawPayloadChoice(uncompressed) {
2846
2856
  return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2847
2857
  }
2848
- function choosePayloadCodecSync(uncompressed) {
2849
- if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2850
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2858
+ /** Auto mode: one compression attempt; keep it only when strictly smaller than raw. */
2859
+ function pickAutoPayloadCodec(uncompressed, compressed, codec) {
2860
+ if (compressed.length < uncompressed.length) {
2861
+ return {
2862
+ payload: compressed,
2863
+ codec,
2864
+ zstdLevel: codec === CODEC_ZSTD ? MSV5_ZSTD_LEVEL : 0,
2865
+ };
2851
2866
  }
2867
+ return rawPayloadChoice(uncompressed);
2868
+ }
2869
+ function zstdPayloadChoiceSync(uncompressed) {
2852
2870
  if (!zstdAvailable()) {
2853
- warnZstdSaveFallbackOnce();
2854
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2871
+ throw zstdUnavailableWriteError();
2855
2872
  }
2856
2873
  const compressed = zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed));
2857
- return pickPayloadCodec(uncompressed, compressed);
2874
+ return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2858
2875
  }
2859
2876
  /**
2860
2877
  * Async zstd via {@link zstdCompress} (not {@link zstdCompressSync}).
@@ -2873,16 +2890,91 @@ function zstdCompressAsync(uncompressed) {
2873
2890
  });
2874
2891
  });
2875
2892
  }
2876
- async function choosePayloadCodecAsync(uncompressed) {
2893
+ async function zstdPayloadChoiceAsync(uncompressed) {
2894
+ if (!zstdAvailable()) {
2895
+ throw zstdUnavailableWriteError();
2896
+ }
2897
+ const compressed = await zstdCompressAsync(uncompressed);
2898
+ return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2899
+ }
2900
+ function zlibPayloadChoiceSync(uncompressed) {
2901
+ const compressed = zlib.deflateSync(uncompressed);
2902
+ return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
2903
+ }
2904
+ function zlibCompressAsync(uncompressed) {
2905
+ return new Promise((resolve, reject) => {
2906
+ zlib.deflate(uncompressed, (err, compressed) => {
2907
+ if (err != null) {
2908
+ reject(err);
2909
+ return;
2910
+ }
2911
+ resolve(compressed);
2912
+ });
2913
+ });
2914
+ }
2915
+ async function zlibPayloadChoiceAsync(uncompressed) {
2916
+ const compressed = await zlibCompressAsync(uncompressed);
2917
+ return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
2918
+ }
2919
+ const autoSyncCompressors = {
2920
+ zstd: (uncompressed) => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
2921
+ zlib: (uncompressed) => zlib.deflateSync(uncompressed),
2922
+ };
2923
+ const autoAsyncCompressors = {
2924
+ zstd: zstdCompressAsync,
2925
+ zlib: zlibCompressAsync,
2926
+ };
2927
+ function autoPayloadChoice(uncompressed, compressors) {
2877
2928
  if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2878
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2929
+ return rawPayloadChoice(uncompressed);
2879
2930
  }
2880
2931
  if (!zstdAvailable()) {
2881
2932
  warnZstdSaveFallbackOnce();
2882
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2933
+ return pickAutoPayloadCodec(uncompressed, compressors.zlib(uncompressed), CODEC_ZLIB);
2934
+ }
2935
+ return pickAutoPayloadCodec(uncompressed, compressors.zstd(uncompressed), CODEC_ZSTD);
2936
+ }
2937
+ async function autoPayloadChoiceAsync(uncompressed, compressors) {
2938
+ if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2939
+ return rawPayloadChoice(uncompressed);
2940
+ }
2941
+ if (!zstdAvailable()) {
2942
+ warnZstdSaveFallbackOnce();
2943
+ return pickAutoPayloadCodec(uncompressed, await compressors.zlib(uncompressed), CODEC_ZLIB);
2944
+ }
2945
+ return pickAutoPayloadCodec(uncompressed, await compressors.zstd(uncompressed), CODEC_ZSTD);
2946
+ }
2947
+ function choosePayloadCodecSync(uncompressed, compression = 'auto') {
2948
+ switch (compression) {
2949
+ case 'raw':
2950
+ return rawPayloadChoice(uncompressed);
2951
+ case 'zstd':
2952
+ return zstdPayloadChoiceSync(uncompressed);
2953
+ case 'zlib':
2954
+ return zlibPayloadChoiceSync(uncompressed);
2955
+ case 'auto':
2956
+ return autoPayloadChoice(uncompressed, autoSyncCompressors);
2957
+ default: {
2958
+ const _exhaustive = compression;
2959
+ return _exhaustive;
2960
+ }
2961
+ }
2962
+ }
2963
+ async function choosePayloadCodecAsync(uncompressed, compression = 'auto') {
2964
+ switch (compression) {
2965
+ case 'raw':
2966
+ return rawPayloadChoice(uncompressed);
2967
+ case 'zstd':
2968
+ return await zstdPayloadChoiceAsync(uncompressed);
2969
+ case 'zlib':
2970
+ return await zlibPayloadChoiceAsync(uncompressed);
2971
+ case 'auto':
2972
+ return await autoPayloadChoiceAsync(uncompressed, autoAsyncCompressors);
2973
+ default: {
2974
+ const _exhaustive = compression;
2975
+ return _exhaustive;
2976
+ }
2883
2977
  }
2884
- const compressed = await zstdCompressAsync(uncompressed);
2885
- return pickPayloadCodec(uncompressed, compressed);
2886
2978
  }
2887
2979
  function concatAndValidateSections(rawSections) {
2888
2980
  if (rawSections.length !== MSV5_SECTION_COUNT) {
@@ -2937,16 +3029,16 @@ function buildMsv5AssembledFile(globalFlags, entries, uncompressedLength, payloa
2937
3029
  }
2938
3030
  /**
2939
3031
  * MSv5 on disk: header + catalogue (uncompressed offsets) + **one** payload blob
2940
- * (raw concatenation or a single zstd stream over it).
3032
+ * (raw concatenation or a single compressed stream over it).
2941
3033
  */
2942
- function assembleMsv5File(globalFlags, rawSections) {
3034
+ function assembleMsv5File(globalFlags, rawSections, compression = 'auto') {
2943
3035
  const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
2944
- const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed);
3036
+ const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed, compression);
2945
3037
  return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
2946
3038
  }
2947
- async function assembleMsv5FileAsync(globalFlags, rawSections) {
3039
+ async function assembleMsv5FileAsync(globalFlags, rawSections, compression = 'auto') {
2948
3040
  const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
2949
- const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed);
3041
+ const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed, compression);
2950
3042
  return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
2951
3043
  }
2952
3044
  function readMsv5SectionDirectory(buf) {
@@ -2990,11 +3082,11 @@ function sectionsFromPayload(payload, directory, payloadCrc32) {
2990
3082
  return out;
2991
3083
  });
2992
3084
  }
2993
- /** Streaming zstd reader: keeps only one section in memory at a time.
2994
- * No `maxOutputLength` on {@link createZstdDecompress}: output is bounded by accumulating
2995
- * `streamOffset` against the header's `uncompressedLength` (same 1 GiB cap checked upfront).
2996
- * Sync load uses `maxOutputLength` instead because it materializes the whole payload at once. */
2997
- function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32) {
3085
+ /** Streaming compressed reader: keeps only one section in memory at a time.
3086
+ * No `maxOutputLength` on Transform streams: output is bounded by accumulating `streamOffset`
3087
+ * against the header's `uncompressedLength` (same 1 GiB cap checked upfront). Sync load uses
3088
+ * `maxOutputLength` because it materializes the whole payload at once. */
3089
+ function collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32) {
2998
3090
  if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
2999
3091
  throw new Error('MSv5 payload exceeds 1 GiB limit');
3000
3092
  }
@@ -3014,7 +3106,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
3014
3106
  }
3015
3107
  function consume(chunk) {
3016
3108
  if (streamOffset + chunk.length > uncompressedLength) {
3017
- throw new Error('MSv5 zstd payload exceeds declared length');
3109
+ throw new Error(MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH);
3018
3110
  }
3019
3111
  payloadCrc = crc32Update(payloadCrc, chunk);
3020
3112
  let off = 0;
@@ -3050,7 +3142,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
3050
3142
  function finish() {
3051
3143
  emitEmptySections();
3052
3144
  if (streamOffset !== uncompressedLength || sectionId !== directory.length) {
3053
- throw new Error('MSv5 zstd decompressed length mismatch');
3145
+ throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
3054
3146
  }
3055
3147
  if (payloadCrc !== payloadCrc32) {
3056
3148
  throw new Error('MSv5 payload CRC mismatch');
@@ -3059,9 +3151,15 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
3059
3151
  return { sections, consume, finish };
3060
3152
  }
3061
3153
  function loadMsv5SectionsFromZstdStream(compressed, directory, uncompressedLength, payloadCrc32) {
3154
+ return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createZstdDecompress());
3155
+ }
3156
+ function loadMsv5SectionsFromZlibStream(compressed, directory, uncompressedLength, payloadCrc32) {
3157
+ return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createInflate());
3158
+ }
3159
+ function loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, createStream) {
3062
3160
  return new Promise((resolve, reject) => {
3063
- const collector = collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32);
3064
- const stream = zlib.createZstdDecompress();
3161
+ const collector = collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32);
3162
+ const stream = createStream();
3065
3163
  stream.on('data', (chunk) => {
3066
3164
  try {
3067
3165
  collector.consume(chunk);
@@ -3119,29 +3217,39 @@ function preparePayload(fileBuf, directory) {
3119
3217
  payloadCrc32,
3120
3218
  };
3121
3219
  }
3122
- /** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
3123
- function loadMsv5Sections(fileBuf, directory) {
3124
- const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
3125
- if (payloadCodec === CODEC_RAW) {
3126
- return sectionsFromPayload(slice, directory, payloadCrc32);
3127
- }
3220
+ function decompressPayloadSync(payloadCodec, slice, uncompressedLength) {
3128
3221
  if (payloadCodec === CODEC_ZSTD) {
3129
3222
  if (!zstdAvailable()) {
3130
3223
  throw zstdUnavailableReadError();
3131
3224
  }
3132
- // Native cap matches readPayloadMeta's 1 GiB limit (see MSV5_MAX_UNCOMPRESSED_BYTES).
3133
- // Using header `uncompressedLength` here would only help when the header understates
3134
- // the zstd stream but the attacker can inflate the header too — same worst case.
3135
3225
  const decoded = zlib.zstdDecompressSync(slice, {
3136
3226
  maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
3137
3227
  });
3138
3228
  if (decoded.length !== uncompressedLength) {
3139
- throw new Error('MSv5 zstd decompressed length mismatch');
3229
+ throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
3230
+ }
3231
+ return decoded;
3232
+ }
3233
+ if (payloadCodec === CODEC_ZLIB) {
3234
+ const decoded = zlib.inflateSync(slice, {
3235
+ maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
3236
+ });
3237
+ if (decoded.length !== uncompressedLength) {
3238
+ throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
3140
3239
  }
3141
- return sectionsFromPayload(decoded, directory, payloadCrc32);
3240
+ return decoded;
3142
3241
  }
3143
3242
  throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
3144
3243
  }
3244
+ /** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
3245
+ function loadMsv5Sections(fileBuf, directory) {
3246
+ const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
3247
+ if (payloadCodec === CODEC_RAW) {
3248
+ return sectionsFromPayload(slice, directory, payloadCrc32);
3249
+ }
3250
+ const decoded = decompressPayloadSync(payloadCodec, slice, uncompressedLength);
3251
+ return sectionsFromPayload(decoded, directory, payloadCrc32);
3252
+ }
3145
3253
  /** Streaming load; peak main-thread RAM ≈ largest single section (+ file buffer). */
3146
3254
  async function loadMsv5SectionsAsync(fileBuf, directory) {
3147
3255
  const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
@@ -3154,6 +3262,9 @@ async function loadMsv5SectionsAsync(fileBuf, directory) {
3154
3262
  }
3155
3263
  return loadMsv5SectionsFromZstdStream(slice, directory, uncompressedLength, payloadCrc32);
3156
3264
  }
3265
+ if (payloadCodec === CODEC_ZLIB) {
3266
+ return loadMsv5SectionsFromZlibStream(slice, directory, uncompressedLength, payloadCrc32);
3267
+ }
3157
3268
  throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
3158
3269
  }
3159
3270
  function isMsv5Buffer(buf) {
@@ -3442,7 +3553,7 @@ function resolvePackedTree(snap, termTree, packedTermIndex) {
3442
3553
  validateTermTreeLeaves(tree, termCount);
3443
3554
  return fromRadixTree(tree, termCount);
3444
3555
  }
3445
- function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
3556
+ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression) {
3446
3557
  var _a;
3447
3558
  validateFrozenSnapshotNumeric(snap);
3448
3559
  const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
@@ -3471,9 +3582,9 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
3471
3582
  postingsWire.docIds,
3472
3583
  postingsWire.freqs,
3473
3584
  ];
3474
- return assembleMsv5File(globalFlags, rawSections).buffer;
3585
+ return assembleMsv5File(globalFlags, rawSections, compression).buffer;
3475
3586
  }
3476
- async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
3587
+ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression) {
3477
3588
  var _a;
3478
3589
  validateFrozenSnapshotNumeric(snap);
3479
3590
  const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
@@ -3502,7 +3613,7 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
3502
3613
  postingsWire.docIds,
3503
3614
  postingsWire.freqs,
3504
3615
  ];
3505
- return (await assembleMsv5FileAsync(globalFlags, rawSections)).buffer;
3616
+ return (await assembleMsv5FileAsync(globalFlags, rawSections, compression)).buffer;
3506
3617
  }
3507
3618
 
3508
3619
  function validateMsv5Container(buf) {
@@ -3579,12 +3690,12 @@ async function decodeFrozenSnapshotMsv5Async(buf, hints) {
3579
3690
  }
3580
3691
 
3581
3692
  /** Encode a frozen snapshot as a binary buffer. */
3582
- function encodeFrozenSnapshot(snap, termTree, packedTermIndex) {
3583
- return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex);
3693
+ function encodeFrozenSnapshot(snap, termTree, packedTermIndex, compression) {
3694
+ return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression);
3584
3695
  }
3585
- /** Async encoder; uses non-blocking zstd compression for large payloads. */
3586
- function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
3587
- return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex);
3696
+ /** Async encoder; uses the selected payload compression without blocking the event loop. */
3697
+ function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex, compression) {
3698
+ return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression);
3588
3699
  }
3589
3700
 
3590
3701
  const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
@@ -3601,7 +3712,7 @@ function decodeFrozenSnapshot(buf, hints) {
3601
3712
  }
3602
3713
  throw invalidFrozenIndex('Unsupported frozen binary snapshot');
3603
3714
  }
3604
- /** Async frozen snapshot decode (streaming zstd). */
3715
+ /** Async frozen snapshot decode (streaming decompression when needed). */
3605
3716
  async function decodeFrozenSnapshotAsync(buf, hints) {
3606
3717
  assertBufferLength(buf, 8);
3607
3718
  const version = buf.readUInt16LE(4);
@@ -4073,56 +4184,76 @@ function normalizeStringQuery(query, searchOptions, params) {
4073
4184
  ...params.globalSearchOptions,
4074
4185
  ...searchOptions,
4075
4186
  };
4076
- const terms = options.tokenize(query)
4077
- .flatMap((term) => options.processTerm(term))
4078
- .filter(term => !!term);
4187
+ const tokens = options.tokenize(query);
4188
+ const terms = [];
4189
+ for (const token of tokens) {
4190
+ const processed = options.processTerm(token);
4191
+ if (Array.isArray(processed)) {
4192
+ for (const term of processed) {
4193
+ if (term)
4194
+ terms.push(term);
4195
+ }
4196
+ }
4197
+ else if (processed) {
4198
+ terms.push(processed);
4199
+ }
4200
+ }
4201
+ const toSpec = termToQuerySpec(options);
4202
+ const specs = new Array(terms.length);
4203
+ for (let i = 0; i < terms.length; i++) {
4204
+ specs[i] = toSpec(terms[i], i, terms);
4205
+ }
4206
+ const { fuzzy: fuzzyWeight, prefix: prefixWeight } = {
4207
+ ...defaultSearchOptions.weights,
4208
+ ...options.weights,
4209
+ };
4079
4210
  return {
4080
4211
  options,
4081
- specs: terms.map(termToQuerySpec(options)),
4212
+ specs,
4082
4213
  operator: options.combineWith,
4214
+ fieldBoosts: fieldBoostsForQuery(options, params.fields),
4215
+ fuzzyWeight,
4216
+ prefixWeight,
4083
4217
  };
4084
4218
  }
4085
4219
  function lazyIndexedTerm(indexView, termIndex) {
4086
4220
  return { kind: 'lazy', resolve: () => indexView.resolveTermByIndex(termIndex) };
4087
4221
  }
4088
- function visitQuerySpecForScoring(query, options, params, visit) {
4222
+ function visitQuerySpecForScoring(query, normalized, params, visit) {
4089
4223
  const { indexView } = params;
4090
- const { weights, maxFuzzy } = options;
4091
- const { fuzzy: fuzzyWeight, prefix: prefixWeight } = { ...defaultSearchOptions.weights, ...weights };
4092
- const maxDistance = maxFuzzyDistance(query, maxFuzzy);
4224
+ const { fuzzyWeight, options, prefixWeight } = normalized;
4225
+ const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
4093
4226
  const exactTi = indexView.resolveTermIndex(query.term);
4094
4227
  visit(exactTi == null ? undefined : indexView.fieldTermData(exactTi), query.term, 1);
4095
- const seenPrefix = new Set();
4228
+ const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
4096
4229
  if (query.prefix) {
4097
4230
  for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
4098
4231
  const distance = length - query.term.length;
4099
4232
  if (!distance)
4100
4233
  continue;
4101
- seenPrefix.add(termIndex);
4234
+ seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
4102
4235
  visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), prefixWeight * length / (length + 0.3 * distance));
4103
4236
  }
4104
4237
  }
4105
4238
  if (!maxDistance)
4106
4239
  return;
4107
4240
  for (const { termIndex, length, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
4108
- if (!distance || seenPrefix.has(termIndex))
4241
+ if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
4109
4242
  continue;
4110
4243
  visit(indexView.fieldTermData(termIndex), lazyIndexedTerm(indexView, termIndex), fuzzyWeight * length / (length + distance));
4111
4244
  }
4112
4245
  }
4113
- function executeQuerySpecInternal(query, searchOptions, params, allowedDocs) {
4114
- const options = { ...params.globalSearchOptions, ...searchOptions };
4115
- const fieldBoosts = fieldBoostsForQuery(options, params.fields);
4246
+ function executeQuerySpecInternal(query, normalized, params, allowedDocs) {
4247
+ const { fieldBoosts, options } = normalized;
4116
4248
  const termOptions = allowedDocs == null ? undefined : { allowedDocs };
4117
4249
  const results = new Map();
4118
- visitQuerySpecForScoring(query, options, params, (data, derivedTerm, termWeight) => {
4250
+ visitQuerySpecForScoring(query, normalized, params, (data, derivedTerm, termWeight) => {
4119
4251
  aggregateTerm(query.term, derivedTerm, termWeight, query.termBoost, data, fieldBoosts, params.aggregateContext, options.boostDocument, options.bm25, results, termOptions);
4120
4252
  });
4121
4253
  return results;
4122
4254
  }
4123
- function collectDocIdsForQuerySpec(query, searchOptions, params, allowedDocs) {
4124
- const options = { ...params.globalSearchOptions, ...searchOptions };
4125
- const fieldBoosts = fieldBoostsForQuery(options, params.fields);
4255
+ function collectDocIdsForQuerySpec(query, normalized, params, allowedDocs) {
4256
+ const { fieldBoosts, options } = normalized;
4126
4257
  const docIds = new Set();
4127
4258
  const { indexView, aggregateContext } = params;
4128
4259
  const maxDistance = maxFuzzyDistance(query, options.maxFuzzy);
@@ -4130,19 +4261,19 @@ function collectDocIdsForQuerySpec(query, searchOptions, params, allowedDocs) {
4130
4261
  if (exactTi != null) {
4131
4262
  indexView.collectDocIds(exactTi, fieldBoosts, aggregateContext, docIds, allowedDocs);
4132
4263
  }
4133
- const seenPrefix = new Set();
4264
+ const seenPrefix = query.prefix && maxDistance ? new Set() : undefined;
4134
4265
  if (query.prefix) {
4135
4266
  for (const { termIndex, length } of indexView.getPrefixMatchesByIndex(query.term)) {
4136
4267
  const distance = length - query.term.length;
4137
4268
  if (!distance)
4138
4269
  continue;
4139
- seenPrefix.add(termIndex);
4270
+ seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.add(termIndex);
4140
4271
  indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
4141
4272
  }
4142
4273
  }
4143
4274
  if (maxDistance) {
4144
4275
  for (const { termIndex, distance } of indexView.getFuzzyMatchesByIndex(query.term, maxDistance)) {
4145
- if (!distance || seenPrefix.has(termIndex))
4276
+ if (!distance || (seenPrefix === null || seenPrefix === void 0 ? void 0 : seenPrefix.has(termIndex)))
4146
4277
  continue;
4147
4278
  indexView.collectDocIds(termIndex, fieldBoosts, aggregateContext, docIds, allowedDocs);
4148
4279
  }
@@ -4273,14 +4404,15 @@ function collectDocIdsForQueryInternal(query, searchOptions, params, allowedDocs
4273
4404
  if (typeof query !== 'string') {
4274
4405
  throw new Error('FrozenMiniSearch: invalid query');
4275
4406
  }
4276
- const { options, specs, operator } = normalizeStringQuery(query, searchOptions, params);
4407
+ const normalized = normalizeStringQuery(query, searchOptions, params);
4408
+ const { specs, operator } = normalized;
4277
4409
  const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
4278
4410
  if (specs.length <= 1) {
4279
4411
  return specs.length === 1
4280
- ? collectDocIdsForQuerySpec(specs[0], options, params, allowedDocs)
4412
+ ? collectDocIdsForQuerySpec(specs[0], normalized, params, allowedDocs)
4281
4413
  : new Set();
4282
4414
  }
4283
- return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, options, params, branchAllowed), allowedDocs);
4415
+ return collectCombinedDocIds(specs, combineWith, (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
4284
4416
  }
4285
4417
  function executeWildcardQuery(searchOptions, params) {
4286
4418
  const results = new Map();
@@ -4310,12 +4442,13 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
4310
4442
  if (typeof query !== 'string') {
4311
4443
  throw new Error('FrozenMiniSearch: invalid query');
4312
4444
  }
4313
- const { options, specs, operator } = normalizeStringQuery(query, searchOptions, params);
4445
+ const normalized = normalizeStringQuery(query, searchOptions, params);
4446
+ const { specs, operator } = normalized;
4314
4447
  const combineWith = (operator !== null && operator !== void 0 ? operator : params.globalSearchOptions.combineWith);
4315
4448
  if (useGatedEvaluation(run, specs.length, combineWith, false)) {
4316
- return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, options, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, options, params, branchAllowed), allowedDocs);
4449
+ return executeCombinedBranches(specs, combineWith, params, (spec, branchAllowed) => executeQuerySpecInternal(spec, normalized, params, branchAllowed), (spec, branchAllowed) => collectDocIdsForQuerySpec(spec, normalized, params, branchAllowed), allowedDocs);
4317
4450
  }
4318
- const results = specs.map(spec => executeQuerySpecInternal(spec, options, params, allowedDocs));
4451
+ const results = specs.map(spec => executeQuerySpecInternal(spec, normalized, params, allowedDocs));
4319
4452
  return combineResults(results, combineWith);
4320
4453
  }
4321
4454
  function executeQuery(query, searchOptions, params) {
@@ -4506,6 +4639,7 @@ function materializeOwnedSnapshot(params, mode) {
4506
4639
  function frozenMemoryBreakdown(frozen) {
4507
4640
  return frozen.memoryBreakdown();
4508
4641
  }
4642
+ const noStoredFields = () => undefined;
4509
4643
  function assertFieldsMatchSnapshot(optionsFields, snapFieldIds) {
4510
4644
  const snapNames = Object.keys(snapFieldIds).sort();
4511
4645
  const optNames = [...optionsFields].sort();
@@ -4559,24 +4693,31 @@ class FrozenMiniSearch {
4559
4693
  this._termCount = params.termCount;
4560
4694
  this._postings = params.postings;
4561
4695
  this._fieldTermFlyweight = createFrozenFieldTermFlyweight(this._postings);
4696
+ this._hasStoredFields = this._storedFields.kind !== 'none';
4562
4697
  this._aggregateContext = {
4563
4698
  documentCount: this._documentCount,
4564
4699
  avgFieldLength: this._avgFieldLength,
4565
4700
  fieldIds: this._fieldIds,
4566
4701
  getFieldLength: (docId, fieldId) => this.getFieldLength(docId, fieldId),
4567
4702
  getExternalId: docId => this._externalIds[docId],
4568
- getStoredFields: docId => readStoredFields(this._storedFields, docId),
4703
+ getStoredFields: this._hasStoredFields
4704
+ ? docId => readStoredFields(this._storedFields, docId)
4705
+ : noStoredFields,
4569
4706
  };
4570
4707
  this._queryEngineParams = {
4571
4708
  fields: this._options.fields,
4572
4709
  globalSearchOptions: this._options.searchOptions,
4573
4710
  tokenize: this._options.tokenize,
4574
4711
  processTerm: this._options.processTerm,
4575
- indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, (callback) => {
4576
- forEachLiveShortId(this._nextId, this._externalIds, (shortId, id) => {
4577
- callback(shortId, id, readStoredFields(this._storedFields, shortId));
4578
- });
4579
- }),
4712
+ indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, this._hasStoredFields
4713
+ ? (callback) => {
4714
+ forEachLiveShortId(this._nextId, this._externalIds, (shortId, id) => {
4715
+ callback(shortId, id, readStoredFields(this._storedFields, shortId));
4716
+ });
4717
+ }
4718
+ : (callback) => {
4719
+ forEachLiveShortId(this._nextId, this._externalIds, callback);
4720
+ }),
4580
4721
  aggregateContext: this._aggregateContext,
4581
4722
  };
4582
4723
  }
@@ -4633,14 +4774,16 @@ class FrozenMiniSearch {
4633
4774
  return shortId == null ? undefined : readStoredFields(this._storedFields, shortId);
4634
4775
  }
4635
4776
  search(query, searchOptions = {}) {
4636
- return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], docId => readStoredFields(this._storedFields, docId));
4777
+ return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], this._hasStoredFields
4778
+ ? docId => readStoredFields(this._storedFields, docId)
4779
+ : undefined);
4637
4780
  }
4638
4781
  autoSuggest(queryString, options = {}) {
4639
4782
  const merged = { ...this._options.autoSuggestOptions, ...options };
4640
4783
  return autoSuggestFromSearch((q, o) => this.search(q, o), queryString, merged);
4641
4784
  }
4642
4785
  /** Serialize this index as a frozen binary snapshot (synchronous). */
4643
- saveBinarySync() {
4786
+ saveBinarySync(saveOptions = {}) {
4644
4787
  return encodeFrozenSnapshot({
4645
4788
  documentCount: this._documentCount,
4646
4789
  nextId: this._nextId,
@@ -4654,10 +4797,10 @@ class FrozenMiniSearch {
4654
4797
  fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
4655
4798
  treeShape: [],
4656
4799
  postings: this._postings,
4657
- }, undefined, this._index);
4800
+ }, undefined, this._index, saveOptions.compression);
4658
4801
  }
4659
- /** Non-blocking zstd compression; same output as {@link saveBinarySync}. */
4660
- async saveBinaryAsync() {
4802
+ /** Non-blocking snapshot serialization with the selected compression codec. */
4803
+ async saveBinaryAsync(saveOptions = {}) {
4661
4804
  return encodeFrozenSnapshotAsync({
4662
4805
  documentCount: this._documentCount,
4663
4806
  nextId: this._nextId,
@@ -4671,7 +4814,7 @@ class FrozenMiniSearch {
4671
4814
  fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
4672
4815
  treeShape: [],
4673
4816
  postings: this._postings,
4674
- }, undefined, this._index);
4817
+ }, undefined, this._index, saveOptions.compression);
4675
4818
  }
4676
4819
  /** Load a frozen binary snapshot. */
4677
4820
  static loadBinarySync(buffer, options = {}) {
@@ -4680,7 +4823,7 @@ class FrozenMiniSearch {
4680
4823
  const snap = decodeFrozenSnapshot(buffer, { storeFields });
4681
4824
  return FrozenMiniSearch.fromBinarySnapshot(snap, options);
4682
4825
  }
4683
- /** Load a frozen binary snapshot with streaming zstd decompression (bounded memory). */
4826
+ /** Load a frozen binary snapshot with streaming decompression when needed (bounded memory). */
4684
4827
  static async loadBinaryAsync(buffer, options = {}) {
4685
4828
  var _a;
4686
4829
  const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;