@yoch/frozenminisearch 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1644,9 +1644,11 @@ const FLAG_FL_U8 = 8;
1644
1644
  const FLAG_FL_U16 = 16;
1645
1645
  const FLAG_FREQ_U16 = 32;
1646
1646
  const CODEC_RAW = 0;
1647
+ /** Deflate/inflate (`node:zlib`) on the whole payload. */
1648
+ const CODEC_ZLIB = 1;
1647
1649
  /** Zstandard (`node:zlib`) on the whole payload. */
1648
1650
  const CODEC_ZSTD = 3;
1649
- /** Single concatenated payload, one zstd stream (or raw). */
1651
+ /** Single concatenated payload, one compressed stream (or raw). */
1650
1652
  const MSV5_FORMAT_REV_PAYLOAD = 1;
1651
1653
  /** Do not compress payloads smaller than this (bytes). */
1652
1654
  const MSV5_MIN_COMPRESS_BYTES = 64;
@@ -1938,7 +1940,7 @@ function postingsTypedBytes(layout) {
1938
1940
  slotCount,
1939
1941
  };
1940
1942
  }
1941
- function validateFrozenPostingsLayout(layout, documentCount, nextId, fail = detail => { throw new Error(detail); }) {
1943
+ function validateFrozenPostingsLayout(layout, documentCount, nextId, fail = (detail) => { throw new Error(detail); }) {
1942
1944
  if (layout.fieldCount <= 0)
1943
1945
  fail('fieldCount must be positive');
1944
1946
  if (layout.nextId !== nextId)
@@ -2209,7 +2211,7 @@ function forEachDefaultToken(text, onToken) {
2209
2211
  /** Default tokenizer into a reusable buffer (avoids `text.split()` array allocation). */
2210
2212
  function tokenizeDefaultInto(out, text) {
2211
2213
  out.length = 0;
2212
- forEachDefaultToken(text, (token) => out.push(token));
2214
+ forEachDefaultToken(text, token => out.push(token));
2213
2215
  }
2214
2216
  /** Tokenize field text into `out` (reused). Fast path when `tokenize` is the library default. */
2215
2217
  function tokenizeFieldInto(out, tokenize, text, fieldName) {
@@ -2288,7 +2290,7 @@ function validateFrozenSnapshotNumeric(snap) {
2288
2290
  if (snap.avgFieldLength.length !== snap.fieldCount) {
2289
2291
  throw invalidFrozenIndex('avgFieldLength size mismatch');
2290
2292
  }
2291
- validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, detail => {
2293
+ validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, (detail) => {
2292
2294
  throw invalidFrozenIndex(detail);
2293
2295
  });
2294
2296
  const indexedFields = Object.keys(snap.fieldIds);
@@ -2503,7 +2505,7 @@ function cloneStoredFields(layout) {
2503
2505
  }
2504
2506
  return { kind: 'multi', rows: layout.rows.slice() };
2505
2507
  }
2506
- /** Import from wire rows or lucaong snapshot. Empty storeFields + non-empty rows → multi (binary load without options). */
2508
+ /** Import from wire rows or MiniSearch snapshot. Empty storeFields + non-empty rows → multi (binary load without options). */
2507
2509
  function storedFieldsFromRows(rows, storeFields) {
2508
2510
  if (storeFields.length === 0) {
2509
2511
  const hasAny = rows.some(row => row != null && Object.keys(row).length > 0);
@@ -2687,7 +2689,7 @@ function buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, nextId, s
2687
2689
  });
2688
2690
  return { termCount, index: packedIndex, postings };
2689
2691
  }
2690
- /** Build frozen assemble params from a lucaong MiniSearch JSON snapshot. */
2692
+ /** Build frozen assemble params from a MiniSearch JSON snapshot. */
2691
2693
  function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
2692
2694
  var _a, _b, _c;
2693
2695
  if (!SUPPORTED_SERIALIZATION_VERSIONS.has(snapshot.serializationVersion)) {
@@ -2766,33 +2768,43 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
2766
2768
  };
2767
2769
  }
2768
2770
 
2769
- /** Hard cap on the uncompressed payload, rejected before allocation (zstd-bomb guard).
2771
+ /** Hard cap on the uncompressed payload, rejected before allocation (compressed-bomb guard).
2770
2772
  * This is the single trust boundary for untrusted snapshots: {@link readPayloadMeta} rejects
2771
2773
  * headers above this size; sync decompress uses the same cap via `maxOutputLength`.
2772
2774
  * A malicious header can still declare up to 1 GiB — no tighter native limit helps without
2773
2775
  * trusting `uncompressedLength` from that same header. Semantic integrity (length match,
2774
2776
  * payload CRC, per-section CRC) is enforced after decode. */
2775
2777
  const MSV5_MAX_UNCOMPRESSED_BYTES = 1024 * 1024 * 1024;
2778
+ const MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH = 'MSv5 compressed payload exceeds declared length';
2779
+ const MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH = 'MSv5 decompressed payload length mismatch';
2776
2780
  // zstd landed in node:zlib at Node 22.15.0 (22.x line) / 23.8.0, where the whole family
2777
2781
  // (zstdCompress[Sync], zstdDecompressSync, createZstdDecompress) ships together — so probing one
2778
2782
  // member is enough to know if the runtime supports zstd. Checked at call time (not captured at
2779
- // module load) so it stays mockable in tests. On older runtimes we degrade gracefully: writes fall
2780
- // back to a raw (uncompressed) payload, reads of a zstd payload throw a clear, actionable error.
2783
+ // module load) so it stays mockable in tests. On older runtimes we degrade gracefully: `auto`
2784
+ // tries zlib once (or raw if it does not help). When zstd is available, `auto` tries zstd once
2785
+ // and stays raw if it does not shrink — no second pass. Reads of a zstd payload throw a clear,
2786
+ // actionable error on runtimes without zstd.
2781
2787
  function zstdAvailable() {
2782
2788
  return typeof zlib.zstdCompressSync === 'function';
2783
2789
  }
2790
+ function zstdUnavailableWriteError() {
2791
+ return new Error('MSv5 snapshot requested zstd compression, but this Node.js runtime lacks node:zlib zstd '
2792
+ + 'support (added in Node 22.15.0). Upgrade Node.js, or use compression: "auto", "raw", '
2793
+ + 'or "zlib".');
2794
+ }
2784
2795
  function zstdUnavailableReadError() {
2785
2796
  return new Error('MSv5 snapshot is zstd-compressed, but this Node.js runtime lacks node:zlib zstd support '
2786
2797
  + '(added in Node 22.15.0). Upgrade Node.js to read this snapshot, or re-save it from a '
2787
- + 'newer runtime to embed a raw (uncompressed) payload.');
2798
+ + 'newer runtime with compression: "raw" or "zlib".');
2788
2799
  }
2789
2800
  let warnedZstdSaveFallback = false;
2790
2801
  function warnZstdSaveFallbackOnce() {
2791
2802
  if (warnedZstdSaveFallback)
2792
2803
  return;
2793
2804
  warnedZstdSaveFallback = true;
2794
- process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); MSv5 snapshots are written with a '
2795
- + 'raw (uncompressed) payload. Upgrade to Node 22.15.0+ for compressed snapshots.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
2805
+ process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); compression: "auto" falls back to '
2806
+ + 'zlib when it shrinks the payload, otherwise raw (uncompressed). Upgrade to Node 22.15.0+ '
2807
+ + 'for zstd.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
2796
2808
  }
2797
2809
  function assertPayloadFormatRev(buf) {
2798
2810
  const rev = buf.readUInt16LE(MSV5_FORMAT_REV_OFFSET);
@@ -2842,23 +2854,26 @@ function msv5ZstdCompressOptions(uncompressed) {
2842
2854
  },
2843
2855
  };
2844
2856
  }
2845
- /** Raw if below {@link MSV5_MIN_COMPRESS_BYTES}; else zstd when strictly smaller than raw. */
2846
- function pickPayloadCodec(uncompressed, compressed) {
2847
- if (compressed.length < uncompressed.length) {
2848
- return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2849
- }
2857
+ function rawPayloadChoice(uncompressed) {
2850
2858
  return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2851
2859
  }
2852
- function choosePayloadCodecSync(uncompressed) {
2853
- if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2854
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2860
+ /** Auto mode: one compression attempt; keep it only when strictly smaller than raw. */
2861
+ function pickAutoPayloadCodec(uncompressed, compressed, codec) {
2862
+ if (compressed.length < uncompressed.length) {
2863
+ return {
2864
+ payload: compressed,
2865
+ codec,
2866
+ zstdLevel: codec === CODEC_ZSTD ? MSV5_ZSTD_LEVEL : 0,
2867
+ };
2855
2868
  }
2869
+ return rawPayloadChoice(uncompressed);
2870
+ }
2871
+ function zstdPayloadChoiceSync(uncompressed) {
2856
2872
  if (!zstdAvailable()) {
2857
- warnZstdSaveFallbackOnce();
2858
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2873
+ throw zstdUnavailableWriteError();
2859
2874
  }
2860
2875
  const compressed = zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed));
2861
- return pickPayloadCodec(uncompressed, compressed);
2876
+ return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2862
2877
  }
2863
2878
  /**
2864
2879
  * Async zstd via {@link zstdCompress} (not {@link zstdCompressSync}).
@@ -2877,16 +2892,91 @@ function zstdCompressAsync(uncompressed) {
2877
2892
  });
2878
2893
  });
2879
2894
  }
2880
- async function choosePayloadCodecAsync(uncompressed) {
2895
+ async function zstdPayloadChoiceAsync(uncompressed) {
2896
+ if (!zstdAvailable()) {
2897
+ throw zstdUnavailableWriteError();
2898
+ }
2899
+ const compressed = await zstdCompressAsync(uncompressed);
2900
+ return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2901
+ }
2902
+ function zlibPayloadChoiceSync(uncompressed) {
2903
+ const compressed = zlib.deflateSync(uncompressed);
2904
+ return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
2905
+ }
2906
+ function zlibCompressAsync(uncompressed) {
2907
+ return new Promise((resolve, reject) => {
2908
+ zlib.deflate(uncompressed, (err, compressed) => {
2909
+ if (err != null) {
2910
+ reject(err);
2911
+ return;
2912
+ }
2913
+ resolve(compressed);
2914
+ });
2915
+ });
2916
+ }
2917
+ async function zlibPayloadChoiceAsync(uncompressed) {
2918
+ const compressed = await zlibCompressAsync(uncompressed);
2919
+ return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
2920
+ }
2921
+ const autoSyncCompressors = {
2922
+ zstd: (uncompressed) => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
2923
+ zlib: (uncompressed) => zlib.deflateSync(uncompressed),
2924
+ };
2925
+ const autoAsyncCompressors = {
2926
+ zstd: zstdCompressAsync,
2927
+ zlib: zlibCompressAsync,
2928
+ };
2929
+ function autoPayloadChoice(uncompressed, compressors) {
2881
2930
  if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2882
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2931
+ return rawPayloadChoice(uncompressed);
2883
2932
  }
2884
2933
  if (!zstdAvailable()) {
2885
2934
  warnZstdSaveFallbackOnce();
2886
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2935
+ return pickAutoPayloadCodec(uncompressed, compressors.zlib(uncompressed), CODEC_ZLIB);
2936
+ }
2937
+ return pickAutoPayloadCodec(uncompressed, compressors.zstd(uncompressed), CODEC_ZSTD);
2938
+ }
2939
+ async function autoPayloadChoiceAsync(uncompressed, compressors) {
2940
+ if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2941
+ return rawPayloadChoice(uncompressed);
2942
+ }
2943
+ if (!zstdAvailable()) {
2944
+ warnZstdSaveFallbackOnce();
2945
+ return pickAutoPayloadCodec(uncompressed, await compressors.zlib(uncompressed), CODEC_ZLIB);
2946
+ }
2947
+ return pickAutoPayloadCodec(uncompressed, await compressors.zstd(uncompressed), CODEC_ZSTD);
2948
+ }
2949
+ function choosePayloadCodecSync(uncompressed, compression = 'auto') {
2950
+ switch (compression) {
2951
+ case 'raw':
2952
+ return rawPayloadChoice(uncompressed);
2953
+ case 'zstd':
2954
+ return zstdPayloadChoiceSync(uncompressed);
2955
+ case 'zlib':
2956
+ return zlibPayloadChoiceSync(uncompressed);
2957
+ case 'auto':
2958
+ return autoPayloadChoice(uncompressed, autoSyncCompressors);
2959
+ default: {
2960
+ const _exhaustive = compression;
2961
+ return _exhaustive;
2962
+ }
2963
+ }
2964
+ }
2965
+ async function choosePayloadCodecAsync(uncompressed, compression = 'auto') {
2966
+ switch (compression) {
2967
+ case 'raw':
2968
+ return rawPayloadChoice(uncompressed);
2969
+ case 'zstd':
2970
+ return await zstdPayloadChoiceAsync(uncompressed);
2971
+ case 'zlib':
2972
+ return await zlibPayloadChoiceAsync(uncompressed);
2973
+ case 'auto':
2974
+ return await autoPayloadChoiceAsync(uncompressed, autoAsyncCompressors);
2975
+ default: {
2976
+ const _exhaustive = compression;
2977
+ return _exhaustive;
2978
+ }
2887
2979
  }
2888
- const compressed = await zstdCompressAsync(uncompressed);
2889
- return pickPayloadCodec(uncompressed, compressed);
2890
2980
  }
2891
2981
  function concatAndValidateSections(rawSections) {
2892
2982
  if (rawSections.length !== MSV5_SECTION_COUNT) {
@@ -2941,16 +3031,16 @@ function buildMsv5AssembledFile(globalFlags, entries, uncompressedLength, payloa
2941
3031
  }
2942
3032
  /**
2943
3033
  * MSv5 on disk: header + catalogue (uncompressed offsets) + **one** payload blob
2944
- * (raw concatenation or a single zstd stream over it).
3034
+ * (raw concatenation or a single compressed stream over it).
2945
3035
  */
2946
- function assembleMsv5File(globalFlags, rawSections) {
3036
+ function assembleMsv5File(globalFlags, rawSections, compression = 'auto') {
2947
3037
  const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
2948
- const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed);
3038
+ const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed, compression);
2949
3039
  return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
2950
3040
  }
2951
- async function assembleMsv5FileAsync(globalFlags, rawSections) {
3041
+ async function assembleMsv5FileAsync(globalFlags, rawSections, compression = 'auto') {
2952
3042
  const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
2953
- const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed);
3043
+ const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed, compression);
2954
3044
  return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
2955
3045
  }
2956
3046
  function readMsv5SectionDirectory(buf) {
@@ -2994,11 +3084,11 @@ function sectionsFromPayload(payload, directory, payloadCrc32) {
2994
3084
  return out;
2995
3085
  });
2996
3086
  }
2997
- /** Streaming zstd reader: keeps only one section in memory at a time.
2998
- * No `maxOutputLength` on {@link createZstdDecompress}: output is bounded by accumulating
2999
- * `streamOffset` against the header's `uncompressedLength` (same 1 GiB cap checked upfront).
3000
- * Sync load uses `maxOutputLength` instead because it materializes the whole payload at once. */
3001
- function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32) {
3087
+ /** Streaming compressed reader: keeps only one section in memory at a time.
3088
+ * No `maxOutputLength` on Transform streams: output is bounded by accumulating `streamOffset`
3089
+ * against the header's `uncompressedLength` (same 1 GiB cap checked upfront). Sync load uses
3090
+ * `maxOutputLength` because it materializes the whole payload at once. */
3091
+ function collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32) {
3002
3092
  if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
3003
3093
  throw new Error('MSv5 payload exceeds 1 GiB limit');
3004
3094
  }
@@ -3018,7 +3108,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
3018
3108
  }
3019
3109
  function consume(chunk) {
3020
3110
  if (streamOffset + chunk.length > uncompressedLength) {
3021
- throw new Error('MSv5 zstd payload exceeds declared length');
3111
+ throw new Error(MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH);
3022
3112
  }
3023
3113
  payloadCrc = crc32Update(payloadCrc, chunk);
3024
3114
  let off = 0;
@@ -3054,7 +3144,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
3054
3144
  function finish() {
3055
3145
  emitEmptySections();
3056
3146
  if (streamOffset !== uncompressedLength || sectionId !== directory.length) {
3057
- throw new Error('MSv5 zstd decompressed length mismatch');
3147
+ throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
3058
3148
  }
3059
3149
  if (payloadCrc !== payloadCrc32) {
3060
3150
  throw new Error('MSv5 payload CRC mismatch');
@@ -3063,9 +3153,15 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
3063
3153
  return { sections, consume, finish };
3064
3154
  }
3065
3155
  function loadMsv5SectionsFromZstdStream(compressed, directory, uncompressedLength, payloadCrc32) {
3156
+ return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createZstdDecompress());
3157
+ }
3158
+ function loadMsv5SectionsFromZlibStream(compressed, directory, uncompressedLength, payloadCrc32) {
3159
+ return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createInflate());
3160
+ }
3161
+ function loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, createStream) {
3066
3162
  return new Promise((resolve, reject) => {
3067
- const collector = collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32);
3068
- const stream = zlib.createZstdDecompress();
3163
+ const collector = collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32);
3164
+ const stream = createStream();
3069
3165
  stream.on('data', (chunk) => {
3070
3166
  try {
3071
3167
  collector.consume(chunk);
@@ -3123,29 +3219,39 @@ function preparePayload(fileBuf, directory) {
3123
3219
  payloadCrc32,
3124
3220
  };
3125
3221
  }
3126
- /** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
3127
- function loadMsv5Sections(fileBuf, directory) {
3128
- const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
3129
- if (payloadCodec === CODEC_RAW) {
3130
- return sectionsFromPayload(slice, directory, payloadCrc32);
3131
- }
3222
+ function decompressPayloadSync(payloadCodec, slice, uncompressedLength) {
3132
3223
  if (payloadCodec === CODEC_ZSTD) {
3133
3224
  if (!zstdAvailable()) {
3134
3225
  throw zstdUnavailableReadError();
3135
3226
  }
3136
- // Native cap matches readPayloadMeta's 1 GiB limit (see MSV5_MAX_UNCOMPRESSED_BYTES).
3137
- // Using header `uncompressedLength` here would only help when the header understates
3138
- // the zstd stream but the attacker can inflate the header too — same worst case.
3139
3227
  const decoded = zlib.zstdDecompressSync(slice, {
3140
3228
  maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
3141
3229
  });
3142
3230
  if (decoded.length !== uncompressedLength) {
3143
- throw new Error('MSv5 zstd decompressed length mismatch');
3231
+ throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
3232
+ }
3233
+ return decoded;
3234
+ }
3235
+ if (payloadCodec === CODEC_ZLIB) {
3236
+ const decoded = zlib.inflateSync(slice, {
3237
+ maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
3238
+ });
3239
+ if (decoded.length !== uncompressedLength) {
3240
+ throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
3144
3241
  }
3145
- return sectionsFromPayload(decoded, directory, payloadCrc32);
3242
+ return decoded;
3146
3243
  }
3147
3244
  throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
3148
3245
  }
3246
+ /** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
3247
+ function loadMsv5Sections(fileBuf, directory) {
3248
+ const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
3249
+ if (payloadCodec === CODEC_RAW) {
3250
+ return sectionsFromPayload(slice, directory, payloadCrc32);
3251
+ }
3252
+ const decoded = decompressPayloadSync(payloadCodec, slice, uncompressedLength);
3253
+ return sectionsFromPayload(decoded, directory, payloadCrc32);
3254
+ }
3149
3255
  /** Streaming load; peak main-thread RAM ≈ largest single section (+ file buffer). */
3150
3256
  async function loadMsv5SectionsAsync(fileBuf, directory) {
3151
3257
  const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
@@ -3158,6 +3264,9 @@ async function loadMsv5SectionsAsync(fileBuf, directory) {
3158
3264
  }
3159
3265
  return loadMsv5SectionsFromZstdStream(slice, directory, uncompressedLength, payloadCrc32);
3160
3266
  }
3267
+ if (payloadCodec === CODEC_ZLIB) {
3268
+ return loadMsv5SectionsFromZlibStream(slice, directory, uncompressedLength, payloadCrc32);
3269
+ }
3161
3270
  throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
3162
3271
  }
3163
3272
  function isMsv5Buffer(buf) {
@@ -3446,7 +3555,7 @@ function resolvePackedTree(snap, termTree, packedTermIndex) {
3446
3555
  validateTermTreeLeaves(tree, termCount);
3447
3556
  return fromRadixTree(tree, termCount);
3448
3557
  }
3449
- function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
3558
+ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression) {
3450
3559
  var _a;
3451
3560
  validateFrozenSnapshotNumeric(snap);
3452
3561
  const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
@@ -3475,9 +3584,9 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
3475
3584
  postingsWire.docIds,
3476
3585
  postingsWire.freqs,
3477
3586
  ];
3478
- return assembleMsv5File(globalFlags, rawSections).buffer;
3587
+ return assembleMsv5File(globalFlags, rawSections, compression).buffer;
3479
3588
  }
3480
- async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
3589
+ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression) {
3481
3590
  var _a;
3482
3591
  validateFrozenSnapshotNumeric(snap);
3483
3592
  const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
@@ -3506,7 +3615,7 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
3506
3615
  postingsWire.docIds,
3507
3616
  postingsWire.freqs,
3508
3617
  ];
3509
- return (await assembleMsv5FileAsync(globalFlags, rawSections)).buffer;
3618
+ return (await assembleMsv5FileAsync(globalFlags, rawSections, compression)).buffer;
3510
3619
  }
3511
3620
 
3512
3621
  function validateMsv5Container(buf) {
@@ -3583,12 +3692,12 @@ async function decodeFrozenSnapshotMsv5Async(buf, hints) {
3583
3692
  }
3584
3693
 
3585
3694
  /** Encode a frozen snapshot as a binary buffer. */
3586
- function encodeFrozenSnapshot(snap, termTree, packedTermIndex) {
3587
- return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex);
3695
+ function encodeFrozenSnapshot(snap, termTree, packedTermIndex, compression) {
3696
+ return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression);
3588
3697
  }
3589
- /** Async encoder; uses non-blocking zstd compression for large payloads. */
3590
- function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
3591
- return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex);
3698
+ /** Async encoder; uses the selected payload compression without blocking the event loop. */
3699
+ function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex, compression) {
3700
+ return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression);
3592
3701
  }
3593
3702
 
3594
3703
  const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
@@ -3601,11 +3710,11 @@ function decodeFrozenSnapshot(buf, hints) {
3601
3710
  return decodeFrozenSnapshotMsv5(buf, hints);
3602
3711
  }
3603
3712
  if (LEGACY_MAGICS.has(magic)) {
3604
- throw invalidFrozenIndex('Unsupported frozen binary snapshot; re-build with saveBinarySync() or from lucaong JSON');
3713
+ throw invalidFrozenIndex('Unsupported frozen binary snapshot; re-build with saveBinarySync() or from MiniSearch JSON');
3605
3714
  }
3606
3715
  throw invalidFrozenIndex('Unsupported frozen binary snapshot');
3607
3716
  }
3608
- /** Async frozen snapshot decode (streaming zstd). */
3717
+ /** Async frozen snapshot decode (streaming decompression when needed). */
3609
3718
  async function decodeFrozenSnapshotAsync(buf, hints) {
3610
3719
  assertBufferLength(buf, 8);
3611
3720
  const version = buf.readUInt16LE(4);
@@ -4302,7 +4411,7 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
4302
4411
  return executeWildcardQuery(searchOptions, params);
4303
4412
  }
4304
4413
  if (isQueryCombination(query)) {
4305
- // Spread inherits parent combineWith into child branches (lucaong 7.2 behavior).
4414
+ // Spread inherits parent combineWith into child branches (MiniSearch 7.2 behavior).
4306
4415
  const options = { ...searchOptions, ...query, queries: undefined };
4307
4416
  const operator = ((_b = (_a = query.combineWith) !== null && _a !== void 0 ? _a : options.combineWith) !== null && _b !== void 0 ? _b : params.globalSearchOptions.combineWith);
4308
4417
  if (useGatedEvaluation(run, query.queries.length, operator, combinationHasWildcard(query))) {
@@ -4352,6 +4461,73 @@ function autoSuggestFromSearch(search, queryString, options = {}) {
4352
4461
  return suggestFromSearchResults(search(queryString, options));
4353
4462
  }
4354
4463
 
4464
+ /** Visit shortIds with a defined external id (holes in `externalIds` are skipped). */
4465
+ function forEachLiveShortId(nextId, externalIds, callback) {
4466
+ for (let shortId = 0; shortId < nextId; shortId++) {
4467
+ const externalId = externalIds[shortId];
4468
+ if (externalId === undefined)
4469
+ continue;
4470
+ callback(shortId, externalId);
4471
+ }
4472
+ }
4473
+
4474
+ /**
4475
+ * Build a MiniSearch `toJSON` wire snapshot (`serializationVersion: 2`) from frozen index parts.
4476
+ * Alloc-heavy (plain objects per term/field) — migration/interop only, not production persistence.
4477
+ * All input parts must belong to the same frozen index instance.
4478
+ */
4479
+ function miniSearchSnapshotFromFrozen(input) {
4480
+ const { documentCount, nextId, fieldIds, fieldCount, externalIds, fieldLengthMatrix, avgFieldLength, storedFields, index, fieldTermFlyweight, } = input;
4481
+ const documentIds = {};
4482
+ const fieldLength = {};
4483
+ const storedFieldsOut = {};
4484
+ const hasStoredFields = storedFields.kind !== 'none';
4485
+ forEachLiveShortId(nextId, externalIds, (shortId, externalId) => {
4486
+ var _a;
4487
+ const shortIdStr = String(shortId);
4488
+ documentIds[shortIdStr] = externalId;
4489
+ const lengths = new Array(fieldCount);
4490
+ const rowBase = shortId * fieldCount;
4491
+ for (let f = 0; f < fieldCount; f++) {
4492
+ lengths[f] = (_a = fieldLengthMatrix[rowBase + f]) !== null && _a !== void 0 ? _a : 0;
4493
+ }
4494
+ fieldLength[shortIdStr] = lengths;
4495
+ if (hasStoredFields) {
4496
+ storedFieldsOut[shortIdStr] = readStoredFields(storedFields, shortId);
4497
+ }
4498
+ });
4499
+ const indexEntries = [];
4500
+ for (const [term, termIndex] of index.entries()) {
4501
+ fieldTermFlyweight.bind(termIndex);
4502
+ const fieldData = {};
4503
+ for (let f = 0; f < fieldCount; f++) {
4504
+ const segment = fieldTermFlyweight.get(f);
4505
+ if (segment == null || segment.size === 0)
4506
+ continue;
4507
+ const entry = {};
4508
+ segment.forEachDoc((docId, freq) => {
4509
+ entry[String(docId)] = freq;
4510
+ });
4511
+ fieldData[String(f)] = entry;
4512
+ }
4513
+ if (Object.keys(fieldData).length > 0) {
4514
+ indexEntries.push([term, fieldData]);
4515
+ }
4516
+ }
4517
+ return {
4518
+ documentCount,
4519
+ nextId,
4520
+ documentIds,
4521
+ fieldIds,
4522
+ fieldLength,
4523
+ averageFieldLength: Array.from(avgFieldLength),
4524
+ storedFields: storedFieldsOut,
4525
+ dirtCount: 0,
4526
+ index: indexEntries,
4527
+ serializationVersion: 2,
4528
+ };
4529
+ }
4530
+
4355
4531
  function ownedIndexArray(arr) {
4356
4532
  if (arr instanceof Uint8Array)
4357
4533
  return new Uint8Array(arr);
@@ -4510,12 +4686,9 @@ class FrozenMiniSearch {
4510
4686
  tokenize: this._options.tokenize,
4511
4687
  processTerm: this._options.processTerm,
4512
4688
  indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, (callback) => {
4513
- for (let shortId = 0; shortId < this._nextId; shortId++) {
4514
- const id = this._externalIds[shortId];
4515
- if (id === undefined)
4516
- continue;
4689
+ forEachLiveShortId(this._nextId, this._externalIds, (shortId, id) => {
4517
4690
  callback(shortId, id, readStoredFields(this._storedFields, shortId));
4518
- }
4691
+ });
4519
4692
  }),
4520
4693
  aggregateContext: this._aggregateContext,
4521
4694
  };
@@ -4580,7 +4753,7 @@ class FrozenMiniSearch {
4580
4753
  return autoSuggestFromSearch((q, o) => this.search(q, o), queryString, merged);
4581
4754
  }
4582
4755
  /** Serialize this index as a frozen binary snapshot (synchronous). */
4583
- saveBinarySync() {
4756
+ saveBinarySync(saveOptions = {}) {
4584
4757
  return encodeFrozenSnapshot({
4585
4758
  documentCount: this._documentCount,
4586
4759
  nextId: this._nextId,
@@ -4594,10 +4767,10 @@ class FrozenMiniSearch {
4594
4767
  fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
4595
4768
  treeShape: [],
4596
4769
  postings: this._postings,
4597
- }, undefined, this._index);
4770
+ }, undefined, this._index, saveOptions.compression);
4598
4771
  }
4599
- /** Non-blocking zstd compression; same output as {@link saveBinarySync}. */
4600
- async saveBinaryAsync() {
4772
+ /** Non-blocking snapshot serialization with the selected compression codec. */
4773
+ async saveBinaryAsync(saveOptions = {}) {
4601
4774
  return encodeFrozenSnapshotAsync({
4602
4775
  documentCount: this._documentCount,
4603
4776
  nextId: this._nextId,
@@ -4611,7 +4784,7 @@ class FrozenMiniSearch {
4611
4784
  fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
4612
4785
  treeShape: [],
4613
4786
  postings: this._postings,
4614
- }, undefined, this._index);
4787
+ }, undefined, this._index, saveOptions.compression);
4615
4788
  }
4616
4789
  /** Load a frozen binary snapshot. */
4617
4790
  static loadBinarySync(buffer, options = {}) {
@@ -4620,7 +4793,7 @@ class FrozenMiniSearch {
4620
4793
  const snap = decodeFrozenSnapshot(buffer, { storeFields });
4621
4794
  return FrozenMiniSearch.fromBinarySnapshot(snap, options);
4622
4795
  }
4623
- /** Load a frozen binary snapshot with streaming zstd decompression (bounded memory). */
4796
+ /** Load a frozen binary snapshot with streaming decompression when needed (bounded memory). */
4624
4797
  static async loadBinaryAsync(buffer, options = {}) {
4625
4798
  var _a;
4626
4799
  const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
@@ -4669,21 +4842,43 @@ class FrozenMiniSearch {
4669
4842
  return buildFrozenFromDocuments(documents, options);
4670
4843
  }
4671
4844
  /**
4672
- * Convert a lucaong MiniSearch JSON snapshot (`toJSON` / `loadJSON` wire format) into a
4673
- * frozen index. No runtime dependency on the `minisearch` package.
4845
+ * Export this index as a MiniSearch wire snapshot (`serializationVersion: 2`).
4846
+ * Use for migration or interchange with the `minisearch` package (`JSON.stringify` works via this method).
4847
+ * Not the primary persistence format — prefer {@link saveBinarySync} for production (size and load time).
4848
+ * Term order in `index` may differ from MiniSearch native `toJSON`; search scores stay equivalent.
4849
+ */
4850
+ toJSON() {
4851
+ return miniSearchSnapshotFromFrozen({
4852
+ documentCount: this._documentCount,
4853
+ nextId: this._nextId,
4854
+ fieldIds: this._fieldIds,
4855
+ fieldCount: this._fieldCount,
4856
+ externalIds: this._externalIds,
4857
+ fieldLengthMatrix: this._fieldLengthMatrix,
4858
+ avgFieldLength: this._avgFieldLength,
4859
+ storedFields: this._storedFields,
4860
+ index: this._index,
4861
+ fieldTermFlyweight: this._fieldTermFlyweight,
4862
+ });
4863
+ }
4864
+ /**
4865
+ * Build a new frozen index **from** a MiniSearch JSON snapshot string (import / migration).
4866
+ * Accepts the wire format produced by MiniSearch `toJSON` or by {@link toJSON} on this class.
4867
+ * Distinct from {@link loadBinarySync}: JSON is MiniSearch interchange, not the native frozen binary.
4868
+ * No runtime dependency on the `minisearch` package.
4674
4869
  */
4675
- static fromMiniSearchJson(json, options = {}) {
4870
+ static fromJson(json, options = {}) {
4676
4871
  return FrozenMiniSearch.fromMiniSearchSnapshot(JSON.parse(json), options);
4677
4872
  }
4678
4873
  /**
4679
- * Same as {@link fromMiniSearchJson} with a pre-parsed snapshot object.
4874
+ * Same as {@link fromJson} with a pre-parsed snapshot object.
4680
4875
  * `storedFields` are shallow-copied; callers must not mutate nested values
4681
4876
  * after load if they intend to keep the index immutable.
4682
4877
  */
4683
4878
  static fromMiniSearchSnapshot(snapshot, options = {}) {
4684
4879
  return assembleFrozenTrusted(buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options), 'minisearch-json');
4685
4880
  }
4686
- /** Accepts any object exposing `toJSON()` in lucaong MiniSearch snapshot shape. */
4881
+ /** Accepts any object exposing `toJSON()` in MiniSearch snapshot shape. */
4687
4882
  static fromMiniSearch(source, options = {}) {
4688
4883
  return FrozenMiniSearch.fromMiniSearchSnapshot(source.toJSON(), options);
4689
4884
  }