@yoch/frozenminisearch 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/index.js CHANGED
@@ -1640,9 +1640,11 @@ const FLAG_FL_U8 = 8;
1640
1640
  const FLAG_FL_U16 = 16;
1641
1641
  const FLAG_FREQ_U16 = 32;
1642
1642
  const CODEC_RAW = 0;
1643
+ /** Deflate/inflate (`node:zlib`) on the whole payload. */
1644
+ const CODEC_ZLIB = 1;
1643
1645
  /** Zstandard (`node:zlib`) on the whole payload. */
1644
1646
  const CODEC_ZSTD = 3;
1645
- /** Single concatenated payload, one zstd stream (or raw). */
1647
+ /** Single concatenated payload, one compressed stream (or raw). */
1646
1648
  const MSV5_FORMAT_REV_PAYLOAD = 1;
1647
1649
  /** Do not compress payloads smaller than this (bytes). */
1648
1650
  const MSV5_MIN_COMPRESS_BYTES = 64;
@@ -1934,7 +1936,7 @@ function postingsTypedBytes(layout) {
1934
1936
  slotCount,
1935
1937
  };
1936
1938
  }
1937
- function validateFrozenPostingsLayout(layout, documentCount, nextId, fail = detail => { throw new Error(detail); }) {
1939
+ function validateFrozenPostingsLayout(layout, documentCount, nextId, fail = (detail) => { throw new Error(detail); }) {
1938
1940
  if (layout.fieldCount <= 0)
1939
1941
  fail('fieldCount must be positive');
1940
1942
  if (layout.nextId !== nextId)
@@ -2205,7 +2207,7 @@ function forEachDefaultToken(text, onToken) {
2205
2207
  /** Default tokenizer into a reusable buffer (avoids `text.split()` array allocation). */
2206
2208
  function tokenizeDefaultInto(out, text) {
2207
2209
  out.length = 0;
2208
- forEachDefaultToken(text, (token) => out.push(token));
2210
+ forEachDefaultToken(text, token => out.push(token));
2209
2211
  }
2210
2212
  /** Tokenize field text into `out` (reused). Fast path when `tokenize` is the library default. */
2211
2213
  function tokenizeFieldInto(out, tokenize, text, fieldName) {
@@ -2284,7 +2286,7 @@ function validateFrozenSnapshotNumeric(snap) {
2284
2286
  if (snap.avgFieldLength.length !== snap.fieldCount) {
2285
2287
  throw invalidFrozenIndex('avgFieldLength size mismatch');
2286
2288
  }
2287
- validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, detail => {
2289
+ validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, (detail) => {
2288
2290
  throw invalidFrozenIndex(detail);
2289
2291
  });
2290
2292
  const indexedFields = Object.keys(snap.fieldIds);
@@ -2499,7 +2501,7 @@ function cloneStoredFields(layout) {
2499
2501
  }
2500
2502
  return { kind: 'multi', rows: layout.rows.slice() };
2501
2503
  }
2502
- /** Import from wire rows or lucaong snapshot. Empty storeFields + non-empty rows → multi (binary load without options). */
2504
+ /** Import from wire rows or MiniSearch snapshot. Empty storeFields + non-empty rows → multi (binary load without options). */
2503
2505
  function storedFieldsFromRows(rows, storeFields) {
2504
2506
  if (storeFields.length === 0) {
2505
2507
  const hasAny = rows.some(row => row != null && Object.keys(row).length > 0);
@@ -2683,7 +2685,7 @@ function buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, nextId, s
2683
2685
  });
2684
2686
  return { termCount, index: packedIndex, postings };
2685
2687
  }
2686
- /** Build frozen assemble params from a lucaong MiniSearch JSON snapshot. */
2688
+ /** Build frozen assemble params from a MiniSearch JSON snapshot. */
2687
2689
  function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
2688
2690
  var _a, _b, _c;
2689
2691
  if (!SUPPORTED_SERIALIZATION_VERSIONS.has(snapshot.serializationVersion)) {
@@ -2762,33 +2764,43 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
2762
2764
  };
2763
2765
  }
2764
2766
 
2765
- /** Hard cap on the uncompressed payload, rejected before allocation (zstd-bomb guard).
2767
+ /** Hard cap on the uncompressed payload, rejected before allocation (compressed-bomb guard).
2766
2768
  * This is the single trust boundary for untrusted snapshots: {@link readPayloadMeta} rejects
2767
2769
  * headers above this size; sync decompress uses the same cap via `maxOutputLength`.
2768
2770
  * A malicious header can still declare up to 1 GiB — no tighter native limit helps without
2769
2771
  * trusting `uncompressedLength` from that same header. Semantic integrity (length match,
2770
2772
  * payload CRC, per-section CRC) is enforced after decode. */
2771
2773
  const MSV5_MAX_UNCOMPRESSED_BYTES = 1024 * 1024 * 1024;
2774
+ const MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH = 'MSv5 compressed payload exceeds declared length';
2775
+ const MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH = 'MSv5 decompressed payload length mismatch';
2772
2776
  // zstd landed in node:zlib at Node 22.15.0 (22.x line) / 23.8.0, where the whole family
2773
2777
  // (zstdCompress[Sync], zstdDecompressSync, createZstdDecompress) ships together — so probing one
2774
2778
  // member is enough to know if the runtime supports zstd. Checked at call time (not captured at
2775
- // module load) so it stays mockable in tests. On older runtimes we degrade gracefully: writes fall
2776
- // back to a raw (uncompressed) payload, reads of a zstd payload throw a clear, actionable error.
2779
+ // module load) so it stays mockable in tests. On older runtimes we degrade gracefully: `auto`
2780
+ // tries zlib once (or raw if it does not help). When zstd is available, `auto` tries zstd once
2781
+ // and stays raw if it does not shrink — no second pass. Reads of a zstd payload throw a clear,
2782
+ // actionable error on runtimes without zstd.
2777
2783
  function zstdAvailable() {
2778
2784
  return typeof zlib.zstdCompressSync === 'function';
2779
2785
  }
2786
+ function zstdUnavailableWriteError() {
2787
+ return new Error('MSv5 snapshot requested zstd compression, but this Node.js runtime lacks node:zlib zstd '
2788
+ + 'support (added in Node 22.15.0). Upgrade Node.js, or use compression: "auto", "raw", '
2789
+ + 'or "zlib".');
2790
+ }
2780
2791
  function zstdUnavailableReadError() {
2781
2792
  return new Error('MSv5 snapshot is zstd-compressed, but this Node.js runtime lacks node:zlib zstd support '
2782
2793
  + '(added in Node 22.15.0). Upgrade Node.js to read this snapshot, or re-save it from a '
2783
- + 'newer runtime to embed a raw (uncompressed) payload.');
2794
+ + 'newer runtime with compression: "raw" or "zlib".');
2784
2795
  }
2785
2796
  let warnedZstdSaveFallback = false;
2786
2797
  function warnZstdSaveFallbackOnce() {
2787
2798
  if (warnedZstdSaveFallback)
2788
2799
  return;
2789
2800
  warnedZstdSaveFallback = true;
2790
- process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); MSv5 snapshots are written with a '
2791
- + 'raw (uncompressed) payload. Upgrade to Node 22.15.0+ for compressed snapshots.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
2801
+ process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); compression: "auto" falls back to '
2802
+ + 'zlib when it shrinks the payload, otherwise raw (uncompressed). Upgrade to Node 22.15.0+ '
2803
+ + 'for zstd.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
2792
2804
  }
2793
2805
  function assertPayloadFormatRev(buf) {
2794
2806
  const rev = buf.readUInt16LE(MSV5_FORMAT_REV_OFFSET);
@@ -2838,23 +2850,26 @@ function msv5ZstdCompressOptions(uncompressed) {
2838
2850
  },
2839
2851
  };
2840
2852
  }
2841
- /** Raw if below {@link MSV5_MIN_COMPRESS_BYTES}; else zstd when strictly smaller than raw. */
2842
- function pickPayloadCodec(uncompressed, compressed) {
2843
- if (compressed.length < uncompressed.length) {
2844
- return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2845
- }
2853
+ function rawPayloadChoice(uncompressed) {
2846
2854
  return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2847
2855
  }
2848
- function choosePayloadCodecSync(uncompressed) {
2849
- if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2850
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2856
+ /** Auto mode: one compression attempt; keep it only when strictly smaller than raw. */
2857
+ function pickAutoPayloadCodec(uncompressed, compressed, codec) {
2858
+ if (compressed.length < uncompressed.length) {
2859
+ return {
2860
+ payload: compressed,
2861
+ codec,
2862
+ zstdLevel: codec === CODEC_ZSTD ? MSV5_ZSTD_LEVEL : 0,
2863
+ };
2851
2864
  }
2865
+ return rawPayloadChoice(uncompressed);
2866
+ }
2867
+ function zstdPayloadChoiceSync(uncompressed) {
2852
2868
  if (!zstdAvailable()) {
2853
- warnZstdSaveFallbackOnce();
2854
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2869
+ throw zstdUnavailableWriteError();
2855
2870
  }
2856
2871
  const compressed = zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed));
2857
- return pickPayloadCodec(uncompressed, compressed);
2872
+ return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2858
2873
  }
2859
2874
  /**
2860
2875
  * Async zstd via {@link zstdCompress} (not {@link zstdCompressSync}).
@@ -2873,16 +2888,91 @@ function zstdCompressAsync(uncompressed) {
2873
2888
  });
2874
2889
  });
2875
2890
  }
2876
- async function choosePayloadCodecAsync(uncompressed) {
2891
+ async function zstdPayloadChoiceAsync(uncompressed) {
2892
+ if (!zstdAvailable()) {
2893
+ throw zstdUnavailableWriteError();
2894
+ }
2895
+ const compressed = await zstdCompressAsync(uncompressed);
2896
+ return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
2897
+ }
2898
+ function zlibPayloadChoiceSync(uncompressed) {
2899
+ const compressed = zlib.deflateSync(uncompressed);
2900
+ return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
2901
+ }
2902
+ function zlibCompressAsync(uncompressed) {
2903
+ return new Promise((resolve, reject) => {
2904
+ zlib.deflate(uncompressed, (err, compressed) => {
2905
+ if (err != null) {
2906
+ reject(err);
2907
+ return;
2908
+ }
2909
+ resolve(compressed);
2910
+ });
2911
+ });
2912
+ }
2913
+ async function zlibPayloadChoiceAsync(uncompressed) {
2914
+ const compressed = await zlibCompressAsync(uncompressed);
2915
+ return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
2916
+ }
2917
+ const autoSyncCompressors = {
2918
+ zstd: (uncompressed) => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
2919
+ zlib: (uncompressed) => zlib.deflateSync(uncompressed),
2920
+ };
2921
+ const autoAsyncCompressors = {
2922
+ zstd: zstdCompressAsync,
2923
+ zlib: zlibCompressAsync,
2924
+ };
2925
+ function autoPayloadChoice(uncompressed, compressors) {
2877
2926
  if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2878
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2927
+ return rawPayloadChoice(uncompressed);
2879
2928
  }
2880
2929
  if (!zstdAvailable()) {
2881
2930
  warnZstdSaveFallbackOnce();
2882
- return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
2931
+ return pickAutoPayloadCodec(uncompressed, compressors.zlib(uncompressed), CODEC_ZLIB);
2932
+ }
2933
+ return pickAutoPayloadCodec(uncompressed, compressors.zstd(uncompressed), CODEC_ZSTD);
2934
+ }
2935
+ async function autoPayloadChoiceAsync(uncompressed, compressors) {
2936
+ if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
2937
+ return rawPayloadChoice(uncompressed);
2938
+ }
2939
+ if (!zstdAvailable()) {
2940
+ warnZstdSaveFallbackOnce();
2941
+ return pickAutoPayloadCodec(uncompressed, await compressors.zlib(uncompressed), CODEC_ZLIB);
2942
+ }
2943
+ return pickAutoPayloadCodec(uncompressed, await compressors.zstd(uncompressed), CODEC_ZSTD);
2944
+ }
2945
+ function choosePayloadCodecSync(uncompressed, compression = 'auto') {
2946
+ switch (compression) {
2947
+ case 'raw':
2948
+ return rawPayloadChoice(uncompressed);
2949
+ case 'zstd':
2950
+ return zstdPayloadChoiceSync(uncompressed);
2951
+ case 'zlib':
2952
+ return zlibPayloadChoiceSync(uncompressed);
2953
+ case 'auto':
2954
+ return autoPayloadChoice(uncompressed, autoSyncCompressors);
2955
+ default: {
2956
+ const _exhaustive = compression;
2957
+ return _exhaustive;
2958
+ }
2959
+ }
2960
+ }
2961
+ async function choosePayloadCodecAsync(uncompressed, compression = 'auto') {
2962
+ switch (compression) {
2963
+ case 'raw':
2964
+ return rawPayloadChoice(uncompressed);
2965
+ case 'zstd':
2966
+ return await zstdPayloadChoiceAsync(uncompressed);
2967
+ case 'zlib':
2968
+ return await zlibPayloadChoiceAsync(uncompressed);
2969
+ case 'auto':
2970
+ return await autoPayloadChoiceAsync(uncompressed, autoAsyncCompressors);
2971
+ default: {
2972
+ const _exhaustive = compression;
2973
+ return _exhaustive;
2974
+ }
2883
2975
  }
2884
- const compressed = await zstdCompressAsync(uncompressed);
2885
- return pickPayloadCodec(uncompressed, compressed);
2886
2976
  }
2887
2977
  function concatAndValidateSections(rawSections) {
2888
2978
  if (rawSections.length !== MSV5_SECTION_COUNT) {
@@ -2937,16 +3027,16 @@ function buildMsv5AssembledFile(globalFlags, entries, uncompressedLength, payloa
2937
3027
  }
2938
3028
  /**
2939
3029
  * MSv5 on disk: header + catalogue (uncompressed offsets) + **one** payload blob
2940
- * (raw concatenation or a single zstd stream over it).
3030
+ * (raw concatenation or a single compressed stream over it).
2941
3031
  */
2942
- function assembleMsv5File(globalFlags, rawSections) {
3032
+ function assembleMsv5File(globalFlags, rawSections, compression = 'auto') {
2943
3033
  const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
2944
- const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed);
3034
+ const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed, compression);
2945
3035
  return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
2946
3036
  }
2947
- async function assembleMsv5FileAsync(globalFlags, rawSections) {
3037
+ async function assembleMsv5FileAsync(globalFlags, rawSections, compression = 'auto') {
2948
3038
  const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
2949
- const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed);
3039
+ const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed, compression);
2950
3040
  return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
2951
3041
  }
2952
3042
  function readMsv5SectionDirectory(buf) {
@@ -2990,11 +3080,11 @@ function sectionsFromPayload(payload, directory, payloadCrc32) {
2990
3080
  return out;
2991
3081
  });
2992
3082
  }
2993
- /** Streaming zstd reader: keeps only one section in memory at a time.
2994
- * No `maxOutputLength` on {@link createZstdDecompress}: output is bounded by accumulating
2995
- * `streamOffset` against the header's `uncompressedLength` (same 1 GiB cap checked upfront).
2996
- * Sync load uses `maxOutputLength` instead because it materializes the whole payload at once. */
2997
- function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32) {
3083
+ /** Streaming compressed reader: keeps only one section in memory at a time.
3084
+ * No `maxOutputLength` on Transform streams: output is bounded by accumulating `streamOffset`
3085
+ * against the header's `uncompressedLength` (same 1 GiB cap checked upfront). Sync load uses
3086
+ * `maxOutputLength` because it materializes the whole payload at once. */
3087
+ function collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32) {
2998
3088
  if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
2999
3089
  throw new Error('MSv5 payload exceeds 1 GiB limit');
3000
3090
  }
@@ -3014,7 +3104,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
3014
3104
  }
3015
3105
  function consume(chunk) {
3016
3106
  if (streamOffset + chunk.length > uncompressedLength) {
3017
- throw new Error('MSv5 zstd payload exceeds declared length');
3107
+ throw new Error(MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH);
3018
3108
  }
3019
3109
  payloadCrc = crc32Update(payloadCrc, chunk);
3020
3110
  let off = 0;
@@ -3050,7 +3140,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
3050
3140
  function finish() {
3051
3141
  emitEmptySections();
3052
3142
  if (streamOffset !== uncompressedLength || sectionId !== directory.length) {
3053
- throw new Error('MSv5 zstd decompressed length mismatch');
3143
+ throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
3054
3144
  }
3055
3145
  if (payloadCrc !== payloadCrc32) {
3056
3146
  throw new Error('MSv5 payload CRC mismatch');
@@ -3059,9 +3149,15 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
3059
3149
  return { sections, consume, finish };
3060
3150
  }
3061
3151
  function loadMsv5SectionsFromZstdStream(compressed, directory, uncompressedLength, payloadCrc32) {
3152
+ return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createZstdDecompress());
3153
+ }
3154
+ function loadMsv5SectionsFromZlibStream(compressed, directory, uncompressedLength, payloadCrc32) {
3155
+ return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createInflate());
3156
+ }
3157
+ function loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, createStream) {
3062
3158
  return new Promise((resolve, reject) => {
3063
- const collector = collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32);
3064
- const stream = zlib.createZstdDecompress();
3159
+ const collector = collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32);
3160
+ const stream = createStream();
3065
3161
  stream.on('data', (chunk) => {
3066
3162
  try {
3067
3163
  collector.consume(chunk);
@@ -3119,29 +3215,39 @@ function preparePayload(fileBuf, directory) {
3119
3215
  payloadCrc32,
3120
3216
  };
3121
3217
  }
3122
- /** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
3123
- function loadMsv5Sections(fileBuf, directory) {
3124
- const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
3125
- if (payloadCodec === CODEC_RAW) {
3126
- return sectionsFromPayload(slice, directory, payloadCrc32);
3127
- }
3218
+ function decompressPayloadSync(payloadCodec, slice, uncompressedLength) {
3128
3219
  if (payloadCodec === CODEC_ZSTD) {
3129
3220
  if (!zstdAvailable()) {
3130
3221
  throw zstdUnavailableReadError();
3131
3222
  }
3132
- // Native cap matches readPayloadMeta's 1 GiB limit (see MSV5_MAX_UNCOMPRESSED_BYTES).
3133
- // Using header `uncompressedLength` here would only help when the header understates
3134
- // the zstd stream but the attacker can inflate the header too — same worst case.
3135
3223
  const decoded = zlib.zstdDecompressSync(slice, {
3136
3224
  maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
3137
3225
  });
3138
3226
  if (decoded.length !== uncompressedLength) {
3139
- throw new Error('MSv5 zstd decompressed length mismatch');
3227
+ throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
3228
+ }
3229
+ return decoded;
3230
+ }
3231
+ if (payloadCodec === CODEC_ZLIB) {
3232
+ const decoded = zlib.inflateSync(slice, {
3233
+ maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
3234
+ });
3235
+ if (decoded.length !== uncompressedLength) {
3236
+ throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
3140
3237
  }
3141
- return sectionsFromPayload(decoded, directory, payloadCrc32);
3238
+ return decoded;
3142
3239
  }
3143
3240
  throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
3144
3241
  }
3242
+ /** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
3243
+ function loadMsv5Sections(fileBuf, directory) {
3244
+ const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
3245
+ if (payloadCodec === CODEC_RAW) {
3246
+ return sectionsFromPayload(slice, directory, payloadCrc32);
3247
+ }
3248
+ const decoded = decompressPayloadSync(payloadCodec, slice, uncompressedLength);
3249
+ return sectionsFromPayload(decoded, directory, payloadCrc32);
3250
+ }
3145
3251
  /** Streaming load; peak main-thread RAM ≈ largest single section (+ file buffer). */
3146
3252
  async function loadMsv5SectionsAsync(fileBuf, directory) {
3147
3253
  const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
@@ -3154,6 +3260,9 @@ async function loadMsv5SectionsAsync(fileBuf, directory) {
3154
3260
  }
3155
3261
  return loadMsv5SectionsFromZstdStream(slice, directory, uncompressedLength, payloadCrc32);
3156
3262
  }
3263
+ if (payloadCodec === CODEC_ZLIB) {
3264
+ return loadMsv5SectionsFromZlibStream(slice, directory, uncompressedLength, payloadCrc32);
3265
+ }
3157
3266
  throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
3158
3267
  }
3159
3268
  function isMsv5Buffer(buf) {
@@ -3442,7 +3551,7 @@ function resolvePackedTree(snap, termTree, packedTermIndex) {
3442
3551
  validateTermTreeLeaves(tree, termCount);
3443
3552
  return fromRadixTree(tree, termCount);
3444
3553
  }
3445
- function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
3554
+ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression) {
3446
3555
  var _a;
3447
3556
  validateFrozenSnapshotNumeric(snap);
3448
3557
  const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
@@ -3471,9 +3580,9 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
3471
3580
  postingsWire.docIds,
3472
3581
  postingsWire.freqs,
3473
3582
  ];
3474
- return assembleMsv5File(globalFlags, rawSections).buffer;
3583
+ return assembleMsv5File(globalFlags, rawSections, compression).buffer;
3475
3584
  }
3476
- async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
3585
+ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression) {
3477
3586
  var _a;
3478
3587
  validateFrozenSnapshotNumeric(snap);
3479
3588
  const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
@@ -3502,7 +3611,7 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
3502
3611
  postingsWire.docIds,
3503
3612
  postingsWire.freqs,
3504
3613
  ];
3505
- return (await assembleMsv5FileAsync(globalFlags, rawSections)).buffer;
3614
+ return (await assembleMsv5FileAsync(globalFlags, rawSections, compression)).buffer;
3506
3615
  }
3507
3616
 
3508
3617
  function validateMsv5Container(buf) {
@@ -3579,12 +3688,12 @@ async function decodeFrozenSnapshotMsv5Async(buf, hints) {
3579
3688
  }
3580
3689
 
3581
3690
  /** Encode a frozen snapshot as a binary buffer. */
3582
- function encodeFrozenSnapshot(snap, termTree, packedTermIndex) {
3583
- return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex);
3691
+ function encodeFrozenSnapshot(snap, termTree, packedTermIndex, compression) {
3692
+ return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression);
3584
3693
  }
3585
- /** Async encoder; uses non-blocking zstd compression for large payloads. */
3586
- function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
3587
- return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex);
3694
+ /** Async encoder; uses the selected payload compression without blocking the event loop. */
3695
+ function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex, compression) {
3696
+ return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression);
3588
3697
  }
3589
3698
 
3590
3699
  const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
@@ -3597,11 +3706,11 @@ function decodeFrozenSnapshot(buf, hints) {
3597
3706
  return decodeFrozenSnapshotMsv5(buf, hints);
3598
3707
  }
3599
3708
  if (LEGACY_MAGICS.has(magic)) {
3600
- throw invalidFrozenIndex('Unsupported frozen binary snapshot; re-build with saveBinarySync() or from lucaong JSON');
3709
+ throw invalidFrozenIndex('Unsupported frozen binary snapshot; re-build with saveBinarySync() or from MiniSearch JSON');
3601
3710
  }
3602
3711
  throw invalidFrozenIndex('Unsupported frozen binary snapshot');
3603
3712
  }
3604
- /** Async frozen snapshot decode (streaming zstd). */
3713
+ /** Async frozen snapshot decode (streaming decompression when needed). */
3605
3714
  async function decodeFrozenSnapshotAsync(buf, hints) {
3606
3715
  assertBufferLength(buf, 8);
3607
3716
  const version = buf.readUInt16LE(4);
@@ -4298,7 +4407,7 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
4298
4407
  return executeWildcardQuery(searchOptions, params);
4299
4408
  }
4300
4409
  if (isQueryCombination(query)) {
4301
- // Spread inherits parent combineWith into child branches (lucaong 7.2 behavior).
4410
+ // Spread inherits parent combineWith into child branches (MiniSearch 7.2 behavior).
4302
4411
  const options = { ...searchOptions, ...query, queries: undefined };
4303
4412
  const operator = ((_b = (_a = query.combineWith) !== null && _a !== void 0 ? _a : options.combineWith) !== null && _b !== void 0 ? _b : params.globalSearchOptions.combineWith);
4304
4413
  if (useGatedEvaluation(run, query.queries.length, operator, combinationHasWildcard(query))) {
@@ -4348,6 +4457,73 @@ function autoSuggestFromSearch(search, queryString, options = {}) {
4348
4457
  return suggestFromSearchResults(search(queryString, options));
4349
4458
  }
4350
4459
 
4460
+ /** Visit shortIds with a defined external id (holes in `externalIds` are skipped). */
4461
+ function forEachLiveShortId(nextId, externalIds, callback) {
4462
+ for (let shortId = 0; shortId < nextId; shortId++) {
4463
+ const externalId = externalIds[shortId];
4464
+ if (externalId === undefined)
4465
+ continue;
4466
+ callback(shortId, externalId);
4467
+ }
4468
+ }
4469
+
4470
+ /**
4471
+ * Build a MiniSearch `toJSON` wire snapshot (`serializationVersion: 2`) from frozen index parts.
4472
+ * Alloc-heavy (plain objects per term/field) — migration/interop only, not production persistence.
4473
+ * All input parts must belong to the same frozen index instance.
4474
+ */
4475
+ function miniSearchSnapshotFromFrozen(input) {
4476
+ const { documentCount, nextId, fieldIds, fieldCount, externalIds, fieldLengthMatrix, avgFieldLength, storedFields, index, fieldTermFlyweight, } = input;
4477
+ const documentIds = {};
4478
+ const fieldLength = {};
4479
+ const storedFieldsOut = {};
4480
+ const hasStoredFields = storedFields.kind !== 'none';
4481
+ forEachLiveShortId(nextId, externalIds, (shortId, externalId) => {
4482
+ var _a;
4483
+ const shortIdStr = String(shortId);
4484
+ documentIds[shortIdStr] = externalId;
4485
+ const lengths = new Array(fieldCount);
4486
+ const rowBase = shortId * fieldCount;
4487
+ for (let f = 0; f < fieldCount; f++) {
4488
+ lengths[f] = (_a = fieldLengthMatrix[rowBase + f]) !== null && _a !== void 0 ? _a : 0;
4489
+ }
4490
+ fieldLength[shortIdStr] = lengths;
4491
+ if (hasStoredFields) {
4492
+ storedFieldsOut[shortIdStr] = readStoredFields(storedFields, shortId);
4493
+ }
4494
+ });
4495
+ const indexEntries = [];
4496
+ for (const [term, termIndex] of index.entries()) {
4497
+ fieldTermFlyweight.bind(termIndex);
4498
+ const fieldData = {};
4499
+ for (let f = 0; f < fieldCount; f++) {
4500
+ const segment = fieldTermFlyweight.get(f);
4501
+ if (segment == null || segment.size === 0)
4502
+ continue;
4503
+ const entry = {};
4504
+ segment.forEachDoc((docId, freq) => {
4505
+ entry[String(docId)] = freq;
4506
+ });
4507
+ fieldData[String(f)] = entry;
4508
+ }
4509
+ if (Object.keys(fieldData).length > 0) {
4510
+ indexEntries.push([term, fieldData]);
4511
+ }
4512
+ }
4513
+ return {
4514
+ documentCount,
4515
+ nextId,
4516
+ documentIds,
4517
+ fieldIds,
4518
+ fieldLength,
4519
+ averageFieldLength: Array.from(avgFieldLength),
4520
+ storedFields: storedFieldsOut,
4521
+ dirtCount: 0,
4522
+ index: indexEntries,
4523
+ serializationVersion: 2,
4524
+ };
4525
+ }
4526
+
4351
4527
  function ownedIndexArray(arr) {
4352
4528
  if (arr instanceof Uint8Array)
4353
4529
  return new Uint8Array(arr);
@@ -4506,12 +4682,9 @@ class FrozenMiniSearch {
4506
4682
  tokenize: this._options.tokenize,
4507
4683
  processTerm: this._options.processTerm,
4508
4684
  indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, (callback) => {
4509
- for (let shortId = 0; shortId < this._nextId; shortId++) {
4510
- const id = this._externalIds[shortId];
4511
- if (id === undefined)
4512
- continue;
4685
+ forEachLiveShortId(this._nextId, this._externalIds, (shortId, id) => {
4513
4686
  callback(shortId, id, readStoredFields(this._storedFields, shortId));
4514
- }
4687
+ });
4515
4688
  }),
4516
4689
  aggregateContext: this._aggregateContext,
4517
4690
  };
@@ -4576,7 +4749,7 @@ class FrozenMiniSearch {
4576
4749
  return autoSuggestFromSearch((q, o) => this.search(q, o), queryString, merged);
4577
4750
  }
4578
4751
  /** Serialize this index as a frozen binary snapshot (synchronous). */
4579
- saveBinarySync() {
4752
+ saveBinarySync(saveOptions = {}) {
4580
4753
  return encodeFrozenSnapshot({
4581
4754
  documentCount: this._documentCount,
4582
4755
  nextId: this._nextId,
@@ -4590,10 +4763,10 @@ class FrozenMiniSearch {
4590
4763
  fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
4591
4764
  treeShape: [],
4592
4765
  postings: this._postings,
4593
- }, undefined, this._index);
4766
+ }, undefined, this._index, saveOptions.compression);
4594
4767
  }
4595
- /** Non-blocking zstd compression; same output as {@link saveBinarySync}. */
4596
- async saveBinaryAsync() {
4768
+ /** Non-blocking snapshot serialization with the selected compression codec. */
4769
+ async saveBinaryAsync(saveOptions = {}) {
4597
4770
  return encodeFrozenSnapshotAsync({
4598
4771
  documentCount: this._documentCount,
4599
4772
  nextId: this._nextId,
@@ -4607,7 +4780,7 @@ class FrozenMiniSearch {
4607
4780
  fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
4608
4781
  treeShape: [],
4609
4782
  postings: this._postings,
4610
- }, undefined, this._index);
4783
+ }, undefined, this._index, saveOptions.compression);
4611
4784
  }
4612
4785
  /** Load a frozen binary snapshot. */
4613
4786
  static loadBinarySync(buffer, options = {}) {
@@ -4616,7 +4789,7 @@ class FrozenMiniSearch {
4616
4789
  const snap = decodeFrozenSnapshot(buffer, { storeFields });
4617
4790
  return FrozenMiniSearch.fromBinarySnapshot(snap, options);
4618
4791
  }
4619
- /** Load a frozen binary snapshot with streaming zstd decompression (bounded memory). */
4792
+ /** Load a frozen binary snapshot with streaming decompression when needed (bounded memory). */
4620
4793
  static async loadBinaryAsync(buffer, options = {}) {
4621
4794
  var _a;
4622
4795
  const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
@@ -4665,21 +4838,43 @@ class FrozenMiniSearch {
4665
4838
  return buildFrozenFromDocuments(documents, options);
4666
4839
  }
4667
4840
  /**
4668
- * Convert a lucaong MiniSearch JSON snapshot (`toJSON` / `loadJSON` wire format) into a
4669
- * frozen index. No runtime dependency on the `minisearch` package.
4841
+ * Export this index as a MiniSearch wire snapshot (`serializationVersion: 2`).
4842
+ * Use for migration or interchange with the `minisearch` package (`JSON.stringify` works via this method).
4843
+ * Not the primary persistence format — prefer {@link saveBinarySync} for production (size and load time).
4844
+ * Term order in `index` may differ from MiniSearch native `toJSON`; search scores stay equivalent.
4845
+ */
4846
+ toJSON() {
4847
+ return miniSearchSnapshotFromFrozen({
4848
+ documentCount: this._documentCount,
4849
+ nextId: this._nextId,
4850
+ fieldIds: this._fieldIds,
4851
+ fieldCount: this._fieldCount,
4852
+ externalIds: this._externalIds,
4853
+ fieldLengthMatrix: this._fieldLengthMatrix,
4854
+ avgFieldLength: this._avgFieldLength,
4855
+ storedFields: this._storedFields,
4856
+ index: this._index,
4857
+ fieldTermFlyweight: this._fieldTermFlyweight,
4858
+ });
4859
+ }
4860
+ /**
4861
+ * Build a new frozen index **from** a MiniSearch JSON snapshot string (import / migration).
4862
+ * Accepts the wire format produced by MiniSearch `toJSON` or by {@link toJSON} on this class.
4863
+ * Distinct from {@link loadBinarySync}: JSON is MiniSearch interchange, not the native frozen binary.
4864
+ * No runtime dependency on the `minisearch` package.
4670
4865
  */
4671
- static fromMiniSearchJson(json, options = {}) {
4866
+ static fromJson(json, options = {}) {
4672
4867
  return FrozenMiniSearch.fromMiniSearchSnapshot(JSON.parse(json), options);
4673
4868
  }
4674
4869
  /**
4675
- * Same as {@link fromMiniSearchJson} with a pre-parsed snapshot object.
4870
+ * Same as {@link fromJson} with a pre-parsed snapshot object.
4676
4871
  * `storedFields` are shallow-copied; callers must not mutate nested values
4677
4872
  * after load if they intend to keep the index immutable.
4678
4873
  */
4679
4874
  static fromMiniSearchSnapshot(snapshot, options = {}) {
4680
4875
  return assembleFrozenTrusted(buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options), 'minisearch-json');
4681
4876
  }
4682
- /** Accepts any object exposing `toJSON()` in lucaong MiniSearch snapshot shape. */
4877
+ /** Accepts any object exposing `toJSON()` in MiniSearch snapshot shape. */
4683
4878
  static fromMiniSearch(source, options = {}) {
4684
4879
  return FrozenMiniSearch.fromMiniSearchSnapshot(source.toJSON(), options);
4685
4880
  }