@yoch/frozenminisearch 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +19 -1
- package/dist/cjs/index.cjs +173 -64
- package/dist/es/index.d.ts +21 -7
- package/dist/es/index.js +173 -64
- package/package.json +7 -4
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## v1.2.0 — `@yoch/frozenminisearch`
|
|
6
|
+
|
|
7
|
+
Minor release: configurable MSv5 snapshot compression and Node 20 support.
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- **`SaveBinaryOptions`** — `saveBinarySync()` / `saveBinaryAsync()` accept `{ compression: 'auto' | 'raw' | 'zstd' | 'zlib' }`.
|
|
12
|
+
- **`CODEC_ZLIB`** — portable deflate snapshots readable on Node 20+; explicit `compression: 'zlib'` always writes zlib on disk.
|
|
13
|
+
- **Exported types** — `BinaryCompression`, `SaveBinaryOptions`.
|
|
14
|
+
|
|
15
|
+
### Improved
|
|
16
|
+
|
|
17
|
+
- **`compression: 'auto'`** — one compression pass: zstd when available (Node 22.15+), otherwise zlib on Node 20–22.14, otherwise raw when compression does not strictly shrink the payload (including payloads under 64 B).
|
|
18
|
+
- **Node engine** — `>=20` (was `>=22.15`); zstd remains available on Node 22.15+ and is required to read zstd snapshots.
|
|
19
|
+
|
|
5
20
|
## v1.1.0 — `@yoch/frozenminisearch`
|
|
6
21
|
|
|
7
22
|
Minor release: MiniSearch JSON wire export and clearer JSON import API. MSv5 binary format unchanged.
|
package/README.md
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
[](https://github.com/yoch/frozenminisearch/actions/workflows/main.yml)
|
|
6
6
|
[](https://socket.dev/npm/package/%40yoch%2Ffrozenminisearch)
|
|
7
7
|
|
|
8
|
+
[API documentation](https://yoch.github.io/frozenminisearch/)
|
|
9
|
+
|
|
8
10
|
**Memory-optimized, read-only full-text search for Node.js** — the same BM25, prefix/fuzzy, and `autoSuggest` API as [MiniSearch](https://github.com/lucaong/minisearch), with **up to ~98% less index RAM** on real corpora and compact binary snapshots you ship instead of JSON.
|
|
9
11
|
|
|
10
12
|
**Why it exists:** [MiniSearch](https://github.com/lucaong/minisearch) optimizes for a mutable in-memory index. FrozenMiniSearch optimizes for **retained heap, disk footprint, and cold load** once the corpus is fixed — packed radix postings, columnar `storeFields`, typed-array layouts, and MSv5 binary wire format instead of per-document JS objects.
|
|
@@ -186,7 +188,23 @@ const buf = index.saveBinarySync()
|
|
|
186
188
|
const loaded = FrozenMiniSearch.loadBinarySync(buf, {}) // field names embedded in snapshot
|
|
187
189
|
```
|
|
188
190
|
|
|
189
|
-
- **Node ≥
|
|
191
|
+
- **Node ≥ 20**
|
|
192
|
+
- Default snapshot compression (`compression: 'auto'`, one pass):
|
|
193
|
+
- payloads under 64 B stay raw
|
|
194
|
+
- `zstd` on Node 22.15+ when it strictly shrinks the payload
|
|
195
|
+
- otherwise `zlib` on Node 20–22.14 when it strictly shrinks the payload
|
|
196
|
+
- otherwise `raw` (uncompressed)
|
|
197
|
+
- Explicit snapshot compression always writes the chosen codec, even when compression would not shrink the payload (useful for portability):
|
|
198
|
+
|
|
199
|
+
```javascript
|
|
200
|
+
const portable = index.saveBinarySync({ compression: 'zlib' })
|
|
201
|
+
const uncompressed = index.saveBinarySync({ compression: 'raw' })
|
|
202
|
+
const bestRatio = index.saveBinarySync({ compression: 'zstd' }) // Node 22.15+
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
- Snapshot readability depends on the embedded codec:
|
|
206
|
+
- `raw` and `zlib` snapshots load on Node 20+
|
|
207
|
+
- `zstd` snapshots require Node 22.15+
|
|
190
208
|
- Snapshots produced by this package version are forward-compatible; re-build from MiniSearch JSON if an older binary fails to load
|
|
191
209
|
- `tokenize` / `processTerm` are not stored — pass the same functions at load when customized
|
|
192
210
|
|
package/dist/cjs/index.cjs
CHANGED
|
@@ -1644,9 +1644,11 @@ const FLAG_FL_U8 = 8;
|
|
|
1644
1644
|
const FLAG_FL_U16 = 16;
|
|
1645
1645
|
const FLAG_FREQ_U16 = 32;
|
|
1646
1646
|
const CODEC_RAW = 0;
|
|
1647
|
+
/** Deflate/inflate (`node:zlib`) on the whole payload. */
|
|
1648
|
+
const CODEC_ZLIB = 1;
|
|
1647
1649
|
/** Zstandard (`node:zlib`) on the whole payload. */
|
|
1648
1650
|
const CODEC_ZSTD = 3;
|
|
1649
|
-
/** Single concatenated payload, one
|
|
1651
|
+
/** Single concatenated payload, one compressed stream (or raw). */
|
|
1650
1652
|
const MSV5_FORMAT_REV_PAYLOAD = 1;
|
|
1651
1653
|
/** Do not compress payloads smaller than this (bytes). */
|
|
1652
1654
|
const MSV5_MIN_COMPRESS_BYTES = 64;
|
|
@@ -2766,33 +2768,43 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2766
2768
|
};
|
|
2767
2769
|
}
|
|
2768
2770
|
|
|
2769
|
-
/** Hard cap on the uncompressed payload, rejected before allocation (
|
|
2771
|
+
/** Hard cap on the uncompressed payload, rejected before allocation (compressed-bomb guard).
|
|
2770
2772
|
* This is the single trust boundary for untrusted snapshots: {@link readPayloadMeta} rejects
|
|
2771
2773
|
* headers above this size; sync decompress uses the same cap via `maxOutputLength`.
|
|
2772
2774
|
* A malicious header can still declare up to 1 GiB — no tighter native limit helps without
|
|
2773
2775
|
* trusting `uncompressedLength` from that same header. Semantic integrity (length match,
|
|
2774
2776
|
* payload CRC, per-section CRC) is enforced after decode. */
|
|
2775
2777
|
const MSV5_MAX_UNCOMPRESSED_BYTES = 1024 * 1024 * 1024;
|
|
2778
|
+
const MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH = 'MSv5 compressed payload exceeds declared length';
|
|
2779
|
+
const MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH = 'MSv5 decompressed payload length mismatch';
|
|
2776
2780
|
// zstd landed in node:zlib at Node 22.15.0 (22.x line) / 23.8.0, where the whole family
|
|
2777
2781
|
// (zstdCompress[Sync], zstdDecompressSync, createZstdDecompress) ships together — so probing one
|
|
2778
2782
|
// member is enough to know if the runtime supports zstd. Checked at call time (not captured at
|
|
2779
|
-
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully:
|
|
2780
|
-
//
|
|
2783
|
+
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully: `auto`
|
|
2784
|
+
// tries zlib once (or raw if it does not help). When zstd is available, `auto` tries zstd once
|
|
2785
|
+
// and stays raw if it does not shrink — no second pass. Reads of a zstd payload throw a clear,
|
|
2786
|
+
// actionable error on runtimes without zstd.
|
|
2781
2787
|
function zstdAvailable() {
|
|
2782
2788
|
return typeof zlib.zstdCompressSync === 'function';
|
|
2783
2789
|
}
|
|
2790
|
+
function zstdUnavailableWriteError() {
|
|
2791
|
+
return new Error('MSv5 snapshot requested zstd compression, but this Node.js runtime lacks node:zlib zstd '
|
|
2792
|
+
+ 'support (added in Node 22.15.0). Upgrade Node.js, or use compression: "auto", "raw", '
|
|
2793
|
+
+ 'or "zlib".');
|
|
2794
|
+
}
|
|
2784
2795
|
function zstdUnavailableReadError() {
|
|
2785
2796
|
return new Error('MSv5 snapshot is zstd-compressed, but this Node.js runtime lacks node:zlib zstd support '
|
|
2786
2797
|
+ '(added in Node 22.15.0). Upgrade Node.js to read this snapshot, or re-save it from a '
|
|
2787
|
-
+ 'newer runtime
|
|
2798
|
+
+ 'newer runtime with compression: "raw" or "zlib".');
|
|
2788
2799
|
}
|
|
2789
2800
|
let warnedZstdSaveFallback = false;
|
|
2790
2801
|
function warnZstdSaveFallbackOnce() {
|
|
2791
2802
|
if (warnedZstdSaveFallback)
|
|
2792
2803
|
return;
|
|
2793
2804
|
warnedZstdSaveFallback = true;
|
|
2794
|
-
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0);
|
|
2795
|
-
+ 'raw (uncompressed)
|
|
2805
|
+
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); compression: "auto" falls back to '
|
|
2806
|
+
+ 'zlib when it shrinks the payload, otherwise raw (uncompressed). Upgrade to Node 22.15.0+ '
|
|
2807
|
+
+ 'for zstd.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
|
|
2796
2808
|
}
|
|
2797
2809
|
function assertPayloadFormatRev(buf) {
|
|
2798
2810
|
const rev = buf.readUInt16LE(MSV5_FORMAT_REV_OFFSET);
|
|
@@ -2842,23 +2854,26 @@ function msv5ZstdCompressOptions(uncompressed) {
|
|
|
2842
2854
|
},
|
|
2843
2855
|
};
|
|
2844
2856
|
}
|
|
2845
|
-
|
|
2846
|
-
function pickPayloadCodec(uncompressed, compressed) {
|
|
2847
|
-
if (compressed.length < uncompressed.length) {
|
|
2848
|
-
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2849
|
-
}
|
|
2857
|
+
function rawPayloadChoice(uncompressed) {
|
|
2850
2858
|
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2851
2859
|
}
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2860
|
+
/** Auto mode: one compression attempt; keep it only when strictly smaller than raw. */
|
|
2861
|
+
function pickAutoPayloadCodec(uncompressed, compressed, codec) {
|
|
2862
|
+
if (compressed.length < uncompressed.length) {
|
|
2863
|
+
return {
|
|
2864
|
+
payload: compressed,
|
|
2865
|
+
codec,
|
|
2866
|
+
zstdLevel: codec === CODEC_ZSTD ? MSV5_ZSTD_LEVEL : 0,
|
|
2867
|
+
};
|
|
2855
2868
|
}
|
|
2869
|
+
return rawPayloadChoice(uncompressed);
|
|
2870
|
+
}
|
|
2871
|
+
function zstdPayloadChoiceSync(uncompressed) {
|
|
2856
2872
|
if (!zstdAvailable()) {
|
|
2857
|
-
|
|
2858
|
-
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2873
|
+
throw zstdUnavailableWriteError();
|
|
2859
2874
|
}
|
|
2860
2875
|
const compressed = zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed));
|
|
2861
|
-
return
|
|
2876
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2862
2877
|
}
|
|
2863
2878
|
/**
|
|
2864
2879
|
* Async zstd via {@link zstdCompress} (not {@link zstdCompressSync}).
|
|
@@ -2877,16 +2892,91 @@ function zstdCompressAsync(uncompressed) {
|
|
|
2877
2892
|
});
|
|
2878
2893
|
});
|
|
2879
2894
|
}
|
|
2880
|
-
async function
|
|
2895
|
+
async function zstdPayloadChoiceAsync(uncompressed) {
|
|
2896
|
+
if (!zstdAvailable()) {
|
|
2897
|
+
throw zstdUnavailableWriteError();
|
|
2898
|
+
}
|
|
2899
|
+
const compressed = await zstdCompressAsync(uncompressed);
|
|
2900
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2901
|
+
}
|
|
2902
|
+
function zlibPayloadChoiceSync(uncompressed) {
|
|
2903
|
+
const compressed = zlib.deflateSync(uncompressed);
|
|
2904
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2905
|
+
}
|
|
2906
|
+
function zlibCompressAsync(uncompressed) {
|
|
2907
|
+
return new Promise((resolve, reject) => {
|
|
2908
|
+
zlib.deflate(uncompressed, (err, compressed) => {
|
|
2909
|
+
if (err != null) {
|
|
2910
|
+
reject(err);
|
|
2911
|
+
return;
|
|
2912
|
+
}
|
|
2913
|
+
resolve(compressed);
|
|
2914
|
+
});
|
|
2915
|
+
});
|
|
2916
|
+
}
|
|
2917
|
+
async function zlibPayloadChoiceAsync(uncompressed) {
|
|
2918
|
+
const compressed = await zlibCompressAsync(uncompressed);
|
|
2919
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2920
|
+
}
|
|
2921
|
+
const autoSyncCompressors = {
|
|
2922
|
+
zstd: (uncompressed) => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
|
|
2923
|
+
zlib: (uncompressed) => zlib.deflateSync(uncompressed),
|
|
2924
|
+
};
|
|
2925
|
+
const autoAsyncCompressors = {
|
|
2926
|
+
zstd: zstdCompressAsync,
|
|
2927
|
+
zlib: zlibCompressAsync,
|
|
2928
|
+
};
|
|
2929
|
+
function autoPayloadChoice(uncompressed, compressors) {
|
|
2881
2930
|
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2882
|
-
return
|
|
2931
|
+
return rawPayloadChoice(uncompressed);
|
|
2883
2932
|
}
|
|
2884
2933
|
if (!zstdAvailable()) {
|
|
2885
2934
|
warnZstdSaveFallbackOnce();
|
|
2886
|
-
return
|
|
2935
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2936
|
+
}
|
|
2937
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2938
|
+
}
|
|
2939
|
+
async function autoPayloadChoiceAsync(uncompressed, compressors) {
|
|
2940
|
+
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2941
|
+
return rawPayloadChoice(uncompressed);
|
|
2942
|
+
}
|
|
2943
|
+
if (!zstdAvailable()) {
|
|
2944
|
+
warnZstdSaveFallbackOnce();
|
|
2945
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2946
|
+
}
|
|
2947
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2948
|
+
}
|
|
2949
|
+
function choosePayloadCodecSync(uncompressed, compression = 'auto') {
|
|
2950
|
+
switch (compression) {
|
|
2951
|
+
case 'raw':
|
|
2952
|
+
return rawPayloadChoice(uncompressed);
|
|
2953
|
+
case 'zstd':
|
|
2954
|
+
return zstdPayloadChoiceSync(uncompressed);
|
|
2955
|
+
case 'zlib':
|
|
2956
|
+
return zlibPayloadChoiceSync(uncompressed);
|
|
2957
|
+
case 'auto':
|
|
2958
|
+
return autoPayloadChoice(uncompressed, autoSyncCompressors);
|
|
2959
|
+
default: {
|
|
2960
|
+
const _exhaustive = compression;
|
|
2961
|
+
return _exhaustive;
|
|
2962
|
+
}
|
|
2963
|
+
}
|
|
2964
|
+
}
|
|
2965
|
+
async function choosePayloadCodecAsync(uncompressed, compression = 'auto') {
|
|
2966
|
+
switch (compression) {
|
|
2967
|
+
case 'raw':
|
|
2968
|
+
return rawPayloadChoice(uncompressed);
|
|
2969
|
+
case 'zstd':
|
|
2970
|
+
return await zstdPayloadChoiceAsync(uncompressed);
|
|
2971
|
+
case 'zlib':
|
|
2972
|
+
return await zlibPayloadChoiceAsync(uncompressed);
|
|
2973
|
+
case 'auto':
|
|
2974
|
+
return await autoPayloadChoiceAsync(uncompressed, autoAsyncCompressors);
|
|
2975
|
+
default: {
|
|
2976
|
+
const _exhaustive = compression;
|
|
2977
|
+
return _exhaustive;
|
|
2978
|
+
}
|
|
2887
2979
|
}
|
|
2888
|
-
const compressed = await zstdCompressAsync(uncompressed);
|
|
2889
|
-
return pickPayloadCodec(uncompressed, compressed);
|
|
2890
2980
|
}
|
|
2891
2981
|
function concatAndValidateSections(rawSections) {
|
|
2892
2982
|
if (rawSections.length !== MSV5_SECTION_COUNT) {
|
|
@@ -2941,16 +3031,16 @@ function buildMsv5AssembledFile(globalFlags, entries, uncompressedLength, payloa
|
|
|
2941
3031
|
}
|
|
2942
3032
|
/**
|
|
2943
3033
|
* MSv5 on disk: header + catalogue (uncompressed offsets) + **one** payload blob
|
|
2944
|
-
* (raw concatenation or a single
|
|
3034
|
+
* (raw concatenation or a single compressed stream over it).
|
|
2945
3035
|
*/
|
|
2946
|
-
function assembleMsv5File(globalFlags, rawSections) {
|
|
3036
|
+
function assembleMsv5File(globalFlags, rawSections, compression = 'auto') {
|
|
2947
3037
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2948
|
-
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed);
|
|
3038
|
+
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed, compression);
|
|
2949
3039
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2950
3040
|
}
|
|
2951
|
-
async function assembleMsv5FileAsync(globalFlags, rawSections) {
|
|
3041
|
+
async function assembleMsv5FileAsync(globalFlags, rawSections, compression = 'auto') {
|
|
2952
3042
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2953
|
-
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed);
|
|
3043
|
+
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed, compression);
|
|
2954
3044
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2955
3045
|
}
|
|
2956
3046
|
function readMsv5SectionDirectory(buf) {
|
|
@@ -2994,11 +3084,11 @@ function sectionsFromPayload(payload, directory, payloadCrc32) {
|
|
|
2994
3084
|
return out;
|
|
2995
3085
|
});
|
|
2996
3086
|
}
|
|
2997
|
-
/** Streaming
|
|
2998
|
-
* No `maxOutputLength` on
|
|
2999
|
-
*
|
|
3000
|
-
*
|
|
3001
|
-
function
|
|
3087
|
+
/** Streaming compressed reader: keeps only one section in memory at a time.
|
|
3088
|
+
* No `maxOutputLength` on Transform streams: output is bounded by accumulating `streamOffset`
|
|
3089
|
+
* against the header's `uncompressedLength` (same 1 GiB cap checked upfront). Sync load uses
|
|
3090
|
+
* `maxOutputLength` because it materializes the whole payload at once. */
|
|
3091
|
+
function collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32) {
|
|
3002
3092
|
if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
|
|
3003
3093
|
throw new Error('MSv5 payload exceeds 1 GiB limit');
|
|
3004
3094
|
}
|
|
@@ -3018,7 +3108,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3018
3108
|
}
|
|
3019
3109
|
function consume(chunk) {
|
|
3020
3110
|
if (streamOffset + chunk.length > uncompressedLength) {
|
|
3021
|
-
throw new Error(
|
|
3111
|
+
throw new Error(MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH);
|
|
3022
3112
|
}
|
|
3023
3113
|
payloadCrc = crc32Update(payloadCrc, chunk);
|
|
3024
3114
|
let off = 0;
|
|
@@ -3054,7 +3144,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3054
3144
|
function finish() {
|
|
3055
3145
|
emitEmptySections();
|
|
3056
3146
|
if (streamOffset !== uncompressedLength || sectionId !== directory.length) {
|
|
3057
|
-
throw new Error(
|
|
3147
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3058
3148
|
}
|
|
3059
3149
|
if (payloadCrc !== payloadCrc32) {
|
|
3060
3150
|
throw new Error('MSv5 payload CRC mismatch');
|
|
@@ -3063,9 +3153,15 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3063
3153
|
return { sections, consume, finish };
|
|
3064
3154
|
}
|
|
3065
3155
|
function loadMsv5SectionsFromZstdStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3156
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createZstdDecompress());
|
|
3157
|
+
}
|
|
3158
|
+
function loadMsv5SectionsFromZlibStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3159
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createInflate());
|
|
3160
|
+
}
|
|
3161
|
+
function loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, createStream) {
|
|
3066
3162
|
return new Promise((resolve, reject) => {
|
|
3067
|
-
const collector =
|
|
3068
|
-
const stream =
|
|
3163
|
+
const collector = collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32);
|
|
3164
|
+
const stream = createStream();
|
|
3069
3165
|
stream.on('data', (chunk) => {
|
|
3070
3166
|
try {
|
|
3071
3167
|
collector.consume(chunk);
|
|
@@ -3123,29 +3219,39 @@ function preparePayload(fileBuf, directory) {
|
|
|
3123
3219
|
payloadCrc32,
|
|
3124
3220
|
};
|
|
3125
3221
|
}
|
|
3126
|
-
|
|
3127
|
-
function loadMsv5Sections(fileBuf, directory) {
|
|
3128
|
-
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3129
|
-
if (payloadCodec === CODEC_RAW) {
|
|
3130
|
-
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3131
|
-
}
|
|
3222
|
+
function decompressPayloadSync(payloadCodec, slice, uncompressedLength) {
|
|
3132
3223
|
if (payloadCodec === CODEC_ZSTD) {
|
|
3133
3224
|
if (!zstdAvailable()) {
|
|
3134
3225
|
throw zstdUnavailableReadError();
|
|
3135
3226
|
}
|
|
3136
|
-
// Native cap matches readPayloadMeta's 1 GiB limit (see MSV5_MAX_UNCOMPRESSED_BYTES).
|
|
3137
|
-
// Using header `uncompressedLength` here would only help when the header understates
|
|
3138
|
-
// the zstd stream but the attacker can inflate the header too — same worst case.
|
|
3139
3227
|
const decoded = zlib.zstdDecompressSync(slice, {
|
|
3140
3228
|
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3141
3229
|
});
|
|
3142
3230
|
if (decoded.length !== uncompressedLength) {
|
|
3143
|
-
throw new Error(
|
|
3231
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3144
3232
|
}
|
|
3145
|
-
return
|
|
3233
|
+
return decoded;
|
|
3234
|
+
}
|
|
3235
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3236
|
+
const decoded = zlib.inflateSync(slice, {
|
|
3237
|
+
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3238
|
+
});
|
|
3239
|
+
if (decoded.length !== uncompressedLength) {
|
|
3240
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3241
|
+
}
|
|
3242
|
+
return decoded;
|
|
3146
3243
|
}
|
|
3147
3244
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3148
3245
|
}
|
|
3246
|
+
/** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
|
|
3247
|
+
function loadMsv5Sections(fileBuf, directory) {
|
|
3248
|
+
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3249
|
+
if (payloadCodec === CODEC_RAW) {
|
|
3250
|
+
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3251
|
+
}
|
|
3252
|
+
const decoded = decompressPayloadSync(payloadCodec, slice, uncompressedLength);
|
|
3253
|
+
return sectionsFromPayload(decoded, directory, payloadCrc32);
|
|
3254
|
+
}
|
|
3149
3255
|
/** Streaming load; peak main-thread RAM ≈ largest single section (+ file buffer). */
|
|
3150
3256
|
async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
3151
3257
|
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
@@ -3158,6 +3264,9 @@ async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
|
3158
3264
|
}
|
|
3159
3265
|
return loadMsv5SectionsFromZstdStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3160
3266
|
}
|
|
3267
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3268
|
+
return loadMsv5SectionsFromZlibStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3269
|
+
}
|
|
3161
3270
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3162
3271
|
}
|
|
3163
3272
|
function isMsv5Buffer(buf) {
|
|
@@ -3446,7 +3555,7 @@ function resolvePackedTree(snap, termTree, packedTermIndex) {
|
|
|
3446
3555
|
validateTermTreeLeaves(tree, termCount);
|
|
3447
3556
|
return fromRadixTree(tree, termCount);
|
|
3448
3557
|
}
|
|
3449
|
-
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
3558
|
+
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression) {
|
|
3450
3559
|
var _a;
|
|
3451
3560
|
validateFrozenSnapshotNumeric(snap);
|
|
3452
3561
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3475,9 +3584,9 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
|
3475
3584
|
postingsWire.docIds,
|
|
3476
3585
|
postingsWire.freqs,
|
|
3477
3586
|
];
|
|
3478
|
-
return assembleMsv5File(globalFlags, rawSections).buffer;
|
|
3587
|
+
return assembleMsv5File(globalFlags, rawSections, compression).buffer;
|
|
3479
3588
|
}
|
|
3480
|
-
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
3589
|
+
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression) {
|
|
3481
3590
|
var _a;
|
|
3482
3591
|
validateFrozenSnapshotNumeric(snap);
|
|
3483
3592
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3506,7 +3615,7 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
|
3506
3615
|
postingsWire.docIds,
|
|
3507
3616
|
postingsWire.freqs,
|
|
3508
3617
|
];
|
|
3509
|
-
return (await assembleMsv5FileAsync(globalFlags, rawSections)).buffer;
|
|
3618
|
+
return (await assembleMsv5FileAsync(globalFlags, rawSections, compression)).buffer;
|
|
3510
3619
|
}
|
|
3511
3620
|
|
|
3512
3621
|
function validateMsv5Container(buf) {
|
|
@@ -3583,12 +3692,12 @@ async function decodeFrozenSnapshotMsv5Async(buf, hints) {
|
|
|
3583
3692
|
}
|
|
3584
3693
|
|
|
3585
3694
|
/** Encode a frozen snapshot as a binary buffer. */
|
|
3586
|
-
function encodeFrozenSnapshot(snap, termTree, packedTermIndex) {
|
|
3587
|
-
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex);
|
|
3695
|
+
function encodeFrozenSnapshot(snap, termTree, packedTermIndex, compression) {
|
|
3696
|
+
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression);
|
|
3588
3697
|
}
|
|
3589
|
-
/** Async encoder; uses
|
|
3590
|
-
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
|
|
3591
|
-
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex);
|
|
3698
|
+
/** Async encoder; uses the selected payload compression without blocking the event loop. */
|
|
3699
|
+
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex, compression) {
|
|
3700
|
+
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression);
|
|
3592
3701
|
}
|
|
3593
3702
|
|
|
3594
3703
|
const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
|
|
@@ -3605,7 +3714,7 @@ function decodeFrozenSnapshot(buf, hints) {
|
|
|
3605
3714
|
}
|
|
3606
3715
|
throw invalidFrozenIndex('Unsupported frozen binary snapshot');
|
|
3607
3716
|
}
|
|
3608
|
-
/** Async frozen snapshot decode (streaming
|
|
3717
|
+
/** Async frozen snapshot decode (streaming decompression when needed). */
|
|
3609
3718
|
async function decodeFrozenSnapshotAsync(buf, hints) {
|
|
3610
3719
|
assertBufferLength(buf, 8);
|
|
3611
3720
|
const version = buf.readUInt16LE(4);
|
|
@@ -4644,7 +4753,7 @@ class FrozenMiniSearch {
|
|
|
4644
4753
|
return autoSuggestFromSearch((q, o) => this.search(q, o), queryString, merged);
|
|
4645
4754
|
}
|
|
4646
4755
|
/** Serialize this index as a frozen binary snapshot (synchronous). */
|
|
4647
|
-
saveBinarySync() {
|
|
4756
|
+
saveBinarySync(saveOptions = {}) {
|
|
4648
4757
|
return encodeFrozenSnapshot({
|
|
4649
4758
|
documentCount: this._documentCount,
|
|
4650
4759
|
nextId: this._nextId,
|
|
@@ -4658,10 +4767,10 @@ class FrozenMiniSearch {
|
|
|
4658
4767
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4659
4768
|
treeShape: [],
|
|
4660
4769
|
postings: this._postings,
|
|
4661
|
-
}, undefined, this._index);
|
|
4770
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4662
4771
|
}
|
|
4663
|
-
/** Non-blocking
|
|
4664
|
-
async saveBinaryAsync() {
|
|
4772
|
+
/** Non-blocking snapshot serialization with the selected compression codec. */
|
|
4773
|
+
async saveBinaryAsync(saveOptions = {}) {
|
|
4665
4774
|
return encodeFrozenSnapshotAsync({
|
|
4666
4775
|
documentCount: this._documentCount,
|
|
4667
4776
|
nextId: this._nextId,
|
|
@@ -4675,7 +4784,7 @@ class FrozenMiniSearch {
|
|
|
4675
4784
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4676
4785
|
treeShape: [],
|
|
4677
4786
|
postings: this._postings,
|
|
4678
|
-
}, undefined, this._index);
|
|
4787
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4679
4788
|
}
|
|
4680
4789
|
/** Load a frozen binary snapshot. */
|
|
4681
4790
|
static loadBinarySync(buffer, options = {}) {
|
|
@@ -4684,7 +4793,7 @@ class FrozenMiniSearch {
|
|
|
4684
4793
|
const snap = decodeFrozenSnapshot(buffer, { storeFields });
|
|
4685
4794
|
return FrozenMiniSearch.fromBinarySnapshot(snap, options);
|
|
4686
4795
|
}
|
|
4687
|
-
/** Load a frozen binary snapshot with streaming
|
|
4796
|
+
/** Load a frozen binary snapshot with streaming decompression when needed (bounded memory). */
|
|
4688
4797
|
static async loadBinaryAsync(buffer, options = {}) {
|
|
4689
4798
|
var _a;
|
|
4690
4799
|
const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
|
package/dist/es/index.d.ts
CHANGED
|
@@ -113,7 +113,7 @@ type SearchOptionsWithDefaults = SearchOptions & {
|
|
|
113
113
|
bm25: BM25Params;
|
|
114
114
|
};
|
|
115
115
|
/**
|
|
116
|
-
* Configuration options
|
|
116
|
+
* Configuration options compatible with the MiniSearch constructor.
|
|
117
117
|
*
|
|
118
118
|
* @typeParam T The type of documents being indexed.
|
|
119
119
|
*/
|
|
@@ -134,7 +134,7 @@ type Options<T = any> = {
|
|
|
134
134
|
processTerm?: (term: string, fieldName?: string) => string | string[] | null | undefined | false;
|
|
135
135
|
/** Function called to log messages from the library. */
|
|
136
136
|
logger?: (level: LogLevel, message: string, code?: string) => void;
|
|
137
|
-
/** Auto-vacuum behaviour after
|
|
137
|
+
/** Auto-vacuum behaviour after MiniSearch `discard`; defaults to `true`. */
|
|
138
138
|
autoVacuum?: boolean | AutoVacuumOptions;
|
|
139
139
|
/** Default search options. */
|
|
140
140
|
searchOptions?: SearchOptions;
|
|
@@ -158,6 +158,20 @@ type OptionsWithDefaults<T = any> = Options<T> & {
|
|
|
158
158
|
searchOptions: SearchOptionsWithDefaults;
|
|
159
159
|
autoSuggestOptions: SearchOptions;
|
|
160
160
|
};
|
|
161
|
+
/** Compression codec selection for frozen binary snapshots. */
|
|
162
|
+
type BinaryCompression = 'auto' | 'raw' | 'zstd' | 'zlib';
|
|
163
|
+
/** Options for `saveBinarySync()` / `saveBinaryAsync()`. */
|
|
164
|
+
type SaveBinaryOptions = {
|
|
165
|
+
/**
|
|
166
|
+
* Compression codec for the payload.
|
|
167
|
+
* - `auto`: one pass; payloads under 64 B stay raw; otherwise zstd when available
|
|
168
|
+
* (else zlib on Node < 22.15), kept only when strictly smaller than raw
|
|
169
|
+
* - `raw`: never compress
|
|
170
|
+
* - `zstd`: always zstd-compress, even when larger than raw; requires Node 22.15+ to write
|
|
171
|
+
* - `zlib`: always deflate, even when larger than raw; readable on Node 20+
|
|
172
|
+
*/
|
|
173
|
+
compression?: BinaryCompression;
|
|
174
|
+
};
|
|
161
175
|
/**
|
|
162
176
|
* A search-completion suggestion.
|
|
163
177
|
*/
|
|
@@ -596,12 +610,12 @@ declare class FrozenMiniSearch<T = any> {
|
|
|
596
610
|
search(query: Query, searchOptions?: SearchOptions): SearchResult[];
|
|
597
611
|
autoSuggest(queryString: string, options?: SearchOptions): Suggestion[];
|
|
598
612
|
/** Serialize this index as a frozen binary snapshot (synchronous). */
|
|
599
|
-
saveBinarySync(): Buffer;
|
|
600
|
-
/** Non-blocking
|
|
601
|
-
saveBinaryAsync(): Promise<Buffer>;
|
|
613
|
+
saveBinarySync(saveOptions?: SaveBinaryOptions): Buffer;
|
|
614
|
+
/** Non-blocking snapshot serialization with the selected compression codec. */
|
|
615
|
+
saveBinaryAsync(saveOptions?: SaveBinaryOptions): Promise<Buffer>;
|
|
602
616
|
/** Load a frozen binary snapshot. */
|
|
603
617
|
static loadBinarySync<T>(buffer: Buffer, options?: Options<T>): FrozenMiniSearch<T>;
|
|
604
|
-
/** Load a frozen binary snapshot with streaming
|
|
618
|
+
/** Load a frozen binary snapshot with streaming decompression when needed (bounded memory). */
|
|
605
619
|
static loadBinaryAsync<T>(buffer: Buffer, options?: Options<T>): Promise<FrozenMiniSearch<T>>;
|
|
606
620
|
private static fromBinarySnapshot;
|
|
607
621
|
/** Build a read-only index in one pass from documents. */
|
|
@@ -643,4 +657,4 @@ declare class FrozenMiniSearch<T = any> {
|
|
|
643
657
|
}
|
|
644
658
|
|
|
645
659
|
export { AND, AND_NOT, FrozenIndexBuilder, FrozenMiniSearch, OR, assembleFrozen, buildFrozenFromDocuments, createFrozenIndexBuilder, FrozenMiniSearch as default, freezeFrozenIndexBuilder, frozenMemoryBreakdown };
|
|
646
|
-
export type { BM25Params, CombinationOperator, FrozenAssembleParams, FrozenIndexBuilderHints, FrozenMemoryBreakdown, LogLevel, LowercaseCombinationOperator, MatchInfo, MiniSearchSnapshot, Options, Query, QueryCombination, SearchOptions, SearchResult, SerializedIndexEntry, Suggestion, Wildcard };
|
|
660
|
+
export type { BM25Params, BinaryCompression, CombinationOperator, FrozenAssembleParams, FrozenIndexBuilderHints, FrozenMemoryBreakdown, LogLevel, LowercaseCombinationOperator, MatchInfo, MiniSearchSnapshot, Options, Query, QueryCombination, SaveBinaryOptions, SearchOptions, SearchResult, SerializedIndexEntry, Suggestion, Wildcard };
|
package/dist/es/index.js
CHANGED
|
@@ -1640,9 +1640,11 @@ const FLAG_FL_U8 = 8;
|
|
|
1640
1640
|
const FLAG_FL_U16 = 16;
|
|
1641
1641
|
const FLAG_FREQ_U16 = 32;
|
|
1642
1642
|
const CODEC_RAW = 0;
|
|
1643
|
+
/** Deflate/inflate (`node:zlib`) on the whole payload. */
|
|
1644
|
+
const CODEC_ZLIB = 1;
|
|
1643
1645
|
/** Zstandard (`node:zlib`) on the whole payload. */
|
|
1644
1646
|
const CODEC_ZSTD = 3;
|
|
1645
|
-
/** Single concatenated payload, one
|
|
1647
|
+
/** Single concatenated payload, one compressed stream (or raw). */
|
|
1646
1648
|
const MSV5_FORMAT_REV_PAYLOAD = 1;
|
|
1647
1649
|
/** Do not compress payloads smaller than this (bytes). */
|
|
1648
1650
|
const MSV5_MIN_COMPRESS_BYTES = 64;
|
|
@@ -2762,33 +2764,43 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2762
2764
|
};
|
|
2763
2765
|
}
|
|
2764
2766
|
|
|
2765
|
-
/** Hard cap on the uncompressed payload, rejected before allocation (
|
|
2767
|
+
/** Hard cap on the uncompressed payload, rejected before allocation (compressed-bomb guard).
|
|
2766
2768
|
* This is the single trust boundary for untrusted snapshots: {@link readPayloadMeta} rejects
|
|
2767
2769
|
* headers above this size; sync decompress uses the same cap via `maxOutputLength`.
|
|
2768
2770
|
* A malicious header can still declare up to 1 GiB — no tighter native limit helps without
|
|
2769
2771
|
* trusting `uncompressedLength` from that same header. Semantic integrity (length match,
|
|
2770
2772
|
* payload CRC, per-section CRC) is enforced after decode. */
|
|
2771
2773
|
const MSV5_MAX_UNCOMPRESSED_BYTES = 1024 * 1024 * 1024;
|
|
2774
|
+
const MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH = 'MSv5 compressed payload exceeds declared length';
|
|
2775
|
+
const MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH = 'MSv5 decompressed payload length mismatch';
|
|
2772
2776
|
// zstd landed in node:zlib at Node 22.15.0 (22.x line) / 23.8.0, where the whole family
|
|
2773
2777
|
// (zstdCompress[Sync], zstdDecompressSync, createZstdDecompress) ships together — so probing one
|
|
2774
2778
|
// member is enough to know if the runtime supports zstd. Checked at call time (not captured at
|
|
2775
|
-
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully:
|
|
2776
|
-
//
|
|
2779
|
+
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully: `auto`
|
|
2780
|
+
// tries zlib once (or raw if it does not help). When zstd is available, `auto` tries zstd once
|
|
2781
|
+
// and stays raw if it does not shrink — no second pass. Reads of a zstd payload throw a clear,
|
|
2782
|
+
// actionable error on runtimes without zstd.
|
|
2777
2783
|
function zstdAvailable() {
|
|
2778
2784
|
return typeof zlib.zstdCompressSync === 'function';
|
|
2779
2785
|
}
|
|
2786
|
+
function zstdUnavailableWriteError() {
|
|
2787
|
+
return new Error('MSv5 snapshot requested zstd compression, but this Node.js runtime lacks node:zlib zstd '
|
|
2788
|
+
+ 'support (added in Node 22.15.0). Upgrade Node.js, or use compression: "auto", "raw", '
|
|
2789
|
+
+ 'or "zlib".');
|
|
2790
|
+
}
|
|
2780
2791
|
function zstdUnavailableReadError() {
|
|
2781
2792
|
return new Error('MSv5 snapshot is zstd-compressed, but this Node.js runtime lacks node:zlib zstd support '
|
|
2782
2793
|
+ '(added in Node 22.15.0). Upgrade Node.js to read this snapshot, or re-save it from a '
|
|
2783
|
-
+ 'newer runtime
|
|
2794
|
+
+ 'newer runtime with compression: "raw" or "zlib".');
|
|
2784
2795
|
}
|
|
2785
2796
|
let warnedZstdSaveFallback = false;
|
|
2786
2797
|
function warnZstdSaveFallbackOnce() {
|
|
2787
2798
|
if (warnedZstdSaveFallback)
|
|
2788
2799
|
return;
|
|
2789
2800
|
warnedZstdSaveFallback = true;
|
|
2790
|
-
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0);
|
|
2791
|
-
+ 'raw (uncompressed)
|
|
2801
|
+
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); compression: "auto" falls back to '
|
|
2802
|
+
+ 'zlib when it shrinks the payload, otherwise raw (uncompressed). Upgrade to Node 22.15.0+ '
|
|
2803
|
+
+ 'for zstd.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
|
|
2792
2804
|
}
|
|
2793
2805
|
function assertPayloadFormatRev(buf) {
|
|
2794
2806
|
const rev = buf.readUInt16LE(MSV5_FORMAT_REV_OFFSET);
|
|
@@ -2838,23 +2850,26 @@ function msv5ZstdCompressOptions(uncompressed) {
|
|
|
2838
2850
|
},
|
|
2839
2851
|
};
|
|
2840
2852
|
}
|
|
2841
|
-
|
|
2842
|
-
function pickPayloadCodec(uncompressed, compressed) {
|
|
2843
|
-
if (compressed.length < uncompressed.length) {
|
|
2844
|
-
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2845
|
-
}
|
|
2853
|
+
function rawPayloadChoice(uncompressed) {
|
|
2846
2854
|
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2847
2855
|
}
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2856
|
+
/** Auto mode: one compression attempt; keep it only when strictly smaller than raw. */
|
|
2857
|
+
function pickAutoPayloadCodec(uncompressed, compressed, codec) {
|
|
2858
|
+
if (compressed.length < uncompressed.length) {
|
|
2859
|
+
return {
|
|
2860
|
+
payload: compressed,
|
|
2861
|
+
codec,
|
|
2862
|
+
zstdLevel: codec === CODEC_ZSTD ? MSV5_ZSTD_LEVEL : 0,
|
|
2863
|
+
};
|
|
2851
2864
|
}
|
|
2865
|
+
return rawPayloadChoice(uncompressed);
|
|
2866
|
+
}
|
|
2867
|
+
function zstdPayloadChoiceSync(uncompressed) {
|
|
2852
2868
|
if (!zstdAvailable()) {
|
|
2853
|
-
|
|
2854
|
-
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2869
|
+
throw zstdUnavailableWriteError();
|
|
2855
2870
|
}
|
|
2856
2871
|
const compressed = zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed));
|
|
2857
|
-
return
|
|
2872
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2858
2873
|
}
|
|
2859
2874
|
/**
|
|
2860
2875
|
* Async zstd via {@link zstdCompress} (not {@link zstdCompressSync}).
|
|
@@ -2873,16 +2888,91 @@ function zstdCompressAsync(uncompressed) {
|
|
|
2873
2888
|
});
|
|
2874
2889
|
});
|
|
2875
2890
|
}
|
|
2876
|
-
async function
|
|
2891
|
+
async function zstdPayloadChoiceAsync(uncompressed) {
|
|
2892
|
+
if (!zstdAvailable()) {
|
|
2893
|
+
throw zstdUnavailableWriteError();
|
|
2894
|
+
}
|
|
2895
|
+
const compressed = await zstdCompressAsync(uncompressed);
|
|
2896
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2897
|
+
}
|
|
2898
|
+
function zlibPayloadChoiceSync(uncompressed) {
|
|
2899
|
+
const compressed = zlib.deflateSync(uncompressed);
|
|
2900
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2901
|
+
}
|
|
2902
|
+
function zlibCompressAsync(uncompressed) {
|
|
2903
|
+
return new Promise((resolve, reject) => {
|
|
2904
|
+
zlib.deflate(uncompressed, (err, compressed) => {
|
|
2905
|
+
if (err != null) {
|
|
2906
|
+
reject(err);
|
|
2907
|
+
return;
|
|
2908
|
+
}
|
|
2909
|
+
resolve(compressed);
|
|
2910
|
+
});
|
|
2911
|
+
});
|
|
2912
|
+
}
|
|
2913
|
+
async function zlibPayloadChoiceAsync(uncompressed) {
|
|
2914
|
+
const compressed = await zlibCompressAsync(uncompressed);
|
|
2915
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2916
|
+
}
|
|
2917
|
+
const autoSyncCompressors = {
|
|
2918
|
+
zstd: (uncompressed) => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
|
|
2919
|
+
zlib: (uncompressed) => zlib.deflateSync(uncompressed),
|
|
2920
|
+
};
|
|
2921
|
+
const autoAsyncCompressors = {
|
|
2922
|
+
zstd: zstdCompressAsync,
|
|
2923
|
+
zlib: zlibCompressAsync,
|
|
2924
|
+
};
|
|
2925
|
+
function autoPayloadChoice(uncompressed, compressors) {
|
|
2877
2926
|
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2878
|
-
return
|
|
2927
|
+
return rawPayloadChoice(uncompressed);
|
|
2879
2928
|
}
|
|
2880
2929
|
if (!zstdAvailable()) {
|
|
2881
2930
|
warnZstdSaveFallbackOnce();
|
|
2882
|
-
return
|
|
2931
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2932
|
+
}
|
|
2933
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2934
|
+
}
|
|
2935
|
+
async function autoPayloadChoiceAsync(uncompressed, compressors) {
|
|
2936
|
+
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2937
|
+
return rawPayloadChoice(uncompressed);
|
|
2938
|
+
}
|
|
2939
|
+
if (!zstdAvailable()) {
|
|
2940
|
+
warnZstdSaveFallbackOnce();
|
|
2941
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2942
|
+
}
|
|
2943
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2944
|
+
}
|
|
2945
|
+
function choosePayloadCodecSync(uncompressed, compression = 'auto') {
|
|
2946
|
+
switch (compression) {
|
|
2947
|
+
case 'raw':
|
|
2948
|
+
return rawPayloadChoice(uncompressed);
|
|
2949
|
+
case 'zstd':
|
|
2950
|
+
return zstdPayloadChoiceSync(uncompressed);
|
|
2951
|
+
case 'zlib':
|
|
2952
|
+
return zlibPayloadChoiceSync(uncompressed);
|
|
2953
|
+
case 'auto':
|
|
2954
|
+
return autoPayloadChoice(uncompressed, autoSyncCompressors);
|
|
2955
|
+
default: {
|
|
2956
|
+
const _exhaustive = compression;
|
|
2957
|
+
return _exhaustive;
|
|
2958
|
+
}
|
|
2959
|
+
}
|
|
2960
|
+
}
|
|
2961
|
+
async function choosePayloadCodecAsync(uncompressed, compression = 'auto') {
|
|
2962
|
+
switch (compression) {
|
|
2963
|
+
case 'raw':
|
|
2964
|
+
return rawPayloadChoice(uncompressed);
|
|
2965
|
+
case 'zstd':
|
|
2966
|
+
return await zstdPayloadChoiceAsync(uncompressed);
|
|
2967
|
+
case 'zlib':
|
|
2968
|
+
return await zlibPayloadChoiceAsync(uncompressed);
|
|
2969
|
+
case 'auto':
|
|
2970
|
+
return await autoPayloadChoiceAsync(uncompressed, autoAsyncCompressors);
|
|
2971
|
+
default: {
|
|
2972
|
+
const _exhaustive = compression;
|
|
2973
|
+
return _exhaustive;
|
|
2974
|
+
}
|
|
2883
2975
|
}
|
|
2884
|
-
const compressed = await zstdCompressAsync(uncompressed);
|
|
2885
|
-
return pickPayloadCodec(uncompressed, compressed);
|
|
2886
2976
|
}
|
|
2887
2977
|
function concatAndValidateSections(rawSections) {
|
|
2888
2978
|
if (rawSections.length !== MSV5_SECTION_COUNT) {
|
|
@@ -2937,16 +3027,16 @@ function buildMsv5AssembledFile(globalFlags, entries, uncompressedLength, payloa
|
|
|
2937
3027
|
}
|
|
2938
3028
|
/**
|
|
2939
3029
|
* MSv5 on disk: header + catalogue (uncompressed offsets) + **one** payload blob
|
|
2940
|
-
* (raw concatenation or a single
|
|
3030
|
+
* (raw concatenation or a single compressed stream over it).
|
|
2941
3031
|
*/
|
|
2942
|
-
function assembleMsv5File(globalFlags, rawSections) {
|
|
3032
|
+
function assembleMsv5File(globalFlags, rawSections, compression = 'auto') {
|
|
2943
3033
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2944
|
-
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed);
|
|
3034
|
+
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed, compression);
|
|
2945
3035
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2946
3036
|
}
|
|
2947
|
-
async function assembleMsv5FileAsync(globalFlags, rawSections) {
|
|
3037
|
+
async function assembleMsv5FileAsync(globalFlags, rawSections, compression = 'auto') {
|
|
2948
3038
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2949
|
-
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed);
|
|
3039
|
+
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed, compression);
|
|
2950
3040
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2951
3041
|
}
|
|
2952
3042
|
function readMsv5SectionDirectory(buf) {
|
|
@@ -2990,11 +3080,11 @@ function sectionsFromPayload(payload, directory, payloadCrc32) {
|
|
|
2990
3080
|
return out;
|
|
2991
3081
|
});
|
|
2992
3082
|
}
|
|
2993
|
-
/** Streaming
|
|
2994
|
-
* No `maxOutputLength` on
|
|
2995
|
-
*
|
|
2996
|
-
*
|
|
2997
|
-
function
|
|
3083
|
+
/** Streaming compressed reader: keeps only one section in memory at a time.
|
|
3084
|
+
* No `maxOutputLength` on Transform streams: output is bounded by accumulating `streamOffset`
|
|
3085
|
+
* against the header's `uncompressedLength` (same 1 GiB cap checked upfront). Sync load uses
|
|
3086
|
+
* `maxOutputLength` because it materializes the whole payload at once. */
|
|
3087
|
+
function collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32) {
|
|
2998
3088
|
if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
|
|
2999
3089
|
throw new Error('MSv5 payload exceeds 1 GiB limit');
|
|
3000
3090
|
}
|
|
@@ -3014,7 +3104,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3014
3104
|
}
|
|
3015
3105
|
function consume(chunk) {
|
|
3016
3106
|
if (streamOffset + chunk.length > uncompressedLength) {
|
|
3017
|
-
throw new Error(
|
|
3107
|
+
throw new Error(MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH);
|
|
3018
3108
|
}
|
|
3019
3109
|
payloadCrc = crc32Update(payloadCrc, chunk);
|
|
3020
3110
|
let off = 0;
|
|
@@ -3050,7 +3140,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3050
3140
|
function finish() {
|
|
3051
3141
|
emitEmptySections();
|
|
3052
3142
|
if (streamOffset !== uncompressedLength || sectionId !== directory.length) {
|
|
3053
|
-
throw new Error(
|
|
3143
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3054
3144
|
}
|
|
3055
3145
|
if (payloadCrc !== payloadCrc32) {
|
|
3056
3146
|
throw new Error('MSv5 payload CRC mismatch');
|
|
@@ -3059,9 +3149,15 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3059
3149
|
return { sections, consume, finish };
|
|
3060
3150
|
}
|
|
3061
3151
|
function loadMsv5SectionsFromZstdStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3152
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createZstdDecompress());
|
|
3153
|
+
}
|
|
3154
|
+
function loadMsv5SectionsFromZlibStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3155
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createInflate());
|
|
3156
|
+
}
|
|
3157
|
+
function loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, createStream) {
|
|
3062
3158
|
return new Promise((resolve, reject) => {
|
|
3063
|
-
const collector =
|
|
3064
|
-
const stream =
|
|
3159
|
+
const collector = collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32);
|
|
3160
|
+
const stream = createStream();
|
|
3065
3161
|
stream.on('data', (chunk) => {
|
|
3066
3162
|
try {
|
|
3067
3163
|
collector.consume(chunk);
|
|
@@ -3119,29 +3215,39 @@ function preparePayload(fileBuf, directory) {
|
|
|
3119
3215
|
payloadCrc32,
|
|
3120
3216
|
};
|
|
3121
3217
|
}
|
|
3122
|
-
|
|
3123
|
-
function loadMsv5Sections(fileBuf, directory) {
|
|
3124
|
-
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3125
|
-
if (payloadCodec === CODEC_RAW) {
|
|
3126
|
-
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3127
|
-
}
|
|
3218
|
+
function decompressPayloadSync(payloadCodec, slice, uncompressedLength) {
|
|
3128
3219
|
if (payloadCodec === CODEC_ZSTD) {
|
|
3129
3220
|
if (!zstdAvailable()) {
|
|
3130
3221
|
throw zstdUnavailableReadError();
|
|
3131
3222
|
}
|
|
3132
|
-
// Native cap matches readPayloadMeta's 1 GiB limit (see MSV5_MAX_UNCOMPRESSED_BYTES).
|
|
3133
|
-
// Using header `uncompressedLength` here would only help when the header understates
|
|
3134
|
-
// the zstd stream but the attacker can inflate the header too — same worst case.
|
|
3135
3223
|
const decoded = zlib.zstdDecompressSync(slice, {
|
|
3136
3224
|
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3137
3225
|
});
|
|
3138
3226
|
if (decoded.length !== uncompressedLength) {
|
|
3139
|
-
throw new Error(
|
|
3227
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3140
3228
|
}
|
|
3141
|
-
return
|
|
3229
|
+
return decoded;
|
|
3230
|
+
}
|
|
3231
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3232
|
+
const decoded = zlib.inflateSync(slice, {
|
|
3233
|
+
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3234
|
+
});
|
|
3235
|
+
if (decoded.length !== uncompressedLength) {
|
|
3236
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3237
|
+
}
|
|
3238
|
+
return decoded;
|
|
3142
3239
|
}
|
|
3143
3240
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3144
3241
|
}
|
|
3242
|
+
/** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
|
|
3243
|
+
function loadMsv5Sections(fileBuf, directory) {
|
|
3244
|
+
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3245
|
+
if (payloadCodec === CODEC_RAW) {
|
|
3246
|
+
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3247
|
+
}
|
|
3248
|
+
const decoded = decompressPayloadSync(payloadCodec, slice, uncompressedLength);
|
|
3249
|
+
return sectionsFromPayload(decoded, directory, payloadCrc32);
|
|
3250
|
+
}
|
|
3145
3251
|
/** Streaming load; peak main-thread RAM ≈ largest single section (+ file buffer). */
|
|
3146
3252
|
async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
3147
3253
|
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
@@ -3154,6 +3260,9 @@ async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
|
3154
3260
|
}
|
|
3155
3261
|
return loadMsv5SectionsFromZstdStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3156
3262
|
}
|
|
3263
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3264
|
+
return loadMsv5SectionsFromZlibStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3265
|
+
}
|
|
3157
3266
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3158
3267
|
}
|
|
3159
3268
|
function isMsv5Buffer(buf) {
|
|
@@ -3442,7 +3551,7 @@ function resolvePackedTree(snap, termTree, packedTermIndex) {
|
|
|
3442
3551
|
validateTermTreeLeaves(tree, termCount);
|
|
3443
3552
|
return fromRadixTree(tree, termCount);
|
|
3444
3553
|
}
|
|
3445
|
-
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
3554
|
+
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression) {
|
|
3446
3555
|
var _a;
|
|
3447
3556
|
validateFrozenSnapshotNumeric(snap);
|
|
3448
3557
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3471,9 +3580,9 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
|
3471
3580
|
postingsWire.docIds,
|
|
3472
3581
|
postingsWire.freqs,
|
|
3473
3582
|
];
|
|
3474
|
-
return assembleMsv5File(globalFlags, rawSections).buffer;
|
|
3583
|
+
return assembleMsv5File(globalFlags, rawSections, compression).buffer;
|
|
3475
3584
|
}
|
|
3476
|
-
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
3585
|
+
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression) {
|
|
3477
3586
|
var _a;
|
|
3478
3587
|
validateFrozenSnapshotNumeric(snap);
|
|
3479
3588
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3502,7 +3611,7 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
|
3502
3611
|
postingsWire.docIds,
|
|
3503
3612
|
postingsWire.freqs,
|
|
3504
3613
|
];
|
|
3505
|
-
return (await assembleMsv5FileAsync(globalFlags, rawSections)).buffer;
|
|
3614
|
+
return (await assembleMsv5FileAsync(globalFlags, rawSections, compression)).buffer;
|
|
3506
3615
|
}
|
|
3507
3616
|
|
|
3508
3617
|
function validateMsv5Container(buf) {
|
|
@@ -3579,12 +3688,12 @@ async function decodeFrozenSnapshotMsv5Async(buf, hints) {
|
|
|
3579
3688
|
}
|
|
3580
3689
|
|
|
3581
3690
|
/** Encode a frozen snapshot as a binary buffer. */
|
|
3582
|
-
function encodeFrozenSnapshot(snap, termTree, packedTermIndex) {
|
|
3583
|
-
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex);
|
|
3691
|
+
function encodeFrozenSnapshot(snap, termTree, packedTermIndex, compression) {
|
|
3692
|
+
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression);
|
|
3584
3693
|
}
|
|
3585
|
-
/** Async encoder; uses
|
|
3586
|
-
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
|
|
3587
|
-
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex);
|
|
3694
|
+
/** Async encoder; uses the selected payload compression without blocking the event loop. */
|
|
3695
|
+
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex, compression) {
|
|
3696
|
+
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression);
|
|
3588
3697
|
}
|
|
3589
3698
|
|
|
3590
3699
|
const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
|
|
@@ -3601,7 +3710,7 @@ function decodeFrozenSnapshot(buf, hints) {
|
|
|
3601
3710
|
}
|
|
3602
3711
|
throw invalidFrozenIndex('Unsupported frozen binary snapshot');
|
|
3603
3712
|
}
|
|
3604
|
-
/** Async frozen snapshot decode (streaming
|
|
3713
|
+
/** Async frozen snapshot decode (streaming decompression when needed). */
|
|
3605
3714
|
async function decodeFrozenSnapshotAsync(buf, hints) {
|
|
3606
3715
|
assertBufferLength(buf, 8);
|
|
3607
3716
|
const version = buf.readUInt16LE(4);
|
|
@@ -4640,7 +4749,7 @@ class FrozenMiniSearch {
|
|
|
4640
4749
|
return autoSuggestFromSearch((q, o) => this.search(q, o), queryString, merged);
|
|
4641
4750
|
}
|
|
4642
4751
|
/** Serialize this index as a frozen binary snapshot (synchronous). */
|
|
4643
|
-
saveBinarySync() {
|
|
4752
|
+
saveBinarySync(saveOptions = {}) {
|
|
4644
4753
|
return encodeFrozenSnapshot({
|
|
4645
4754
|
documentCount: this._documentCount,
|
|
4646
4755
|
nextId: this._nextId,
|
|
@@ -4654,10 +4763,10 @@ class FrozenMiniSearch {
|
|
|
4654
4763
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4655
4764
|
treeShape: [],
|
|
4656
4765
|
postings: this._postings,
|
|
4657
|
-
}, undefined, this._index);
|
|
4766
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4658
4767
|
}
|
|
4659
|
-
/** Non-blocking
|
|
4660
|
-
async saveBinaryAsync() {
|
|
4768
|
+
/** Non-blocking snapshot serialization with the selected compression codec. */
|
|
4769
|
+
async saveBinaryAsync(saveOptions = {}) {
|
|
4661
4770
|
return encodeFrozenSnapshotAsync({
|
|
4662
4771
|
documentCount: this._documentCount,
|
|
4663
4772
|
nextId: this._nextId,
|
|
@@ -4671,7 +4780,7 @@ class FrozenMiniSearch {
|
|
|
4671
4780
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4672
4781
|
treeShape: [],
|
|
4673
4782
|
postings: this._postings,
|
|
4674
|
-
}, undefined, this._index);
|
|
4783
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4675
4784
|
}
|
|
4676
4785
|
/** Load a frozen binary snapshot. */
|
|
4677
4786
|
static loadBinarySync(buffer, options = {}) {
|
|
@@ -4680,7 +4789,7 @@ class FrozenMiniSearch {
|
|
|
4680
4789
|
const snap = decodeFrozenSnapshot(buffer, { storeFields });
|
|
4681
4790
|
return FrozenMiniSearch.fromBinarySnapshot(snap, options);
|
|
4682
4791
|
}
|
|
4683
|
-
/** Load a frozen binary snapshot with streaming
|
|
4792
|
+
/** Load a frozen binary snapshot with streaming decompression when needed (bounded memory). */
|
|
4684
4793
|
static async loadBinaryAsync(buffer, options = {}) {
|
|
4685
4794
|
var _a;
|
|
4686
4795
|
const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yoch/frozenminisearch",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Read-only Node.js full-text search — compact frozen indexes and binary snapshots",
|
|
5
5
|
"main": "dist/cjs/index.cjs",
|
|
6
6
|
"module": "dist/es/index.js",
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
],
|
|
38
38
|
"license": "MIT",
|
|
39
39
|
"engines": {
|
|
40
|
-
"node": ">=
|
|
40
|
+
"node": ">=20"
|
|
41
41
|
},
|
|
42
42
|
"publishConfig": {
|
|
43
43
|
"access": "public"
|
|
@@ -101,6 +101,8 @@
|
|
|
101
101
|
"scripts": {
|
|
102
102
|
"test": "jest",
|
|
103
103
|
"test-watch": "jest --watch",
|
|
104
|
+
"test:fuzzysearch": "jest --testMatch='<rootDir>/dev/internal/**/*.test.(ts|js)' --testPathIgnorePatterns=/node_modules/",
|
|
105
|
+
"test:benchmarks": "jest --testMatch='<rootDir>/benchmarks/**/*.test.(ts|js)' --testPathIgnorePatterns=/node_modules/",
|
|
104
106
|
"coverage": "jest --coverage",
|
|
105
107
|
"bench": "npm run build && node --expose-gc benchmarks/framework/cli.mjs",
|
|
106
108
|
"bench:record": "npm run build && node --expose-gc benchmarks/framework/cli.mjs record",
|
|
@@ -143,8 +145,9 @@
|
|
|
143
145
|
"clean-build": "rm -rf dist",
|
|
144
146
|
"build-minified": "MINIFY=true npm run build",
|
|
145
147
|
"sync-docs-media": "node scripts/sync-docs-media.cjs",
|
|
146
|
-
"build-docs": "
|
|
147
|
-
"build-
|
|
148
|
+
"build-docs": "node scripts/build-docs.cjs",
|
|
149
|
+
"build-docs:pages": "DOCS_PAGES=1 node scripts/build-docs.cjs",
|
|
150
|
+
"build-demo": "rm -rf ./docs/demo && mkdir -p ./docs/demo && cp -rL ./examples/plain_js/. ./docs/demo",
|
|
148
151
|
"lint": "eslint 'src/**/*.{js,ts}'",
|
|
149
152
|
"lintfix": "eslint --fix 'src/**/*.{js,ts}'",
|
|
150
153
|
"prepublishOnly": "npm test && npm run build && node scripts/verify-npm-pack.cjs",
|