@yoch/frozenminisearch 1.0.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -5
- package/README.md +57 -21
- package/dist/cjs/index.cjs +276 -81
- package/dist/es/index.d.ts +37 -14
- package/dist/es/index.js +276 -81
- package/package.json +16 -5
package/dist/es/index.js
CHANGED
|
@@ -1640,9 +1640,11 @@ const FLAG_FL_U8 = 8;
|
|
|
1640
1640
|
const FLAG_FL_U16 = 16;
|
|
1641
1641
|
const FLAG_FREQ_U16 = 32;
|
|
1642
1642
|
const CODEC_RAW = 0;
|
|
1643
|
+
/** Deflate/inflate (`node:zlib`) on the whole payload. */
|
|
1644
|
+
const CODEC_ZLIB = 1;
|
|
1643
1645
|
/** Zstandard (`node:zlib`) on the whole payload. */
|
|
1644
1646
|
const CODEC_ZSTD = 3;
|
|
1645
|
-
/** Single concatenated payload, one
|
|
1647
|
+
/** Single concatenated payload, one compressed stream (or raw). */
|
|
1646
1648
|
const MSV5_FORMAT_REV_PAYLOAD = 1;
|
|
1647
1649
|
/** Do not compress payloads smaller than this (bytes). */
|
|
1648
1650
|
const MSV5_MIN_COMPRESS_BYTES = 64;
|
|
@@ -1934,7 +1936,7 @@ function postingsTypedBytes(layout) {
|
|
|
1934
1936
|
slotCount,
|
|
1935
1937
|
};
|
|
1936
1938
|
}
|
|
1937
|
-
function validateFrozenPostingsLayout(layout, documentCount, nextId, fail = detail => { throw new Error(detail); }) {
|
|
1939
|
+
function validateFrozenPostingsLayout(layout, documentCount, nextId, fail = (detail) => { throw new Error(detail); }) {
|
|
1938
1940
|
if (layout.fieldCount <= 0)
|
|
1939
1941
|
fail('fieldCount must be positive');
|
|
1940
1942
|
if (layout.nextId !== nextId)
|
|
@@ -2205,7 +2207,7 @@ function forEachDefaultToken(text, onToken) {
|
|
|
2205
2207
|
/** Default tokenizer into a reusable buffer (avoids `text.split()` array allocation). */
|
|
2206
2208
|
function tokenizeDefaultInto(out, text) {
|
|
2207
2209
|
out.length = 0;
|
|
2208
|
-
forEachDefaultToken(text,
|
|
2210
|
+
forEachDefaultToken(text, token => out.push(token));
|
|
2209
2211
|
}
|
|
2210
2212
|
/** Tokenize field text into `out` (reused). Fast path when `tokenize` is the library default. */
|
|
2211
2213
|
function tokenizeFieldInto(out, tokenize, text, fieldName) {
|
|
@@ -2284,7 +2286,7 @@ function validateFrozenSnapshotNumeric(snap) {
|
|
|
2284
2286
|
if (snap.avgFieldLength.length !== snap.fieldCount) {
|
|
2285
2287
|
throw invalidFrozenIndex('avgFieldLength size mismatch');
|
|
2286
2288
|
}
|
|
2287
|
-
validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, detail => {
|
|
2289
|
+
validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, (detail) => {
|
|
2288
2290
|
throw invalidFrozenIndex(detail);
|
|
2289
2291
|
});
|
|
2290
2292
|
const indexedFields = Object.keys(snap.fieldIds);
|
|
@@ -2499,7 +2501,7 @@ function cloneStoredFields(layout) {
|
|
|
2499
2501
|
}
|
|
2500
2502
|
return { kind: 'multi', rows: layout.rows.slice() };
|
|
2501
2503
|
}
|
|
2502
|
-
/** Import from wire rows or
|
|
2504
|
+
/** Import from wire rows or MiniSearch snapshot. Empty storeFields + non-empty rows → multi (binary load without options). */
|
|
2503
2505
|
function storedFieldsFromRows(rows, storeFields) {
|
|
2504
2506
|
if (storeFields.length === 0) {
|
|
2505
2507
|
const hasAny = rows.some(row => row != null && Object.keys(row).length > 0);
|
|
@@ -2683,7 +2685,7 @@ function buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, nextId, s
|
|
|
2683
2685
|
});
|
|
2684
2686
|
return { termCount, index: packedIndex, postings };
|
|
2685
2687
|
}
|
|
2686
|
-
/** Build frozen assemble params from a
|
|
2688
|
+
/** Build frozen assemble params from a MiniSearch JSON snapshot. */
|
|
2687
2689
|
function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
2688
2690
|
var _a, _b, _c;
|
|
2689
2691
|
if (!SUPPORTED_SERIALIZATION_VERSIONS.has(snapshot.serializationVersion)) {
|
|
@@ -2762,33 +2764,43 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2762
2764
|
};
|
|
2763
2765
|
}
|
|
2764
2766
|
|
|
2765
|
-
/** Hard cap on the uncompressed payload, rejected before allocation (
|
|
2767
|
+
/** Hard cap on the uncompressed payload, rejected before allocation (compressed-bomb guard).
|
|
2766
2768
|
* This is the single trust boundary for untrusted snapshots: {@link readPayloadMeta} rejects
|
|
2767
2769
|
* headers above this size; sync decompress uses the same cap via `maxOutputLength`.
|
|
2768
2770
|
* A malicious header can still declare up to 1 GiB — no tighter native limit helps without
|
|
2769
2771
|
* trusting `uncompressedLength` from that same header. Semantic integrity (length match,
|
|
2770
2772
|
* payload CRC, per-section CRC) is enforced after decode. */
|
|
2771
2773
|
const MSV5_MAX_UNCOMPRESSED_BYTES = 1024 * 1024 * 1024;
|
|
2774
|
+
const MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH = 'MSv5 compressed payload exceeds declared length';
|
|
2775
|
+
const MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH = 'MSv5 decompressed payload length mismatch';
|
|
2772
2776
|
// zstd landed in node:zlib at Node 22.15.0 (22.x line) / 23.8.0, where the whole family
|
|
2773
2777
|
// (zstdCompress[Sync], zstdDecompressSync, createZstdDecompress) ships together — so probing one
|
|
2774
2778
|
// member is enough to know if the runtime supports zstd. Checked at call time (not captured at
|
|
2775
|
-
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully:
|
|
2776
|
-
//
|
|
2779
|
+
// module load) so it stays mockable in tests. On older runtimes we degrade gracefully: `auto`
|
|
2780
|
+
// tries zlib once (or raw if it does not help). When zstd is available, `auto` tries zstd once
|
|
2781
|
+
// and stays raw if it does not shrink — no second pass. Reads of a zstd payload throw a clear,
|
|
2782
|
+
// actionable error on runtimes without zstd.
|
|
2777
2783
|
function zstdAvailable() {
|
|
2778
2784
|
return typeof zlib.zstdCompressSync === 'function';
|
|
2779
2785
|
}
|
|
2786
|
+
function zstdUnavailableWriteError() {
|
|
2787
|
+
return new Error('MSv5 snapshot requested zstd compression, but this Node.js runtime lacks node:zlib zstd '
|
|
2788
|
+
+ 'support (added in Node 22.15.0). Upgrade Node.js, or use compression: "auto", "raw", '
|
|
2789
|
+
+ 'or "zlib".');
|
|
2790
|
+
}
|
|
2780
2791
|
function zstdUnavailableReadError() {
|
|
2781
2792
|
return new Error('MSv5 snapshot is zstd-compressed, but this Node.js runtime lacks node:zlib zstd support '
|
|
2782
2793
|
+ '(added in Node 22.15.0). Upgrade Node.js to read this snapshot, or re-save it from a '
|
|
2783
|
-
+ 'newer runtime
|
|
2794
|
+
+ 'newer runtime with compression: "raw" or "zlib".');
|
|
2784
2795
|
}
|
|
2785
2796
|
let warnedZstdSaveFallback = false;
|
|
2786
2797
|
function warnZstdSaveFallbackOnce() {
|
|
2787
2798
|
if (warnedZstdSaveFallback)
|
|
2788
2799
|
return;
|
|
2789
2800
|
warnedZstdSaveFallback = true;
|
|
2790
|
-
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0);
|
|
2791
|
-
+ 'raw (uncompressed)
|
|
2801
|
+
process.emitWarning('node:zlib zstd APIs are unavailable (Node.js < 22.15.0); compression: "auto" falls back to '
|
|
2802
|
+
+ 'zlib when it shrinks the payload, otherwise raw (uncompressed). Upgrade to Node 22.15.0+ '
|
|
2803
|
+
+ 'for zstd.', { code: 'MINISEARCH_MSV5_ZSTD_UNAVAILABLE' });
|
|
2792
2804
|
}
|
|
2793
2805
|
function assertPayloadFormatRev(buf) {
|
|
2794
2806
|
const rev = buf.readUInt16LE(MSV5_FORMAT_REV_OFFSET);
|
|
@@ -2838,23 +2850,26 @@ function msv5ZstdCompressOptions(uncompressed) {
|
|
|
2838
2850
|
},
|
|
2839
2851
|
};
|
|
2840
2852
|
}
|
|
2841
|
-
|
|
2842
|
-
function pickPayloadCodec(uncompressed, compressed) {
|
|
2843
|
-
if (compressed.length < uncompressed.length) {
|
|
2844
|
-
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2845
|
-
}
|
|
2853
|
+
function rawPayloadChoice(uncompressed) {
|
|
2846
2854
|
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2847
2855
|
}
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2856
|
+
/** Auto mode: one compression attempt; keep it only when strictly smaller than raw. */
|
|
2857
|
+
function pickAutoPayloadCodec(uncompressed, compressed, codec) {
|
|
2858
|
+
if (compressed.length < uncompressed.length) {
|
|
2859
|
+
return {
|
|
2860
|
+
payload: compressed,
|
|
2861
|
+
codec,
|
|
2862
|
+
zstdLevel: codec === CODEC_ZSTD ? MSV5_ZSTD_LEVEL : 0,
|
|
2863
|
+
};
|
|
2851
2864
|
}
|
|
2865
|
+
return rawPayloadChoice(uncompressed);
|
|
2866
|
+
}
|
|
2867
|
+
function zstdPayloadChoiceSync(uncompressed) {
|
|
2852
2868
|
if (!zstdAvailable()) {
|
|
2853
|
-
|
|
2854
|
-
return { payload: uncompressed, codec: CODEC_RAW, zstdLevel: 0 };
|
|
2869
|
+
throw zstdUnavailableWriteError();
|
|
2855
2870
|
}
|
|
2856
2871
|
const compressed = zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed));
|
|
2857
|
-
return
|
|
2872
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2858
2873
|
}
|
|
2859
2874
|
/**
|
|
2860
2875
|
* Async zstd via {@link zstdCompress} (not {@link zstdCompressSync}).
|
|
@@ -2873,16 +2888,91 @@ function zstdCompressAsync(uncompressed) {
|
|
|
2873
2888
|
});
|
|
2874
2889
|
});
|
|
2875
2890
|
}
|
|
2876
|
-
async function
|
|
2891
|
+
async function zstdPayloadChoiceAsync(uncompressed) {
|
|
2892
|
+
if (!zstdAvailable()) {
|
|
2893
|
+
throw zstdUnavailableWriteError();
|
|
2894
|
+
}
|
|
2895
|
+
const compressed = await zstdCompressAsync(uncompressed);
|
|
2896
|
+
return { payload: compressed, codec: CODEC_ZSTD, zstdLevel: MSV5_ZSTD_LEVEL };
|
|
2897
|
+
}
|
|
2898
|
+
function zlibPayloadChoiceSync(uncompressed) {
|
|
2899
|
+
const compressed = zlib.deflateSync(uncompressed);
|
|
2900
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2901
|
+
}
|
|
2902
|
+
function zlibCompressAsync(uncompressed) {
|
|
2903
|
+
return new Promise((resolve, reject) => {
|
|
2904
|
+
zlib.deflate(uncompressed, (err, compressed) => {
|
|
2905
|
+
if (err != null) {
|
|
2906
|
+
reject(err);
|
|
2907
|
+
return;
|
|
2908
|
+
}
|
|
2909
|
+
resolve(compressed);
|
|
2910
|
+
});
|
|
2911
|
+
});
|
|
2912
|
+
}
|
|
2913
|
+
async function zlibPayloadChoiceAsync(uncompressed) {
|
|
2914
|
+
const compressed = await zlibCompressAsync(uncompressed);
|
|
2915
|
+
return { payload: compressed, codec: CODEC_ZLIB, zstdLevel: 0 };
|
|
2916
|
+
}
|
|
2917
|
+
const autoSyncCompressors = {
|
|
2918
|
+
zstd: (uncompressed) => zlib.zstdCompressSync(uncompressed, msv5ZstdCompressOptions(uncompressed)),
|
|
2919
|
+
zlib: (uncompressed) => zlib.deflateSync(uncompressed),
|
|
2920
|
+
};
|
|
2921
|
+
const autoAsyncCompressors = {
|
|
2922
|
+
zstd: zstdCompressAsync,
|
|
2923
|
+
zlib: zlibCompressAsync,
|
|
2924
|
+
};
|
|
2925
|
+
function autoPayloadChoice(uncompressed, compressors) {
|
|
2877
2926
|
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2878
|
-
return
|
|
2927
|
+
return rawPayloadChoice(uncompressed);
|
|
2879
2928
|
}
|
|
2880
2929
|
if (!zstdAvailable()) {
|
|
2881
2930
|
warnZstdSaveFallbackOnce();
|
|
2882
|
-
return
|
|
2931
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2932
|
+
}
|
|
2933
|
+
return pickAutoPayloadCodec(uncompressed, compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2934
|
+
}
|
|
2935
|
+
async function autoPayloadChoiceAsync(uncompressed, compressors) {
|
|
2936
|
+
if (uncompressed.length < MSV5_MIN_COMPRESS_BYTES) {
|
|
2937
|
+
return rawPayloadChoice(uncompressed);
|
|
2938
|
+
}
|
|
2939
|
+
if (!zstdAvailable()) {
|
|
2940
|
+
warnZstdSaveFallbackOnce();
|
|
2941
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zlib(uncompressed), CODEC_ZLIB);
|
|
2942
|
+
}
|
|
2943
|
+
return pickAutoPayloadCodec(uncompressed, await compressors.zstd(uncompressed), CODEC_ZSTD);
|
|
2944
|
+
}
|
|
2945
|
+
function choosePayloadCodecSync(uncompressed, compression = 'auto') {
|
|
2946
|
+
switch (compression) {
|
|
2947
|
+
case 'raw':
|
|
2948
|
+
return rawPayloadChoice(uncompressed);
|
|
2949
|
+
case 'zstd':
|
|
2950
|
+
return zstdPayloadChoiceSync(uncompressed);
|
|
2951
|
+
case 'zlib':
|
|
2952
|
+
return zlibPayloadChoiceSync(uncompressed);
|
|
2953
|
+
case 'auto':
|
|
2954
|
+
return autoPayloadChoice(uncompressed, autoSyncCompressors);
|
|
2955
|
+
default: {
|
|
2956
|
+
const _exhaustive = compression;
|
|
2957
|
+
return _exhaustive;
|
|
2958
|
+
}
|
|
2959
|
+
}
|
|
2960
|
+
}
|
|
2961
|
+
async function choosePayloadCodecAsync(uncompressed, compression = 'auto') {
|
|
2962
|
+
switch (compression) {
|
|
2963
|
+
case 'raw':
|
|
2964
|
+
return rawPayloadChoice(uncompressed);
|
|
2965
|
+
case 'zstd':
|
|
2966
|
+
return await zstdPayloadChoiceAsync(uncompressed);
|
|
2967
|
+
case 'zlib':
|
|
2968
|
+
return await zlibPayloadChoiceAsync(uncompressed);
|
|
2969
|
+
case 'auto':
|
|
2970
|
+
return await autoPayloadChoiceAsync(uncompressed, autoAsyncCompressors);
|
|
2971
|
+
default: {
|
|
2972
|
+
const _exhaustive = compression;
|
|
2973
|
+
return _exhaustive;
|
|
2974
|
+
}
|
|
2883
2975
|
}
|
|
2884
|
-
const compressed = await zstdCompressAsync(uncompressed);
|
|
2885
|
-
return pickPayloadCodec(uncompressed, compressed);
|
|
2886
2976
|
}
|
|
2887
2977
|
function concatAndValidateSections(rawSections) {
|
|
2888
2978
|
if (rawSections.length !== MSV5_SECTION_COUNT) {
|
|
@@ -2937,16 +3027,16 @@ function buildMsv5AssembledFile(globalFlags, entries, uncompressedLength, payloa
|
|
|
2937
3027
|
}
|
|
2938
3028
|
/**
|
|
2939
3029
|
* MSv5 on disk: header + catalogue (uncompressed offsets) + **one** payload blob
|
|
2940
|
-
* (raw concatenation or a single
|
|
3030
|
+
* (raw concatenation or a single compressed stream over it).
|
|
2941
3031
|
*/
|
|
2942
|
-
function assembleMsv5File(globalFlags, rawSections) {
|
|
3032
|
+
function assembleMsv5File(globalFlags, rawSections, compression = 'auto') {
|
|
2943
3033
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2944
|
-
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed);
|
|
3034
|
+
const { payload, codec, zstdLevel } = choosePayloadCodecSync(uncompressed, compression);
|
|
2945
3035
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2946
3036
|
}
|
|
2947
|
-
async function assembleMsv5FileAsync(globalFlags, rawSections) {
|
|
3037
|
+
async function assembleMsv5FileAsync(globalFlags, rawSections, compression = 'auto') {
|
|
2948
3038
|
const { uncompressed, entries, payloadCrc32 } = concatAndValidateSections(rawSections);
|
|
2949
|
-
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed);
|
|
3039
|
+
const { payload, codec, zstdLevel } = await choosePayloadCodecAsync(uncompressed, compression);
|
|
2950
3040
|
return buildMsv5AssembledFile(globalFlags, entries, uncompressed.length, payloadCrc32, payload, codec, zstdLevel);
|
|
2951
3041
|
}
|
|
2952
3042
|
function readMsv5SectionDirectory(buf) {
|
|
@@ -2990,11 +3080,11 @@ function sectionsFromPayload(payload, directory, payloadCrc32) {
|
|
|
2990
3080
|
return out;
|
|
2991
3081
|
});
|
|
2992
3082
|
}
|
|
2993
|
-
/** Streaming
|
|
2994
|
-
* No `maxOutputLength` on
|
|
2995
|
-
*
|
|
2996
|
-
*
|
|
2997
|
-
function
|
|
3083
|
+
/** Streaming compressed reader: keeps only one section in memory at a time.
|
|
3084
|
+
* No `maxOutputLength` on Transform streams: output is bounded by accumulating `streamOffset`
|
|
3085
|
+
* against the header's `uncompressedLength` (same 1 GiB cap checked upfront). Sync load uses
|
|
3086
|
+
* `maxOutputLength` because it materializes the whole payload at once. */
|
|
3087
|
+
function collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32) {
|
|
2998
3088
|
if (uncompressedLength > MSV5_MAX_UNCOMPRESSED_BYTES) {
|
|
2999
3089
|
throw new Error('MSv5 payload exceeds 1 GiB limit');
|
|
3000
3090
|
}
|
|
@@ -3014,7 +3104,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3014
3104
|
}
|
|
3015
3105
|
function consume(chunk) {
|
|
3016
3106
|
if (streamOffset + chunk.length > uncompressedLength) {
|
|
3017
|
-
throw new Error(
|
|
3107
|
+
throw new Error(MSV5_COMPRESSED_PAYLOAD_EXCEEDS_LENGTH);
|
|
3018
3108
|
}
|
|
3019
3109
|
payloadCrc = crc32Update(payloadCrc, chunk);
|
|
3020
3110
|
let off = 0;
|
|
@@ -3050,7 +3140,7 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3050
3140
|
function finish() {
|
|
3051
3141
|
emitEmptySections();
|
|
3052
3142
|
if (streamOffset !== uncompressedLength || sectionId !== directory.length) {
|
|
3053
|
-
throw new Error(
|
|
3143
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3054
3144
|
}
|
|
3055
3145
|
if (payloadCrc !== payloadCrc32) {
|
|
3056
3146
|
throw new Error('MSv5 payload CRC mismatch');
|
|
@@ -3059,9 +3149,15 @@ function collectZstdPayloadSections(directory, uncompressedLength, payloadCrc32)
|
|
|
3059
3149
|
return { sections, consume, finish };
|
|
3060
3150
|
}
|
|
3061
3151
|
function loadMsv5SectionsFromZstdStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3152
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createZstdDecompress());
|
|
3153
|
+
}
|
|
3154
|
+
function loadMsv5SectionsFromZlibStream(compressed, directory, uncompressedLength, payloadCrc32) {
|
|
3155
|
+
return loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, () => zlib.createInflate());
|
|
3156
|
+
}
|
|
3157
|
+
function loadMsv5SectionsFromCompressedStream(compressed, directory, uncompressedLength, payloadCrc32, createStream) {
|
|
3062
3158
|
return new Promise((resolve, reject) => {
|
|
3063
|
-
const collector =
|
|
3064
|
-
const stream =
|
|
3159
|
+
const collector = collectCompressedPayloadSections(directory, uncompressedLength, payloadCrc32);
|
|
3160
|
+
const stream = createStream();
|
|
3065
3161
|
stream.on('data', (chunk) => {
|
|
3066
3162
|
try {
|
|
3067
3163
|
collector.consume(chunk);
|
|
@@ -3119,29 +3215,39 @@ function preparePayload(fileBuf, directory) {
|
|
|
3119
3215
|
payloadCrc32,
|
|
3120
3216
|
};
|
|
3121
3217
|
}
|
|
3122
|
-
|
|
3123
|
-
function loadMsv5Sections(fileBuf, directory) {
|
|
3124
|
-
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3125
|
-
if (payloadCodec === CODEC_RAW) {
|
|
3126
|
-
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3127
|
-
}
|
|
3218
|
+
function decompressPayloadSync(payloadCodec, slice, uncompressedLength) {
|
|
3128
3219
|
if (payloadCodec === CODEC_ZSTD) {
|
|
3129
3220
|
if (!zstdAvailable()) {
|
|
3130
3221
|
throw zstdUnavailableReadError();
|
|
3131
3222
|
}
|
|
3132
|
-
// Native cap matches readPayloadMeta's 1 GiB limit (see MSV5_MAX_UNCOMPRESSED_BYTES).
|
|
3133
|
-
// Using header `uncompressedLength` here would only help when the header understates
|
|
3134
|
-
// the zstd stream but the attacker can inflate the header too — same worst case.
|
|
3135
3223
|
const decoded = zlib.zstdDecompressSync(slice, {
|
|
3136
3224
|
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3137
3225
|
});
|
|
3138
3226
|
if (decoded.length !== uncompressedLength) {
|
|
3139
|
-
throw new Error(
|
|
3227
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3228
|
+
}
|
|
3229
|
+
return decoded;
|
|
3230
|
+
}
|
|
3231
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3232
|
+
const decoded = zlib.inflateSync(slice, {
|
|
3233
|
+
maxOutputLength: MSV5_MAX_UNCOMPRESSED_BYTES,
|
|
3234
|
+
});
|
|
3235
|
+
if (decoded.length !== uncompressedLength) {
|
|
3236
|
+
throw new Error(MSV5_DECOMPRESSED_PAYLOAD_LENGTH_MISMATCH);
|
|
3140
3237
|
}
|
|
3141
|
-
return
|
|
3238
|
+
return decoded;
|
|
3142
3239
|
}
|
|
3143
3240
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3144
3241
|
}
|
|
3242
|
+
/** Synchronous load; peak RAM ≈ full uncompressed payload (use the async path to bound it). */
|
|
3243
|
+
function loadMsv5Sections(fileBuf, directory) {
|
|
3244
|
+
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
3245
|
+
if (payloadCodec === CODEC_RAW) {
|
|
3246
|
+
return sectionsFromPayload(slice, directory, payloadCrc32);
|
|
3247
|
+
}
|
|
3248
|
+
const decoded = decompressPayloadSync(payloadCodec, slice, uncompressedLength);
|
|
3249
|
+
return sectionsFromPayload(decoded, directory, payloadCrc32);
|
|
3250
|
+
}
|
|
3145
3251
|
/** Streaming load; peak main-thread RAM ≈ largest single section (+ file buffer). */
|
|
3146
3252
|
async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
3147
3253
|
const { payloadCodec, slice, uncompressedLength, payloadCrc32 } = preparePayload(fileBuf, directory);
|
|
@@ -3154,6 +3260,9 @@ async function loadMsv5SectionsAsync(fileBuf, directory) {
|
|
|
3154
3260
|
}
|
|
3155
3261
|
return loadMsv5SectionsFromZstdStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3156
3262
|
}
|
|
3263
|
+
if (payloadCodec === CODEC_ZLIB) {
|
|
3264
|
+
return loadMsv5SectionsFromZlibStream(slice, directory, uncompressedLength, payloadCrc32);
|
|
3265
|
+
}
|
|
3157
3266
|
throw new Error(`MSv5 unknown payload codec ${payloadCodec}`);
|
|
3158
3267
|
}
|
|
3159
3268
|
function isMsv5Buffer(buf) {
|
|
@@ -3442,7 +3551,7 @@ function resolvePackedTree(snap, termTree, packedTermIndex) {
|
|
|
3442
3551
|
validateTermTreeLeaves(tree, termCount);
|
|
3443
3552
|
return fromRadixTree(tree, termCount);
|
|
3444
3553
|
}
|
|
3445
|
-
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
3554
|
+
function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression) {
|
|
3446
3555
|
var _a;
|
|
3447
3556
|
validateFrozenSnapshotNumeric(snap);
|
|
3448
3557
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3471,9 +3580,9 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
|
3471
3580
|
postingsWire.docIds,
|
|
3472
3581
|
postingsWire.freqs,
|
|
3473
3582
|
];
|
|
3474
|
-
return assembleMsv5File(globalFlags, rawSections).buffer;
|
|
3583
|
+
return assembleMsv5File(globalFlags, rawSections, compression).buffer;
|
|
3475
3584
|
}
|
|
3476
|
-
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
3585
|
+
async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression) {
|
|
3477
3586
|
var _a;
|
|
3478
3587
|
validateFrozenSnapshotNumeric(snap);
|
|
3479
3588
|
const fieldNames = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
@@ -3502,7 +3611,7 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
|
3502
3611
|
postingsWire.docIds,
|
|
3503
3612
|
postingsWire.freqs,
|
|
3504
3613
|
];
|
|
3505
|
-
return (await assembleMsv5FileAsync(globalFlags, rawSections)).buffer;
|
|
3614
|
+
return (await assembleMsv5FileAsync(globalFlags, rawSections, compression)).buffer;
|
|
3506
3615
|
}
|
|
3507
3616
|
|
|
3508
3617
|
function validateMsv5Container(buf) {
|
|
@@ -3579,12 +3688,12 @@ async function decodeFrozenSnapshotMsv5Async(buf, hints) {
|
|
|
3579
3688
|
}
|
|
3580
3689
|
|
|
3581
3690
|
/** Encode a frozen snapshot as a binary buffer. */
|
|
3582
|
-
function encodeFrozenSnapshot(snap, termTree, packedTermIndex) {
|
|
3583
|
-
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex);
|
|
3691
|
+
function encodeFrozenSnapshot(snap, termTree, packedTermIndex, compression) {
|
|
3692
|
+
return encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex, compression);
|
|
3584
3693
|
}
|
|
3585
|
-
/** Async encoder; uses
|
|
3586
|
-
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
|
|
3587
|
-
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex);
|
|
3694
|
+
/** Async encoder; uses the selected payload compression without blocking the event loop. */
|
|
3695
|
+
function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex, compression) {
|
|
3696
|
+
return encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex, compression);
|
|
3588
3697
|
}
|
|
3589
3698
|
|
|
3590
3699
|
const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
|
|
@@ -3597,11 +3706,11 @@ function decodeFrozenSnapshot(buf, hints) {
|
|
|
3597
3706
|
return decodeFrozenSnapshotMsv5(buf, hints);
|
|
3598
3707
|
}
|
|
3599
3708
|
if (LEGACY_MAGICS.has(magic)) {
|
|
3600
|
-
throw invalidFrozenIndex('Unsupported frozen binary snapshot; re-build with saveBinarySync() or from
|
|
3709
|
+
throw invalidFrozenIndex('Unsupported frozen binary snapshot; re-build with saveBinarySync() or from MiniSearch JSON');
|
|
3601
3710
|
}
|
|
3602
3711
|
throw invalidFrozenIndex('Unsupported frozen binary snapshot');
|
|
3603
3712
|
}
|
|
3604
|
-
/** Async frozen snapshot decode (streaming
|
|
3713
|
+
/** Async frozen snapshot decode (streaming decompression when needed). */
|
|
3605
3714
|
async function decodeFrozenSnapshotAsync(buf, hints) {
|
|
3606
3715
|
assertBufferLength(buf, 8);
|
|
3607
3716
|
const version = buf.readUInt16LE(4);
|
|
@@ -4298,7 +4407,7 @@ function executeQueryInternal(query, searchOptions, params, allowedDocs, run) {
|
|
|
4298
4407
|
return executeWildcardQuery(searchOptions, params);
|
|
4299
4408
|
}
|
|
4300
4409
|
if (isQueryCombination(query)) {
|
|
4301
|
-
// Spread inherits parent combineWith into child branches (
|
|
4410
|
+
// Spread inherits parent combineWith into child branches (MiniSearch 7.2 behavior).
|
|
4302
4411
|
const options = { ...searchOptions, ...query, queries: undefined };
|
|
4303
4412
|
const operator = ((_b = (_a = query.combineWith) !== null && _a !== void 0 ? _a : options.combineWith) !== null && _b !== void 0 ? _b : params.globalSearchOptions.combineWith);
|
|
4304
4413
|
if (useGatedEvaluation(run, query.queries.length, operator, combinationHasWildcard(query))) {
|
|
@@ -4348,6 +4457,73 @@ function autoSuggestFromSearch(search, queryString, options = {}) {
|
|
|
4348
4457
|
return suggestFromSearchResults(search(queryString, options));
|
|
4349
4458
|
}
|
|
4350
4459
|
|
|
4460
|
+
/** Visit shortIds with a defined external id (holes in `externalIds` are skipped). */
|
|
4461
|
+
function forEachLiveShortId(nextId, externalIds, callback) {
|
|
4462
|
+
for (let shortId = 0; shortId < nextId; shortId++) {
|
|
4463
|
+
const externalId = externalIds[shortId];
|
|
4464
|
+
if (externalId === undefined)
|
|
4465
|
+
continue;
|
|
4466
|
+
callback(shortId, externalId);
|
|
4467
|
+
}
|
|
4468
|
+
}
|
|
4469
|
+
|
|
4470
|
+
/**
|
|
4471
|
+
* Build a MiniSearch `toJSON` wire snapshot (`serializationVersion: 2`) from frozen index parts.
|
|
4472
|
+
* Alloc-heavy (plain objects per term/field) — migration/interop only, not production persistence.
|
|
4473
|
+
* All input parts must belong to the same frozen index instance.
|
|
4474
|
+
*/
|
|
4475
|
+
function miniSearchSnapshotFromFrozen(input) {
|
|
4476
|
+
const { documentCount, nextId, fieldIds, fieldCount, externalIds, fieldLengthMatrix, avgFieldLength, storedFields, index, fieldTermFlyweight, } = input;
|
|
4477
|
+
const documentIds = {};
|
|
4478
|
+
const fieldLength = {};
|
|
4479
|
+
const storedFieldsOut = {};
|
|
4480
|
+
const hasStoredFields = storedFields.kind !== 'none';
|
|
4481
|
+
forEachLiveShortId(nextId, externalIds, (shortId, externalId) => {
|
|
4482
|
+
var _a;
|
|
4483
|
+
const shortIdStr = String(shortId);
|
|
4484
|
+
documentIds[shortIdStr] = externalId;
|
|
4485
|
+
const lengths = new Array(fieldCount);
|
|
4486
|
+
const rowBase = shortId * fieldCount;
|
|
4487
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
4488
|
+
lengths[f] = (_a = fieldLengthMatrix[rowBase + f]) !== null && _a !== void 0 ? _a : 0;
|
|
4489
|
+
}
|
|
4490
|
+
fieldLength[shortIdStr] = lengths;
|
|
4491
|
+
if (hasStoredFields) {
|
|
4492
|
+
storedFieldsOut[shortIdStr] = readStoredFields(storedFields, shortId);
|
|
4493
|
+
}
|
|
4494
|
+
});
|
|
4495
|
+
const indexEntries = [];
|
|
4496
|
+
for (const [term, termIndex] of index.entries()) {
|
|
4497
|
+
fieldTermFlyweight.bind(termIndex);
|
|
4498
|
+
const fieldData = {};
|
|
4499
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
4500
|
+
const segment = fieldTermFlyweight.get(f);
|
|
4501
|
+
if (segment == null || segment.size === 0)
|
|
4502
|
+
continue;
|
|
4503
|
+
const entry = {};
|
|
4504
|
+
segment.forEachDoc((docId, freq) => {
|
|
4505
|
+
entry[String(docId)] = freq;
|
|
4506
|
+
});
|
|
4507
|
+
fieldData[String(f)] = entry;
|
|
4508
|
+
}
|
|
4509
|
+
if (Object.keys(fieldData).length > 0) {
|
|
4510
|
+
indexEntries.push([term, fieldData]);
|
|
4511
|
+
}
|
|
4512
|
+
}
|
|
4513
|
+
return {
|
|
4514
|
+
documentCount,
|
|
4515
|
+
nextId,
|
|
4516
|
+
documentIds,
|
|
4517
|
+
fieldIds,
|
|
4518
|
+
fieldLength,
|
|
4519
|
+
averageFieldLength: Array.from(avgFieldLength),
|
|
4520
|
+
storedFields: storedFieldsOut,
|
|
4521
|
+
dirtCount: 0,
|
|
4522
|
+
index: indexEntries,
|
|
4523
|
+
serializationVersion: 2,
|
|
4524
|
+
};
|
|
4525
|
+
}
|
|
4526
|
+
|
|
4351
4527
|
function ownedIndexArray(arr) {
|
|
4352
4528
|
if (arr instanceof Uint8Array)
|
|
4353
4529
|
return new Uint8Array(arr);
|
|
@@ -4506,12 +4682,9 @@ class FrozenMiniSearch {
|
|
|
4506
4682
|
tokenize: this._options.tokenize,
|
|
4507
4683
|
processTerm: this._options.processTerm,
|
|
4508
4684
|
indexView: createFrozenQueryIndexView(this._index, this._postings, this._fieldTermFlyweight, (callback) => {
|
|
4509
|
-
|
|
4510
|
-
const id = this._externalIds[shortId];
|
|
4511
|
-
if (id === undefined)
|
|
4512
|
-
continue;
|
|
4685
|
+
forEachLiveShortId(this._nextId, this._externalIds, (shortId, id) => {
|
|
4513
4686
|
callback(shortId, id, readStoredFields(this._storedFields, shortId));
|
|
4514
|
-
}
|
|
4687
|
+
});
|
|
4515
4688
|
}),
|
|
4516
4689
|
aggregateContext: this._aggregateContext,
|
|
4517
4690
|
};
|
|
@@ -4576,7 +4749,7 @@ class FrozenMiniSearch {
|
|
|
4576
4749
|
return autoSuggestFromSearch((q, o) => this.search(q, o), queryString, merged);
|
|
4577
4750
|
}
|
|
4578
4751
|
/** Serialize this index as a frozen binary snapshot (synchronous). */
|
|
4579
|
-
saveBinarySync() {
|
|
4752
|
+
saveBinarySync(saveOptions = {}) {
|
|
4580
4753
|
return encodeFrozenSnapshot({
|
|
4581
4754
|
documentCount: this._documentCount,
|
|
4582
4755
|
nextId: this._nextId,
|
|
@@ -4590,10 +4763,10 @@ class FrozenMiniSearch {
|
|
|
4590
4763
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4591
4764
|
treeShape: [],
|
|
4592
4765
|
postings: this._postings,
|
|
4593
|
-
}, undefined, this._index);
|
|
4766
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4594
4767
|
}
|
|
4595
|
-
/** Non-blocking
|
|
4596
|
-
async saveBinaryAsync() {
|
|
4768
|
+
/** Non-blocking snapshot serialization with the selected compression codec. */
|
|
4769
|
+
async saveBinaryAsync(saveOptions = {}) {
|
|
4597
4770
|
return encodeFrozenSnapshotAsync({
|
|
4598
4771
|
documentCount: this._documentCount,
|
|
4599
4772
|
nextId: this._nextId,
|
|
@@ -4607,7 +4780,7 @@ class FrozenMiniSearch {
|
|
|
4607
4780
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4608
4781
|
treeShape: [],
|
|
4609
4782
|
postings: this._postings,
|
|
4610
|
-
}, undefined, this._index);
|
|
4783
|
+
}, undefined, this._index, saveOptions.compression);
|
|
4611
4784
|
}
|
|
4612
4785
|
/** Load a frozen binary snapshot. */
|
|
4613
4786
|
static loadBinarySync(buffer, options = {}) {
|
|
@@ -4616,7 +4789,7 @@ class FrozenMiniSearch {
|
|
|
4616
4789
|
const snap = decodeFrozenSnapshot(buffer, { storeFields });
|
|
4617
4790
|
return FrozenMiniSearch.fromBinarySnapshot(snap, options);
|
|
4618
4791
|
}
|
|
4619
|
-
/** Load a frozen binary snapshot with streaming
|
|
4792
|
+
/** Load a frozen binary snapshot with streaming decompression when needed (bounded memory). */
|
|
4620
4793
|
static async loadBinaryAsync(buffer, options = {}) {
|
|
4621
4794
|
var _a;
|
|
4622
4795
|
const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
|
|
@@ -4665,21 +4838,43 @@ class FrozenMiniSearch {
|
|
|
4665
4838
|
return buildFrozenFromDocuments(documents, options);
|
|
4666
4839
|
}
|
|
4667
4840
|
/**
|
|
4668
|
-
*
|
|
4669
|
-
*
|
|
4841
|
+
* Export this index as a MiniSearch wire snapshot (`serializationVersion: 2`).
|
|
4842
|
+
* Use for migration or interchange with the `minisearch` package (`JSON.stringify` works via this method).
|
|
4843
|
+
* Not the primary persistence format — prefer {@link saveBinarySync} for production (size and load time).
|
|
4844
|
+
* Term order in `index` may differ from MiniSearch native `toJSON`; search scores stay equivalent.
|
|
4845
|
+
*/
|
|
4846
|
+
toJSON() {
|
|
4847
|
+
return miniSearchSnapshotFromFrozen({
|
|
4848
|
+
documentCount: this._documentCount,
|
|
4849
|
+
nextId: this._nextId,
|
|
4850
|
+
fieldIds: this._fieldIds,
|
|
4851
|
+
fieldCount: this._fieldCount,
|
|
4852
|
+
externalIds: this._externalIds,
|
|
4853
|
+
fieldLengthMatrix: this._fieldLengthMatrix,
|
|
4854
|
+
avgFieldLength: this._avgFieldLength,
|
|
4855
|
+
storedFields: this._storedFields,
|
|
4856
|
+
index: this._index,
|
|
4857
|
+
fieldTermFlyweight: this._fieldTermFlyweight,
|
|
4858
|
+
});
|
|
4859
|
+
}
|
|
4860
|
+
/**
|
|
4861
|
+
* Build a new frozen index **from** a MiniSearch JSON snapshot string (import / migration).
|
|
4862
|
+
* Accepts the wire format produced by MiniSearch `toJSON` or by {@link toJSON} on this class.
|
|
4863
|
+
* Distinct from {@link loadBinarySync}: JSON is MiniSearch interchange, not the native frozen binary.
|
|
4864
|
+
* No runtime dependency on the `minisearch` package.
|
|
4670
4865
|
*/
|
|
4671
|
-
static
|
|
4866
|
+
static fromJson(json, options = {}) {
|
|
4672
4867
|
return FrozenMiniSearch.fromMiniSearchSnapshot(JSON.parse(json), options);
|
|
4673
4868
|
}
|
|
4674
4869
|
/**
|
|
4675
|
-
* Same as {@link
|
|
4870
|
+
* Same as {@link fromJson} with a pre-parsed snapshot object.
|
|
4676
4871
|
* `storedFields` are shallow-copied; callers must not mutate nested values
|
|
4677
4872
|
* after load if they intend to keep the index immutable.
|
|
4678
4873
|
*/
|
|
4679
4874
|
static fromMiniSearchSnapshot(snapshot, options = {}) {
|
|
4680
4875
|
return assembleFrozenTrusted(buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options), 'minisearch-json');
|
|
4681
4876
|
}
|
|
4682
|
-
/** Accepts any object exposing `toJSON()` in
|
|
4877
|
+
/** Accepts any object exposing `toJSON()` in MiniSearch snapshot shape. */
|
|
4683
4878
|
static fromMiniSearch(source, options = {}) {
|
|
4684
4879
|
return FrozenMiniSearch.fromMiniSearchSnapshot(source.toJSON(), options);
|
|
4685
4880
|
}
|