multi_compress 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -3
- data/GET_STARTED.md +3 -3
- data/README.md +75 -66
- data/THIRD_PARTY_NOTICES.md +24 -0
- data/ext/multi_compress/brotli_dec_static_init.c +3 -0
- data/ext/multi_compress/brotli_enc_static_init.c +3 -0
- data/ext/multi_compress/extconf.rb +22 -1
- data/ext/multi_compress/vendor/.vendored +2 -2
- data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
- data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
- data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
- data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
- data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
- data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
- data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
- data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
- data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
- data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
- data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
- data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
- data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
- data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
- data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
- data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
- data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
- data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
- data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
- data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
- data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
- data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
- data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
- data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
- data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
- data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
- data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
- data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
- data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
- data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
- data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
- data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
- data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
- data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
- data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
- data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
- data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
- data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
- data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
- data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
- data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
- data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
- data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
- data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
- data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
- data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
- data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
- data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
- data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
- data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
- data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
- data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
- data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
- data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
- data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
- data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
- data/ext/multi_compress/vendor/zstd/COPYING +339 -0
- data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
- data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
- data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
- data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
- data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
- data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
- data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
- data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
- data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
- data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
- data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
- data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
- data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
- data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
- data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
- data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
- data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
- data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
- data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
- data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
- data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
- data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
- data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
- data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
- data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
- data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
- data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
- data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
- data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
- data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
- data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
- data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
- data/lib/multi_compress/version.rb +1 -1
- metadata +29 -2
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -10,14 +10,23 @@
|
|
|
10
10
|
|
|
11
11
|
#include "zstd_compress_internal.h"
|
|
12
12
|
#include "zstd_lazy.h"
|
|
13
|
+
#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
|
|
14
|
+
|
|
15
|
+
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|
|
16
|
+
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|
|
17
|
+
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|
|
18
|
+
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
|
|
19
|
+
|
|
20
|
+
#define kLazySkippingStep 8
|
|
13
21
|
|
|
14
22
|
|
|
15
23
|
/*-*************************************
|
|
16
24
|
* Binary Tree search
|
|
17
25
|
***************************************/
|
|
18
26
|
|
|
19
|
-
static
|
|
20
|
-
|
|
27
|
+
static
|
|
28
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
29
|
+
void ZSTD_updateDUBT(ZSTD_MatchState_t* ms,
|
|
21
30
|
const BYTE* ip, const BYTE* iend,
|
|
22
31
|
U32 mls)
|
|
23
32
|
{
|
|
@@ -60,8 +69,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
|
|
|
60
69
|
* sort one already inserted but unsorted position
|
|
61
70
|
* assumption : curr >= btlow == (curr - btmask)
|
|
62
71
|
* doesn't fail */
|
|
63
|
-
static
|
|
64
|
-
|
|
72
|
+
static
|
|
73
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
74
|
+
void ZSTD_insertDUBT1(const ZSTD_MatchState_t* ms,
|
|
65
75
|
U32 curr, const BYTE* inputEnd,
|
|
66
76
|
U32 nbCompares, U32 btLow,
|
|
67
77
|
const ZSTD_dictMode_e dictMode)
|
|
@@ -149,9 +159,10 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
|
|
|
149
159
|
}
|
|
150
160
|
|
|
151
161
|
|
|
152
|
-
static
|
|
153
|
-
|
|
154
|
-
|
|
162
|
+
static
|
|
163
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
164
|
+
size_t ZSTD_DUBT_findBetterDictMatch (
|
|
165
|
+
const ZSTD_MatchState_t* ms,
|
|
155
166
|
const BYTE* const ip, const BYTE* const iend,
|
|
156
167
|
size_t* offsetPtr,
|
|
157
168
|
size_t bestLength,
|
|
@@ -159,7 +170,7 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
|
159
170
|
U32 const mls,
|
|
160
171
|
const ZSTD_dictMode_e dictMode)
|
|
161
172
|
{
|
|
162
|
-
const
|
|
173
|
+
const ZSTD_MatchState_t * const dms = ms->dictMatchState;
|
|
163
174
|
const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
|
|
164
175
|
const U32 * const dictHashTable = dms->hashTable;
|
|
165
176
|
U32 const hashLog = dmsCParams->hashLog;
|
|
@@ -197,8 +208,8 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
|
197
208
|
U32 matchIndex = dictMatchIndex + dictIndexDelta;
|
|
198
209
|
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
|
|
199
210
|
DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
|
|
200
|
-
curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr,
|
|
201
|
-
bestLength = matchLength, *offsetPtr =
|
|
211
|
+
curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
|
|
212
|
+
bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
|
202
213
|
}
|
|
203
214
|
if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
|
|
204
215
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
|
@@ -218,7 +229,7 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
|
218
229
|
}
|
|
219
230
|
|
|
220
231
|
if (bestLength >= MINMATCH) {
|
|
221
|
-
U32 const mIndex = curr - (U32)
|
|
232
|
+
U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex;
|
|
222
233
|
DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
|
|
223
234
|
curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
|
|
224
235
|
}
|
|
@@ -227,10 +238,11 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
|
227
238
|
}
|
|
228
239
|
|
|
229
240
|
|
|
230
|
-
static
|
|
231
|
-
|
|
241
|
+
static
|
|
242
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
243
|
+
size_t ZSTD_DUBT_findBestMatch(ZSTD_MatchState_t* ms,
|
|
232
244
|
const BYTE* const ip, const BYTE* const iend,
|
|
233
|
-
size_t*
|
|
245
|
+
size_t* offBasePtr,
|
|
234
246
|
U32 const mls,
|
|
235
247
|
const ZSTD_dictMode_e dictMode)
|
|
236
248
|
{
|
|
@@ -327,8 +339,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
|
327
339
|
if (matchLength > bestLength) {
|
|
328
340
|
if (matchLength > matchEndIdx - matchIndex)
|
|
329
341
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
330
|
-
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)
|
|
331
|
-
bestLength = matchLength, *
|
|
342
|
+
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) )
|
|
343
|
+
bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
|
332
344
|
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
|
333
345
|
if (dictMode == ZSTD_dictMatchState) {
|
|
334
346
|
nbCompares = 0; /* in addition to avoiding checking any
|
|
@@ -361,16 +373,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
|
361
373
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
|
362
374
|
bestLength = ZSTD_DUBT_findBetterDictMatch(
|
|
363
375
|
ms, ip, iend,
|
|
364
|
-
|
|
376
|
+
offBasePtr, bestLength, nbCompares,
|
|
365
377
|
mls, dictMode);
|
|
366
378
|
}
|
|
367
379
|
|
|
368
380
|
assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
|
|
369
381
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
|
370
382
|
if (bestLength >= MINMATCH) {
|
|
371
|
-
U32 const mIndex = curr - (U32)
|
|
383
|
+
U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex;
|
|
372
384
|
DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
|
|
373
|
-
curr, (U32)bestLength, (U32)*
|
|
385
|
+
curr, (U32)bestLength, (U32)*offBasePtr, mIndex);
|
|
374
386
|
}
|
|
375
387
|
return bestLength;
|
|
376
388
|
}
|
|
@@ -378,24 +390,25 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
|
378
390
|
|
|
379
391
|
|
|
380
392
|
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
|
|
381
|
-
FORCE_INLINE_TEMPLATE
|
|
382
|
-
|
|
393
|
+
FORCE_INLINE_TEMPLATE
|
|
394
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
395
|
+
size_t ZSTD_BtFindBestMatch( ZSTD_MatchState_t* ms,
|
|
383
396
|
const BYTE* const ip, const BYTE* const iLimit,
|
|
384
|
-
size_t*
|
|
397
|
+
size_t* offBasePtr,
|
|
385
398
|
const U32 mls /* template */,
|
|
386
399
|
const ZSTD_dictMode_e dictMode)
|
|
387
400
|
{
|
|
388
401
|
DEBUGLOG(7, "ZSTD_BtFindBestMatch");
|
|
389
402
|
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
|
|
390
403
|
ZSTD_updateDUBT(ms, ip, iLimit, mls);
|
|
391
|
-
return ZSTD_DUBT_findBestMatch(ms, ip, iLimit,
|
|
404
|
+
return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode);
|
|
392
405
|
}
|
|
393
406
|
|
|
394
407
|
/***********************************
|
|
395
408
|
* Dedicated dict search
|
|
396
409
|
***********************************/
|
|
397
410
|
|
|
398
|
-
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(
|
|
411
|
+
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip)
|
|
399
412
|
{
|
|
400
413
|
const BYTE* const base = ms->window.base;
|
|
401
414
|
U32 const target = (U32)(ip - base);
|
|
@@ -514,7 +527,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B
|
|
|
514
527
|
*/
|
|
515
528
|
FORCE_INLINE_TEMPLATE
|
|
516
529
|
size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
|
|
517
|
-
const
|
|
530
|
+
const ZSTD_MatchState_t* const dms,
|
|
518
531
|
const BYTE* const ip, const BYTE* const iLimit,
|
|
519
532
|
const BYTE* const prefixStart, const U32 curr,
|
|
520
533
|
const U32 dictLimit, const size_t ddsIdx) {
|
|
@@ -561,7 +574,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
|
|
|
561
574
|
/* save best solution */
|
|
562
575
|
if (currentMl > ml) {
|
|
563
576
|
ml = currentMl;
|
|
564
|
-
*offsetPtr =
|
|
577
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
|
|
565
578
|
if (ip+currentMl == iLimit) {
|
|
566
579
|
/* best possible, avoids read overflow on next attempt */
|
|
567
580
|
return ml;
|
|
@@ -598,7 +611,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
|
|
|
598
611
|
/* save best solution */
|
|
599
612
|
if (currentMl > ml) {
|
|
600
613
|
ml = currentMl;
|
|
601
|
-
*offsetPtr =
|
|
614
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
|
|
602
615
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
|
603
616
|
}
|
|
604
617
|
}
|
|
@@ -614,10 +627,12 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
|
|
|
614
627
|
|
|
615
628
|
/* Update chains up to ip (excluded)
|
|
616
629
|
Assumption : always within prefix (i.e. not within extDict) */
|
|
617
|
-
FORCE_INLINE_TEMPLATE
|
|
618
|
-
|
|
630
|
+
FORCE_INLINE_TEMPLATE
|
|
631
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
632
|
+
U32 ZSTD_insertAndFindFirstIndex_internal(
|
|
633
|
+
ZSTD_MatchState_t* ms,
|
|
619
634
|
const ZSTD_compressionParameters* const cParams,
|
|
620
|
-
const BYTE* ip, U32 const mls)
|
|
635
|
+
const BYTE* ip, U32 const mls, U32 const lazySkipping)
|
|
621
636
|
{
|
|
622
637
|
U32* const hashTable = ms->hashTable;
|
|
623
638
|
const U32 hashLog = cParams->hashLog;
|
|
@@ -632,21 +647,25 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
|
|
|
632
647
|
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
|
|
633
648
|
hashTable[h] = idx;
|
|
634
649
|
idx++;
|
|
650
|
+
/* Stop inserting every position when in the lazy skipping mode. */
|
|
651
|
+
if (lazySkipping)
|
|
652
|
+
break;
|
|
635
653
|
}
|
|
636
654
|
|
|
637
655
|
ms->nextToUpdate = target;
|
|
638
656
|
return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
|
|
639
657
|
}
|
|
640
658
|
|
|
641
|
-
U32 ZSTD_insertAndFindFirstIndex(
|
|
659
|
+
U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip) {
|
|
642
660
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
643
|
-
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
|
|
661
|
+
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
|
|
644
662
|
}
|
|
645
663
|
|
|
646
664
|
/* inlining is important to hardwire a hot branch (template emulation) */
|
|
647
665
|
FORCE_INLINE_TEMPLATE
|
|
666
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
648
667
|
size_t ZSTD_HcFindBestMatch(
|
|
649
|
-
|
|
668
|
+
ZSTD_MatchState_t* ms,
|
|
650
669
|
const BYTE* const ip, const BYTE* const iLimit,
|
|
651
670
|
size_t* offsetPtr,
|
|
652
671
|
const U32 mls, const ZSTD_dictMode_e dictMode)
|
|
@@ -670,7 +689,7 @@ size_t ZSTD_HcFindBestMatch(
|
|
|
670
689
|
U32 nbAttempts = 1U << cParams->searchLog;
|
|
671
690
|
size_t ml=4-1;
|
|
672
691
|
|
|
673
|
-
const
|
|
692
|
+
const ZSTD_MatchState_t* const dms = ms->dictMatchState;
|
|
674
693
|
const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
|
|
675
694
|
? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
|
|
676
695
|
const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
|
|
@@ -684,14 +703,15 @@ size_t ZSTD_HcFindBestMatch(
|
|
|
684
703
|
}
|
|
685
704
|
|
|
686
705
|
/* HC4 match finder */
|
|
687
|
-
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
|
|
706
|
+
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
|
|
688
707
|
|
|
689
708
|
for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
|
|
690
709
|
size_t currentMl=0;
|
|
691
710
|
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
|
692
711
|
const BYTE* const match = base + matchIndex;
|
|
693
712
|
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
|
|
694
|
-
|
|
713
|
+
/* read 4B starting from (match + ml + 1 - sizeof(U32)) */
|
|
714
|
+
if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
|
|
695
715
|
currentMl = ZSTD_count(ip, match, iLimit);
|
|
696
716
|
} else {
|
|
697
717
|
const BYTE* const match = dictBase + matchIndex;
|
|
@@ -703,7 +723,7 @@ size_t ZSTD_HcFindBestMatch(
|
|
|
703
723
|
/* save best solution */
|
|
704
724
|
if (currentMl > ml) {
|
|
705
725
|
ml = currentMl;
|
|
706
|
-
*offsetPtr =
|
|
726
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
|
707
727
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
|
708
728
|
}
|
|
709
729
|
|
|
@@ -739,7 +759,7 @@ size_t ZSTD_HcFindBestMatch(
|
|
|
739
759
|
if (currentMl > ml) {
|
|
740
760
|
ml = currentMl;
|
|
741
761
|
assert(curr > matchIndex + dmsIndexDelta);
|
|
742
|
-
*offsetPtr =
|
|
762
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
|
|
743
763
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
|
744
764
|
}
|
|
745
765
|
|
|
@@ -756,8 +776,6 @@ size_t ZSTD_HcFindBestMatch(
|
|
|
756
776
|
* (SIMD) Row-based matchfinder
|
|
757
777
|
***********************************/
|
|
758
778
|
/* Constants for row-based hash */
|
|
759
|
-
#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
|
|
760
|
-
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
|
|
761
779
|
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
|
|
762
780
|
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
|
|
763
781
|
|
|
@@ -769,73 +787,19 @@ typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 repr
|
|
|
769
787
|
* Starting from the LSB, returns the idx of the next non-zero bit.
|
|
770
788
|
* Basically counting the nb of trailing zeroes.
|
|
771
789
|
*/
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
|
775
|
-
if (val != 0) {
|
|
776
|
-
unsigned long r;
|
|
777
|
-
_BitScanForward64(&r, val);
|
|
778
|
-
return (U32)(r);
|
|
779
|
-
} else {
|
|
780
|
-
/* Should not reach this code path */
|
|
781
|
-
__assume(0);
|
|
782
|
-
}
|
|
783
|
-
# elif (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))
|
|
784
|
-
if (sizeof(size_t) == 4) {
|
|
785
|
-
U32 mostSignificantWord = (U32)(val >> 32);
|
|
786
|
-
U32 leastSignificantWord = (U32)val;
|
|
787
|
-
if (leastSignificantWord == 0) {
|
|
788
|
-
return 32 + (U32)__builtin_ctz(mostSignificantWord);
|
|
789
|
-
} else {
|
|
790
|
-
return (U32)__builtin_ctz(leastSignificantWord);
|
|
791
|
-
}
|
|
792
|
-
} else {
|
|
793
|
-
return (U32)__builtin_ctzll(val);
|
|
794
|
-
}
|
|
795
|
-
# else
|
|
796
|
-
/* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count
|
|
797
|
-
* and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer
|
|
798
|
-
*/
|
|
799
|
-
val = ~val & (val - 1ULL); /* Lowest set bit mask */
|
|
800
|
-
val = val - ((val >> 1) & 0x5555555555555555);
|
|
801
|
-
val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
|
|
802
|
-
return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
|
|
803
|
-
# endif
|
|
804
|
-
}
|
|
805
|
-
|
|
806
|
-
/* ZSTD_rotateRight_*():
|
|
807
|
-
* Rotates a bitfield to the right by "count" bits.
|
|
808
|
-
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
|
|
809
|
-
*/
|
|
810
|
-
FORCE_INLINE_TEMPLATE
|
|
811
|
-
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
|
|
812
|
-
assert(count < 64);
|
|
813
|
-
count &= 0x3F; /* for fickle pattern recognition */
|
|
814
|
-
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
|
|
815
|
-
}
|
|
816
|
-
|
|
817
|
-
FORCE_INLINE_TEMPLATE
|
|
818
|
-
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
|
|
819
|
-
assert(count < 32);
|
|
820
|
-
count &= 0x1F; /* for fickle pattern recognition */
|
|
821
|
-
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
|
|
822
|
-
}
|
|
823
|
-
|
|
824
|
-
FORCE_INLINE_TEMPLATE
|
|
825
|
-
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
|
|
826
|
-
assert(count < 16);
|
|
827
|
-
count &= 0x0F; /* for fickle pattern recognition */
|
|
828
|
-
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
|
|
790
|
+
MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
|
|
791
|
+
return ZSTD_countTrailingZeros64(val);
|
|
829
792
|
}
|
|
830
793
|
|
|
831
794
|
/* ZSTD_row_nextIndex():
|
|
832
795
|
* Returns the next index to insert at within a tagTable row, and updates the "head"
|
|
833
|
-
* value to reflect the update. Essentially cycles backwards from [
|
|
796
|
+
* value to reflect the update. Essentially cycles backwards from [1, {entries per row})
|
|
834
797
|
*/
|
|
835
798
|
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
799
|
+
U32 next = (*tagRow-1) & rowMask;
|
|
800
|
+
next += (next == 0) ? rowMask : 0; /* skip first position */
|
|
801
|
+
*tagRow = (BYTE)next;
|
|
802
|
+
return next;
|
|
839
803
|
}
|
|
840
804
|
|
|
841
805
|
/* ZSTD_isAligned():
|
|
@@ -849,7 +813,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
|
|
|
849
813
|
/* ZSTD_row_prefetch():
|
|
850
814
|
* Performs prefetching for the hashTable and tagTable at a given row.
|
|
851
815
|
*/
|
|
852
|
-
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable,
|
|
816
|
+
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
|
|
853
817
|
PREFETCH_L1(hashTable + relRow);
|
|
854
818
|
if (rowLog >= 5) {
|
|
855
819
|
PREFETCH_L1(hashTable + relRow + 16);
|
|
@@ -868,18 +832,20 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* ta
|
|
|
868
832
|
* Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
|
|
869
833
|
* but not beyond iLimit.
|
|
870
834
|
*/
|
|
871
|
-
FORCE_INLINE_TEMPLATE
|
|
835
|
+
FORCE_INLINE_TEMPLATE
|
|
836
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
837
|
+
void ZSTD_row_fillHashCache(ZSTD_MatchState_t* ms, const BYTE* base,
|
|
872
838
|
U32 const rowLog, U32 const mls,
|
|
873
839
|
U32 idx, const BYTE* const iLimit)
|
|
874
840
|
{
|
|
875
841
|
U32 const* const hashTable = ms->hashTable;
|
|
876
|
-
|
|
842
|
+
BYTE const* const tagTable = ms->tagTable;
|
|
877
843
|
U32 const hashLog = ms->rowHashLog;
|
|
878
844
|
U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
|
|
879
845
|
U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
|
|
880
846
|
|
|
881
847
|
for (; idx < lim; ++idx) {
|
|
882
|
-
U32 const hash = (U32)
|
|
848
|
+
U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
|
|
883
849
|
U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
|
884
850
|
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
|
|
885
851
|
ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
|
|
@@ -894,12 +860,15 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
|
|
|
894
860
|
* Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
|
|
895
861
|
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
|
|
896
862
|
*/
|
|
897
|
-
FORCE_INLINE_TEMPLATE
|
|
898
|
-
|
|
863
|
+
FORCE_INLINE_TEMPLATE
|
|
864
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
865
|
+
U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
|
|
866
|
+
BYTE const* tagTable, BYTE const* base,
|
|
899
867
|
U32 idx, U32 const hashLog,
|
|
900
|
-
U32 const rowLog, U32 const mls
|
|
868
|
+
U32 const rowLog, U32 const mls,
|
|
869
|
+
U64 const hashSalt)
|
|
901
870
|
{
|
|
902
|
-
U32 const newHash = (U32)
|
|
871
|
+
U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
|
|
903
872
|
U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
|
904
873
|
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
|
|
905
874
|
{ U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
|
|
@@ -911,28 +880,29 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab
|
|
|
911
880
|
/* ZSTD_row_update_internalImpl():
|
|
912
881
|
* Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
|
|
913
882
|
*/
|
|
914
|
-
FORCE_INLINE_TEMPLATE
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
883
|
+
FORCE_INLINE_TEMPLATE
|
|
884
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
885
|
+
void ZSTD_row_update_internalImpl(ZSTD_MatchState_t* ms,
|
|
886
|
+
U32 updateStartIdx, U32 const updateEndIdx,
|
|
887
|
+
U32 const mls, U32 const rowLog,
|
|
888
|
+
U32 const rowMask, U32 const useCache)
|
|
918
889
|
{
|
|
919
890
|
U32* const hashTable = ms->hashTable;
|
|
920
|
-
|
|
891
|
+
BYTE* const tagTable = ms->tagTable;
|
|
921
892
|
U32 const hashLog = ms->rowHashLog;
|
|
922
893
|
const BYTE* const base = ms->window.base;
|
|
923
894
|
|
|
924
895
|
DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
|
|
925
896
|
for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
|
|
926
|
-
U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
|
|
927
|
-
: (U32)
|
|
897
|
+
U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
|
|
898
|
+
: (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
|
|
928
899
|
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
|
929
900
|
U32* const row = hashTable + relRow;
|
|
930
|
-
BYTE* tagRow =
|
|
931
|
-
Explicit cast allows us to get exact desired position within each row */
|
|
901
|
+
BYTE* tagRow = tagTable + relRow;
|
|
932
902
|
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
|
|
933
903
|
|
|
934
|
-
assert(hash ==
|
|
935
|
-
|
|
904
|
+
assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
|
|
905
|
+
tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
|
|
936
906
|
row[pos] = updateStartIdx;
|
|
937
907
|
}
|
|
938
908
|
}
|
|
@@ -941,9 +911,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
|
|
|
941
911
|
* Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
|
|
942
912
|
* Skips sections of long matches as is necessary.
|
|
943
913
|
*/
|
|
944
|
-
FORCE_INLINE_TEMPLATE
|
|
945
|
-
|
|
946
|
-
|
|
914
|
+
FORCE_INLINE_TEMPLATE
|
|
915
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
916
|
+
void ZSTD_row_update_internal(ZSTD_MatchState_t* ms, const BYTE* ip,
|
|
917
|
+
U32 const mls, U32 const rowLog,
|
|
918
|
+
U32 const rowMask, U32 const useCache)
|
|
947
919
|
{
|
|
948
920
|
U32 idx = ms->nextToUpdate;
|
|
949
921
|
const BYTE* const base = ms->window.base;
|
|
@@ -974,13 +946,41 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const
|
|
|
974
946
|
* External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
|
|
975
947
|
* processing.
|
|
976
948
|
*/
|
|
977
|
-
void ZSTD_row_update(
|
|
949
|
+
void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip) {
|
|
978
950
|
const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
|
|
979
951
|
const U32 rowMask = (1u << rowLog) - 1;
|
|
980
952
|
const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
|
|
981
953
|
|
|
982
954
|
DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
|
|
983
|
-
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /*
|
|
955
|
+
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */);
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
/* Returns the mask width of bits group of which will be set to 1. Given not all
|
|
959
|
+
* architectures have easy movemask instruction, this helps to iterate over
|
|
960
|
+
* groups of bits easier and faster.
|
|
961
|
+
*/
|
|
962
|
+
FORCE_INLINE_TEMPLATE U32
|
|
963
|
+
ZSTD_row_matchMaskGroupWidth(const U32 rowEntries)
|
|
964
|
+
{
|
|
965
|
+
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
|
966
|
+
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
|
|
967
|
+
(void)rowEntries;
|
|
968
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
|
969
|
+
/* NEON path only works for little endian */
|
|
970
|
+
if (!MEM_isLittleEndian()) {
|
|
971
|
+
return 1;
|
|
972
|
+
}
|
|
973
|
+
if (rowEntries == 16) {
|
|
974
|
+
return 4;
|
|
975
|
+
}
|
|
976
|
+
if (rowEntries == 32) {
|
|
977
|
+
return 2;
|
|
978
|
+
}
|
|
979
|
+
if (rowEntries == 64) {
|
|
980
|
+
return 1;
|
|
981
|
+
}
|
|
982
|
+
#endif
|
|
983
|
+
return 1;
|
|
984
984
|
}
|
|
985
985
|
|
|
986
986
|
#if defined(ZSTD_ARCH_X86_SSE2)
|
|
@@ -1003,71 +1003,82 @@ ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U
|
|
|
1003
1003
|
}
|
|
1004
1004
|
#endif
|
|
1005
1005
|
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1006
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
|
1007
|
+
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
|
1008
|
+
ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped)
|
|
1009
|
+
{
|
|
1010
|
+
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
|
1011
|
+
if (rowEntries == 16) {
|
|
1012
|
+
/* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits.
|
|
1013
|
+
* After that groups of 4 bits represent the equalMask. We lower
|
|
1014
|
+
* all bits except the highest in these groups by doing AND with
|
|
1015
|
+
* 0x88 = 0b10001000.
|
|
1016
|
+
*/
|
|
1017
|
+
const uint8x16_t chunk = vld1q_u8(src);
|
|
1018
|
+
const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
|
|
1019
|
+
const uint8x8_t res = vshrn_n_u16(equalMask, 4);
|
|
1020
|
+
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
|
1021
|
+
return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull;
|
|
1022
|
+
} else if (rowEntries == 32) {
|
|
1023
|
+
/* Same idea as with rowEntries == 16 but doing AND with
|
|
1024
|
+
* 0x55 = 0b01010101.
|
|
1025
|
+
*/
|
|
1026
|
+
const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src);
|
|
1027
|
+
const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
|
|
1028
|
+
const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
|
|
1029
|
+
const uint8x16_t dup = vdupq_n_u8(tag);
|
|
1030
|
+
const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6);
|
|
1031
|
+
const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6);
|
|
1032
|
+
const uint8x8_t res = vsli_n_u8(t0, t1, 4);
|
|
1033
|
+
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ;
|
|
1034
|
+
return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull;
|
|
1035
|
+
} else { /* rowEntries == 64 */
|
|
1036
|
+
const uint8x16x4_t chunk = vld4q_u8(src);
|
|
1037
|
+
const uint8x16_t dup = vdupq_n_u8(tag);
|
|
1038
|
+
const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
|
|
1039
|
+
const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
|
|
1040
|
+
const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
|
|
1041
|
+
const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
|
|
1042
|
+
|
|
1043
|
+
const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
|
|
1044
|
+
const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
|
|
1045
|
+
const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
|
|
1046
|
+
const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
|
|
1047
|
+
const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
|
|
1048
|
+
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
|
|
1049
|
+
return ZSTD_rotateRight_U64(matches, headGrouped);
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
#endif
|
|
1053
|
+
|
|
1054
|
+
/* Returns a ZSTD_VecMask (U64) that has the nth group (determined by
|
|
1055
|
+
* ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag"
|
|
1056
|
+
* matches the hash at the nth position in a row of the tagTable.
|
|
1057
|
+
* Each row is a circular buffer beginning at the value of "headGrouped". So we
|
|
1058
|
+
* must rotate the "matches" bitfield to match up with the actual layout of the
|
|
1059
|
+
* entries within the hashTable */
|
|
1010
1060
|
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
|
1011
|
-
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32
|
|
1061
|
+
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
|
|
1012
1062
|
{
|
|
1013
|
-
const BYTE* const src = tagRow
|
|
1063
|
+
const BYTE* const src = tagRow;
|
|
1014
1064
|
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
|
1015
1065
|
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
|
|
1066
|
+
assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
|
|
1016
1067
|
|
|
1017
1068
|
#if defined(ZSTD_ARCH_X86_SSE2)
|
|
1018
1069
|
|
|
1019
|
-
return ZSTD_row_getSSEMask(rowEntries / 16, src, tag,
|
|
1070
|
+
return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped);
|
|
1020
1071
|
|
|
1021
1072
|
#else /* SW or NEON-LE */
|
|
1022
1073
|
|
|
1023
1074
|
# if defined(ZSTD_ARCH_ARM_NEON)
|
|
1024
1075
|
/* This NEON path only works for little endian - otherwise use SWAR below */
|
|
1025
1076
|
if (MEM_isLittleEndian()) {
|
|
1026
|
-
|
|
1027
|
-
const uint8x16_t chunk = vld1q_u8(src);
|
|
1028
|
-
const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
|
|
1029
|
-
const uint16x8_t t0 = vshlq_n_u16(equalMask, 7);
|
|
1030
|
-
const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14));
|
|
1031
|
-
const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14));
|
|
1032
|
-
const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28));
|
|
1033
|
-
const U16 hi = (U16)vgetq_lane_u8(t3, 8);
|
|
1034
|
-
const U16 lo = (U16)vgetq_lane_u8(t3, 0);
|
|
1035
|
-
return ZSTD_rotateRight_U16((hi << 8) | lo, head);
|
|
1036
|
-
} else if (rowEntries == 32) {
|
|
1037
|
-
const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src);
|
|
1038
|
-
const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
|
|
1039
|
-
const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
|
|
1040
|
-
const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag));
|
|
1041
|
-
const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag));
|
|
1042
|
-
const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0));
|
|
1043
|
-
const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1));
|
|
1044
|
-
const uint8x8_t t0 = vreinterpret_u8_s8(pack0);
|
|
1045
|
-
const uint8x8_t t1 = vreinterpret_u8_s8(pack1);
|
|
1046
|
-
const uint8x8_t t2 = vsri_n_u8(t1, t0, 2);
|
|
1047
|
-
const uint8x8x2_t t3 = vuzp_u8(t2, t0);
|
|
1048
|
-
const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4);
|
|
1049
|
-
const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0);
|
|
1050
|
-
return ZSTD_rotateRight_U32(matches, head);
|
|
1051
|
-
} else { /* rowEntries == 64 */
|
|
1052
|
-
const uint8x16x4_t chunk = vld4q_u8(src);
|
|
1053
|
-
const uint8x16_t dup = vdupq_n_u8(tag);
|
|
1054
|
-
const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
|
|
1055
|
-
const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
|
|
1056
|
-
const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
|
|
1057
|
-
const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
|
|
1058
|
-
|
|
1059
|
-
const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
|
|
1060
|
-
const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
|
|
1061
|
-
const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
|
|
1062
|
-
const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
|
|
1063
|
-
const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
|
|
1064
|
-
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
|
|
1065
|
-
return ZSTD_rotateRight_U64(matches, head);
|
|
1066
|
-
}
|
|
1077
|
+
return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
|
|
1067
1078
|
}
|
|
1068
1079
|
# endif /* ZSTD_ARCH_ARM_NEON */
|
|
1069
1080
|
/* SWAR */
|
|
1070
|
-
{ const
|
|
1081
|
+
{ const int chunkSize = sizeof(size_t);
|
|
1071
1082
|
const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
|
|
1072
1083
|
const size_t xFF = ~((size_t)0);
|
|
1073
1084
|
const size_t x01 = xFF / 0xFF;
|
|
@@ -1100,11 +1111,11 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head,
|
|
|
1100
1111
|
}
|
|
1101
1112
|
matches = ~matches;
|
|
1102
1113
|
if (rowEntries == 16) {
|
|
1103
|
-
return ZSTD_rotateRight_U16((U16)matches,
|
|
1114
|
+
return ZSTD_rotateRight_U16((U16)matches, headGrouped);
|
|
1104
1115
|
} else if (rowEntries == 32) {
|
|
1105
|
-
return ZSTD_rotateRight_U32((U32)matches,
|
|
1116
|
+
return ZSTD_rotateRight_U32((U32)matches, headGrouped);
|
|
1106
1117
|
} else {
|
|
1107
|
-
return ZSTD_rotateRight_U64((U64)matches,
|
|
1118
|
+
return ZSTD_rotateRight_U64((U64)matches, headGrouped);
|
|
1108
1119
|
}
|
|
1109
1120
|
}
|
|
1110
1121
|
#endif
|
|
@@ -1112,29 +1123,30 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head,
|
|
|
1112
1123
|
|
|
1113
1124
|
/* The high-level approach of the SIMD row based match finder is as follows:
|
|
1114
1125
|
* - Figure out where to insert the new entry:
|
|
1115
|
-
* - Generate a hash
|
|
1116
|
-
*
|
|
1126
|
+
* - Generate a hash for current input position and split it into a one byte of tag and `rowHashLog` bits of index.
|
|
1127
|
+
* - The hash is salted by a value that changes on every context reset, so when the same table is used
|
|
1128
|
+
* we will avoid collisions that would otherwise slow us down by introducing phantom matches.
|
|
1129
|
+
* - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
|
|
1117
1130
|
* which row to insert into.
|
|
1118
|
-
* - Determine the correct position within the row to insert the entry into. Each row of
|
|
1119
|
-
* be considered as a circular buffer with a "head" index that resides in the tagTable
|
|
1120
|
-
*
|
|
1121
|
-
*
|
|
1122
|
-
* The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
|
|
1123
|
-
* for alignment/performance reasons, leaving some bytes unused.
|
|
1124
|
-
* - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
|
|
1131
|
+
* - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
|
|
1132
|
+
* be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
|
|
1133
|
+
* per row).
|
|
1134
|
+
* - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
|
|
1125
1135
|
* generate a bitfield that we can cycle through to check the collisions in the hash table.
|
|
1126
1136
|
* - Pick the longest match.
|
|
1137
|
+
* - Insert the tag into the equivalent row and position in the tagTable.
|
|
1127
1138
|
*/
|
|
1128
1139
|
FORCE_INLINE_TEMPLATE
|
|
1140
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
1129
1141
|
size_t ZSTD_RowFindBestMatch(
|
|
1130
|
-
|
|
1142
|
+
ZSTD_MatchState_t* ms,
|
|
1131
1143
|
const BYTE* const ip, const BYTE* const iLimit,
|
|
1132
1144
|
size_t* offsetPtr,
|
|
1133
1145
|
const U32 mls, const ZSTD_dictMode_e dictMode,
|
|
1134
1146
|
const U32 rowLog)
|
|
1135
1147
|
{
|
|
1136
1148
|
U32* const hashTable = ms->hashTable;
|
|
1137
|
-
|
|
1149
|
+
BYTE* const tagTable = ms->tagTable;
|
|
1138
1150
|
U32* const hashCache = ms->hashCache;
|
|
1139
1151
|
const U32 hashLog = ms->rowHashLog;
|
|
1140
1152
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
@@ -1152,11 +1164,14 @@ size_t ZSTD_RowFindBestMatch(
|
|
|
1152
1164
|
const U32 rowEntries = (1U << rowLog);
|
|
1153
1165
|
const U32 rowMask = rowEntries - 1;
|
|
1154
1166
|
const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
|
|
1167
|
+
const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
|
|
1168
|
+
const U64 hashSalt = ms->hashSalt;
|
|
1155
1169
|
U32 nbAttempts = 1U << cappedSearchLog;
|
|
1156
1170
|
size_t ml=4-1;
|
|
1171
|
+
U32 hash;
|
|
1157
1172
|
|
|
1158
1173
|
/* DMS/DDS variables that may be referenced laster */
|
|
1159
|
-
const
|
|
1174
|
+
const ZSTD_MatchState_t* const dms = ms->dictMatchState;
|
|
1160
1175
|
|
|
1161
1176
|
/* Initialize the following variables to satisfy static analyzer */
|
|
1162
1177
|
size_t ddsIdx = 0;
|
|
@@ -1177,7 +1192,7 @@ size_t ZSTD_RowFindBestMatch(
|
|
|
1177
1192
|
if (dictMode == ZSTD_dictMatchState) {
|
|
1178
1193
|
/* Prefetch DMS rows */
|
|
1179
1194
|
U32* const dmsHashTable = dms->hashTable;
|
|
1180
|
-
|
|
1195
|
+
BYTE* const dmsTagTable = dms->tagTable;
|
|
1181
1196
|
U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
|
|
1182
1197
|
U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
|
1183
1198
|
dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
|
|
@@ -1187,23 +1202,34 @@ size_t ZSTD_RowFindBestMatch(
|
|
|
1187
1202
|
}
|
|
1188
1203
|
|
|
1189
1204
|
/* Update the hashTable and tagTable up to (but not including) ip */
|
|
1190
|
-
|
|
1205
|
+
if (!ms->lazySkipping) {
|
|
1206
|
+
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
|
|
1207
|
+
hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
|
|
1208
|
+
} else {
|
|
1209
|
+
/* Stop inserting every position when in the lazy skipping mode.
|
|
1210
|
+
* The hash cache is also not kept up to date in this mode.
|
|
1211
|
+
*/
|
|
1212
|
+
hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
|
|
1213
|
+
ms->nextToUpdate = curr;
|
|
1214
|
+
}
|
|
1215
|
+
ms->hashSaltEntropy += hash; /* collect salt entropy */
|
|
1216
|
+
|
|
1191
1217
|
{ /* Get the hash for ip, compute the appropriate row */
|
|
1192
|
-
U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
|
|
1193
1218
|
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
|
1194
1219
|
U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
|
|
1195
1220
|
U32* const row = hashTable + relRow;
|
|
1196
1221
|
BYTE* tagRow = (BYTE*)(tagTable + relRow);
|
|
1197
|
-
U32 const
|
|
1222
|
+
U32 const headGrouped = (*tagRow & rowMask) * groupWidth;
|
|
1198
1223
|
U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
|
|
1199
1224
|
size_t numMatches = 0;
|
|
1200
1225
|
size_t currMatch = 0;
|
|
1201
|
-
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag,
|
|
1226
|
+
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
|
|
1202
1227
|
|
|
1203
1228
|
/* Cycle through the matches and prefetch */
|
|
1204
|
-
for (; (matches > 0) && (nbAttempts > 0);
|
|
1205
|
-
U32 const matchPos = (
|
|
1229
|
+
for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
|
|
1230
|
+
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
|
|
1206
1231
|
U32 const matchIndex = row[matchPos];
|
|
1232
|
+
if(matchPos == 0) continue;
|
|
1207
1233
|
assert(numMatches < rowEntries);
|
|
1208
1234
|
if (matchIndex < lowLimit)
|
|
1209
1235
|
break;
|
|
@@ -1213,13 +1239,14 @@ size_t ZSTD_RowFindBestMatch(
|
|
|
1213
1239
|
PREFETCH_L1(dictBase + matchIndex);
|
|
1214
1240
|
}
|
|
1215
1241
|
matchBuffer[numMatches++] = matchIndex;
|
|
1242
|
+
--nbAttempts;
|
|
1216
1243
|
}
|
|
1217
1244
|
|
|
1218
1245
|
/* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
|
|
1219
1246
|
in ZSTD_row_update_internal() at the next search. */
|
|
1220
1247
|
{
|
|
1221
1248
|
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
|
|
1222
|
-
tagRow[pos
|
|
1249
|
+
tagRow[pos] = (BYTE)tag;
|
|
1223
1250
|
row[pos] = ms->nextToUpdate++;
|
|
1224
1251
|
}
|
|
1225
1252
|
|
|
@@ -1233,7 +1260,8 @@ size_t ZSTD_RowFindBestMatch(
|
|
|
1233
1260
|
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
|
1234
1261
|
const BYTE* const match = base + matchIndex;
|
|
1235
1262
|
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
|
|
1236
|
-
|
|
1263
|
+
/* read 4B starting from (match + ml + 1 - sizeof(U32)) */
|
|
1264
|
+
if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
|
|
1237
1265
|
currentMl = ZSTD_count(ip, match, iLimit);
|
|
1238
1266
|
} else {
|
|
1239
1267
|
const BYTE* const match = dictBase + matchIndex;
|
|
@@ -1245,7 +1273,7 @@ size_t ZSTD_RowFindBestMatch(
|
|
|
1245
1273
|
/* Save best solution */
|
|
1246
1274
|
if (currentMl > ml) {
|
|
1247
1275
|
ml = currentMl;
|
|
1248
|
-
*offsetPtr =
|
|
1276
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
|
1249
1277
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
|
1250
1278
|
}
|
|
1251
1279
|
}
|
|
@@ -1263,19 +1291,21 @@ size_t ZSTD_RowFindBestMatch(
|
|
|
1263
1291
|
const U32 dmsSize = (U32)(dmsEnd - dmsBase);
|
|
1264
1292
|
const U32 dmsIndexDelta = dictLimit - dmsSize;
|
|
1265
1293
|
|
|
1266
|
-
{ U32 const
|
|
1294
|
+
{ U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth;
|
|
1267
1295
|
U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
|
|
1268
1296
|
size_t numMatches = 0;
|
|
1269
1297
|
size_t currMatch = 0;
|
|
1270
|
-
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag,
|
|
1298
|
+
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
|
|
1271
1299
|
|
|
1272
|
-
for (; (matches > 0) && (nbAttempts > 0);
|
|
1273
|
-
U32 const matchPos = (
|
|
1300
|
+
for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
|
|
1301
|
+
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
|
|
1274
1302
|
U32 const matchIndex = dmsRow[matchPos];
|
|
1303
|
+
if(matchPos == 0) continue;
|
|
1275
1304
|
if (matchIndex < dmsLowestIndex)
|
|
1276
1305
|
break;
|
|
1277
1306
|
PREFETCH_L1(dmsBase + matchIndex);
|
|
1278
1307
|
matchBuffer[numMatches++] = matchIndex;
|
|
1308
|
+
--nbAttempts;
|
|
1279
1309
|
}
|
|
1280
1310
|
|
|
1281
1311
|
/* Return the longest match */
|
|
@@ -1294,7 +1324,7 @@ size_t ZSTD_RowFindBestMatch(
|
|
|
1294
1324
|
if (currentMl > ml) {
|
|
1295
1325
|
ml = currentMl;
|
|
1296
1326
|
assert(curr > matchIndex + dmsIndexDelta);
|
|
1297
|
-
*offsetPtr =
|
|
1327
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
|
|
1298
1328
|
if (ip+currentMl == iLimit) break;
|
|
1299
1329
|
}
|
|
1300
1330
|
}
|
|
@@ -1304,17 +1334,13 @@ size_t ZSTD_RowFindBestMatch(
|
|
|
1304
1334
|
}
|
|
1305
1335
|
|
|
1306
1336
|
|
|
1307
|
-
typedef size_t (*searchMax_f)(
|
|
1308
|
-
ZSTD_matchState_t* ms,
|
|
1309
|
-
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
|
1310
|
-
|
|
1311
1337
|
/**
|
|
1312
|
-
*
|
|
1313
|
-
*
|
|
1314
|
-
*
|
|
1338
|
+
* Generate search functions templated on (dictMode, mls, rowLog).
|
|
1339
|
+
* These functions are outlined for code size & compilation time.
|
|
1340
|
+
* ZSTD_searchMax() dispatches to the correct implementation function.
|
|
1315
1341
|
*
|
|
1316
1342
|
* TODO: The start of the search function involves loading and calculating a
|
|
1317
|
-
* bunch of constants from the
|
|
1343
|
+
* bunch of constants from the ZSTD_MatchState_t. These computations could be
|
|
1318
1344
|
* done in an initialization function, and saved somewhere in the match state.
|
|
1319
1345
|
* Then we could pass a pointer to the saved state instead of the match state,
|
|
1320
1346
|
* and avoid duplicate computations.
|
|
@@ -1329,39 +1355,36 @@ typedef size_t (*searchMax_f)(
|
|
|
1329
1355
|
* the single segment loop. It should go in searchMax instead of its own
|
|
1330
1356
|
* function to avoid having multiple virtual function calls per search.
|
|
1331
1357
|
*/
|
|
1332
|
-
typedef struct {
|
|
1333
|
-
searchMax_f searchMax;
|
|
1334
|
-
} ZSTD_LazyVTable;
|
|
1335
1358
|
|
|
1336
|
-
#define
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1359
|
+
#define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls
|
|
1360
|
+
#define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls
|
|
1361
|
+
#define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
|
|
1362
|
+
|
|
1363
|
+
#define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
|
|
1364
|
+
|
|
1365
|
+
#define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \
|
|
1366
|
+
ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \
|
|
1367
|
+
ZSTD_MatchState_t* ms, \
|
|
1368
|
+
const BYTE* ip, const BYTE* const iLimit, \
|
|
1369
|
+
size_t* offBasePtr) \
|
|
1370
|
+
{ \
|
|
1371
|
+
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
|
|
1372
|
+
return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \
|
|
1373
|
+
} \
|
|
1374
|
+
|
|
1375
|
+
#define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \
|
|
1376
|
+
ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \
|
|
1377
|
+
ZSTD_MatchState_t* ms, \
|
|
1352
1378
|
const BYTE* ip, const BYTE* const iLimit, \
|
|
1353
1379
|
size_t* offsetPtr) \
|
|
1354
1380
|
{ \
|
|
1355
1381
|
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
|
|
1356
1382
|
return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
|
|
1357
1383
|
} \
|
|
1358
|
-
static const ZSTD_LazyVTable ZSTD_HcVTable_##dictMode##_##mls = { \
|
|
1359
|
-
ZSTD_HcFindBestMatch_##dictMode##_##mls \
|
|
1360
|
-
};
|
|
1361
1384
|
|
|
1362
|
-
#define
|
|
1363
|
-
|
|
1364
|
-
|
|
1385
|
+
#define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \
|
|
1386
|
+
ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \
|
|
1387
|
+
ZSTD_MatchState_t* ms, \
|
|
1365
1388
|
const BYTE* ip, const BYTE* const iLimit, \
|
|
1366
1389
|
size_t* offsetPtr) \
|
|
1367
1390
|
{ \
|
|
@@ -1369,9 +1392,6 @@ typedef struct {
|
|
|
1369
1392
|
assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \
|
|
1370
1393
|
return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
|
|
1371
1394
|
} \
|
|
1372
|
-
static const ZSTD_LazyVTable ZSTD_RowVTable_##dictMode##_##mls##_##rowLog = { \
|
|
1373
|
-
ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog \
|
|
1374
|
-
};
|
|
1375
1395
|
|
|
1376
1396
|
#define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \
|
|
1377
1397
|
X(dictMode, mls, 4) \
|
|
@@ -1394,87 +1414,107 @@ typedef struct {
|
|
|
1394
1414
|
X(__VA_ARGS__, dictMatchState) \
|
|
1395
1415
|
X(__VA_ARGS__, dedicatedDictSearch)
|
|
1396
1416
|
|
|
1397
|
-
/* Generate
|
|
1398
|
-
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG,
|
|
1399
|
-
/* Generate
|
|
1400
|
-
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS,
|
|
1401
|
-
/* Generate
|
|
1402
|
-
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS,
|
|
1403
|
-
|
|
1404
|
-
#define GEN_ZSTD_BT_VTABLE_ARRAY(dictMode) \
|
|
1405
|
-
{ \
|
|
1406
|
-
&ZSTD_BtVTable_##dictMode##_4, \
|
|
1407
|
-
&ZSTD_BtVTable_##dictMode##_5, \
|
|
1408
|
-
&ZSTD_BtVTable_##dictMode##_6 \
|
|
1409
|
-
}
|
|
1410
|
-
|
|
1411
|
-
#define GEN_ZSTD_HC_VTABLE_ARRAY(dictMode) \
|
|
1412
|
-
{ \
|
|
1413
|
-
&ZSTD_HcVTable_##dictMode##_4, \
|
|
1414
|
-
&ZSTD_HcVTable_##dictMode##_5, \
|
|
1415
|
-
&ZSTD_HcVTable_##dictMode##_6 \
|
|
1416
|
-
}
|
|
1417
|
-
|
|
1418
|
-
#define GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, mls) \
|
|
1419
|
-
{ \
|
|
1420
|
-
&ZSTD_RowVTable_##dictMode##_##mls##_4, \
|
|
1421
|
-
&ZSTD_RowVTable_##dictMode##_##mls##_5, \
|
|
1422
|
-
&ZSTD_RowVTable_##dictMode##_##mls##_6 \
|
|
1423
|
-
}
|
|
1417
|
+
/* Generate row search fns for each combination of (dictMode, mls, rowLog) */
|
|
1418
|
+
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN)
|
|
1419
|
+
/* Generate binary Tree search fns for each combination of (dictMode, mls) */
|
|
1420
|
+
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN)
|
|
1421
|
+
/* Generate hash chain search fns for each combination of (dictMode, mls) */
|
|
1422
|
+
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN)
|
|
1424
1423
|
|
|
1425
|
-
|
|
1426
|
-
{ \
|
|
1427
|
-
GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 4), \
|
|
1428
|
-
GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 5), \
|
|
1429
|
-
GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 6) \
|
|
1430
|
-
}
|
|
1424
|
+
typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
|
|
1431
1425
|
|
|
1432
|
-
#define
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1426
|
+
#define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \
|
|
1427
|
+
case mls: \
|
|
1428
|
+
return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
|
|
1429
|
+
#define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \
|
|
1430
|
+
case mls: \
|
|
1431
|
+
return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
|
|
1432
|
+
#define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \
|
|
1433
|
+
case rowLog: \
|
|
1434
|
+
return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
|
|
1435
|
+
|
|
1436
|
+
#define ZSTD_SWITCH_MLS(X, dictMode) \
|
|
1437
|
+
switch (mls) { \
|
|
1438
|
+
ZSTD_FOR_EACH_MLS(X, dictMode) \
|
|
1438
1439
|
}
|
|
1439
1440
|
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1441
|
+
#define ZSTD_SWITCH_ROWLOG(dictMode, mls) \
|
|
1442
|
+
case mls: \
|
|
1443
|
+
switch (rowLog) { \
|
|
1444
|
+
ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \
|
|
1445
|
+
} \
|
|
1446
|
+
ZSTD_UNREACHABLE; \
|
|
1447
|
+
break;
|
|
1448
|
+
|
|
1449
|
+
#define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \
|
|
1450
|
+
switch (searchMethod) { \
|
|
1451
|
+
case search_hashChain: \
|
|
1452
|
+
ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
|
|
1453
|
+
break; \
|
|
1454
|
+
case search_binaryTree: \
|
|
1455
|
+
ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
|
|
1456
|
+
break; \
|
|
1457
|
+
case search_rowHash: \
|
|
1458
|
+
ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \
|
|
1459
|
+
break; \
|
|
1460
|
+
} \
|
|
1461
|
+
ZSTD_UNREACHABLE;
|
|
1444
1462
|
|
|
1445
1463
|
/**
|
|
1446
|
-
*
|
|
1447
|
-
*
|
|
1448
|
-
*
|
|
1449
|
-
*
|
|
1464
|
+
* Searches for the longest match at @p ip.
|
|
1465
|
+
* Dispatches to the correct implementation function based on the
|
|
1466
|
+
* (searchMethod, dictMode, mls, rowLog). We use switch statements
|
|
1467
|
+
* here instead of using an indirect function call through a function
|
|
1468
|
+
* pointer because after Spectre and Meltdown mitigations, indirect
|
|
1469
|
+
* function calls can be very costly, especially in the kernel.
|
|
1470
|
+
*
|
|
1471
|
+
* NOTE: dictMode and searchMethod should be templated, so those switch
|
|
1472
|
+
* statements should be optimized out. Only the mls & rowLog switches
|
|
1473
|
+
* should be left.
|
|
1474
|
+
*
|
|
1475
|
+
* @param ms The match state.
|
|
1476
|
+
* @param ip The position to search at.
|
|
1477
|
+
* @param iend The end of the input data.
|
|
1478
|
+
* @param[out] offsetPtr Stores the match offset into this pointer.
|
|
1479
|
+
* @param mls The minimum search length, in the range [4, 6].
|
|
1480
|
+
* @param rowLog The row log (if applicable), in the range [4, 6].
|
|
1481
|
+
* @param searchMethod The search method to use (templated).
|
|
1482
|
+
* @param dictMode The dictMode (templated).
|
|
1483
|
+
*
|
|
1484
|
+
* @returns The length of the longest match found, or < mls if no match is found.
|
|
1485
|
+
* If a match is found its offset is stored in @p offsetPtr.
|
|
1450
1486
|
*/
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1487
|
+
FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
|
|
1488
|
+
ZSTD_MatchState_t* ms,
|
|
1489
|
+
const BYTE* ip,
|
|
1490
|
+
const BYTE* iend,
|
|
1491
|
+
size_t* offsetPtr,
|
|
1492
|
+
U32 const mls,
|
|
1493
|
+
U32 const rowLog,
|
|
1494
|
+
searchMethod_e const searchMethod,
|
|
1495
|
+
ZSTD_dictMode_e const dictMode)
|
|
1454
1496
|
{
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
switch (searchMethod) {
|
|
1464
|
-
case search_hashChain:
|
|
1465
|
-
return hcVTables[dictMode][mls - 4];
|
|
1466
|
-
case search_binaryTree:
|
|
1467
|
-
return btVTables[dictMode][mls - 4];
|
|
1468
|
-
case search_rowHash:
|
|
1469
|
-
return rowVTables[dictMode][mls - 4][rowLog - 4];
|
|
1470
|
-
default:
|
|
1471
|
-
return NULL;
|
|
1497
|
+
if (dictMode == ZSTD_noDict) {
|
|
1498
|
+
ZSTD_SWITCH_SEARCH_METHOD(noDict)
|
|
1499
|
+
} else if (dictMode == ZSTD_extDict) {
|
|
1500
|
+
ZSTD_SWITCH_SEARCH_METHOD(extDict)
|
|
1501
|
+
} else if (dictMode == ZSTD_dictMatchState) {
|
|
1502
|
+
ZSTD_SWITCH_SEARCH_METHOD(dictMatchState)
|
|
1503
|
+
} else if (dictMode == ZSTD_dedicatedDictSearch) {
|
|
1504
|
+
ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch)
|
|
1472
1505
|
}
|
|
1506
|
+
ZSTD_UNREACHABLE;
|
|
1507
|
+
return 0;
|
|
1473
1508
|
}
|
|
1474
1509
|
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1510
|
+
/* *******************************
|
|
1511
|
+
* Common parser - lazy strategy
|
|
1512
|
+
*********************************/
|
|
1513
|
+
|
|
1514
|
+
FORCE_INLINE_TEMPLATE
|
|
1515
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
1516
|
+
size_t ZSTD_compressBlock_lazy_generic(
|
|
1517
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore,
|
|
1478
1518
|
U32 rep[ZSTD_REP_NUM],
|
|
1479
1519
|
const void* src, size_t srcSize,
|
|
1480
1520
|
const searchMethod_e searchMethod, const U32 depth,
|
|
@@ -1488,14 +1528,16 @@ ZSTD_compressBlock_lazy_generic(
|
|
|
1488
1528
|
const BYTE* const base = ms->window.base;
|
|
1489
1529
|
const U32 prefixLowestIndex = ms->window.dictLimit;
|
|
1490
1530
|
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
|
1531
|
+
const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
|
|
1532
|
+
const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
|
|
1491
1533
|
|
|
1492
|
-
|
|
1493
|
-
U32
|
|
1534
|
+
U32 offset_1 = rep[0], offset_2 = rep[1];
|
|
1535
|
+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
|
1494
1536
|
|
|
1495
1537
|
const int isDMS = dictMode == ZSTD_dictMatchState;
|
|
1496
1538
|
const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
|
|
1497
1539
|
const int isDxS = isDMS || isDDS;
|
|
1498
|
-
const
|
|
1540
|
+
const ZSTD_MatchState_t* const dms = ms->dictMatchState;
|
|
1499
1541
|
const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
|
|
1500
1542
|
const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
|
|
1501
1543
|
const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
|
|
@@ -1505,16 +1547,14 @@ ZSTD_compressBlock_lazy_generic(
|
|
|
1505
1547
|
0;
|
|
1506
1548
|
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
|
|
1507
1549
|
|
|
1508
|
-
assert(searchMax != NULL);
|
|
1509
|
-
|
|
1510
1550
|
DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
|
|
1511
1551
|
ip += (dictAndPrefixLength == 0);
|
|
1512
1552
|
if (dictMode == ZSTD_noDict) {
|
|
1513
1553
|
U32 const curr = (U32)(ip - base);
|
|
1514
1554
|
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
|
|
1515
1555
|
U32 const maxRep = curr - windowLow;
|
|
1516
|
-
if (offset_2 > maxRep)
|
|
1517
|
-
if (offset_1 > maxRep)
|
|
1556
|
+
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
|
|
1557
|
+
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
|
|
1518
1558
|
}
|
|
1519
1559
|
if (isDxS) {
|
|
1520
1560
|
/* dictMatchState repCode checks don't currently handle repCode == 0
|
|
@@ -1523,11 +1563,11 @@ ZSTD_compressBlock_lazy_generic(
|
|
|
1523
1563
|
assert(offset_2 <= dictAndPrefixLength);
|
|
1524
1564
|
}
|
|
1525
1565
|
|
|
1566
|
+
/* Reset the lazy skipping state */
|
|
1567
|
+
ms->lazySkipping = 0;
|
|
1568
|
+
|
|
1526
1569
|
if (searchMethod == search_rowHash) {
|
|
1527
|
-
|
|
1528
|
-
ZSTD_row_fillHashCache(ms, base, rowLog,
|
|
1529
|
-
MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
|
|
1530
|
-
ms->nextToUpdate, ilimit);
|
|
1570
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
|
1531
1571
|
}
|
|
1532
1572
|
|
|
1533
1573
|
/* Match Loop */
|
|
@@ -1539,7 +1579,7 @@ ZSTD_compressBlock_lazy_generic(
|
|
|
1539
1579
|
#endif
|
|
1540
1580
|
while (ip < ilimit) {
|
|
1541
1581
|
size_t matchLength=0;
|
|
1542
|
-
size_t
|
|
1582
|
+
size_t offBase = REPCODE1_TO_OFFBASE;
|
|
1543
1583
|
const BYTE* start=ip+1;
|
|
1544
1584
|
DEBUGLOG(7, "search baseline (depth 0)");
|
|
1545
1585
|
|
|
@@ -1550,7 +1590,7 @@ ZSTD_compressBlock_lazy_generic(
|
|
|
1550
1590
|
&& repIndex < prefixLowestIndex) ?
|
|
1551
1591
|
dictBase + (repIndex - dictIndexDelta) :
|
|
1552
1592
|
base + repIndex;
|
|
1553
|
-
if (((
|
|
1593
|
+
if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
|
|
1554
1594
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
|
1555
1595
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
|
1556
1596
|
matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
|
@@ -1564,14 +1604,23 @@ ZSTD_compressBlock_lazy_generic(
|
|
|
1564
1604
|
}
|
|
1565
1605
|
|
|
1566
1606
|
/* first search (depth 0) */
|
|
1567
|
-
{ size_t
|
|
1568
|
-
size_t const ml2 =
|
|
1607
|
+
{ size_t offbaseFound = 999999999;
|
|
1608
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode);
|
|
1569
1609
|
if (ml2 > matchLength)
|
|
1570
|
-
matchLength = ml2, start = ip,
|
|
1610
|
+
matchLength = ml2, start = ip, offBase = offbaseFound;
|
|
1571
1611
|
}
|
|
1572
1612
|
|
|
1573
1613
|
if (matchLength < 4) {
|
|
1574
|
-
|
|
1614
|
+
size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
|
|
1615
|
+
ip += step;
|
|
1616
|
+
/* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
|
|
1617
|
+
* In this mode we stop inserting every position into our tables, and only insert
|
|
1618
|
+
* positions that we search, which is one in step positions.
|
|
1619
|
+
* The exact cutoff is flexible, I've just chosen a number that is reasonably high,
|
|
1620
|
+
* so we minimize the compression ratio loss in "normal" scenarios. This mode gets
|
|
1621
|
+
* triggered once we've gone 2KB without finding any matches.
|
|
1622
|
+
*/
|
|
1623
|
+
ms->lazySkipping = step > kLazySkippingStep;
|
|
1575
1624
|
continue;
|
|
1576
1625
|
}
|
|
1577
1626
|
|
|
@@ -1581,34 +1630,34 @@ ZSTD_compressBlock_lazy_generic(
|
|
|
1581
1630
|
DEBUGLOG(7, "search depth 1");
|
|
1582
1631
|
ip ++;
|
|
1583
1632
|
if ( (dictMode == ZSTD_noDict)
|
|
1584
|
-
&& (
|
|
1633
|
+
&& (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
|
|
1585
1634
|
size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
|
|
1586
1635
|
int const gain2 = (int)(mlRep * 3);
|
|
1587
|
-
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)
|
|
1636
|
+
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
|
|
1588
1637
|
if ((mlRep >= 4) && (gain2 > gain1))
|
|
1589
|
-
matchLength = mlRep,
|
|
1638
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
|
1590
1639
|
}
|
|
1591
1640
|
if (isDxS) {
|
|
1592
1641
|
const U32 repIndex = (U32)(ip - base) - offset_1;
|
|
1593
1642
|
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
|
1594
1643
|
dictBase + (repIndex - dictIndexDelta) :
|
|
1595
1644
|
base + repIndex;
|
|
1596
|
-
if (((
|
|
1645
|
+
if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
|
|
1597
1646
|
&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {
|
|
1598
1647
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
|
1599
1648
|
size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
|
1600
1649
|
int const gain2 = (int)(mlRep * 3);
|
|
1601
|
-
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)
|
|
1650
|
+
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
|
|
1602
1651
|
if ((mlRep >= 4) && (gain2 > gain1))
|
|
1603
|
-
matchLength = mlRep,
|
|
1652
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
|
1604
1653
|
}
|
|
1605
1654
|
}
|
|
1606
|
-
{ size_t
|
|
1607
|
-
size_t const ml2 =
|
|
1608
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
|
1609
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
|
1655
|
+
{ size_t ofbCandidate=999999999;
|
|
1656
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
|
|
1657
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
|
1658
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
|
|
1610
1659
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
|
1611
|
-
matchLength = ml2,
|
|
1660
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
|
1612
1661
|
continue; /* search a better one */
|
|
1613
1662
|
} }
|
|
1614
1663
|
|
|
@@ -1617,34 +1666,34 @@ ZSTD_compressBlock_lazy_generic(
|
|
|
1617
1666
|
DEBUGLOG(7, "search depth 2");
|
|
1618
1667
|
ip ++;
|
|
1619
1668
|
if ( (dictMode == ZSTD_noDict)
|
|
1620
|
-
&& (
|
|
1669
|
+
&& (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
|
|
1621
1670
|
size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
|
|
1622
1671
|
int const gain2 = (int)(mlRep * 4);
|
|
1623
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
|
1672
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
|
|
1624
1673
|
if ((mlRep >= 4) && (gain2 > gain1))
|
|
1625
|
-
matchLength = mlRep,
|
|
1674
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
|
1626
1675
|
}
|
|
1627
1676
|
if (isDxS) {
|
|
1628
1677
|
const U32 repIndex = (U32)(ip - base) - offset_1;
|
|
1629
1678
|
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
|
1630
1679
|
dictBase + (repIndex - dictIndexDelta) :
|
|
1631
1680
|
base + repIndex;
|
|
1632
|
-
if (((
|
|
1681
|
+
if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
|
|
1633
1682
|
&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {
|
|
1634
1683
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
|
1635
1684
|
size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
|
1636
1685
|
int const gain2 = (int)(mlRep * 4);
|
|
1637
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
|
1686
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
|
|
1638
1687
|
if ((mlRep >= 4) && (gain2 > gain1))
|
|
1639
|
-
matchLength = mlRep,
|
|
1688
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
|
1640
1689
|
}
|
|
1641
1690
|
}
|
|
1642
|
-
{ size_t
|
|
1643
|
-
size_t const ml2 =
|
|
1644
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
|
1645
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
|
1691
|
+
{ size_t ofbCandidate=999999999;
|
|
1692
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
|
|
1693
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
|
1694
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
|
|
1646
1695
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
|
1647
|
-
matchLength = ml2,
|
|
1696
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
|
1648
1697
|
continue;
|
|
1649
1698
|
} } }
|
|
1650
1699
|
break; /* nothing found : store previous solution */
|
|
@@ -1655,26 +1704,33 @@ ZSTD_compressBlock_lazy_generic(
|
|
|
1655
1704
|
* notably if `value` is unsigned, resulting in a large positive `-value`.
|
|
1656
1705
|
*/
|
|
1657
1706
|
/* catch up */
|
|
1658
|
-
if (
|
|
1707
|
+
if (OFFBASE_IS_OFFSET(offBase)) {
|
|
1659
1708
|
if (dictMode == ZSTD_noDict) {
|
|
1660
|
-
while ( ((start > anchor) & (start -
|
|
1661
|
-
&& (start[-1] == (start-
|
|
1709
|
+
while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest))
|
|
1710
|
+
&& (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefix */
|
|
1662
1711
|
{ start--; matchLength++; }
|
|
1663
1712
|
}
|
|
1664
1713
|
if (isDxS) {
|
|
1665
|
-
U32 const matchIndex = (U32)((size_t)(start-base) -
|
|
1714
|
+
U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
|
|
1666
1715
|
const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
|
|
1667
1716
|
const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
|
|
1668
1717
|
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
|
|
1669
1718
|
}
|
|
1670
|
-
offset_2 = offset_1; offset_1 = (U32)
|
|
1719
|
+
offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
|
|
1671
1720
|
}
|
|
1672
1721
|
/* store sequence */
|
|
1673
1722
|
_storeSequence:
|
|
1674
1723
|
{ size_t const litLength = (size_t)(start - anchor);
|
|
1675
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)
|
|
1724
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
|
|
1676
1725
|
anchor = ip = start + matchLength;
|
|
1677
1726
|
}
|
|
1727
|
+
if (ms->lazySkipping) {
|
|
1728
|
+
/* We've found a match, disable lazy skipping mode, and refill the hash cache. */
|
|
1729
|
+
if (searchMethod == search_rowHash) {
|
|
1730
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
|
1731
|
+
}
|
|
1732
|
+
ms->lazySkipping = 0;
|
|
1733
|
+
}
|
|
1678
1734
|
|
|
1679
1735
|
/* check immediate repcode */
|
|
1680
1736
|
if (isDxS) {
|
|
@@ -1684,12 +1740,12 @@ _storeSequence:
|
|
|
1684
1740
|
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
|
1685
1741
|
dictBase - dictIndexDelta + repIndex :
|
|
1686
1742
|
base + repIndex;
|
|
1687
|
-
if ( ((
|
|
1743
|
+
if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
|
|
1688
1744
|
&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {
|
|
1689
1745
|
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
|
|
1690
1746
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
|
|
1691
|
-
|
|
1692
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
|
1747
|
+
offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset_2 <=> offset_1 */
|
|
1748
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
|
|
1693
1749
|
ip += matchLength;
|
|
1694
1750
|
anchor = ip;
|
|
1695
1751
|
continue;
|
|
@@ -1703,168 +1759,183 @@ _storeSequence:
|
|
|
1703
1759
|
&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
|
|
1704
1760
|
/* store sequence */
|
|
1705
1761
|
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
|
1706
|
-
|
|
1707
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
|
1762
|
+
offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */
|
|
1763
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
|
|
1708
1764
|
ip += matchLength;
|
|
1709
1765
|
anchor = ip;
|
|
1710
1766
|
continue; /* faster when present ... (?) */
|
|
1711
1767
|
} } }
|
|
1712
1768
|
|
|
1713
|
-
/*
|
|
1714
|
-
|
|
1715
|
-
|
|
1769
|
+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
|
|
1770
|
+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
|
|
1771
|
+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
|
|
1772
|
+
|
|
1773
|
+
/* save reps for next block */
|
|
1774
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
|
|
1775
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
|
|
1716
1776
|
|
|
1717
1777
|
/* Return the last literals size */
|
|
1718
1778
|
return (size_t)(iend - anchor);
|
|
1719
1779
|
}
|
|
1780
|
+
#endif /* build exclusions */
|
|
1720
1781
|
|
|
1721
1782
|
|
|
1722
|
-
|
|
1723
|
-
|
|
1783
|
+
#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
|
|
1784
|
+
size_t ZSTD_compressBlock_greedy(
|
|
1785
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1724
1786
|
void const* src, size_t srcSize)
|
|
1725
1787
|
{
|
|
1726
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1788
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
|
|
1727
1789
|
}
|
|
1728
1790
|
|
|
1729
|
-
size_t
|
|
1730
|
-
|
|
1791
|
+
size_t ZSTD_compressBlock_greedy_dictMatchState(
|
|
1792
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1731
1793
|
void const* src, size_t srcSize)
|
|
1732
1794
|
{
|
|
1733
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain,
|
|
1795
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
|
|
1734
1796
|
}
|
|
1735
1797
|
|
|
1736
|
-
size_t
|
|
1737
|
-
|
|
1798
|
+
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
|
|
1799
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1738
1800
|
void const* src, size_t srcSize)
|
|
1739
1801
|
{
|
|
1740
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain,
|
|
1802
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
|
|
1741
1803
|
}
|
|
1742
1804
|
|
|
1743
|
-
size_t
|
|
1744
|
-
|
|
1805
|
+
size_t ZSTD_compressBlock_greedy_row(
|
|
1806
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1745
1807
|
void const* src, size_t srcSize)
|
|
1746
1808
|
{
|
|
1747
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1809
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
|
|
1748
1810
|
}
|
|
1749
1811
|
|
|
1750
|
-
size_t
|
|
1751
|
-
|
|
1812
|
+
size_t ZSTD_compressBlock_greedy_dictMatchState_row(
|
|
1813
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1752
1814
|
void const* src, size_t srcSize)
|
|
1753
1815
|
{
|
|
1754
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1816
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
|
|
1755
1817
|
}
|
|
1756
1818
|
|
|
1757
|
-
size_t
|
|
1758
|
-
|
|
1819
|
+
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
|
|
1820
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1759
1821
|
void const* src, size_t srcSize)
|
|
1760
1822
|
{
|
|
1761
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1823
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
|
|
1762
1824
|
}
|
|
1825
|
+
#endif
|
|
1763
1826
|
|
|
1764
|
-
|
|
1765
|
-
|
|
1827
|
+
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
|
|
1828
|
+
size_t ZSTD_compressBlock_lazy(
|
|
1829
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1766
1830
|
void const* src, size_t srcSize)
|
|
1767
1831
|
{
|
|
1768
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1,
|
|
1832
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
|
|
1769
1833
|
}
|
|
1770
1834
|
|
|
1771
|
-
size_t
|
|
1772
|
-
|
|
1835
|
+
size_t ZSTD_compressBlock_lazy_dictMatchState(
|
|
1836
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1773
1837
|
void const* src, size_t srcSize)
|
|
1774
1838
|
{
|
|
1775
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain,
|
|
1839
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
|
|
1776
1840
|
}
|
|
1777
1841
|
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1842
|
+
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
|
|
1843
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1781
1844
|
void const* src, size_t srcSize)
|
|
1782
1845
|
{
|
|
1783
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain,
|
|
1846
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
|
|
1784
1847
|
}
|
|
1785
1848
|
|
|
1786
|
-
size_t
|
|
1787
|
-
|
|
1849
|
+
size_t ZSTD_compressBlock_lazy_row(
|
|
1850
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1788
1851
|
void const* src, size_t srcSize)
|
|
1789
1852
|
{
|
|
1790
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1853
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
|
|
1791
1854
|
}
|
|
1792
1855
|
|
|
1793
|
-
size_t
|
|
1794
|
-
|
|
1856
|
+
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
|
|
1857
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1795
1858
|
void const* src, size_t srcSize)
|
|
1796
1859
|
{
|
|
1797
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1860
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
|
|
1798
1861
|
}
|
|
1799
1862
|
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1863
|
+
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
|
|
1864
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1803
1865
|
void const* src, size_t srcSize)
|
|
1804
1866
|
{
|
|
1805
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash,
|
|
1867
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
|
|
1806
1868
|
}
|
|
1869
|
+
#endif
|
|
1807
1870
|
|
|
1808
|
-
|
|
1809
|
-
|
|
1871
|
+
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
|
|
1872
|
+
size_t ZSTD_compressBlock_lazy2(
|
|
1873
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1810
1874
|
void const* src, size_t srcSize)
|
|
1811
1875
|
{
|
|
1812
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1876
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
|
|
1813
1877
|
}
|
|
1814
1878
|
|
|
1815
|
-
size_t
|
|
1816
|
-
|
|
1879
|
+
size_t ZSTD_compressBlock_lazy2_dictMatchState(
|
|
1880
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1817
1881
|
void const* src, size_t srcSize)
|
|
1818
1882
|
{
|
|
1819
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1883
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
|
|
1820
1884
|
}
|
|
1821
1885
|
|
|
1822
|
-
size_t
|
|
1823
|
-
|
|
1886
|
+
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
|
|
1887
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1824
1888
|
void const* src, size_t srcSize)
|
|
1825
1889
|
{
|
|
1826
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1890
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
|
|
1827
1891
|
}
|
|
1828
1892
|
|
|
1829
|
-
size_t
|
|
1830
|
-
|
|
1893
|
+
size_t ZSTD_compressBlock_lazy2_row(
|
|
1894
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1831
1895
|
void const* src, size_t srcSize)
|
|
1832
1896
|
{
|
|
1833
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash,
|
|
1897
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
|
|
1834
1898
|
}
|
|
1835
1899
|
|
|
1836
|
-
size_t
|
|
1837
|
-
|
|
1900
|
+
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
|
|
1901
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1838
1902
|
void const* src, size_t srcSize)
|
|
1839
1903
|
{
|
|
1840
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash,
|
|
1904
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
|
|
1841
1905
|
}
|
|
1842
1906
|
|
|
1843
|
-
|
|
1844
1907
|
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
|
|
1845
|
-
|
|
1908
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1846
1909
|
void const* src, size_t srcSize)
|
|
1847
1910
|
{
|
|
1848
1911
|
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
|
|
1849
1912
|
}
|
|
1913
|
+
#endif
|
|
1850
1914
|
|
|
1851
|
-
|
|
1852
|
-
|
|
1915
|
+
#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
|
|
1916
|
+
size_t ZSTD_compressBlock_btlazy2(
|
|
1917
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1853
1918
|
void const* src, size_t srcSize)
|
|
1854
1919
|
{
|
|
1855
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1920
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
|
|
1856
1921
|
}
|
|
1857
1922
|
|
|
1858
|
-
size_t
|
|
1859
|
-
|
|
1923
|
+
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
|
|
1924
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1860
1925
|
void const* src, size_t srcSize)
|
|
1861
1926
|
{
|
|
1862
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
1927
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
|
|
1863
1928
|
}
|
|
1929
|
+
#endif
|
|
1864
1930
|
|
|
1931
|
+
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|
|
1932
|
+
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|
|
1933
|
+
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|
|
1934
|
+
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
|
|
1865
1935
|
FORCE_INLINE_TEMPLATE
|
|
1936
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
1866
1937
|
size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1867
|
-
|
|
1938
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore,
|
|
1868
1939
|
U32 rep[ZSTD_REP_NUM],
|
|
1869
1940
|
const void* src, size_t srcSize,
|
|
1870
1941
|
const searchMethod_e searchMethod, const U32 depth)
|
|
@@ -1881,19 +1952,20 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
1881
1952
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
|
1882
1953
|
const BYTE* const dictStart = dictBase + ms->window.lowLimit;
|
|
1883
1954
|
const U32 windowLog = ms->cParams.windowLog;
|
|
1884
|
-
const U32
|
|
1955
|
+
const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
|
|
1956
|
+
const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
|
|
1885
1957
|
|
|
1886
|
-
searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, ZSTD_extDict)->searchMax;
|
|
1887
1958
|
U32 offset_1 = rep[0], offset_2 = rep[1];
|
|
1888
1959
|
|
|
1889
1960
|
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
|
|
1890
1961
|
|
|
1962
|
+
/* Reset the lazy skipping state */
|
|
1963
|
+
ms->lazySkipping = 0;
|
|
1964
|
+
|
|
1891
1965
|
/* init */
|
|
1892
1966
|
ip += (ip == prefixStart);
|
|
1893
1967
|
if (searchMethod == search_rowHash) {
|
|
1894
|
-
ZSTD_row_fillHashCache(ms, base, rowLog,
|
|
1895
|
-
MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
|
|
1896
|
-
ms->nextToUpdate, ilimit);
|
|
1968
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
|
1897
1969
|
}
|
|
1898
1970
|
|
|
1899
1971
|
/* Match Loop */
|
|
@@ -1905,7 +1977,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
1905
1977
|
#endif
|
|
1906
1978
|
while (ip < ilimit) {
|
|
1907
1979
|
size_t matchLength=0;
|
|
1908
|
-
size_t
|
|
1980
|
+
size_t offBase = REPCODE1_TO_OFFBASE;
|
|
1909
1981
|
const BYTE* start=ip+1;
|
|
1910
1982
|
U32 curr = (U32)(ip-base);
|
|
1911
1983
|
|
|
@@ -1914,7 +1986,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
1914
1986
|
const U32 repIndex = (U32)(curr+1 - offset_1);
|
|
1915
1987
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
|
1916
1988
|
const BYTE* const repMatch = repBase + repIndex;
|
|
1917
|
-
if ( ((
|
|
1989
|
+
if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
|
|
1918
1990
|
& (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
|
|
1919
1991
|
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
|
|
1920
1992
|
/* repcode detected we should take it */
|
|
@@ -1924,14 +1996,23 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
1924
1996
|
} }
|
|
1925
1997
|
|
|
1926
1998
|
/* first search (depth 0) */
|
|
1927
|
-
{ size_t
|
|
1928
|
-
size_t const ml2 =
|
|
1999
|
+
{ size_t ofbCandidate = 999999999;
|
|
2000
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
|
|
1929
2001
|
if (ml2 > matchLength)
|
|
1930
|
-
matchLength = ml2, start = ip,
|
|
2002
|
+
matchLength = ml2, start = ip, offBase = ofbCandidate;
|
|
1931
2003
|
}
|
|
1932
2004
|
|
|
1933
2005
|
if (matchLength < 4) {
|
|
1934
|
-
|
|
2006
|
+
size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
|
|
2007
|
+
ip += step + 1; /* jump faster over incompressible sections */
|
|
2008
|
+
/* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
|
|
2009
|
+
* In this mode we stop inserting every position into our tables, and only insert
|
|
2010
|
+
* positions that we search, which is one in step positions.
|
|
2011
|
+
* The exact cutoff is flexible, I've just chosen a number that is reasonably high,
|
|
2012
|
+
* so we minimize the compression ratio loss in "normal" scenarios. This mode gets
|
|
2013
|
+
* triggered once we've gone 2KB without finding any matches.
|
|
2014
|
+
*/
|
|
2015
|
+
ms->lazySkipping = step > kLazySkippingStep;
|
|
1935
2016
|
continue;
|
|
1936
2017
|
}
|
|
1937
2018
|
|
|
@@ -1941,30 +2022,30 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
1941
2022
|
ip ++;
|
|
1942
2023
|
curr++;
|
|
1943
2024
|
/* check repCode */
|
|
1944
|
-
if (
|
|
2025
|
+
if (offBase) {
|
|
1945
2026
|
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
|
|
1946
2027
|
const U32 repIndex = (U32)(curr - offset_1);
|
|
1947
2028
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
|
1948
2029
|
const BYTE* const repMatch = repBase + repIndex;
|
|
1949
|
-
if ( ((
|
|
2030
|
+
if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
|
|
1950
2031
|
& (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
|
1951
2032
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
|
1952
2033
|
/* repcode detected */
|
|
1953
2034
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
|
1954
2035
|
size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
|
1955
2036
|
int const gain2 = (int)(repLength * 3);
|
|
1956
|
-
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)
|
|
2037
|
+
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
|
|
1957
2038
|
if ((repLength >= 4) && (gain2 > gain1))
|
|
1958
|
-
matchLength = repLength,
|
|
2039
|
+
matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
|
1959
2040
|
} }
|
|
1960
2041
|
|
|
1961
2042
|
/* search match, depth 1 */
|
|
1962
|
-
{ size_t
|
|
1963
|
-
size_t const ml2 =
|
|
1964
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
|
1965
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
|
2043
|
+
{ size_t ofbCandidate = 999999999;
|
|
2044
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
|
|
2045
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
|
2046
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
|
|
1966
2047
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
|
1967
|
-
matchLength = ml2,
|
|
2048
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
|
1968
2049
|
continue; /* search a better one */
|
|
1969
2050
|
} }
|
|
1970
2051
|
|
|
@@ -1973,50 +2054,57 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
1973
2054
|
ip ++;
|
|
1974
2055
|
curr++;
|
|
1975
2056
|
/* check repCode */
|
|
1976
|
-
if (
|
|
2057
|
+
if (offBase) {
|
|
1977
2058
|
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
|
|
1978
2059
|
const U32 repIndex = (U32)(curr - offset_1);
|
|
1979
2060
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
|
1980
2061
|
const BYTE* const repMatch = repBase + repIndex;
|
|
1981
|
-
if ( ((
|
|
2062
|
+
if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
|
|
1982
2063
|
& (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
|
1983
2064
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
|
1984
2065
|
/* repcode detected */
|
|
1985
2066
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
|
1986
2067
|
size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
|
1987
2068
|
int const gain2 = (int)(repLength * 4);
|
|
1988
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
|
2069
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
|
|
1989
2070
|
if ((repLength >= 4) && (gain2 > gain1))
|
|
1990
|
-
matchLength = repLength,
|
|
2071
|
+
matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
|
1991
2072
|
} }
|
|
1992
2073
|
|
|
1993
2074
|
/* search match, depth 2 */
|
|
1994
|
-
{ size_t
|
|
1995
|
-
size_t const ml2 =
|
|
1996
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
|
1997
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
|
2075
|
+
{ size_t ofbCandidate = 999999999;
|
|
2076
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
|
|
2077
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
|
2078
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
|
|
1998
2079
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
|
1999
|
-
matchLength = ml2,
|
|
2080
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
|
2000
2081
|
continue;
|
|
2001
2082
|
} } }
|
|
2002
2083
|
break; /* nothing found : store previous solution */
|
|
2003
2084
|
}
|
|
2004
2085
|
|
|
2005
2086
|
/* catch up */
|
|
2006
|
-
if (
|
|
2007
|
-
U32 const matchIndex = (U32)((size_t)(start-base) -
|
|
2087
|
+
if (OFFBASE_IS_OFFSET(offBase)) {
|
|
2088
|
+
U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
|
|
2008
2089
|
const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
|
|
2009
2090
|
const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
|
|
2010
2091
|
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
|
|
2011
|
-
offset_2 = offset_1; offset_1 = (U32)
|
|
2092
|
+
offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
|
|
2012
2093
|
}
|
|
2013
2094
|
|
|
2014
2095
|
/* store sequence */
|
|
2015
2096
|
_storeSequence:
|
|
2016
2097
|
{ size_t const litLength = (size_t)(start - anchor);
|
|
2017
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)
|
|
2098
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
|
|
2018
2099
|
anchor = ip = start + matchLength;
|
|
2019
2100
|
}
|
|
2101
|
+
if (ms->lazySkipping) {
|
|
2102
|
+
/* We've found a match, disable lazy skipping mode, and refill the hash cache. */
|
|
2103
|
+
if (searchMethod == search_rowHash) {
|
|
2104
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
|
2105
|
+
}
|
|
2106
|
+
ms->lazySkipping = 0;
|
|
2107
|
+
}
|
|
2020
2108
|
|
|
2021
2109
|
/* check immediate repcode */
|
|
2022
2110
|
while (ip <= ilimit) {
|
|
@@ -2025,14 +2113,14 @@ _storeSequence:
|
|
|
2025
2113
|
const U32 repIndex = repCurrent - offset_2;
|
|
2026
2114
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
|
2027
2115
|
const BYTE* const repMatch = repBase + repIndex;
|
|
2028
|
-
if ( ((
|
|
2116
|
+
if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
|
|
2029
2117
|
& (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
|
2030
2118
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
|
2031
2119
|
/* repcode detected we should take it */
|
|
2032
2120
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
|
2033
2121
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
|
2034
|
-
|
|
2035
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
|
2122
|
+
offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset history */
|
|
2123
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
|
|
2036
2124
|
ip += matchLength;
|
|
2037
2125
|
anchor = ip;
|
|
2038
2126
|
continue; /* faster when present ... (?) */
|
|
@@ -2047,58 +2135,65 @@ _storeSequence:
|
|
|
2047
2135
|
/* Return the last literals size */
|
|
2048
2136
|
return (size_t)(iend - anchor);
|
|
2049
2137
|
}
|
|
2138
|
+
#endif /* build exclusions */
|
|
2050
2139
|
|
|
2051
|
-
|
|
2140
|
+
#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
|
|
2052
2141
|
size_t ZSTD_compressBlock_greedy_extDict(
|
|
2053
|
-
|
|
2142
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
2054
2143
|
void const* src, size_t srcSize)
|
|
2055
2144
|
{
|
|
2056
2145
|
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
|
|
2057
2146
|
}
|
|
2058
2147
|
|
|
2059
|
-
size_t
|
|
2060
|
-
|
|
2148
|
+
size_t ZSTD_compressBlock_greedy_extDict_row(
|
|
2149
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
2061
2150
|
void const* src, size_t srcSize)
|
|
2062
|
-
|
|
2063
2151
|
{
|
|
2064
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
|
2152
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
|
|
2065
2153
|
}
|
|
2154
|
+
#endif
|
|
2066
2155
|
|
|
2067
|
-
|
|
2068
|
-
|
|
2156
|
+
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
|
|
2157
|
+
size_t ZSTD_compressBlock_lazy_extDict(
|
|
2158
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
2069
2159
|
void const* src, size_t srcSize)
|
|
2070
2160
|
|
|
2071
2161
|
{
|
|
2072
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain,
|
|
2162
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
|
|
2073
2163
|
}
|
|
2074
2164
|
|
|
2075
|
-
size_t
|
|
2076
|
-
|
|
2165
|
+
size_t ZSTD_compressBlock_lazy_extDict_row(
|
|
2166
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
2077
2167
|
void const* src, size_t srcSize)
|
|
2078
2168
|
|
|
2079
2169
|
{
|
|
2080
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
|
2170
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
|
|
2081
2171
|
}
|
|
2172
|
+
#endif
|
|
2082
2173
|
|
|
2083
|
-
|
|
2084
|
-
|
|
2174
|
+
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
|
|
2175
|
+
size_t ZSTD_compressBlock_lazy2_extDict(
|
|
2176
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
2085
2177
|
void const* src, size_t srcSize)
|
|
2178
|
+
|
|
2086
2179
|
{
|
|
2087
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
|
2180
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
|
|
2088
2181
|
}
|
|
2089
2182
|
|
|
2090
|
-
size_t
|
|
2091
|
-
|
|
2183
|
+
size_t ZSTD_compressBlock_lazy2_extDict_row(
|
|
2184
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
2092
2185
|
void const* src, size_t srcSize)
|
|
2093
|
-
|
|
2094
2186
|
{
|
|
2095
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash,
|
|
2187
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
|
|
2096
2188
|
}
|
|
2189
|
+
#endif
|
|
2097
2190
|
|
|
2098
|
-
|
|
2099
|
-
|
|
2191
|
+
#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
|
|
2192
|
+
size_t ZSTD_compressBlock_btlazy2_extDict(
|
|
2193
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
2100
2194
|
void const* src, size_t srcSize)
|
|
2101
2195
|
|
|
2102
2196
|
{
|
|
2103
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
|
2197
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
|
|
2104
2198
|
}
|
|
2199
|
+
#endif
|