multi_compress 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -3
- data/GET_STARTED.md +3 -3
- data/README.md +75 -66
- data/THIRD_PARTY_NOTICES.md +24 -0
- data/ext/multi_compress/brotli_dec_static_init.c +3 -0
- data/ext/multi_compress/brotli_enc_static_init.c +3 -0
- data/ext/multi_compress/extconf.rb +22 -1
- data/ext/multi_compress/vendor/.vendored +2 -2
- data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
- data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
- data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
- data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
- data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
- data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
- data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
- data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
- data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
- data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
- data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
- data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
- data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
- data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
- data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
- data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
- data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
- data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
- data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
- data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
- data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
- data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
- data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
- data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
- data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
- data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
- data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
- data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
- data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
- data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
- data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
- data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
- data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
- data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
- data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
- data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
- data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
- data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
- data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
- data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
- data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
- data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
- data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
- data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
- data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
- data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
- data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
- data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
- data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
- data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
- data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
- data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
- data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
- data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
- data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
- data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
- data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
- data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
- data/ext/multi_compress/vendor/zstd/COPYING +339 -0
- data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
- data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
- data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
- data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
- data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
- data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
- data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
- data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
- data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
- data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
- data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
- data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
- data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
- data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
- data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
- data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
- data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
- data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
- data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
- data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
- data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
- data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
- data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
- data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
- data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
- data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
- data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
- data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
- data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
- data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
- data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
- data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
- data/lib/multi_compress/version.rb +1 -1
- metadata +29 -2
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -11,8 +11,46 @@
|
|
|
11
11
|
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
|
|
12
12
|
#include "zstd_fast.h"
|
|
13
13
|
|
|
14
|
+
static
|
|
15
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
16
|
+
void ZSTD_fillHashTableForCDict(ZSTD_MatchState_t* ms,
|
|
17
|
+
const void* const end,
|
|
18
|
+
ZSTD_dictTableLoadMethod_e dtlm)
|
|
19
|
+
{
|
|
20
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
21
|
+
U32* const hashTable = ms->hashTable;
|
|
22
|
+
U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
23
|
+
U32 const mls = cParams->minMatch;
|
|
24
|
+
const BYTE* const base = ms->window.base;
|
|
25
|
+
const BYTE* ip = base + ms->nextToUpdate;
|
|
26
|
+
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
|
27
|
+
const U32 fastHashFillStep = 3;
|
|
28
|
+
|
|
29
|
+
/* Currently, we always use ZSTD_dtlm_full for filling CDict tables.
|
|
30
|
+
* Feel free to remove this assert if there's a good reason! */
|
|
31
|
+
assert(dtlm == ZSTD_dtlm_full);
|
|
32
|
+
|
|
33
|
+
/* Always insert every fastHashFillStep position into the hash table.
|
|
34
|
+
* Insert the other positions if their hash entry is empty.
|
|
35
|
+
*/
|
|
36
|
+
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
|
|
37
|
+
U32 const curr = (U32)(ip - base);
|
|
38
|
+
{ size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls);
|
|
39
|
+
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); }
|
|
40
|
+
|
|
41
|
+
if (dtlm == ZSTD_dtlm_fast) continue;
|
|
42
|
+
/* Only load extra positions for ZSTD_dtlm_full */
|
|
43
|
+
{ U32 p;
|
|
44
|
+
for (p = 1; p < fastHashFillStep; ++p) {
|
|
45
|
+
size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
|
|
46
|
+
if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */
|
|
47
|
+
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
|
|
48
|
+
} } } }
|
|
49
|
+
}
|
|
14
50
|
|
|
15
|
-
|
|
51
|
+
static
|
|
52
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
53
|
+
void ZSTD_fillHashTableForCCtx(ZSTD_MatchState_t* ms,
|
|
16
54
|
const void* const end,
|
|
17
55
|
ZSTD_dictTableLoadMethod_e dtlm)
|
|
18
56
|
{
|
|
@@ -25,6 +63,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
|
25
63
|
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
|
26
64
|
const U32 fastHashFillStep = 3;
|
|
27
65
|
|
|
66
|
+
/* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables.
|
|
67
|
+
* Feel free to remove this assert if there's a good reason! */
|
|
68
|
+
assert(dtlm == ZSTD_dtlm_fast);
|
|
69
|
+
|
|
28
70
|
/* Always insert every fastHashFillStep position into the hash table.
|
|
29
71
|
* Insert the other positions if their hash entry is empty.
|
|
30
72
|
*/
|
|
@@ -42,6 +84,62 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
|
42
84
|
} } } }
|
|
43
85
|
}
|
|
44
86
|
|
|
87
|
+
void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
|
|
88
|
+
const void* const end,
|
|
89
|
+
ZSTD_dictTableLoadMethod_e dtlm,
|
|
90
|
+
ZSTD_tableFillPurpose_e tfp)
|
|
91
|
+
{
|
|
92
|
+
if (tfp == ZSTD_tfp_forCDict) {
|
|
93
|
+
ZSTD_fillHashTableForCDict(ms, end, dtlm);
|
|
94
|
+
} else {
|
|
95
|
+
ZSTD_fillHashTableForCCtx(ms, end, dtlm);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
typedef int (*ZSTD_match4Found) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit);
|
|
101
|
+
|
|
102
|
+
static int
|
|
103
|
+
ZSTD_match4Found_cmov(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
|
|
104
|
+
{
|
|
105
|
+
/* Array of ~random data, should have low probability of matching data.
|
|
106
|
+
* Load from here if the index is invalid.
|
|
107
|
+
* Used to avoid unpredictable branches. */
|
|
108
|
+
static const BYTE dummy[] = {0x12,0x34,0x56,0x78};
|
|
109
|
+
|
|
110
|
+
/* currentIdx >= lowLimit is a (somewhat) unpredictable branch.
|
|
111
|
+
* However expression below compiles into conditional move.
|
|
112
|
+
*/
|
|
113
|
+
const BYTE* mvalAddr = ZSTD_selectAddr(matchIdx, idxLowLimit, matchAddress, dummy);
|
|
114
|
+
/* Note: this used to be written as : return test1 && test2;
|
|
115
|
+
* Unfortunately, once inlined, these tests become branches,
|
|
116
|
+
* in which case it becomes critical that they are executed in the right order (test1 then test2).
|
|
117
|
+
* So we have to write these tests in a specific manner to ensure their ordering.
|
|
118
|
+
*/
|
|
119
|
+
if (MEM_read32(currentPtr) != MEM_read32(mvalAddr)) return 0;
|
|
120
|
+
/* force ordering of these tests, which matters once the function is inlined, as they become branches */
|
|
121
|
+
#if defined(__GNUC__)
|
|
122
|
+
__asm__("");
|
|
123
|
+
#endif
|
|
124
|
+
return matchIdx >= idxLowLimit;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
static int
|
|
128
|
+
ZSTD_match4Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
|
|
129
|
+
{
|
|
130
|
+
/* using a branch instead of a cmov,
|
|
131
|
+
* because it's faster in scenarios where matchIdx >= idxLowLimit is generally true,
|
|
132
|
+
* aka almost all candidates are within range */
|
|
133
|
+
U32 mval;
|
|
134
|
+
if (matchIdx >= idxLowLimit) {
|
|
135
|
+
mval = MEM_read32(matchAddress);
|
|
136
|
+
} else {
|
|
137
|
+
mval = MEM_read32(currentPtr) ^ 1; /* guaranteed to not match. */
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return (MEM_read32(currentPtr) == mval);
|
|
141
|
+
}
|
|
142
|
+
|
|
45
143
|
|
|
46
144
|
/**
|
|
47
145
|
* If you squint hard enough (and ignore repcodes), the search operation at any
|
|
@@ -89,17 +187,17 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
|
89
187
|
*
|
|
90
188
|
* This is also the work we do at the beginning to enter the loop initially.
|
|
91
189
|
*/
|
|
92
|
-
FORCE_INLINE_TEMPLATE
|
|
93
|
-
|
|
94
|
-
|
|
190
|
+
FORCE_INLINE_TEMPLATE
|
|
191
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
192
|
+
size_t ZSTD_compressBlock_fast_noDict_generic(
|
|
193
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
95
194
|
void const* src, size_t srcSize,
|
|
96
|
-
U32 const mls,
|
|
195
|
+
U32 const mls, int useCmov)
|
|
97
196
|
{
|
|
98
197
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
99
198
|
U32* const hashTable = ms->hashTable;
|
|
100
199
|
U32 const hlog = cParams->hashLog;
|
|
101
|
-
|
|
102
|
-
size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
|
|
200
|
+
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; /* min 2 */
|
|
103
201
|
const BYTE* const base = ms->window.base;
|
|
104
202
|
const BYTE* const istart = (const BYTE*)src;
|
|
105
203
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
@@ -117,12 +215,11 @@ ZSTD_compressBlock_fast_noDict_generic(
|
|
|
117
215
|
|
|
118
216
|
U32 rep_offset1 = rep[0];
|
|
119
217
|
U32 rep_offset2 = rep[1];
|
|
120
|
-
U32
|
|
218
|
+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
|
121
219
|
|
|
122
220
|
size_t hash0; /* hash for ip0 */
|
|
123
221
|
size_t hash1; /* hash for ip1 */
|
|
124
|
-
U32
|
|
125
|
-
U32 mval; /* src value at match idx */
|
|
222
|
+
U32 matchIdx; /* match idx for ip0 */
|
|
126
223
|
|
|
127
224
|
U32 offcode;
|
|
128
225
|
const BYTE* match0;
|
|
@@ -135,14 +232,15 @@ ZSTD_compressBlock_fast_noDict_generic(
|
|
|
135
232
|
size_t step;
|
|
136
233
|
const BYTE* nextStep;
|
|
137
234
|
const size_t kStepIncr = (1 << (kSearchStrength - 1));
|
|
235
|
+
const ZSTD_match4Found matchFound = useCmov ? ZSTD_match4Found_cmov : ZSTD_match4Found_branch;
|
|
138
236
|
|
|
139
237
|
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
|
|
140
238
|
ip0 += (ip0 == prefixStart);
|
|
141
239
|
{ U32 const curr = (U32)(ip0 - base);
|
|
142
240
|
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
|
143
241
|
U32 const maxRep = curr - windowLow;
|
|
144
|
-
if (rep_offset2 > maxRep)
|
|
145
|
-
if (rep_offset1 > maxRep)
|
|
242
|
+
if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
|
|
243
|
+
if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
|
|
146
244
|
}
|
|
147
245
|
|
|
148
246
|
/* start each op */
|
|
@@ -163,7 +261,7 @@ _start: /* Requires: ip0 */
|
|
|
163
261
|
hash0 = ZSTD_hashPtr(ip0, hlog, mls);
|
|
164
262
|
hash1 = ZSTD_hashPtr(ip1, hlog, mls);
|
|
165
263
|
|
|
166
|
-
|
|
264
|
+
matchIdx = hashTable[hash0];
|
|
167
265
|
|
|
168
266
|
do {
|
|
169
267
|
/* load repcode match for ip[2]*/
|
|
@@ -180,26 +278,28 @@ _start: /* Requires: ip0 */
|
|
|
180
278
|
mLength = ip0[-1] == match0[-1];
|
|
181
279
|
ip0 -= mLength;
|
|
182
280
|
match0 -= mLength;
|
|
183
|
-
offcode =
|
|
281
|
+
offcode = REPCODE1_TO_OFFBASE;
|
|
184
282
|
mLength += 4;
|
|
283
|
+
|
|
284
|
+
/* Write next hash table entry: it's already calculated.
|
|
285
|
+
* This write is known to be safe because ip1 is before the
|
|
286
|
+
* repcode (ip2). */
|
|
287
|
+
hashTable[hash1] = (U32)(ip1 - base);
|
|
288
|
+
|
|
185
289
|
goto _match;
|
|
186
290
|
}
|
|
187
291
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
}
|
|
292
|
+
if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
|
|
293
|
+
/* Write next hash table entry (it's already calculated).
|
|
294
|
+
* This write is known to be safe because the ip1 == ip0 + 1,
|
|
295
|
+
* so searching will resume after ip1 */
|
|
296
|
+
hashTable[hash1] = (U32)(ip1 - base);
|
|
194
297
|
|
|
195
|
-
/* check match at ip[0] */
|
|
196
|
-
if (MEM_read32(ip0) == mval) {
|
|
197
|
-
/* found a match! */
|
|
198
298
|
goto _offset;
|
|
199
299
|
}
|
|
200
300
|
|
|
201
301
|
/* lookup ip[1] */
|
|
202
|
-
|
|
302
|
+
matchIdx = hashTable[hash1];
|
|
203
303
|
|
|
204
304
|
/* hash ip[2] */
|
|
205
305
|
hash0 = hash1;
|
|
@@ -214,21 +314,19 @@ _start: /* Requires: ip0 */
|
|
|
214
314
|
current0 = (U32)(ip0 - base);
|
|
215
315
|
hashTable[hash0] = current0;
|
|
216
316
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
if (MEM_read32(ip0) == mval) {
|
|
226
|
-
/* found a match! */
|
|
317
|
+
if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
|
|
318
|
+
/* Write next hash table entry, since it's already calculated */
|
|
319
|
+
if (step <= 4) {
|
|
320
|
+
/* Avoid writing an index if it's >= position where search will resume.
|
|
321
|
+
* The minimum possible match has length 4, so search can resume at ip0 + 4.
|
|
322
|
+
*/
|
|
323
|
+
hashTable[hash1] = (U32)(ip1 - base);
|
|
324
|
+
}
|
|
227
325
|
goto _offset;
|
|
228
326
|
}
|
|
229
327
|
|
|
230
328
|
/* lookup ip[1] */
|
|
231
|
-
|
|
329
|
+
matchIdx = hashTable[hash1];
|
|
232
330
|
|
|
233
331
|
/* hash ip[2] */
|
|
234
332
|
hash0 = hash1;
|
|
@@ -250,13 +348,28 @@ _start: /* Requires: ip0 */
|
|
|
250
348
|
} while (ip3 < ilimit);
|
|
251
349
|
|
|
252
350
|
_cleanup:
|
|
253
|
-
/* Note that there are probably still a couple positions
|
|
351
|
+
/* Note that there are probably still a couple positions one could search.
|
|
254
352
|
* However, it seems to be a meaningful performance hit to try to search
|
|
255
353
|
* them. So let's not. */
|
|
256
354
|
|
|
355
|
+
/* When the repcodes are outside of the prefix, we set them to zero before the loop.
|
|
356
|
+
* When the offsets are still zero, we need to restore them after the block to have a correct
|
|
357
|
+
* repcode history. If only one offset was invalid, it is easy. The tricky case is when both
|
|
358
|
+
* offsets were invalid. We need to figure out which offset to refill with.
|
|
359
|
+
* - If both offsets are zero they are in the same order.
|
|
360
|
+
* - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
|
|
361
|
+
* - If only one is zero, we need to decide which offset to restore.
|
|
362
|
+
* - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
|
|
363
|
+
* - It is impossible for rep_offset2 to be non-zero.
|
|
364
|
+
*
|
|
365
|
+
* So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
|
|
366
|
+
* set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
|
|
367
|
+
*/
|
|
368
|
+
offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;
|
|
369
|
+
|
|
257
370
|
/* save reps for next block */
|
|
258
|
-
rep[0] = rep_offset1 ? rep_offset1 :
|
|
259
|
-
rep[1] = rep_offset2 ? rep_offset2 :
|
|
371
|
+
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
|
|
372
|
+
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;
|
|
260
373
|
|
|
261
374
|
/* Return the last literals size */
|
|
262
375
|
return (size_t)(iend - anchor);
|
|
@@ -264,10 +377,10 @@ _cleanup:
|
|
|
264
377
|
_offset: /* Requires: ip0, idx */
|
|
265
378
|
|
|
266
379
|
/* Compute the offset code. */
|
|
267
|
-
match0 = base +
|
|
380
|
+
match0 = base + matchIdx;
|
|
268
381
|
rep_offset2 = rep_offset1;
|
|
269
382
|
rep_offset1 = (U32)(ip0-match0);
|
|
270
|
-
offcode =
|
|
383
|
+
offcode = OFFSET_TO_OFFBASE(rep_offset1);
|
|
271
384
|
mLength = 4;
|
|
272
385
|
|
|
273
386
|
/* Count the backwards match length. */
|
|
@@ -287,11 +400,6 @@ _match: /* Requires: ip0, match0, offcode */
|
|
|
287
400
|
ip0 += mLength;
|
|
288
401
|
anchor = ip0;
|
|
289
402
|
|
|
290
|
-
/* write next hash table entry */
|
|
291
|
-
if (ip1 < ip0) {
|
|
292
|
-
hashTable[hash1] = (U32)(ip1 - base);
|
|
293
|
-
}
|
|
294
|
-
|
|
295
403
|
/* Fill table and check for immediate repcode. */
|
|
296
404
|
if (ip0 <= ilimit) {
|
|
297
405
|
/* Fill Table */
|
|
@@ -306,7 +414,7 @@ _match: /* Requires: ip0, match0, offcode */
|
|
|
306
414
|
{ U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
|
|
307
415
|
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
|
308
416
|
ip0 += rLength;
|
|
309
|
-
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend,
|
|
417
|
+
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
|
|
310
418
|
anchor = ip0;
|
|
311
419
|
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
|
312
420
|
} } }
|
|
@@ -314,12 +422,12 @@ _match: /* Requires: ip0, match0, offcode */
|
|
|
314
422
|
goto _start;
|
|
315
423
|
}
|
|
316
424
|
|
|
317
|
-
#define ZSTD_GEN_FAST_FN(dictMode,
|
|
318
|
-
static size_t ZSTD_compressBlock_fast_##dictMode##_##
|
|
319
|
-
|
|
425
|
+
#define ZSTD_GEN_FAST_FN(dictMode, mml, cmov) \
|
|
426
|
+
static size_t ZSTD_compressBlock_fast_##dictMode##_##mml##_##cmov( \
|
|
427
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
|
320
428
|
void const* src, size_t srcSize) \
|
|
321
429
|
{ \
|
|
322
|
-
return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize,
|
|
430
|
+
return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mml, cmov); \
|
|
323
431
|
}
|
|
324
432
|
|
|
325
433
|
ZSTD_GEN_FAST_FN(noDict, 4, 1)
|
|
@@ -333,13 +441,15 @@ ZSTD_GEN_FAST_FN(noDict, 6, 0)
|
|
|
333
441
|
ZSTD_GEN_FAST_FN(noDict, 7, 0)
|
|
334
442
|
|
|
335
443
|
size_t ZSTD_compressBlock_fast(
|
|
336
|
-
|
|
444
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
337
445
|
void const* src, size_t srcSize)
|
|
338
446
|
{
|
|
339
|
-
U32 const
|
|
447
|
+
U32 const mml = ms->cParams.minMatch;
|
|
448
|
+
/* use cmov when "candidate in range" branch is likely unpredictable */
|
|
449
|
+
int const useCmov = ms->cParams.windowLog < 19;
|
|
340
450
|
assert(ms->dictMatchState == NULL);
|
|
341
|
-
if (
|
|
342
|
-
switch(
|
|
451
|
+
if (useCmov) {
|
|
452
|
+
switch(mml)
|
|
343
453
|
{
|
|
344
454
|
default: /* includes case 3 */
|
|
345
455
|
case 4 :
|
|
@@ -352,7 +462,8 @@ size_t ZSTD_compressBlock_fast(
|
|
|
352
462
|
return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
|
|
353
463
|
}
|
|
354
464
|
} else {
|
|
355
|
-
|
|
465
|
+
/* use a branch instead */
|
|
466
|
+
switch(mml)
|
|
356
467
|
{
|
|
357
468
|
default: /* includes case 3 */
|
|
358
469
|
case 4 :
|
|
@@ -364,13 +475,13 @@ size_t ZSTD_compressBlock_fast(
|
|
|
364
475
|
case 7 :
|
|
365
476
|
return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
|
|
366
477
|
}
|
|
367
|
-
|
|
368
478
|
}
|
|
369
479
|
}
|
|
370
480
|
|
|
371
481
|
FORCE_INLINE_TEMPLATE
|
|
482
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
372
483
|
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
373
|
-
|
|
484
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
374
485
|
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
|
|
375
486
|
{
|
|
376
487
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
@@ -380,16 +491,16 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
380
491
|
U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
|
|
381
492
|
const BYTE* const base = ms->window.base;
|
|
382
493
|
const BYTE* const istart = (const BYTE*)src;
|
|
383
|
-
const BYTE*
|
|
494
|
+
const BYTE* ip0 = istart;
|
|
495
|
+
const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */
|
|
384
496
|
const BYTE* anchor = istart;
|
|
385
497
|
const U32 prefixStartIndex = ms->window.dictLimit;
|
|
386
498
|
const BYTE* const prefixStart = base + prefixStartIndex;
|
|
387
499
|
const BYTE* const iend = istart + srcSize;
|
|
388
500
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
|
389
501
|
U32 offset_1=rep[0], offset_2=rep[1];
|
|
390
|
-
U32 offsetSaved = 0;
|
|
391
502
|
|
|
392
|
-
const
|
|
503
|
+
const ZSTD_MatchState_t* const dms = ms->dictMatchState;
|
|
393
504
|
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
|
|
394
505
|
const U32* const dictHashTable = dms->hashTable;
|
|
395
506
|
const U32 dictStartIndex = dms->window.dictLimit;
|
|
@@ -397,13 +508,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
397
508
|
const BYTE* const dictStart = dictBase + dictStartIndex;
|
|
398
509
|
const BYTE* const dictEnd = dms->window.nextSrc;
|
|
399
510
|
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
|
|
400
|
-
const U32 dictAndPrefixLength = (U32)(
|
|
401
|
-
const U32
|
|
511
|
+
const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart);
|
|
512
|
+
const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
|
402
513
|
|
|
403
514
|
/* if a dictionary is still attached, it necessarily means that
|
|
404
515
|
* it is within window size. So we just check it. */
|
|
405
516
|
const U32 maxDistance = 1U << cParams->windowLog;
|
|
406
|
-
const U32 endIndex = (U32)((size_t)(
|
|
517
|
+
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
407
518
|
assert(endIndex - prefixStartIndex <= maxDistance);
|
|
408
519
|
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
|
|
409
520
|
|
|
@@ -413,106 +524,154 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
|
413
524
|
* when translating a dict index into a local index */
|
|
414
525
|
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
|
415
526
|
|
|
527
|
+
if (ms->prefetchCDictTables) {
|
|
528
|
+
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
|
|
529
|
+
PREFETCH_AREA(dictHashTable, hashTableBytes);
|
|
530
|
+
}
|
|
531
|
+
|
|
416
532
|
/* init */
|
|
417
533
|
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
|
|
418
|
-
|
|
534
|
+
ip0 += (dictAndPrefixLength == 0);
|
|
419
535
|
/* dictMatchState repCode checks don't currently handle repCode == 0
|
|
420
536
|
* disabling. */
|
|
421
537
|
assert(offset_1 <= dictAndPrefixLength);
|
|
422
538
|
assert(offset_2 <= dictAndPrefixLength);
|
|
423
539
|
|
|
424
|
-
/*
|
|
425
|
-
|
|
540
|
+
/* Outer search loop */
|
|
541
|
+
assert(stepSize >= 1);
|
|
542
|
+
while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */
|
|
426
543
|
size_t mLength;
|
|
427
|
-
size_t
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
const
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
544
|
+
size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls);
|
|
545
|
+
|
|
546
|
+
size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls);
|
|
547
|
+
U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
|
548
|
+
int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0);
|
|
549
|
+
|
|
550
|
+
U32 matchIndex = hashTable[hash0];
|
|
551
|
+
U32 curr = (U32)(ip0 - base);
|
|
552
|
+
size_t step = stepSize;
|
|
553
|
+
const size_t kStepIncr = 1 << kSearchStrength;
|
|
554
|
+
const BYTE* nextStep = ip0 + kStepIncr;
|
|
555
|
+
|
|
556
|
+
/* Inner search loop */
|
|
557
|
+
while (1) {
|
|
558
|
+
const BYTE* match = base + matchIndex;
|
|
559
|
+
const U32 repIndex = curr + 1 - offset_1;
|
|
560
|
+
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
|
|
561
|
+
dictBase + (repIndex - dictIndexDelta) :
|
|
562
|
+
base + repIndex;
|
|
563
|
+
const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls);
|
|
564
|
+
size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
|
|
565
|
+
hashTable[hash0] = curr; /* update hash table */
|
|
566
|
+
|
|
567
|
+
if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
|
|
568
|
+
&& (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
|
|
569
|
+
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
570
|
+
mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
|
|
571
|
+
ip0++;
|
|
572
|
+
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
|
573
|
+
break;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
if (dictTagsMatch) {
|
|
577
|
+
/* Found a possible dict match */
|
|
578
|
+
const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
|
|
579
|
+
const BYTE* dictMatch = dictBase + dictMatchIndex;
|
|
580
|
+
if (dictMatchIndex > dictStartIndex &&
|
|
581
|
+
MEM_read32(dictMatch) == MEM_read32(ip0)) {
|
|
582
|
+
/* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */
|
|
583
|
+
if (matchIndex <= prefixStartIndex) {
|
|
584
|
+
U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta);
|
|
585
|
+
mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4;
|
|
586
|
+
while (((ip0 > anchor) & (dictMatch > dictStart))
|
|
587
|
+
&& (ip0[-1] == dictMatch[-1])) {
|
|
588
|
+
ip0--;
|
|
589
|
+
dictMatch--;
|
|
590
|
+
mLength++;
|
|
591
|
+
} /* catch up */
|
|
592
|
+
offset_2 = offset_1;
|
|
593
|
+
offset_1 = offset;
|
|
594
|
+
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
595
|
+
break;
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
if (ZSTD_match4Found_cmov(ip0, match, matchIndex, prefixStartIndex)) {
|
|
601
|
+
/* found a regular match of size >= 4 */
|
|
602
|
+
U32 const offset = (U32) (ip0 - match);
|
|
603
|
+
mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
|
|
604
|
+
while (((ip0 > anchor) & (match > prefixStart))
|
|
605
|
+
&& (ip0[-1] == match[-1])) {
|
|
606
|
+
ip0--;
|
|
607
|
+
match--;
|
|
608
|
+
mLength++;
|
|
459
609
|
} /* catch up */
|
|
460
610
|
offset_2 = offset_1;
|
|
461
611
|
offset_1 = offset;
|
|
462
|
-
ZSTD_storeSeq(seqStore, (size_t)(
|
|
612
|
+
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
|
613
|
+
break;
|
|
463
614
|
}
|
|
464
|
-
|
|
465
|
-
/*
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
615
|
+
|
|
616
|
+
/* Prepare for next iteration */
|
|
617
|
+
dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
|
618
|
+
dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1);
|
|
619
|
+
matchIndex = hashTable[hash1];
|
|
620
|
+
|
|
621
|
+
if (ip1 >= nextStep) {
|
|
622
|
+
step++;
|
|
623
|
+
nextStep += kStepIncr;
|
|
624
|
+
}
|
|
625
|
+
ip0 = ip1;
|
|
626
|
+
ip1 = ip1 + step;
|
|
627
|
+
if (ip1 > ilimit) goto _cleanup;
|
|
628
|
+
|
|
629
|
+
curr = (U32)(ip0 - base);
|
|
630
|
+
hash0 = hash1;
|
|
631
|
+
} /* end inner search loop */
|
|
479
632
|
|
|
480
633
|
/* match found */
|
|
481
|
-
|
|
482
|
-
|
|
634
|
+
assert(mLength);
|
|
635
|
+
ip0 += mLength;
|
|
636
|
+
anchor = ip0;
|
|
483
637
|
|
|
484
|
-
if (
|
|
638
|
+
if (ip0 <= ilimit) {
|
|
485
639
|
/* Fill Table */
|
|
486
640
|
assert(base+curr+2 > istart); /* check base overflow */
|
|
487
641
|
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
|
|
488
|
-
hashTable[ZSTD_hashPtr(
|
|
642
|
+
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
|
489
643
|
|
|
490
644
|
/* check immediate repcode */
|
|
491
|
-
while (
|
|
492
|
-
U32 const current2 = (U32)(
|
|
645
|
+
while (ip0 <= ilimit) {
|
|
646
|
+
U32 const current2 = (U32)(ip0-base);
|
|
493
647
|
U32 const repIndex2 = current2 - offset_2;
|
|
494
648
|
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
|
|
495
649
|
dictBase - dictIndexDelta + repIndex2 :
|
|
496
650
|
base + repIndex2;
|
|
497
|
-
if ( ((
|
|
498
|
-
&& (MEM_read32(repMatch2) == MEM_read32(
|
|
651
|
+
if ( (ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
|
|
652
|
+
&& (MEM_read32(repMatch2) == MEM_read32(ip0))) {
|
|
499
653
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
500
|
-
size_t const repLength2 = ZSTD_count_2segments(
|
|
654
|
+
size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
501
655
|
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
|
502
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
|
503
|
-
hashTable[ZSTD_hashPtr(
|
|
504
|
-
|
|
505
|
-
anchor =
|
|
656
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
|
657
|
+
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2;
|
|
658
|
+
ip0 += repLength2;
|
|
659
|
+
anchor = ip0;
|
|
506
660
|
continue;
|
|
507
661
|
}
|
|
508
662
|
break;
|
|
509
663
|
}
|
|
510
664
|
}
|
|
665
|
+
|
|
666
|
+
/* Prepare for next iteration */
|
|
667
|
+
assert(ip0 == anchor);
|
|
668
|
+
ip1 = ip0 + stepSize;
|
|
511
669
|
}
|
|
512
670
|
|
|
671
|
+
_cleanup:
|
|
513
672
|
/* save reps for next block */
|
|
514
|
-
rep[0] = offset_1
|
|
515
|
-
rep[1] = offset_2
|
|
673
|
+
rep[0] = offset_1;
|
|
674
|
+
rep[1] = offset_2;
|
|
516
675
|
|
|
517
676
|
/* Return the last literals size */
|
|
518
677
|
return (size_t)(iend - anchor);
|
|
@@ -525,7 +684,7 @@ ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
|
|
|
525
684
|
ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
|
|
526
685
|
|
|
527
686
|
size_t ZSTD_compressBlock_fast_dictMatchState(
|
|
528
|
-
|
|
687
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
529
688
|
void const* src, size_t srcSize)
|
|
530
689
|
{
|
|
531
690
|
U32 const mls = ms->cParams.minMatch;
|
|
@@ -545,19 +704,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
|
|
|
545
704
|
}
|
|
546
705
|
|
|
547
706
|
|
|
548
|
-
static
|
|
549
|
-
|
|
707
|
+
static
|
|
708
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
709
|
+
size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
710
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
550
711
|
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
|
|
551
712
|
{
|
|
552
713
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
553
714
|
U32* const hashTable = ms->hashTable;
|
|
554
715
|
U32 const hlog = cParams->hashLog;
|
|
555
716
|
/* support stepSize of 0 */
|
|
556
|
-
|
|
717
|
+
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
|
|
557
718
|
const BYTE* const base = ms->window.base;
|
|
558
719
|
const BYTE* const dictBase = ms->window.dictBase;
|
|
559
720
|
const BYTE* const istart = (const BYTE*)src;
|
|
560
|
-
const BYTE* ip = istart;
|
|
561
721
|
const BYTE* anchor = istart;
|
|
562
722
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
|
563
723
|
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
|
|
@@ -570,6 +730,28 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
|
570
730
|
const BYTE* const iend = istart + srcSize;
|
|
571
731
|
const BYTE* const ilimit = iend - 8;
|
|
572
732
|
U32 offset_1=rep[0], offset_2=rep[1];
|
|
733
|
+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
|
734
|
+
|
|
735
|
+
const BYTE* ip0 = istart;
|
|
736
|
+
const BYTE* ip1;
|
|
737
|
+
const BYTE* ip2;
|
|
738
|
+
const BYTE* ip3;
|
|
739
|
+
U32 current0;
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
size_t hash0; /* hash for ip0 */
|
|
743
|
+
size_t hash1; /* hash for ip1 */
|
|
744
|
+
U32 idx; /* match idx for ip0 */
|
|
745
|
+
const BYTE* idxBase; /* base pointer for idx */
|
|
746
|
+
|
|
747
|
+
U32 offcode;
|
|
748
|
+
const BYTE* match0;
|
|
749
|
+
size_t mLength;
|
|
750
|
+
const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */
|
|
751
|
+
|
|
752
|
+
size_t step;
|
|
753
|
+
const BYTE* nextStep;
|
|
754
|
+
const size_t kStepIncr = (1 << (kSearchStrength - 1));
|
|
573
755
|
|
|
574
756
|
(void)hasStep; /* not currently specialized on whether it's accelerated */
|
|
575
757
|
|
|
@@ -579,75 +761,202 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
|
|
579
761
|
if (prefixStartIndex == dictStartIndex)
|
|
580
762
|
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
|
|
581
763
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
764
|
+
{ U32 const curr = (U32)(ip0 - base);
|
|
765
|
+
U32 const maxRep = curr - dictStartIndex;
|
|
766
|
+
if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
|
|
767
|
+
if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
/* start each op */
|
|
771
|
+
_start: /* Requires: ip0 */
|
|
772
|
+
|
|
773
|
+
step = stepSize;
|
|
774
|
+
nextStep = ip0 + kStepIncr;
|
|
775
|
+
|
|
776
|
+
/* calculate positions, ip0 - anchor == 0, so we skip step calc */
|
|
777
|
+
ip1 = ip0 + 1;
|
|
778
|
+
ip2 = ip0 + step;
|
|
779
|
+
ip3 = ip2 + 1;
|
|
780
|
+
|
|
781
|
+
if (ip3 >= ilimit) {
|
|
782
|
+
goto _cleanup;
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
hash0 = ZSTD_hashPtr(ip0, hlog, mls);
|
|
786
|
+
hash1 = ZSTD_hashPtr(ip1, hlog, mls);
|
|
787
|
+
|
|
788
|
+
idx = hashTable[hash0];
|
|
789
|
+
idxBase = idx < prefixStartIndex ? dictBase : base;
|
|
790
|
+
|
|
791
|
+
do {
|
|
792
|
+
{ /* load repcode match for ip[2] */
|
|
793
|
+
U32 const current2 = (U32)(ip2 - base);
|
|
794
|
+
U32 const repIndex = current2 - offset_1;
|
|
795
|
+
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
|
|
796
|
+
U32 rval;
|
|
797
|
+
if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */
|
|
798
|
+
& (offset_1 > 0) ) {
|
|
799
|
+
rval = MEM_read32(repBase + repIndex);
|
|
800
|
+
} else {
|
|
801
|
+
rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */
|
|
610
802
|
}
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
803
|
+
|
|
804
|
+
/* write back hash table entry */
|
|
805
|
+
current0 = (U32)(ip0 - base);
|
|
806
|
+
hashTable[hash0] = current0;
|
|
807
|
+
|
|
808
|
+
/* check repcode at ip[2] */
|
|
809
|
+
if (MEM_read32(ip2) == rval) {
|
|
810
|
+
ip0 = ip2;
|
|
811
|
+
match0 = repBase + repIndex;
|
|
812
|
+
matchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
|
813
|
+
assert((match0 != prefixStart) & (match0 != dictStart));
|
|
814
|
+
mLength = ip0[-1] == match0[-1];
|
|
815
|
+
ip0 -= mLength;
|
|
816
|
+
match0 -= mLength;
|
|
817
|
+
offcode = REPCODE1_TO_OFFBASE;
|
|
818
|
+
mLength += 4;
|
|
819
|
+
goto _match;
|
|
620
820
|
} }
|
|
621
821
|
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
822
|
+
{ /* load match for ip[0] */
|
|
823
|
+
U32 const mval = idx >= dictStartIndex ?
|
|
824
|
+
MEM_read32(idxBase + idx) :
|
|
825
|
+
MEM_read32(ip0) ^ 1; /* guaranteed not to match */
|
|
826
|
+
|
|
827
|
+
/* check match at ip[0] */
|
|
828
|
+
if (MEM_read32(ip0) == mval) {
|
|
829
|
+
/* found a match! */
|
|
830
|
+
goto _offset;
|
|
831
|
+
} }
|
|
832
|
+
|
|
833
|
+
/* lookup ip[1] */
|
|
834
|
+
idx = hashTable[hash1];
|
|
835
|
+
idxBase = idx < prefixStartIndex ? dictBase : base;
|
|
836
|
+
|
|
837
|
+
/* hash ip[2] */
|
|
838
|
+
hash0 = hash1;
|
|
839
|
+
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
|
|
840
|
+
|
|
841
|
+
/* advance to next positions */
|
|
842
|
+
ip0 = ip1;
|
|
843
|
+
ip1 = ip2;
|
|
844
|
+
ip2 = ip3;
|
|
845
|
+
|
|
846
|
+
/* write back hash table entry */
|
|
847
|
+
current0 = (U32)(ip0 - base);
|
|
848
|
+
hashTable[hash0] = current0;
|
|
849
|
+
|
|
850
|
+
{ /* load match for ip[0] */
|
|
851
|
+
U32 const mval = idx >= dictStartIndex ?
|
|
852
|
+
MEM_read32(idxBase + idx) :
|
|
853
|
+
MEM_read32(ip0) ^ 1; /* guaranteed not to match */
|
|
854
|
+
|
|
855
|
+
/* check match at ip[0] */
|
|
856
|
+
if (MEM_read32(ip0) == mval) {
|
|
857
|
+
/* found a match! */
|
|
858
|
+
goto _offset;
|
|
859
|
+
} }
|
|
860
|
+
|
|
861
|
+
/* lookup ip[1] */
|
|
862
|
+
idx = hashTable[hash1];
|
|
863
|
+
idxBase = idx < prefixStartIndex ? dictBase : base;
|
|
864
|
+
|
|
865
|
+
/* hash ip[2] */
|
|
866
|
+
hash0 = hash1;
|
|
867
|
+
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
|
|
868
|
+
|
|
869
|
+
/* advance to next positions */
|
|
870
|
+
ip0 = ip1;
|
|
871
|
+
ip1 = ip2;
|
|
872
|
+
ip2 = ip0 + step;
|
|
873
|
+
ip3 = ip1 + step;
|
|
874
|
+
|
|
875
|
+
/* calculate step */
|
|
876
|
+
if (ip2 >= nextStep) {
|
|
877
|
+
step++;
|
|
878
|
+
PREFETCH_L1(ip1 + 64);
|
|
879
|
+
PREFETCH_L1(ip1 + 128);
|
|
880
|
+
nextStep += kStepIncr;
|
|
881
|
+
}
|
|
882
|
+
} while (ip3 < ilimit);
|
|
883
|
+
|
|
884
|
+
_cleanup:
|
|
885
|
+
/* Note that there are probably still a couple positions we could search.
|
|
886
|
+
* However, it seems to be a meaningful performance hit to try to search
|
|
887
|
+
* them. So let's not. */
|
|
888
|
+
|
|
889
|
+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
|
|
890
|
+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
|
|
891
|
+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
|
|
644
892
|
|
|
645
893
|
/* save reps for next block */
|
|
646
|
-
rep[0] = offset_1;
|
|
647
|
-
rep[1] = offset_2;
|
|
894
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
|
|
895
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
|
|
648
896
|
|
|
649
897
|
/* Return the last literals size */
|
|
650
898
|
return (size_t)(iend - anchor);
|
|
899
|
+
|
|
900
|
+
_offset: /* Requires: ip0, idx, idxBase */
|
|
901
|
+
|
|
902
|
+
/* Compute the offset code. */
|
|
903
|
+
{ U32 const offset = current0 - idx;
|
|
904
|
+
const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart;
|
|
905
|
+
matchEnd = idx < prefixStartIndex ? dictEnd : iend;
|
|
906
|
+
match0 = idxBase + idx;
|
|
907
|
+
offset_2 = offset_1;
|
|
908
|
+
offset_1 = offset;
|
|
909
|
+
offcode = OFFSET_TO_OFFBASE(offset);
|
|
910
|
+
mLength = 4;
|
|
911
|
+
|
|
912
|
+
/* Count the backwards match length. */
|
|
913
|
+
while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) {
|
|
914
|
+
ip0--;
|
|
915
|
+
match0--;
|
|
916
|
+
mLength++;
|
|
917
|
+
} }
|
|
918
|
+
|
|
919
|
+
_match: /* Requires: ip0, match0, offcode, matchEnd */
|
|
920
|
+
|
|
921
|
+
/* Count the forward length. */
|
|
922
|
+
assert(matchEnd != 0);
|
|
923
|
+
mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart);
|
|
924
|
+
|
|
925
|
+
ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
|
|
926
|
+
|
|
927
|
+
ip0 += mLength;
|
|
928
|
+
anchor = ip0;
|
|
929
|
+
|
|
930
|
+
/* write next hash table entry */
|
|
931
|
+
if (ip1 < ip0) {
|
|
932
|
+
hashTable[hash1] = (U32)(ip1 - base);
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
/* Fill table and check for immediate repcode. */
|
|
936
|
+
if (ip0 <= ilimit) {
|
|
937
|
+
/* Fill Table */
|
|
938
|
+
assert(base+current0+2 > istart); /* check base overflow */
|
|
939
|
+
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
|
940
|
+
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
|
941
|
+
|
|
942
|
+
while (ip0 <= ilimit) {
|
|
943
|
+
U32 const repIndex2 = (U32)(ip0-base) - offset_2;
|
|
944
|
+
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
|
945
|
+
if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 > 0))
|
|
946
|
+
&& (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
|
|
947
|
+
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
|
948
|
+
size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
|
949
|
+
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
|
|
950
|
+
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
|
|
951
|
+
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
|
952
|
+
ip0 += repLength2;
|
|
953
|
+
anchor = ip0;
|
|
954
|
+
continue;
|
|
955
|
+
}
|
|
956
|
+
break;
|
|
957
|
+
} }
|
|
958
|
+
|
|
959
|
+
goto _start;
|
|
651
960
|
}
|
|
652
961
|
|
|
653
962
|
ZSTD_GEN_FAST_FN(extDict, 4, 0)
|
|
@@ -656,10 +965,11 @@ ZSTD_GEN_FAST_FN(extDict, 6, 0)
|
|
|
656
965
|
ZSTD_GEN_FAST_FN(extDict, 7, 0)
|
|
657
966
|
|
|
658
967
|
size_t ZSTD_compressBlock_fast_extDict(
|
|
659
|
-
|
|
968
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
660
969
|
void const* src, size_t srcSize)
|
|
661
970
|
{
|
|
662
971
|
U32 const mls = ms->cParams.minMatch;
|
|
972
|
+
assert(ms->dictMatchState == NULL);
|
|
663
973
|
switch(mls)
|
|
664
974
|
{
|
|
665
975
|
default: /* includes case 3 */
|