zstdlib 0.14.0-x86-mingw32 → 0.15.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +5 -0
- data/ext/zstdlib_c/extconf.rb +1 -1
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/bits.h +92 -87
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/bitstream.h +26 -29
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/compiler.h +36 -22
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/cpu.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/debug.h +0 -9
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/error_private.c +1 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/error_private.h +0 -10
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/fse.h +2 -17
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/fse_decompress.c +2 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/huf.h +0 -9
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/mem.h +7 -11
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/pool.h +0 -9
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/portability_macros.h +22 -9
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/threading.h +0 -8
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/xxhash.h +93 -19
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_deps.h +12 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_internal.h +1 -69
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_trace.h +5 -12
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/hist.c +10 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/hist.h +7 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress.c +1057 -367
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_internal.h +227 -125
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_literals.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_sequences.c +7 -7
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_sequences.h +7 -6
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_superblock.c +17 -17
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_cwksp.h +41 -24
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_double_fast.c +58 -50
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_double_fast.h +4 -12
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_fast.c +91 -74
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_fast.h +4 -12
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_lazy.c +64 -64
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_lazy.h +30 -39
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_ldm.c +48 -33
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_ldm.h +6 -14
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_opt.c +55 -51
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_opt.h +8 -16
- data/ext/zstdlib_c/zstd-1.5.7/lib/compress/zstd_preSplit.c +238 -0
- data/ext/zstdlib_c/zstd-1.5.7/lib/compress/zstd_preSplit.h +33 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstdmt_compress.c +134 -93
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstdmt_compress.h +4 -15
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/huf_decompress_amd64.S +10 -3
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress.c +14 -11
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress_block.c +6 -12
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress_internal.h +5 -5
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/zdict.h +15 -8
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/zstd.h +241 -132
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/zstd_errors.h +1 -8
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzwrite.c +2 -1
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/3.1/zstdlib_c.so +0 -0
- data/lib/3.2/zstdlib_c.so +0 -0
- data/lib/3.3/zstdlib_c.so +0 -0
- data/lib/3.4/zstdlib_c.so +0 -0
- metadata +75 -73
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/allocations.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/debug.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/entropy_common.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/pool.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/threading.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/xxhash.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_common.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/clevels.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/fse_compress.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/huf_compress.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_literals.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_superblock.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_ldm_geartab.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/huf_decompress.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_ddict.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_ddict.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress_block.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzclose.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzcompatibility.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzlib.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzread.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/zstd_zlibwrapper.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/zstd_zlibwrapper.h +5 -5
| @@ -11,27 +11,23 @@ | |
| 11 11 | 
             
            #ifndef ZSTD_DOUBLE_FAST_H
         | 
| 12 12 | 
             
            #define ZSTD_DOUBLE_FAST_H
         | 
| 13 13 |  | 
| 14 | 
            -
            #if defined (__cplusplus)
         | 
| 15 | 
            -
            extern "C" {
         | 
| 16 | 
            -
            #endif
         | 
| 17 | 
            -
             | 
| 18 14 | 
             
            #include "../common/mem.h"      /* U32 */
         | 
| 19 15 | 
             
            #include "zstd_compress_internal.h"     /* ZSTD_CCtx, size_t */
         | 
| 20 16 |  | 
| 21 17 | 
             
            #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
         | 
| 22 18 |  | 
| 23 | 
            -
            void ZSTD_fillDoubleHashTable( | 
| 19 | 
            +
            void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
         | 
| 24 20 | 
             
                                          void const* end, ZSTD_dictTableLoadMethod_e dtlm,
         | 
| 25 21 | 
             
                                          ZSTD_tableFillPurpose_e tfp);
         | 
| 26 22 |  | 
| 27 23 | 
             
            size_t ZSTD_compressBlock_doubleFast(
         | 
| 28 | 
            -
                     | 
| 24 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 29 25 | 
             
                    void const* src, size_t srcSize);
         | 
| 30 26 | 
             
            size_t ZSTD_compressBlock_doubleFast_dictMatchState(
         | 
| 31 | 
            -
                     | 
| 27 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 32 28 | 
             
                    void const* src, size_t srcSize);
         | 
| 33 29 | 
             
            size_t ZSTD_compressBlock_doubleFast_extDict(
         | 
| 34 | 
            -
                     | 
| 30 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 35 31 | 
             
                    void const* src, size_t srcSize);
         | 
| 36 32 |  | 
| 37 33 | 
             
            #define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
         | 
| @@ -43,8 +39,4 @@ size_t ZSTD_compressBlock_doubleFast_extDict( | |
| 43 39 | 
             
            #define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
         | 
| 44 40 | 
             
            #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
         | 
| 45 41 |  | 
| 46 | 
            -
            #if defined (__cplusplus)
         | 
| 47 | 
            -
            }
         | 
| 48 | 
            -
            #endif
         | 
| 49 | 
            -
             | 
| 50 42 | 
             
            #endif /* ZSTD_DOUBLE_FAST_H */
         | 
| @@ -13,7 +13,7 @@ | |
| 13 13 |  | 
| 14 14 | 
             
            static
         | 
| 15 15 | 
             
            ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
         | 
| 16 | 
            -
            void ZSTD_fillHashTableForCDict( | 
| 16 | 
            +
            void ZSTD_fillHashTableForCDict(ZSTD_MatchState_t* ms,
         | 
| 17 17 | 
             
                                    const void* const end,
         | 
| 18 18 | 
             
                                    ZSTD_dictTableLoadMethod_e dtlm)
         | 
| 19 19 | 
             
            {
         | 
| @@ -45,12 +45,12 @@ void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms, | |
| 45 45 | 
             
                            size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
         | 
| 46 46 | 
             
                            if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {  /* not yet filled */
         | 
| 47 47 | 
             
                                ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
         | 
| 48 | 
            -
             | 
| 48 | 
            +
                }   }   }   }
         | 
| 49 49 | 
             
            }
         | 
| 50 50 |  | 
| 51 51 | 
             
            static
         | 
| 52 52 | 
             
            ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
         | 
| 53 | 
            -
            void ZSTD_fillHashTableForCCtx( | 
| 53 | 
            +
            void ZSTD_fillHashTableForCCtx(ZSTD_MatchState_t* ms,
         | 
| 54 54 | 
             
                                    const void* const end,
         | 
| 55 55 | 
             
                                    ZSTD_dictTableLoadMethod_e dtlm)
         | 
| 56 56 | 
             
            {
         | 
| @@ -84,7 +84,7 @@ void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms, | |
| 84 84 | 
             
                }   }   }   }
         | 
| 85 85 | 
             
            }
         | 
| 86 86 |  | 
| 87 | 
            -
            void ZSTD_fillHashTable( | 
| 87 | 
            +
            void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
         | 
| 88 88 | 
             
                                    const void* const end,
         | 
| 89 89 | 
             
                                    ZSTD_dictTableLoadMethod_e dtlm,
         | 
| 90 90 | 
             
                                    ZSTD_tableFillPurpose_e tfp)
         | 
| @@ -97,6 +97,50 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, | |
| 97 97 | 
             
            }
         | 
| 98 98 |  | 
| 99 99 |  | 
| 100 | 
            +
            typedef int (*ZSTD_match4Found) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit);
         | 
| 101 | 
            +
             | 
| 102 | 
            +
            static int
         | 
| 103 | 
            +
            ZSTD_match4Found_cmov(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
         | 
| 104 | 
            +
            {
         | 
| 105 | 
            +
                /* Array of ~random data, should have low probability of matching data.
         | 
| 106 | 
            +
                 * Load from here if the index is invalid.
         | 
| 107 | 
            +
                 * Used to avoid unpredictable branches. */
         | 
| 108 | 
            +
                static const BYTE dummy[] = {0x12,0x34,0x56,0x78};
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                /* currentIdx >= lowLimit is a (somewhat) unpredictable branch.
         | 
| 111 | 
            +
                 * However expression below compiles into conditional move.
         | 
| 112 | 
            +
                 */
         | 
| 113 | 
            +
                const BYTE* mvalAddr = ZSTD_selectAddr(matchIdx, idxLowLimit, matchAddress, dummy);
         | 
| 114 | 
            +
                /* Note: this used to be written as : return test1 && test2;
         | 
| 115 | 
            +
                 * Unfortunately, once inlined, these tests become branches,
         | 
| 116 | 
            +
                 * in which case it becomes critical that they are executed in the right order (test1 then test2).
         | 
| 117 | 
            +
                 * So we have to write these tests in a specific manner to ensure their ordering.
         | 
| 118 | 
            +
                 */
         | 
| 119 | 
            +
                if (MEM_read32(currentPtr) != MEM_read32(mvalAddr)) return 0;
         | 
| 120 | 
            +
                /* force ordering of these tests, which matters once the function is inlined, as they become branches */
         | 
| 121 | 
            +
            #if defined(__GNUC__)
         | 
| 122 | 
            +
                __asm__("");
         | 
| 123 | 
            +
            #endif
         | 
| 124 | 
            +
                return matchIdx >= idxLowLimit;
         | 
| 125 | 
            +
            }
         | 
| 126 | 
            +
             | 
| 127 | 
            +
            static int
         | 
| 128 | 
            +
            ZSTD_match4Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
         | 
| 129 | 
            +
            {
         | 
| 130 | 
            +
                /* using a branch instead of a cmov,
         | 
| 131 | 
            +
                 * because it's faster in scenarios where matchIdx >= idxLowLimit is generally true,
         | 
| 132 | 
            +
                 * aka almost all candidates are within range */
         | 
| 133 | 
            +
                U32 mval;
         | 
| 134 | 
            +
                if (matchIdx >= idxLowLimit) {
         | 
| 135 | 
            +
                    mval = MEM_read32(matchAddress);
         | 
| 136 | 
            +
                } else {
         | 
| 137 | 
            +
                    mval = MEM_read32(currentPtr) ^ 1; /* guaranteed to not match. */
         | 
| 138 | 
            +
                }
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                return (MEM_read32(currentPtr) == mval);
         | 
| 141 | 
            +
            }
         | 
| 142 | 
            +
             | 
| 143 | 
            +
             | 
| 100 144 | 
             
            /**
         | 
| 101 145 | 
             
             * If you squint hard enough (and ignore repcodes), the search operation at any
         | 
| 102 146 | 
             
             * given position is broken into 4 stages:
         | 
| @@ -146,15 +190,14 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, | |
| 146 190 | 
             
            FORCE_INLINE_TEMPLATE
         | 
| 147 191 | 
             
            ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
         | 
| 148 192 | 
             
            size_t ZSTD_compressBlock_fast_noDict_generic(
         | 
| 149 | 
            -
                     | 
| 193 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 150 194 | 
             
                    void const* src, size_t srcSize,
         | 
| 151 | 
            -
                    U32 const mls,  | 
| 195 | 
            +
                    U32 const mls, int useCmov)
         | 
| 152 196 | 
             
            {
         | 
| 153 197 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| 154 198 | 
             
                U32* const hashTable = ms->hashTable;
         | 
| 155 199 | 
             
                U32 const hlog = cParams->hashLog;
         | 
| 156 | 
            -
                 | 
| 157 | 
            -
                size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
         | 
| 200 | 
            +
                size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; /* min 2 */
         | 
| 158 201 | 
             
                const BYTE* const base = ms->window.base;
         | 
| 159 202 | 
             
                const BYTE* const istart = (const BYTE*)src;
         | 
| 160 203 | 
             
                const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
         | 
| @@ -176,8 +219,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic( | |
| 176 219 |  | 
| 177 220 | 
             
                size_t hash0; /* hash for ip0 */
         | 
| 178 221 | 
             
                size_t hash1; /* hash for ip1 */
         | 
| 179 | 
            -
                U32  | 
| 180 | 
            -
                U32 mval; /* src value at match idx */
         | 
| 222 | 
            +
                U32 matchIdx; /* match idx for ip0 */
         | 
| 181 223 |  | 
| 182 224 | 
             
                U32 offcode;
         | 
| 183 225 | 
             
                const BYTE* match0;
         | 
| @@ -190,6 +232,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic( | |
| 190 232 | 
             
                size_t step;
         | 
| 191 233 | 
             
                const BYTE* nextStep;
         | 
| 192 234 | 
             
                const size_t kStepIncr = (1 << (kSearchStrength - 1));
         | 
| 235 | 
            +
                const ZSTD_match4Found matchFound = useCmov ? ZSTD_match4Found_cmov : ZSTD_match4Found_branch;
         | 
| 193 236 |  | 
| 194 237 | 
             
                DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
         | 
| 195 238 | 
             
                ip0 += (ip0 == prefixStart);
         | 
| @@ -218,7 +261,7 @@ _start: /* Requires: ip0 */ | |
| 218 261 | 
             
                hash0 = ZSTD_hashPtr(ip0, hlog, mls);
         | 
| 219 262 | 
             
                hash1 = ZSTD_hashPtr(ip1, hlog, mls);
         | 
| 220 263 |  | 
| 221 | 
            -
                 | 
| 264 | 
            +
                matchIdx = hashTable[hash0];
         | 
| 222 265 |  | 
| 223 266 | 
             
                do {
         | 
| 224 267 | 
             
                    /* load repcode match for ip[2]*/
         | 
| @@ -238,35 +281,25 @@ _start: /* Requires: ip0 */ | |
| 238 281 | 
             
                        offcode = REPCODE1_TO_OFFBASE;
         | 
| 239 282 | 
             
                        mLength += 4;
         | 
| 240 283 |  | 
| 241 | 
            -
                        /*  | 
| 242 | 
            -
                         * This write is known to be safe because  | 
| 284 | 
            +
                        /* Write next hash table entry: it's already calculated.
         | 
| 285 | 
            +
                         * This write is known to be safe because ip1 is before the
         | 
| 243 286 | 
             
                         * repcode (ip2). */
         | 
| 244 287 | 
             
                        hashTable[hash1] = (U32)(ip1 - base);
         | 
| 245 288 |  | 
| 246 289 | 
             
                        goto _match;
         | 
| 247 290 | 
             
                    }
         | 
| 248 291 |  | 
| 249 | 
            -
             | 
| 250 | 
            -
             | 
| 251 | 
            -
                         | 
| 252 | 
            -
             | 
| 253 | 
            -
                        mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
         | 
| 254 | 
            -
                    }
         | 
| 255 | 
            -
             | 
| 256 | 
            -
                    /* check match at ip[0] */
         | 
| 257 | 
            -
                    if (MEM_read32(ip0) == mval) {
         | 
| 258 | 
            -
                        /* found a match! */
         | 
| 259 | 
            -
             | 
| 260 | 
            -
                        /* First write next hash table entry; we've already calculated it.
         | 
| 261 | 
            -
                         * This write is known to be safe because the ip1 == ip0 + 1, so
         | 
| 262 | 
            -
                         * we know we will resume searching after ip1 */
         | 
| 292 | 
            +
                     if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
         | 
| 293 | 
            +
                        /* Write next hash table entry (it's already calculated).
         | 
| 294 | 
            +
                        * This write is known to be safe because the ip1 == ip0 + 1,
         | 
| 295 | 
            +
                        * so searching will resume after ip1 */
         | 
| 263 296 | 
             
                        hashTable[hash1] = (U32)(ip1 - base);
         | 
| 264 297 |  | 
| 265 298 | 
             
                        goto _offset;
         | 
| 266 299 | 
             
                    }
         | 
| 267 300 |  | 
| 268 301 | 
             
                    /* lookup ip[1] */
         | 
| 269 | 
            -
                     | 
| 302 | 
            +
                    matchIdx = hashTable[hash1];
         | 
| 270 303 |  | 
| 271 304 | 
             
                    /* hash ip[2] */
         | 
| 272 305 | 
             
                    hash0 = hash1;
         | 
| @@ -281,36 +314,19 @@ _start: /* Requires: ip0 */ | |
| 281 314 | 
             
                    current0 = (U32)(ip0 - base);
         | 
| 282 315 | 
             
                    hashTable[hash0] = current0;
         | 
| 283 316 |  | 
| 284 | 
            -
             | 
| 285 | 
            -
             | 
| 286 | 
            -
                        mval = MEM_read32(base + idx);
         | 
| 287 | 
            -
                    } else {
         | 
| 288 | 
            -
                        mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
         | 
| 289 | 
            -
                    }
         | 
| 290 | 
            -
             | 
| 291 | 
            -
                    /* check match at ip[0] */
         | 
| 292 | 
            -
                    if (MEM_read32(ip0) == mval) {
         | 
| 293 | 
            -
                        /* found a match! */
         | 
| 294 | 
            -
             | 
| 295 | 
            -
                        /* first write next hash table entry; we've already calculated it */
         | 
| 317 | 
            +
                     if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
         | 
| 318 | 
            +
                        /* Write next hash table entry, since it's already calculated */
         | 
| 296 319 | 
             
                        if (step <= 4) {
         | 
| 297 | 
            -
                            /*  | 
| 298 | 
            -
             | 
| 299 | 
            -
             | 
| 300 | 
            -
                             *
         | 
| 301 | 
            -
                             * The minimum possible match has length 4, so the earliest ip0
         | 
| 302 | 
            -
                             * can be after we take this match will be the current ip0 + 4.
         | 
| 303 | 
            -
                             * ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
         | 
| 304 | 
            -
                             * write this position.
         | 
| 305 | 
            -
                             */
         | 
| 320 | 
            +
                            /* Avoid writing an index if it's >= position where search will resume.
         | 
| 321 | 
            +
                            * The minimum possible match has length 4, so search can resume at ip0 + 4.
         | 
| 322 | 
            +
                            */
         | 
| 306 323 | 
             
                            hashTable[hash1] = (U32)(ip1 - base);
         | 
| 307 324 | 
             
                        }
         | 
| 308 | 
            -
             | 
| 309 325 | 
             
                        goto _offset;
         | 
| 310 326 | 
             
                    }
         | 
| 311 327 |  | 
| 312 328 | 
             
                    /* lookup ip[1] */
         | 
| 313 | 
            -
                     | 
| 329 | 
            +
                    matchIdx = hashTable[hash1];
         | 
| 314 330 |  | 
| 315 331 | 
             
                    /* hash ip[2] */
         | 
| 316 332 | 
             
                    hash0 = hash1;
         | 
| @@ -332,7 +348,7 @@ _start: /* Requires: ip0 */ | |
| 332 348 | 
             
                } while (ip3 < ilimit);
         | 
| 333 349 |  | 
| 334 350 | 
             
            _cleanup:
         | 
| 335 | 
            -
                /* Note that there are probably still a couple positions  | 
| 351 | 
            +
                /* Note that there are probably still a couple positions one could search.
         | 
| 336 352 | 
             
                 * However, it seems to be a meaningful performance hit to try to search
         | 
| 337 353 | 
             
                 * them. So let's not. */
         | 
| 338 354 |  | 
| @@ -361,7 +377,7 @@ _cleanup: | |
| 361 377 | 
             
            _offset: /* Requires: ip0, idx */
         | 
| 362 378 |  | 
| 363 379 | 
             
                /* Compute the offset code. */
         | 
| 364 | 
            -
                match0 = base +  | 
| 380 | 
            +
                match0 = base + matchIdx;
         | 
| 365 381 | 
             
                rep_offset2 = rep_offset1;
         | 
| 366 382 | 
             
                rep_offset1 = (U32)(ip0-match0);
         | 
| 367 383 | 
             
                offcode = OFFSET_TO_OFFBASE(rep_offset1);
         | 
| @@ -406,12 +422,12 @@ _match: /* Requires: ip0, match0, offcode */ | |
| 406 422 | 
             
                goto _start;
         | 
| 407 423 | 
             
            }
         | 
| 408 424 |  | 
| 409 | 
            -
            #define ZSTD_GEN_FAST_FN(dictMode,  | 
| 410 | 
            -
                static size_t ZSTD_compressBlock_fast_##dictMode##_## | 
| 411 | 
            -
                         | 
| 425 | 
            +
            #define ZSTD_GEN_FAST_FN(dictMode, mml, cmov)                                                       \
         | 
| 426 | 
            +
                static size_t ZSTD_compressBlock_fast_##dictMode##_##mml##_##cmov(                              \
         | 
| 427 | 
            +
                        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                    \
         | 
| 412 428 | 
             
                        void const* src, size_t srcSize)                                                       \
         | 
| 413 429 | 
             
                {                                                                                              \
         | 
| 414 | 
            -
                    return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize,  | 
| 430 | 
            +
                    return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mml, cmov); \
         | 
| 415 431 | 
             
                }
         | 
| 416 432 |  | 
| 417 433 | 
             
            ZSTD_GEN_FAST_FN(noDict, 4, 1)
         | 
| @@ -425,13 +441,15 @@ ZSTD_GEN_FAST_FN(noDict, 6, 0) | |
| 425 441 | 
             
            ZSTD_GEN_FAST_FN(noDict, 7, 0)
         | 
| 426 442 |  | 
| 427 443 | 
             
            size_t ZSTD_compressBlock_fast(
         | 
| 428 | 
            -
                     | 
| 444 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 429 445 | 
             
                    void const* src, size_t srcSize)
         | 
| 430 446 | 
             
            {
         | 
| 431 | 
            -
                U32 const  | 
| 447 | 
            +
                U32 const mml = ms->cParams.minMatch;
         | 
| 448 | 
            +
                /* use cmov when "candidate in range" branch is likely unpredictable */
         | 
| 449 | 
            +
                int const useCmov = ms->cParams.windowLog < 19;
         | 
| 432 450 | 
             
                assert(ms->dictMatchState == NULL);
         | 
| 433 | 
            -
                if ( | 
| 434 | 
            -
                    switch( | 
| 451 | 
            +
                if (useCmov) {
         | 
| 452 | 
            +
                    switch(mml)
         | 
| 435 453 | 
             
                    {
         | 
| 436 454 | 
             
                    default: /* includes case 3 */
         | 
| 437 455 | 
             
                    case 4 :
         | 
| @@ -444,7 +462,8 @@ size_t ZSTD_compressBlock_fast( | |
| 444 462 | 
             
                        return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
         | 
| 445 463 | 
             
                    }
         | 
| 446 464 | 
             
                } else {
         | 
| 447 | 
            -
                     | 
| 465 | 
            +
                    /* use a branch instead */
         | 
| 466 | 
            +
                    switch(mml)
         | 
| 448 467 | 
             
                    {
         | 
| 449 468 | 
             
                    default: /* includes case 3 */
         | 
| 450 469 | 
             
                    case 4 :
         | 
| @@ -456,14 +475,13 @@ size_t ZSTD_compressBlock_fast( | |
| 456 475 | 
             
                    case 7 :
         | 
| 457 476 | 
             
                        return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
         | 
| 458 477 | 
             
                    }
         | 
| 459 | 
            -
             | 
| 460 478 | 
             
                }
         | 
| 461 479 | 
             
            }
         | 
| 462 480 |  | 
| 463 481 | 
             
            FORCE_INLINE_TEMPLATE
         | 
| 464 482 | 
             
            ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
         | 
| 465 483 | 
             
            size_t ZSTD_compressBlock_fast_dictMatchState_generic(
         | 
| 466 | 
            -
                     | 
| 484 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 467 485 | 
             
                    void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
         | 
| 468 486 | 
             
            {
         | 
| 469 487 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| @@ -482,7 +500,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 482 500 | 
             
                const BYTE* const ilimit = iend - HASH_READ_SIZE;
         | 
| 483 501 | 
             
                U32 offset_1=rep[0], offset_2=rep[1];
         | 
| 484 502 |  | 
| 485 | 
            -
                const  | 
| 503 | 
            +
                const ZSTD_MatchState_t* const dms = ms->dictMatchState;
         | 
| 486 504 | 
             
                const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
         | 
| 487 505 | 
             
                const U32* const dictHashTable = dms->hashTable;
         | 
| 488 506 | 
             
                const U32 dictStartIndex       = dms->window.dictLimit;
         | 
| @@ -546,8 +564,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 546 564 | 
             
                        size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
         | 
| 547 565 | 
             
                        hashTable[hash0] = curr;   /* update hash table */
         | 
| 548 566 |  | 
| 549 | 
            -
                        if ((( | 
| 550 | 
            -
                             3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
         | 
| 567 | 
            +
                        if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
         | 
| 551 568 | 
             
                            && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
         | 
| 552 569 | 
             
                            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
         | 
| 553 570 | 
             
                            mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
         | 
| @@ -580,8 +597,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 580 597 | 
             
                            }
         | 
| 581 598 | 
             
                        }
         | 
| 582 599 |  | 
| 583 | 
            -
                        if ( | 
| 584 | 
            -
                            /* found a regular match */
         | 
| 600 | 
            +
                        if (ZSTD_match4Found_cmov(ip0, match, matchIndex, prefixStartIndex)) {
         | 
| 601 | 
            +
                            /* found a regular match of size >= 4 */
         | 
| 585 602 | 
             
                            U32 const offset = (U32) (ip0 - match);
         | 
| 586 603 | 
             
                            mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
         | 
| 587 604 | 
             
                            while (((ip0 > anchor) & (match > prefixStart))
         | 
| @@ -631,7 +648,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 631 648 | 
             
                            const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
         | 
| 632 649 | 
             
                                    dictBase - dictIndexDelta + repIndex2 :
         | 
| 633 650 | 
             
                                    base + repIndex2;
         | 
| 634 | 
            -
                            if ( (( | 
| 651 | 
            +
                            if ( (ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
         | 
| 635 652 | 
             
                               && (MEM_read32(repMatch2) == MEM_read32(ip0))) {
         | 
| 636 653 | 
             
                                const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
         | 
| 637 654 | 
             
                                size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
         | 
| @@ -667,7 +684,7 @@ ZSTD_GEN_FAST_FN(dictMatchState, 6, 0) | |
| 667 684 | 
             
            ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
         | 
| 668 685 |  | 
| 669 686 | 
             
            size_t ZSTD_compressBlock_fast_dictMatchState(
         | 
| 670 | 
            -
                     | 
| 687 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 671 688 | 
             
                    void const* src, size_t srcSize)
         | 
| 672 689 | 
             
            {
         | 
| 673 690 | 
             
                U32 const mls = ms->cParams.minMatch;
         | 
| @@ -690,7 +707,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState( | |
| 690 707 | 
             
            static
         | 
| 691 708 | 
             
            ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
         | 
| 692 709 | 
             
            size_t ZSTD_compressBlock_fast_extDict_generic(
         | 
| 693 | 
            -
                     | 
| 710 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 694 711 | 
             
                    void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
         | 
| 695 712 | 
             
            {
         | 
| 696 713 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| @@ -925,7 +942,7 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */ | |
| 925 942 | 
             
                    while (ip0 <= ilimit) {
         | 
| 926 943 | 
             
                        U32 const repIndex2 = (U32)(ip0-base) - offset_2;
         | 
| 927 944 | 
             
                        const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
         | 
| 928 | 
            -
                        if ( ((( | 
| 945 | 
            +
                        if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 > 0))
         | 
| 929 946 | 
             
                             && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
         | 
| 930 947 | 
             
                            const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
         | 
| 931 948 | 
             
                            size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
         | 
| @@ -948,7 +965,7 @@ ZSTD_GEN_FAST_FN(extDict, 6, 0) | |
| 948 965 | 
             
            ZSTD_GEN_FAST_FN(extDict, 7, 0)
         | 
| 949 966 |  | 
| 950 967 | 
             
            size_t ZSTD_compressBlock_fast_extDict(
         | 
| 951 | 
            -
                     | 
| 968 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 952 969 | 
             
                    void const* src, size_t srcSize)
         | 
| 953 970 | 
             
            {
         | 
| 954 971 | 
             
                U32 const mls = ms->cParams.minMatch;
         | 
| @@ -11,28 +11,20 @@ | |
| 11 11 | 
             
            #ifndef ZSTD_FAST_H
         | 
| 12 12 | 
             
            #define ZSTD_FAST_H
         | 
| 13 13 |  | 
| 14 | 
            -
            #if defined (__cplusplus)
         | 
| 15 | 
            -
            extern "C" {
         | 
| 16 | 
            -
            #endif
         | 
| 17 | 
            -
             | 
| 18 14 | 
             
            #include "../common/mem.h"      /* U32 */
         | 
| 19 15 | 
             
            #include "zstd_compress_internal.h"
         | 
| 20 16 |  | 
| 21 | 
            -
            void ZSTD_fillHashTable( | 
| 17 | 
            +
            void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
         | 
| 22 18 | 
             
                                    void const* end, ZSTD_dictTableLoadMethod_e dtlm,
         | 
| 23 19 | 
             
                                    ZSTD_tableFillPurpose_e tfp);
         | 
| 24 20 | 
             
            size_t ZSTD_compressBlock_fast(
         | 
| 25 | 
            -
                     | 
| 21 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 26 22 | 
             
                    void const* src, size_t srcSize);
         | 
| 27 23 | 
             
            size_t ZSTD_compressBlock_fast_dictMatchState(
         | 
| 28 | 
            -
                     | 
| 24 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 29 25 | 
             
                    void const* src, size_t srcSize);
         | 
| 30 26 | 
             
            size_t ZSTD_compressBlock_fast_extDict(
         | 
| 31 | 
            -
                     | 
| 27 | 
            +
                    ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 32 28 | 
             
                    void const* src, size_t srcSize);
         | 
| 33 29 |  | 
| 34 | 
            -
            #if defined (__cplusplus)
         | 
| 35 | 
            -
            }
         | 
| 36 | 
            -
            #endif
         | 
| 37 | 
            -
             | 
| 38 30 | 
             
            #endif /* ZSTD_FAST_H */
         |