zstdlib 0.6.0-x64-mingw32 → 0.9.0-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +20 -0
- data/README.md +7 -1
- data/Rakefile +38 -8
- data/ext/{zstdlib → zstdlib_c}/extconf.rb +10 -5
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/adler32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/compress.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzclose.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzguts.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzlib.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzread.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzwrite.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/infback.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffixed.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/uncompr.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zconf.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zlib.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +75 -57
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/compiler.h +335 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +1 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/debug.c +24 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +22 -49
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/entropy_common.c +368 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +3 -1
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/error_private.h +159 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +51 -42
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +149 -57
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +60 -54
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +87 -98
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +34 -23
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +5 -5
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +10 -8
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +4 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +10 -10
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_deps.h +111 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_internal.h +493 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_trace.h +163 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +105 -85
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +41 -63
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +13 -33
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/huf_compress.c +1370 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress.c +6327 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +537 -82
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +21 -16
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +4 -2
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +61 -34
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.c +573 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +236 -95
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +321 -143
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +328 -137
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.c +2104 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.h +125 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +336 -209
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +15 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_ldm_geartab.h +106 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +439 -239
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +1 -1
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +205 -462
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstdmt_compress.h +113 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress.c +1889 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +20 -16
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +3 -3
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +691 -230
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/zstd_decompress_block.c +2072 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +16 -7
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +71 -10
- data/ext/zstdlib_c/zstd-1.5.2/lib/zdict.h +452 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +760 -234
- data/ext/{zstdlib/zstd-1.4.4/lib/common → zstdlib_c/zstd-1.5.2/lib}/zstd_errors.h +3 -1
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +133 -44
- data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/ext/zstdlib_c/zstd.mk +15 -0
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/zstdlib.rb +2 -2
- metadata +124 -114
- data/ext/zstdlib/zstd-1.4.4/lib/common/compiler.h +0 -159
- data/ext/zstdlib/zstd-1.4.4/lib/common/debug.c +0 -44
- data/ext/zstdlib/zstd-1.4.4/lib/common/entropy_common.c +0 -236
- data/ext/zstdlib/zstd-1.4.4/lib/common/error_private.h +0 -76
- data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.c +0 -882
- data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.h +0 -305
- data/ext/zstdlib/zstd-1.4.4/lib/common/zstd_internal.h +0 -350
- data/ext/zstdlib/zstd-1.4.4/lib/compress/huf_compress.c +0 -798
- data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_compress.c +0 -4103
- data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.c +0 -1115
- data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.h +0 -67
- data/ext/zstdlib/zstd-1.4.4/lib/compress/zstdmt_compress.h +0 -192
- data/ext/zstdlib/zstd-1.4.4/lib/decompress/huf_decompress.c +0 -1234
- data/ext/zstdlib/zstd-1.4.4/lib/decompress/zstd_decompress_block.c +0 -1323
- data/ext/zstdlib/zstd.mk +0 -14
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
| @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            /*
         | 
| 2 | 
            -
             * Copyright (c)  | 
| 2 | 
            +
             * Copyright (c) Yann Collet, Facebook, Inc.
         | 
| 3 3 | 
             
             * All rights reserved.
         | 
| 4 4 | 
             
             *
         | 
| 5 5 | 
             
             * This source code is licensed under both the BSD-style license (found in the
         | 
| @@ -29,148 +29,308 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, | |
| 29 29 | 
             
                 * Insert the other positions if their hash entry is empty.
         | 
| 30 30 | 
             
                 */
         | 
| 31 31 | 
             
                for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
         | 
| 32 | 
            -
                    U32 const  | 
| 32 | 
            +
                    U32 const curr = (U32)(ip - base);
         | 
| 33 33 | 
             
                    size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
         | 
| 34 | 
            -
                    hashTable[hash0] =  | 
| 34 | 
            +
                    hashTable[hash0] = curr;
         | 
| 35 35 | 
             
                    if (dtlm == ZSTD_dtlm_fast) continue;
         | 
| 36 36 | 
             
                    /* Only load extra positions for ZSTD_dtlm_full */
         | 
| 37 37 | 
             
                    {   U32 p;
         | 
| 38 38 | 
             
                        for (p = 1; p < fastHashFillStep; ++p) {
         | 
| 39 39 | 
             
                            size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
         | 
| 40 40 | 
             
                            if (hashTable[hash] == 0) {  /* not yet filled */
         | 
| 41 | 
            -
                                hashTable[hash] =  | 
| 41 | 
            +
                                hashTable[hash] = curr + p;
         | 
| 42 42 | 
             
                }   }   }   }
         | 
| 43 43 | 
             
            }
         | 
| 44 44 |  | 
| 45 45 |  | 
| 46 | 
            +
            /**
         | 
| 47 | 
            +
             * If you squint hard enough (and ignore repcodes), the search operation at any
         | 
| 48 | 
            +
             * given position is broken into 4 stages:
         | 
| 49 | 
            +
             *
         | 
| 50 | 
            +
             * 1. Hash   (map position to hash value via input read)
         | 
| 51 | 
            +
             * 2. Lookup (map hash val to index via hashtable read)
         | 
| 52 | 
            +
             * 3. Load   (map index to value at that position via input read)
         | 
| 53 | 
            +
             * 4. Compare
         | 
| 54 | 
            +
             *
         | 
| 55 | 
            +
             * Each of these steps involves a memory read at an address which is computed
         | 
| 56 | 
            +
             * from the previous step. This means these steps must be sequenced and their
         | 
| 57 | 
            +
             * latencies are cumulative.
         | 
| 58 | 
            +
             *
         | 
| 59 | 
            +
             * Rather than do 1->2->3->4 sequentially for a single position before moving
         | 
| 60 | 
            +
             * onto the next, this implementation interleaves these operations across the
         | 
| 61 | 
            +
             * next few positions:
         | 
| 62 | 
            +
             *
         | 
| 63 | 
            +
             * R = Repcode Read & Compare
         | 
| 64 | 
            +
             * H = Hash
         | 
| 65 | 
            +
             * T = Table Lookup
         | 
| 66 | 
            +
             * M = Match Read & Compare
         | 
| 67 | 
            +
             *
         | 
| 68 | 
            +
             * Pos | Time -->
         | 
| 69 | 
            +
             * ----+-------------------
         | 
| 70 | 
            +
             * N   | ... M
         | 
| 71 | 
            +
             * N+1 | ...   TM
         | 
| 72 | 
            +
             * N+2 |    R H   T M
         | 
| 73 | 
            +
             * N+3 |         H    TM
         | 
| 74 | 
            +
             * N+4 |           R H   T M
         | 
| 75 | 
            +
             * N+5 |                H   ...
         | 
| 76 | 
            +
             * N+6 |                  R ...
         | 
| 77 | 
            +
             *
         | 
| 78 | 
            +
             * This is very much analogous to the pipelining of execution in a CPU. And just
         | 
| 79 | 
            +
             * like a CPU, we have to dump the pipeline when we find a match (i.e., take a
         | 
| 80 | 
            +
             * branch).
         | 
| 81 | 
            +
             *
         | 
| 82 | 
            +
             * When this happens, we throw away our current state, and do the following prep
         | 
| 83 | 
            +
             * to re-enter the loop:
         | 
| 84 | 
            +
             *
         | 
| 85 | 
            +
             * Pos | Time -->
         | 
| 86 | 
            +
             * ----+-------------------
         | 
| 87 | 
            +
             * N   | H T
         | 
| 88 | 
            +
             * N+1 |  H
         | 
| 89 | 
            +
             *
         | 
| 90 | 
            +
             * This is also the work we do at the beginning to enter the loop initially.
         | 
| 91 | 
            +
             */
         | 
| 46 92 | 
             
            FORCE_INLINE_TEMPLATE size_t
         | 
| 47 | 
            -
             | 
| 93 | 
            +
            ZSTD_compressBlock_fast_noDict_generic(
         | 
| 48 94 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 49 95 | 
             
                    void const* src, size_t srcSize,
         | 
| 50 | 
            -
                    U32 const mls)
         | 
| 96 | 
            +
                    U32 const mls, U32 const hasStep)
         | 
| 51 97 | 
             
            {
         | 
| 52 98 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| 53 99 | 
             
                U32* const hashTable = ms->hashTable;
         | 
| 54 100 | 
             
                U32 const hlog = cParams->hashLog;
         | 
| 55 101 | 
             
                /* support stepSize of 0 */
         | 
| 56 | 
            -
                size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
         | 
| 102 | 
            +
                size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
         | 
| 57 103 | 
             
                const BYTE* const base = ms->window.base;
         | 
| 58 104 | 
             
                const BYTE* const istart = (const BYTE*)src;
         | 
| 59 | 
            -
                /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
         | 
| 60 | 
            -
                const BYTE* ip0 = istart;
         | 
| 61 | 
            -
                const BYTE* ip1;
         | 
| 62 | 
            -
                const BYTE* anchor = istart;
         | 
| 63 105 | 
             
                const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
         | 
| 64 | 
            -
                const U32    | 
| 65 | 
            -
                const U32   validStartIndex = ms->window.dictLimit;
         | 
| 66 | 
            -
                const U32   prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
         | 
| 106 | 
            +
                const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
         | 
| 67 107 | 
             
                const BYTE* const prefixStart = base + prefixStartIndex;
         | 
| 68 108 | 
             
                const BYTE* const iend = istart + srcSize;
         | 
| 69 109 | 
             
                const BYTE* const ilimit = iend - HASH_READ_SIZE;
         | 
| 70 | 
            -
             | 
| 110 | 
            +
             | 
| 111 | 
            +
                const BYTE* anchor = istart;
         | 
| 112 | 
            +
                const BYTE* ip0 = istart;
         | 
| 113 | 
            +
                const BYTE* ip1;
         | 
| 114 | 
            +
                const BYTE* ip2;
         | 
| 115 | 
            +
                const BYTE* ip3;
         | 
| 116 | 
            +
                U32 current0;
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                U32 rep_offset1 = rep[0];
         | 
| 119 | 
            +
                U32 rep_offset2 = rep[1];
         | 
| 71 120 | 
             
                U32 offsetSaved = 0;
         | 
| 72 121 |  | 
| 73 | 
            -
                /*  | 
| 122 | 
            +
                size_t hash0; /* hash for ip0 */
         | 
| 123 | 
            +
                size_t hash1; /* hash for ip1 */
         | 
| 124 | 
            +
                U32 idx; /* match idx for ip0 */
         | 
| 125 | 
            +
                U32 mval; /* src value at match idx */
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                U32 offcode;
         | 
| 128 | 
            +
                const BYTE* match0;
         | 
| 129 | 
            +
                size_t mLength;
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                /* ip0 and ip1 are always adjacent. The targetLength skipping and
         | 
| 132 | 
            +
                 * uncompressibility acceleration is applied to every other position,
         | 
| 133 | 
            +
                 * matching the behavior of #1562. step therefore represents the gap
         | 
| 134 | 
            +
                 * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
         | 
| 135 | 
            +
                size_t step;
         | 
| 136 | 
            +
                const BYTE* nextStep;
         | 
| 137 | 
            +
                const size_t kStepIncr = (1 << (kSearchStrength - 1));
         | 
| 138 | 
            +
             | 
| 74 139 | 
             
                DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
         | 
| 75 140 | 
             
                ip0 += (ip0 == prefixStart);
         | 
| 141 | 
            +
                {   U32 const curr = (U32)(ip0 - base);
         | 
| 142 | 
            +
                    U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
         | 
| 143 | 
            +
                    U32 const maxRep = curr - windowLow;
         | 
| 144 | 
            +
                    if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
         | 
| 145 | 
            +
                    if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
         | 
| 146 | 
            +
                }
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                /* start each op */
         | 
| 149 | 
            +
            _start: /* Requires: ip0 */
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                step = stepSize;
         | 
| 152 | 
            +
                nextStep = ip0 + kStepIncr;
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                /* calculate positions, ip0 - anchor == 0, so we skip step calc */
         | 
| 76 155 | 
             
                ip1 = ip0 + 1;
         | 
| 77 | 
            -
                 | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 156 | 
            +
                ip2 = ip0 + step;
         | 
| 157 | 
            +
                ip3 = ip2 + 1;
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                if (ip3 >= ilimit) {
         | 
| 160 | 
            +
                    goto _cleanup;
         | 
| 80 161 | 
             
                }
         | 
| 81 162 |  | 
| 82 | 
            -
                 | 
| 83 | 
            -
                 | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
                     | 
| 89 | 
            -
                    U32  | 
| 90 | 
            -
             | 
| 91 | 
            -
                     | 
| 92 | 
            -
                     | 
| 93 | 
            -
                     | 
| 94 | 
            -
             | 
| 95 | 
            -
                     | 
| 96 | 
            -
                     | 
| 97 | 
            -
             | 
| 98 | 
            -
             | 
| 99 | 
            -
             | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 102 | 
            -
             | 
| 103 | 
            -
             | 
| 104 | 
            -
                        mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
         | 
| 105 | 
            -
                        ip0 = ip2 - mLength;
         | 
| 106 | 
            -
                        match0 = repMatch - mLength;
         | 
| 107 | 
            -
                        offcode = 0;
         | 
| 163 | 
            +
                hash0 = ZSTD_hashPtr(ip0, hlog, mls);
         | 
| 164 | 
            +
                hash1 = ZSTD_hashPtr(ip1, hlog, mls);
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                idx = hashTable[hash0];
         | 
| 167 | 
            +
             | 
| 168 | 
            +
                do {
         | 
| 169 | 
            +
                    /* load repcode match for ip[2]*/
         | 
| 170 | 
            +
                    const U32 rval = MEM_read32(ip2 - rep_offset1);
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                    /* write back hash table entry */
         | 
| 173 | 
            +
                    current0 = (U32)(ip0 - base);
         | 
| 174 | 
            +
                    hashTable[hash0] = current0;
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                    /* check repcode at ip[2] */
         | 
| 177 | 
            +
                    if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
         | 
| 178 | 
            +
                        ip0 = ip2;
         | 
| 179 | 
            +
                        match0 = ip0 - rep_offset1;
         | 
| 180 | 
            +
                        mLength = ip0[-1] == match0[-1];
         | 
| 181 | 
            +
                        ip0 -= mLength;
         | 
| 182 | 
            +
                        match0 -= mLength;
         | 
| 183 | 
            +
                        offcode = STORE_REPCODE_1;
         | 
| 184 | 
            +
                        mLength += 4;
         | 
| 108 185 | 
             
                        goto _match;
         | 
| 109 186 | 
             
                    }
         | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 187 | 
            +
             | 
| 188 | 
            +
                    /* load match for ip[0] */
         | 
| 189 | 
            +
                    if (idx >= prefixStartIndex) {
         | 
| 190 | 
            +
                        mval = MEM_read32(base + idx);
         | 
| 191 | 
            +
                    } else {
         | 
| 192 | 
            +
                        mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
         | 
| 193 | 
            +
                    }
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                    /* check match at ip[0] */
         | 
| 196 | 
            +
                    if (MEM_read32(ip0) == mval) {
         | 
| 197 | 
            +
                        /* found a match! */
         | 
| 112 198 | 
             
                        goto _offset;
         | 
| 113 199 | 
             
                    }
         | 
| 114 | 
            -
             | 
| 115 | 
            -
             | 
| 116 | 
            -
             | 
| 117 | 
            -
             | 
| 200 | 
            +
             | 
| 201 | 
            +
                    /* lookup ip[1] */
         | 
| 202 | 
            +
                    idx = hashTable[hash1];
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                    /* hash ip[2] */
         | 
| 205 | 
            +
                    hash0 = hash1;
         | 
| 206 | 
            +
                    hash1 = ZSTD_hashPtr(ip2, hlog, mls);
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                    /* advance to next positions */
         | 
| 209 | 
            +
                    ip0 = ip1;
         | 
| 210 | 
            +
                    ip1 = ip2;
         | 
| 211 | 
            +
                    ip2 = ip3;
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    /* write back hash table entry */
         | 
| 214 | 
            +
                    current0 = (U32)(ip0 - base);
         | 
| 215 | 
            +
                    hashTable[hash0] = current0;
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                    /* load match for ip[0] */
         | 
| 218 | 
            +
                    if (idx >= prefixStartIndex) {
         | 
| 219 | 
            +
                        mval = MEM_read32(base + idx);
         | 
| 220 | 
            +
                    } else {
         | 
| 221 | 
            +
                        mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
         | 
| 222 | 
            +
                    }
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                    /* check match at ip[0] */
         | 
| 225 | 
            +
                    if (MEM_read32(ip0) == mval) {
         | 
| 226 | 
            +
                        /* found a match! */
         | 
| 118 227 | 
             
                        goto _offset;
         | 
| 119 228 | 
             
                    }
         | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 229 | 
            +
             | 
| 230 | 
            +
                    /* lookup ip[1] */
         | 
| 231 | 
            +
                    idx = hashTable[hash1];
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                    /* hash ip[2] */
         | 
| 234 | 
            +
                    hash0 = hash1;
         | 
| 235 | 
            +
                    hash1 = ZSTD_hashPtr(ip2, hlog, mls);
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                    /* advance to next positions */
         | 
| 238 | 
            +
                    ip0 = ip1;
         | 
| 239 | 
            +
                    ip1 = ip2;
         | 
| 240 | 
            +
                    ip2 = ip0 + step;
         | 
| 241 | 
            +
                    ip3 = ip1 + step;
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                    /* calculate step */
         | 
| 244 | 
            +
                    if (ip2 >= nextStep) {
         | 
| 245 | 
            +
                        step++;
         | 
| 246 | 
            +
                        PREFETCH_L1(ip1 + 64);
         | 
| 247 | 
            +
                        PREFETCH_L1(ip1 + 128);
         | 
| 248 | 
            +
                        nextStep += kStepIncr;
         | 
| 125 249 | 
             
                    }
         | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
            -
             | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
| 132 | 
            -
             | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 250 | 
            +
                } while (ip3 < ilimit);
         | 
| 251 | 
            +
             | 
| 252 | 
            +
            _cleanup:
         | 
| 253 | 
            +
                /* Note that there are probably still a couple positions we could search.
         | 
| 254 | 
            +
                 * However, it seems to be a meaningful performance hit to try to search
         | 
| 255 | 
            +
                 * them. So let's not. */
         | 
| 256 | 
            +
             | 
| 257 | 
            +
                /* save reps for next block */
         | 
| 258 | 
            +
                rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
         | 
| 259 | 
            +
                rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                /* Return the last literals size */
         | 
| 262 | 
            +
                return (size_t)(iend - anchor);
         | 
| 263 | 
            +
             | 
| 264 | 
            +
            _offset: /* Requires: ip0, idx */
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                /* Compute the offset code. */
         | 
| 267 | 
            +
                match0 = base + idx;
         | 
| 268 | 
            +
                rep_offset2 = rep_offset1;
         | 
| 269 | 
            +
                rep_offset1 = (U32)(ip0-match0);
         | 
| 270 | 
            +
                offcode = STORE_OFFSET(rep_offset1);
         | 
| 271 | 
            +
                mLength = 4;
         | 
| 272 | 
            +
             | 
| 273 | 
            +
                /* Count the backwards match length. */
         | 
| 274 | 
            +
                while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
         | 
| 275 | 
            +
                    ip0--;
         | 
| 276 | 
            +
                    match0--;
         | 
| 277 | 
            +
                    mLength++;
         | 
| 278 | 
            +
                }
         | 
| 135 279 |  | 
| 136 280 | 
             
            _match: /* Requires: ip0, match0, offcode */
         | 
| 137 | 
            -
                    /* Count the forward length */
         | 
| 138 | 
            -
                    mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
         | 
| 139 | 
            -
                    ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
         | 
| 140 | 
            -
                    /* match found */
         | 
| 141 | 
            -
                    ip0 += mLength;
         | 
| 142 | 
            -
                    anchor = ip0;
         | 
| 143 | 
            -
                    ip1 = ip0 + 1;
         | 
| 144 281 |  | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 282 | 
            +
                /* Count the forward length. */
         | 
| 283 | 
            +
                mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
         | 
| 284 | 
            +
             | 
| 285 | 
            +
                ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
         | 
| 286 | 
            +
             | 
| 287 | 
            +
                ip0 += mLength;
         | 
| 288 | 
            +
                anchor = ip0;
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                /* write next hash table entry */
         | 
| 291 | 
            +
                if (ip1 < ip0) {
         | 
| 292 | 
            +
                    hashTable[hash1] = (U32)(ip1 - base);
         | 
| 293 | 
            +
                }
         | 
| 150 294 |  | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 295 | 
            +
                /* Fill table and check for immediate repcode. */
         | 
| 296 | 
            +
                if (ip0 <= ilimit) {
         | 
| 297 | 
            +
                    /* Fill Table */
         | 
| 298 | 
            +
                    assert(base+current0+2 > istart);  /* check base overflow */
         | 
| 299 | 
            +
                    hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
         | 
| 300 | 
            +
                    hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
         | 
| 301 | 
            +
             | 
| 302 | 
            +
                    if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
         | 
| 303 | 
            +
                        while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
         | 
| 153 304 | 
             
                            /* store sequence */
         | 
| 154 | 
            -
                            size_t const rLength = ZSTD_count(ip0+4, ip0+4- | 
| 155 | 
            -
                            { U32 const tmpOff =  | 
| 305 | 
            +
                            size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
         | 
| 306 | 
            +
                            { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
         | 
| 156 307 | 
             
                            hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
         | 
| 157 308 | 
             
                            ip0 += rLength;
         | 
| 158 | 
            -
                             | 
| 159 | 
            -
                            ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
         | 
| 309 | 
            +
                            ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
         | 
| 160 310 | 
             
                            anchor = ip0;
         | 
| 161 311 | 
             
                            continue;   /* faster when present (confirmed on gcc-8) ... (?) */
         | 
| 162 | 
            -
             | 
| 163 | 
            -
                    }
         | 
| 164 | 
            -
                }
         | 
| 165 | 
            -
             | 
| 166 | 
            -
                /* save reps for next block */
         | 
| 167 | 
            -
                rep[0] = offset_1 ? offset_1 : offsetSaved;
         | 
| 168 | 
            -
                rep[1] = offset_2 ? offset_2 : offsetSaved;
         | 
| 312 | 
            +
                }   }   }
         | 
| 169 313 |  | 
| 170 | 
            -
                 | 
| 171 | 
            -
                return (size_t)(iend - anchor);
         | 
| 314 | 
            +
                goto _start;
         | 
| 172 315 | 
             
            }
         | 
| 173 316 |  | 
| 317 | 
            +
            #define ZSTD_GEN_FAST_FN(dictMode, mls, step)                                                            \
         | 
| 318 | 
            +
                static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step(                                      \
         | 
| 319 | 
            +
                        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                    \
         | 
| 320 | 
            +
                        void const* src, size_t srcSize)                                                       \
         | 
| 321 | 
            +
                {                                                                                              \
         | 
| 322 | 
            +
                    return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
         | 
| 323 | 
            +
                }
         | 
| 324 | 
            +
             | 
| 325 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 4, 1)
         | 
| 326 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 5, 1)
         | 
| 327 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 6, 1)
         | 
| 328 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 7, 1)
         | 
| 329 | 
            +
             | 
| 330 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 4, 0)
         | 
| 331 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 5, 0)
         | 
| 332 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 6, 0)
         | 
| 333 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 7, 0)
         | 
| 174 334 |  | 
| 175 335 | 
             
            size_t ZSTD_compressBlock_fast(
         | 
| 176 336 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| @@ -178,24 +338,40 @@ size_t ZSTD_compressBlock_fast( | |
| 178 338 | 
             
            {
         | 
| 179 339 | 
             
                U32 const mls = ms->cParams.minMatch;
         | 
| 180 340 | 
             
                assert(ms->dictMatchState == NULL);
         | 
| 181 | 
            -
                 | 
| 182 | 
            -
             | 
| 183 | 
            -
             | 
| 184 | 
            -
             | 
| 185 | 
            -
                     | 
| 186 | 
            -
             | 
| 187 | 
            -
                     | 
| 188 | 
            -
             | 
| 189 | 
            -
                     | 
| 190 | 
            -
             | 
| 191 | 
            -
                     | 
| 341 | 
            +
                if (ms->cParams.targetLength > 1) {
         | 
| 342 | 
            +
                    switch(mls)
         | 
| 343 | 
            +
                    {
         | 
| 344 | 
            +
                    default: /* includes case 3 */
         | 
| 345 | 
            +
                    case 4 :
         | 
| 346 | 
            +
                        return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
         | 
| 347 | 
            +
                    case 5 :
         | 
| 348 | 
            +
                        return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
         | 
| 349 | 
            +
                    case 6 :
         | 
| 350 | 
            +
                        return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
         | 
| 351 | 
            +
                    case 7 :
         | 
| 352 | 
            +
                        return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
         | 
| 353 | 
            +
                    }
         | 
| 354 | 
            +
                } else {
         | 
| 355 | 
            +
                    switch(mls)
         | 
| 356 | 
            +
                    {
         | 
| 357 | 
            +
                    default: /* includes case 3 */
         | 
| 358 | 
            +
                    case 4 :
         | 
| 359 | 
            +
                        return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
         | 
| 360 | 
            +
                    case 5 :
         | 
| 361 | 
            +
                        return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
         | 
| 362 | 
            +
                    case 6 :
         | 
| 363 | 
            +
                        return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
         | 
| 364 | 
            +
                    case 7 :
         | 
| 365 | 
            +
                        return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
         | 
| 366 | 
            +
                    }
         | 
| 367 | 
            +
             | 
| 192 368 | 
             
                }
         | 
| 193 369 | 
             
            }
         | 
| 194 370 |  | 
| 195 371 | 
             
            FORCE_INLINE_TEMPLATE
         | 
| 196 372 | 
             
            size_t ZSTD_compressBlock_fast_dictMatchState_generic(
         | 
| 197 373 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 198 | 
            -
                    void const* src, size_t srcSize, U32 const mls)
         | 
| 374 | 
            +
                    void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
         | 
| 199 375 | 
             
            {
         | 
| 200 376 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| 201 377 | 
             
                U32* const hashTable = ms->hashTable;
         | 
| @@ -231,7 +407,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 231 407 | 
             
                assert(endIndex - prefixStartIndex <= maxDistance);
         | 
| 232 408 | 
             
                (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
         | 
| 233 409 |  | 
| 234 | 
            -
                /*  | 
| 410 | 
            +
                (void)hasStep; /* not currently specialized on whether it's accelerated */
         | 
| 411 | 
            +
             | 
| 412 | 
            +
                /* ensure there will be no underflow
         | 
| 235 413 | 
             
                 * when translating a dict index into a local index */
         | 
| 236 414 | 
             
                assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
         | 
| 237 415 |  | 
| @@ -247,21 +425,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 247 425 | 
             
                while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
         | 
| 248 426 | 
             
                    size_t mLength;
         | 
| 249 427 | 
             
                    size_t const h = ZSTD_hashPtr(ip, hlog, mls);
         | 
| 250 | 
            -
                    U32 const  | 
| 428 | 
            +
                    U32 const curr = (U32)(ip-base);
         | 
| 251 429 | 
             
                    U32 const matchIndex = hashTable[h];
         | 
| 252 430 | 
             
                    const BYTE* match = base + matchIndex;
         | 
| 253 | 
            -
                    const U32 repIndex =  | 
| 431 | 
            +
                    const U32 repIndex = curr + 1 - offset_1;
         | 
| 254 432 | 
             
                    const BYTE* repMatch = (repIndex < prefixStartIndex) ?
         | 
| 255 433 | 
             
                                           dictBase + (repIndex - dictIndexDelta) :
         | 
| 256 434 | 
             
                                           base + repIndex;
         | 
| 257 | 
            -
                    hashTable[h] =  | 
| 435 | 
            +
                    hashTable[h] = curr;   /* update hash table */
         | 
| 258 436 |  | 
| 259 437 | 
             
                    if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
         | 
| 260 438 | 
             
                      && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
         | 
| 261 439 | 
             
                        const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
         | 
| 262 440 | 
             
                        mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
         | 
| 263 441 | 
             
                        ip++;
         | 
| 264 | 
            -
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,  | 
| 442 | 
            +
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
         | 
| 265 443 | 
             
                    } else if ( (matchIndex <= prefixStartIndex) ) {
         | 
| 266 444 | 
             
                        size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
         | 
| 267 445 | 
             
                        U32 const dictMatchIndex = dictHashTable[dictHash];
         | 
| @@ -273,7 +451,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 273 451 | 
             
                            continue;
         | 
| 274 452 | 
             
                        } else {
         | 
| 275 453 | 
             
                            /* found a dict match */
         | 
| 276 | 
            -
                            U32 const offset = (U32)( | 
| 454 | 
            +
                            U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
         | 
| 277 455 | 
             
                            mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
         | 
| 278 456 | 
             
                            while (((ip>anchor) & (dictMatch>dictStart))
         | 
| 279 457 | 
             
                                 && (ip[-1] == dictMatch[-1])) {
         | 
| @@ -281,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 281 459 | 
             
                            } /* catch up */
         | 
| 282 460 | 
             
                            offset_2 = offset_1;
         | 
| 283 461 | 
             
                            offset_1 = offset;
         | 
| 284 | 
            -
                            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset | 
| 462 | 
            +
                            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
         | 
| 285 463 | 
             
                        }
         | 
| 286 464 | 
             
                    } else if (MEM_read32(match) != MEM_read32(ip)) {
         | 
| 287 465 | 
             
                        /* it's not a match, and we're not going to check the dictionary */
         | 
| @@ -296,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 296 474 | 
             
                             && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
         | 
| 297 475 | 
             
                        offset_2 = offset_1;
         | 
| 298 476 | 
             
                        offset_1 = offset;
         | 
| 299 | 
            -
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset | 
| 477 | 
            +
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
         | 
| 300 478 | 
             
                    }
         | 
| 301 479 |  | 
| 302 480 | 
             
                    /* match found */
         | 
| @@ -305,8 +483,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 305 483 |  | 
| 306 484 | 
             
                    if (ip <= ilimit) {
         | 
| 307 485 | 
             
                        /* Fill Table */
         | 
| 308 | 
            -
                        assert(base+ | 
| 309 | 
            -
                        hashTable[ZSTD_hashPtr(base+ | 
| 486 | 
            +
                        assert(base+curr+2 > istart);  /* check base overflow */
         | 
| 487 | 
            +
                        hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
         | 
| 310 488 | 
             
                        hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
         | 
| 311 489 |  | 
| 312 490 | 
             
                        /* check immediate repcode */
         | 
| @@ -321,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 321 499 | 
             
                                const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
         | 
| 322 500 | 
             
                                size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
         | 
| 323 501 | 
             
                                U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
         | 
| 324 | 
            -
                                ZSTD_storeSeq(seqStore, 0, anchor, iend,  | 
| 502 | 
            +
                                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
         | 
| 325 503 | 
             
                                hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
         | 
| 326 504 | 
             
                                ip += repLength2;
         | 
| 327 505 | 
             
                                anchor = ip;
         | 
| @@ -340,6 +518,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 340 518 | 
             
                return (size_t)(iend - anchor);
         | 
| 341 519 | 
             
            }
         | 
| 342 520 |  | 
| 521 | 
            +
             | 
| 522 | 
            +
            ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
         | 
| 523 | 
            +
            ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
         | 
| 524 | 
            +
            ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
         | 
| 525 | 
            +
            ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
         | 
| 526 | 
            +
             | 
| 343 527 | 
             
            size_t ZSTD_compressBlock_fast_dictMatchState(
         | 
| 344 528 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 345 529 | 
             
                    void const* src, size_t srcSize)
         | 
| @@ -350,20 +534,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState( | |
| 350 534 | 
             
                {
         | 
| 351 535 | 
             
                default: /* includes case 3 */
         | 
| 352 536 | 
             
                case 4 :
         | 
| 353 | 
            -
                    return  | 
| 537 | 
            +
                    return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
         | 
| 354 538 | 
             
                case 5 :
         | 
| 355 | 
            -
                    return  | 
| 539 | 
            +
                    return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
         | 
| 356 540 | 
             
                case 6 :
         | 
| 357 | 
            -
                    return  | 
| 541 | 
            +
                    return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
         | 
| 358 542 | 
             
                case 7 :
         | 
| 359 | 
            -
                    return  | 
| 543 | 
            +
                    return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
         | 
| 360 544 | 
             
                }
         | 
| 361 545 | 
             
            }
         | 
| 362 546 |  | 
| 363 547 |  | 
| 364 548 | 
             
            static size_t ZSTD_compressBlock_fast_extDict_generic(
         | 
| 365 549 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 366 | 
            -
                    void const* src, size_t srcSize, U32 const mls)
         | 
| 550 | 
            +
                    void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
         | 
| 367 551 | 
             
            {
         | 
| 368 552 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| 369 553 | 
             
                U32* const hashTable = ms->hashTable;
         | 
| @@ -387,11 +571,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( | |
| 387 571 | 
             
                const BYTE* const ilimit = iend - 8;
         | 
| 388 572 | 
             
                U32 offset_1=rep[0], offset_2=rep[1];
         | 
| 389 573 |  | 
| 390 | 
            -
                 | 
| 574 | 
            +
                (void)hasStep; /* not currently specialized on whether it's accelerated */
         | 
| 575 | 
            +
             | 
| 576 | 
            +
                DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
         | 
| 391 577 |  | 
| 392 578 | 
             
                /* switch to "regular" variant if extDict is invalidated due to maxDistance */
         | 
| 393 579 | 
             
                if (prefixStartIndex == dictStartIndex)
         | 
| 394 | 
            -
                    return  | 
| 580 | 
            +
                    return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
         | 
| 395 581 |  | 
| 396 582 | 
             
                /* Search Loop */
         | 
| 397 583 | 
             
                while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
         | 
| @@ -399,19 +585,20 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( | |
| 399 585 | 
             
                    const U32    matchIndex = hashTable[h];
         | 
| 400 586 | 
             
                    const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
         | 
| 401 587 | 
             
                    const BYTE*  match = matchBase + matchIndex;
         | 
| 402 | 
            -
                    const U32     | 
| 403 | 
            -
                    const U32    repIndex =  | 
| 588 | 
            +
                    const U32    curr = (U32)(ip-base);
         | 
| 589 | 
            +
                    const U32    repIndex = curr + 1 - offset_1;
         | 
| 404 590 | 
             
                    const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
         | 
| 405 591 | 
             
                    const BYTE* const repMatch = repBase + repIndex;
         | 
| 406 | 
            -
                    hashTable[h] =  | 
| 407 | 
            -
                     | 
| 592 | 
            +
                    hashTable[h] = curr;   /* update hash table */
         | 
| 593 | 
            +
                    DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
         | 
| 408 594 |  | 
| 409 | 
            -
                    if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ | 
| 595 | 
            +
                    if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
         | 
| 596 | 
            +
                         & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
         | 
| 410 597 | 
             
                       && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
         | 
| 411 598 | 
             
                        const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
         | 
| 412 599 | 
             
                        size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
         | 
| 413 600 | 
             
                        ip++;
         | 
| 414 | 
            -
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,  | 
| 601 | 
            +
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
         | 
| 415 602 | 
             
                        ip += rLength;
         | 
| 416 603 | 
             
                        anchor = ip;
         | 
| 417 604 | 
             
                    } else {
         | 
| @@ -423,30 +610,30 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( | |
| 423 610 | 
             
                        }
         | 
| 424 611 | 
             
                        {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
         | 
| 425 612 | 
             
                            const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
         | 
| 426 | 
            -
                            U32 const offset =  | 
| 613 | 
            +
                            U32 const offset = curr - matchIndex;
         | 
| 427 614 | 
             
                            size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
         | 
| 428 615 | 
             
                            while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
         | 
| 429 616 | 
             
                            offset_2 = offset_1; offset_1 = offset;  /* update offset history */
         | 
| 430 | 
            -
                            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset | 
| 617 | 
            +
                            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
         | 
| 431 618 | 
             
                            ip += mLength;
         | 
| 432 619 | 
             
                            anchor = ip;
         | 
| 433 620 | 
             
                    }   }
         | 
| 434 621 |  | 
| 435 622 | 
             
                    if (ip <= ilimit) {
         | 
| 436 623 | 
             
                        /* Fill Table */
         | 
| 437 | 
            -
                        hashTable[ZSTD_hashPtr(base+ | 
| 624 | 
            +
                        hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
         | 
| 438 625 | 
             
                        hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
         | 
| 439 626 | 
             
                        /* check immediate repcode */
         | 
| 440 627 | 
             
                        while (ip <= ilimit) {
         | 
| 441 628 | 
             
                            U32 const current2 = (U32)(ip-base);
         | 
| 442 629 | 
             
                            U32 const repIndex2 = current2 - offset_2;
         | 
| 443 630 | 
             
                            const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
         | 
| 444 | 
            -
                            if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & ( | 
| 631 | 
            +
                            if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex))  /* intentional overflow */
         | 
| 445 632 | 
             
                               && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
         | 
| 446 633 | 
             
                                const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
         | 
| 447 634 | 
             
                                size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
         | 
| 448 635 | 
             
                                { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
         | 
| 449 | 
            -
                                ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend,  | 
| 636 | 
            +
                                ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
         | 
| 450 637 | 
             
                                hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
         | 
| 451 638 | 
             
                                ip += repLength2;
         | 
| 452 639 | 
             
                                anchor = ip;
         | 
| @@ -463,6 +650,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( | |
| 463 650 | 
             
                return (size_t)(iend - anchor);
         | 
| 464 651 | 
             
            }
         | 
| 465 652 |  | 
| 653 | 
            +
            ZSTD_GEN_FAST_FN(extDict, 4, 0)
         | 
| 654 | 
            +
            ZSTD_GEN_FAST_FN(extDict, 5, 0)
         | 
| 655 | 
            +
            ZSTD_GEN_FAST_FN(extDict, 6, 0)
         | 
| 656 | 
            +
            ZSTD_GEN_FAST_FN(extDict, 7, 0)
         | 
| 466 657 |  | 
| 467 658 | 
             
            size_t ZSTD_compressBlock_fast_extDict(
         | 
| 468 659 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| @@ -473,12 +664,12 @@ size_t ZSTD_compressBlock_fast_extDict( | |
| 473 664 | 
             
                {
         | 
| 474 665 | 
             
                default: /* includes case 3 */
         | 
| 475 666 | 
             
                case 4 :
         | 
| 476 | 
            -
                    return  | 
| 667 | 
            +
                    return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
         | 
| 477 668 | 
             
                case 5 :
         | 
| 478 | 
            -
                    return  | 
| 669 | 
            +
                    return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
         | 
| 479 670 | 
             
                case 6 :
         | 
| 480 | 
            -
                    return  | 
| 671 | 
            +
                    return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
         | 
| 481 672 | 
             
                case 7 :
         | 
| 482 | 
            -
                    return  | 
| 673 | 
            +
                    return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
         | 
| 483 674 | 
             
                }
         | 
| 484 675 | 
             
            }
         |