zstdlib 0.8.0-x64-mingw32 → 0.9.0-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +10 -0
- data/README.md +7 -1
- data/Rakefile +38 -8
- data/ext/{zstdlib → zstdlib_c}/extconf.rb +10 -5
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-3.0/zstdlib.c +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/adler32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/compress.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzclose.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzguts.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzlib.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzread.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzwrite.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/infback.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffixed.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/uncompr.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zconf.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zlib.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +24 -9
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/compiler.h +89 -43
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/debug.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/entropy_common.c +11 -5
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.h +79 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +2 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +1 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +24 -22
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +18 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +11 -6
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +2 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +0 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_deps.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_internal.h +95 -92
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_trace.h +12 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +63 -27
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/huf_compress.c +537 -104
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress.c +307 -373
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +174 -83
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +4 -3
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +3 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +15 -14
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.c +4 -3
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +41 -27
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +295 -120
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +309 -130
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_lazy.c +482 -562
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_lazy.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +9 -7
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +1 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm_geartab.h +4 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +249 -148
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +76 -38
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.h +4 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/huf_decompress.c +727 -189
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +85 -22
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.c +744 -220
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +8 -2
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +34 -3
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/zdict.h +4 -4
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +179 -136
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/zstd_errors.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +7 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +0 -0
- data/ext/zstdlib_c/zstd.mk +15 -0
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/zstdlib.rb +2 -2
- metadata +124 -121
- data/ext/zstdlib/zstd-1.5.0/lib/common/xxhash.c +0 -824
- data/ext/zstdlib/zstd-1.5.0/lib/common/xxhash.h +0 -285
- data/ext/zstdlib/zstd.mk +0 -14
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
| @@ -43,145 +43,294 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, | |
| 43 43 | 
             
            }
         | 
| 44 44 |  | 
| 45 45 |  | 
| 46 | 
            +
            /**
         | 
| 47 | 
            +
             * If you squint hard enough (and ignore repcodes), the search operation at any
         | 
| 48 | 
            +
             * given position is broken into 4 stages:
         | 
| 49 | 
            +
             *
         | 
| 50 | 
            +
             * 1. Hash   (map position to hash value via input read)
         | 
| 51 | 
            +
             * 2. Lookup (map hash val to index via hashtable read)
         | 
| 52 | 
            +
             * 3. Load   (map index to value at that position via input read)
         | 
| 53 | 
            +
             * 4. Compare
         | 
| 54 | 
            +
             *
         | 
| 55 | 
            +
             * Each of these steps involves a memory read at an address which is computed
         | 
| 56 | 
            +
             * from the previous step. This means these steps must be sequenced and their
         | 
| 57 | 
            +
             * latencies are cumulative.
         | 
| 58 | 
            +
             *
         | 
| 59 | 
            +
             * Rather than do 1->2->3->4 sequentially for a single position before moving
         | 
| 60 | 
            +
             * onto the next, this implementation interleaves these operations across the
         | 
| 61 | 
            +
             * next few positions:
         | 
| 62 | 
            +
             *
         | 
| 63 | 
            +
             * R = Repcode Read & Compare
         | 
| 64 | 
            +
             * H = Hash
         | 
| 65 | 
            +
             * T = Table Lookup
         | 
| 66 | 
            +
             * M = Match Read & Compare
         | 
| 67 | 
            +
             *
         | 
| 68 | 
            +
             * Pos | Time -->
         | 
| 69 | 
            +
             * ----+-------------------
         | 
| 70 | 
            +
             * N   | ... M
         | 
| 71 | 
            +
             * N+1 | ...   TM
         | 
| 72 | 
            +
             * N+2 |    R H   T M
         | 
| 73 | 
            +
             * N+3 |         H    TM
         | 
| 74 | 
            +
             * N+4 |           R H   T M
         | 
| 75 | 
            +
             * N+5 |                H   ...
         | 
| 76 | 
            +
             * N+6 |                  R ...
         | 
| 77 | 
            +
             *
         | 
| 78 | 
            +
             * This is very much analogous to the pipelining of execution in a CPU. And just
         | 
| 79 | 
            +
             * like a CPU, we have to dump the pipeline when we find a match (i.e., take a
         | 
| 80 | 
            +
             * branch).
         | 
| 81 | 
            +
             *
         | 
| 82 | 
            +
             * When this happens, we throw away our current state, and do the following prep
         | 
| 83 | 
            +
             * to re-enter the loop:
         | 
| 84 | 
            +
             *
         | 
| 85 | 
            +
             * Pos | Time -->
         | 
| 86 | 
            +
             * ----+-------------------
         | 
| 87 | 
            +
             * N   | H T
         | 
| 88 | 
            +
             * N+1 |  H
         | 
| 89 | 
            +
             *
         | 
| 90 | 
            +
             * This is also the work we do at the beginning to enter the loop initially.
         | 
| 91 | 
            +
             */
         | 
| 46 92 | 
             
            FORCE_INLINE_TEMPLATE size_t
         | 
| 47 | 
            -
             | 
| 93 | 
            +
            ZSTD_compressBlock_fast_noDict_generic(
         | 
| 48 94 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 49 95 | 
             
                    void const* src, size_t srcSize,
         | 
| 50 | 
            -
                    U32 const mls)
         | 
| 96 | 
            +
                    U32 const mls, U32 const hasStep)
         | 
| 51 97 | 
             
            {
         | 
| 52 98 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| 53 99 | 
             
                U32* const hashTable = ms->hashTable;
         | 
| 54 100 | 
             
                U32 const hlog = cParams->hashLog;
         | 
| 55 101 | 
             
                /* support stepSize of 0 */
         | 
| 56 | 
            -
                size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
         | 
| 102 | 
            +
                size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
         | 
| 57 103 | 
             
                const BYTE* const base = ms->window.base;
         | 
| 58 104 | 
             
                const BYTE* const istart = (const BYTE*)src;
         | 
| 59 | 
            -
                /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
         | 
| 60 | 
            -
                const BYTE* ip0 = istart;
         | 
| 61 | 
            -
                const BYTE* ip1;
         | 
| 62 | 
            -
                const BYTE* anchor = istart;
         | 
| 63 105 | 
             
                const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
         | 
| 64 106 | 
             
                const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
         | 
| 65 107 | 
             
                const BYTE* const prefixStart = base + prefixStartIndex;
         | 
| 66 108 | 
             
                const BYTE* const iend = istart + srcSize;
         | 
| 67 109 | 
             
                const BYTE* const ilimit = iend - HASH_READ_SIZE;
         | 
| 68 | 
            -
             | 
| 110 | 
            +
             | 
| 111 | 
            +
                const BYTE* anchor = istart;
         | 
| 112 | 
            +
                const BYTE* ip0 = istart;
         | 
| 113 | 
            +
                const BYTE* ip1;
         | 
| 114 | 
            +
                const BYTE* ip2;
         | 
| 115 | 
            +
                const BYTE* ip3;
         | 
| 116 | 
            +
                U32 current0;
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                U32 rep_offset1 = rep[0];
         | 
| 119 | 
            +
                U32 rep_offset2 = rep[1];
         | 
| 69 120 | 
             
                U32 offsetSaved = 0;
         | 
| 70 121 |  | 
| 71 | 
            -
                /*  | 
| 122 | 
            +
                size_t hash0; /* hash for ip0 */
         | 
| 123 | 
            +
                size_t hash1; /* hash for ip1 */
         | 
| 124 | 
            +
                U32 idx; /* match idx for ip0 */
         | 
| 125 | 
            +
                U32 mval; /* src value at match idx */
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                U32 offcode;
         | 
| 128 | 
            +
                const BYTE* match0;
         | 
| 129 | 
            +
                size_t mLength;
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                /* ip0 and ip1 are always adjacent. The targetLength skipping and
         | 
| 132 | 
            +
                 * uncompressibility acceleration is applied to every other position,
         | 
| 133 | 
            +
                 * matching the behavior of #1562. step therefore represents the gap
         | 
| 134 | 
            +
                 * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
         | 
| 135 | 
            +
                size_t step;
         | 
| 136 | 
            +
                const BYTE* nextStep;
         | 
| 137 | 
            +
                const size_t kStepIncr = (1 << (kSearchStrength - 1));
         | 
| 138 | 
            +
             | 
| 72 139 | 
             
                DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
         | 
| 73 140 | 
             
                ip0 += (ip0 == prefixStart);
         | 
| 74 | 
            -
                ip1 = ip0 + 1;
         | 
| 75 141 | 
             
                {   U32 const curr = (U32)(ip0 - base);
         | 
| 76 142 | 
             
                    U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
         | 
| 77 143 | 
             
                    U32 const maxRep = curr - windowLow;
         | 
| 78 | 
            -
                    if ( | 
| 79 | 
            -
                    if ( | 
| 144 | 
            +
                    if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
         | 
| 145 | 
            +
                    if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
         | 
| 80 146 | 
             
                }
         | 
| 81 147 |  | 
| 82 | 
            -
                /*  | 
| 83 | 
            -
             | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
                 | 
| 89 | 
            -
             | 
| 90 | 
            -
                 | 
| 91 | 
            -
             | 
| 92 | 
            -
             | 
| 93 | 
            -
             | 
| 94 | 
            -
                     | 
| 95 | 
            -
             | 
| 96 | 
            -
             | 
| 97 | 
            -
             | 
| 98 | 
            -
             | 
| 99 | 
            -
             | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 102 | 
            -
             | 
| 103 | 
            -
                     | 
| 104 | 
            -
                    U32  | 
| 105 | 
            -
             | 
| 106 | 
            -
             | 
| 107 | 
            -
                     | 
| 108 | 
            -
             | 
| 109 | 
            -
             | 
| 110 | 
            -
                     | 
| 111 | 
            -
                     | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
| 114 | 
            -
             | 
| 115 | 
            -
             | 
| 116 | 
            -
                         | 
| 117 | 
            -
                         | 
| 118 | 
            -
                        match0 = repMatch - mLength;
         | 
| 148 | 
            +
                /* start each op */
         | 
| 149 | 
            +
            _start: /* Requires: ip0 */
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                step = stepSize;
         | 
| 152 | 
            +
                nextStep = ip0 + kStepIncr;
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                /* calculate positions, ip0 - anchor == 0, so we skip step calc */
         | 
| 155 | 
            +
                ip1 = ip0 + 1;
         | 
| 156 | 
            +
                ip2 = ip0 + step;
         | 
| 157 | 
            +
                ip3 = ip2 + 1;
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                if (ip3 >= ilimit) {
         | 
| 160 | 
            +
                    goto _cleanup;
         | 
| 161 | 
            +
                }
         | 
| 162 | 
            +
             | 
| 163 | 
            +
                hash0 = ZSTD_hashPtr(ip0, hlog, mls);
         | 
| 164 | 
            +
                hash1 = ZSTD_hashPtr(ip1, hlog, mls);
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                idx = hashTable[hash0];
         | 
| 167 | 
            +
             | 
| 168 | 
            +
                do {
         | 
| 169 | 
            +
                    /* load repcode match for ip[2]*/
         | 
| 170 | 
            +
                    const U32 rval = MEM_read32(ip2 - rep_offset1);
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                    /* write back hash table entry */
         | 
| 173 | 
            +
                    current0 = (U32)(ip0 - base);
         | 
| 174 | 
            +
                    hashTable[hash0] = current0;
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                    /* check repcode at ip[2] */
         | 
| 177 | 
            +
                    if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
         | 
| 178 | 
            +
                        ip0 = ip2;
         | 
| 179 | 
            +
                        match0 = ip0 - rep_offset1;
         | 
| 180 | 
            +
                        mLength = ip0[-1] == match0[-1];
         | 
| 181 | 
            +
                        ip0 -= mLength;
         | 
| 182 | 
            +
                        match0 -= mLength;
         | 
| 183 | 
            +
                        offcode = STORE_REPCODE_1;
         | 
| 119 184 | 
             
                        mLength += 4;
         | 
| 120 | 
            -
                        offcode = 0;
         | 
| 121 185 | 
             
                        goto _match;
         | 
| 122 186 | 
             
                    }
         | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
| 187 | 
            +
             | 
| 188 | 
            +
                    /* load match for ip[0] */
         | 
| 189 | 
            +
                    if (idx >= prefixStartIndex) {
         | 
| 190 | 
            +
                        mval = MEM_read32(base + idx);
         | 
| 191 | 
            +
                    } else {
         | 
| 192 | 
            +
                        mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
         | 
| 126 193 | 
             
                    }
         | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
            -
             | 
| 130 | 
            -
                         | 
| 194 | 
            +
             | 
| 195 | 
            +
                    /* check match at ip[0] */
         | 
| 196 | 
            +
                    if (MEM_read32(ip0) == mval) {
         | 
| 197 | 
            +
                        /* found a match! */
         | 
| 131 198 | 
             
                        goto _offset;
         | 
| 132 199 | 
             
                    }
         | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 137 | 
            -
             | 
| 200 | 
            +
             | 
| 201 | 
            +
                    /* lookup ip[1] */
         | 
| 202 | 
            +
                    idx = hashTable[hash1];
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                    /* hash ip[2] */
         | 
| 205 | 
            +
                    hash0 = hash1;
         | 
| 206 | 
            +
                    hash1 = ZSTD_hashPtr(ip2, hlog, mls);
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                    /* advance to next positions */
         | 
| 209 | 
            +
                    ip0 = ip1;
         | 
| 210 | 
            +
                    ip1 = ip2;
         | 
| 211 | 
            +
                    ip2 = ip3;
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    /* write back hash table entry */
         | 
| 214 | 
            +
                    current0 = (U32)(ip0 - base);
         | 
| 215 | 
            +
                    hashTable[hash0] = current0;
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                    /* load match for ip[0] */
         | 
| 218 | 
            +
                    if (idx >= prefixStartIndex) {
         | 
| 219 | 
            +
                        mval = MEM_read32(base + idx);
         | 
| 220 | 
            +
                    } else {
         | 
| 221 | 
            +
                        mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
         | 
| 138 222 | 
             
                    }
         | 
| 139 | 
            -
            _offset: /* Requires: ip0, match0 */
         | 
| 140 | 
            -
                    /* Compute the offset code */
         | 
| 141 | 
            -
                    offset_2 = offset_1;
         | 
| 142 | 
            -
                    offset_1 = (U32)(ip0-match0);
         | 
| 143 | 
            -
                    offcode = offset_1 + ZSTD_REP_MOVE;
         | 
| 144 | 
            -
                    mLength = 4;
         | 
| 145 | 
            -
                    /* Count the backwards match length */
         | 
| 146 | 
            -
                    while (((ip0>anchor) & (match0>prefixStart))
         | 
| 147 | 
            -
                         && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
         | 
| 148 223 |  | 
| 149 | 
            -
             | 
| 150 | 
            -
                     | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 153 | 
            -
                     | 
| 154 | 
            -
                    ip0 += mLength;
         | 
| 155 | 
            -
                    anchor = ip0;
         | 
| 224 | 
            +
                    /* check match at ip[0] */
         | 
| 225 | 
            +
                    if (MEM_read32(ip0) == mval) {
         | 
| 226 | 
            +
                        /* found a match! */
         | 
| 227 | 
            +
                        goto _offset;
         | 
| 228 | 
            +
                    }
         | 
| 156 229 |  | 
| 157 | 
            -
                     | 
| 158 | 
            -
             | 
| 159 | 
            -
             | 
| 160 | 
            -
             | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 167 | 
            -
             | 
| 168 | 
            -
             | 
| 169 | 
            -
             | 
| 170 | 
            -
             | 
| 171 | 
            -
             | 
| 172 | 
            -
             | 
| 173 | 
            -
             | 
| 174 | 
            -
             | 
| 175 | 
            -
             | 
| 230 | 
            +
                    /* lookup ip[1] */
         | 
| 231 | 
            +
                    idx = hashTable[hash1];
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                    /* hash ip[2] */
         | 
| 234 | 
            +
                    hash0 = hash1;
         | 
| 235 | 
            +
                    hash1 = ZSTD_hashPtr(ip2, hlog, mls);
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                    /* advance to next positions */
         | 
| 238 | 
            +
                    ip0 = ip1;
         | 
| 239 | 
            +
                    ip1 = ip2;
         | 
| 240 | 
            +
                    ip2 = ip0 + step;
         | 
| 241 | 
            +
                    ip3 = ip1 + step;
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                    /* calculate step */
         | 
| 244 | 
            +
                    if (ip2 >= nextStep) {
         | 
| 245 | 
            +
                        step++;
         | 
| 246 | 
            +
                        PREFETCH_L1(ip1 + 64);
         | 
| 247 | 
            +
                        PREFETCH_L1(ip1 + 128);
         | 
| 248 | 
            +
                        nextStep += kStepIncr;
         | 
| 249 | 
            +
                    }
         | 
| 250 | 
            +
                } while (ip3 < ilimit);
         | 
| 251 | 
            +
             | 
| 252 | 
            +
            _cleanup:
         | 
| 253 | 
            +
                /* Note that there are probably still a couple positions we could search.
         | 
| 254 | 
            +
                 * However, it seems to be a meaningful performance hit to try to search
         | 
| 255 | 
            +
                 * them. So let's not. */
         | 
| 176 256 |  | 
| 177 257 | 
             
                /* save reps for next block */
         | 
| 178 | 
            -
                rep[0] =  | 
| 179 | 
            -
                rep[1] =  | 
| 258 | 
            +
                rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
         | 
| 259 | 
            +
                rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
         | 
| 180 260 |  | 
| 181 261 | 
             
                /* Return the last literals size */
         | 
| 182 262 | 
             
                return (size_t)(iend - anchor);
         | 
| 263 | 
            +
             | 
| 264 | 
            +
            _offset: /* Requires: ip0, idx */
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                /* Compute the offset code. */
         | 
| 267 | 
            +
                match0 = base + idx;
         | 
| 268 | 
            +
                rep_offset2 = rep_offset1;
         | 
| 269 | 
            +
                rep_offset1 = (U32)(ip0-match0);
         | 
| 270 | 
            +
                offcode = STORE_OFFSET(rep_offset1);
         | 
| 271 | 
            +
                mLength = 4;
         | 
| 272 | 
            +
             | 
| 273 | 
            +
                /* Count the backwards match length. */
         | 
| 274 | 
            +
                while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
         | 
| 275 | 
            +
                    ip0--;
         | 
| 276 | 
            +
                    match0--;
         | 
| 277 | 
            +
                    mLength++;
         | 
| 278 | 
            +
                }
         | 
| 279 | 
            +
             | 
| 280 | 
            +
            _match: /* Requires: ip0, match0, offcode */
         | 
| 281 | 
            +
             | 
| 282 | 
            +
                /* Count the forward length. */
         | 
| 283 | 
            +
                mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
         | 
| 284 | 
            +
             | 
| 285 | 
            +
                ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
         | 
| 286 | 
            +
             | 
| 287 | 
            +
                ip0 += mLength;
         | 
| 288 | 
            +
                anchor = ip0;
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                /* write next hash table entry */
         | 
| 291 | 
            +
                if (ip1 < ip0) {
         | 
| 292 | 
            +
                    hashTable[hash1] = (U32)(ip1 - base);
         | 
| 293 | 
            +
                }
         | 
| 294 | 
            +
             | 
| 295 | 
            +
                /* Fill table and check for immediate repcode. */
         | 
| 296 | 
            +
                if (ip0 <= ilimit) {
         | 
| 297 | 
            +
                    /* Fill Table */
         | 
| 298 | 
            +
                    assert(base+current0+2 > istart);  /* check base overflow */
         | 
| 299 | 
            +
                    hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
         | 
| 300 | 
            +
                    hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
         | 
| 301 | 
            +
             | 
| 302 | 
            +
                    if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
         | 
| 303 | 
            +
                        while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
         | 
| 304 | 
            +
                            /* store sequence */
         | 
| 305 | 
            +
                            size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
         | 
| 306 | 
            +
                            { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
         | 
| 307 | 
            +
                            hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
         | 
| 308 | 
            +
                            ip0 += rLength;
         | 
| 309 | 
            +
                            ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
         | 
| 310 | 
            +
                            anchor = ip0;
         | 
| 311 | 
            +
                            continue;   /* faster when present (confirmed on gcc-8) ... (?) */
         | 
| 312 | 
            +
                }   }   }
         | 
| 313 | 
            +
             | 
| 314 | 
            +
                goto _start;
         | 
| 183 315 | 
             
            }
         | 
| 184 316 |  | 
| 317 | 
            +
            #define ZSTD_GEN_FAST_FN(dictMode, mls, step)                                                            \
         | 
| 318 | 
            +
                static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step(                                      \
         | 
| 319 | 
            +
                        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                    \
         | 
| 320 | 
            +
                        void const* src, size_t srcSize)                                                       \
         | 
| 321 | 
            +
                {                                                                                              \
         | 
| 322 | 
            +
                    return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
         | 
| 323 | 
            +
                }
         | 
| 324 | 
            +
             | 
| 325 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 4, 1)
         | 
| 326 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 5, 1)
         | 
| 327 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 6, 1)
         | 
| 328 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 7, 1)
         | 
| 329 | 
            +
             | 
| 330 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 4, 0)
         | 
| 331 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 5, 0)
         | 
| 332 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 6, 0)
         | 
| 333 | 
            +
            ZSTD_GEN_FAST_FN(noDict, 7, 0)
         | 
| 185 334 |  | 
| 186 335 | 
             
            size_t ZSTD_compressBlock_fast(
         | 
| 187 336 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| @@ -189,24 +338,40 @@ size_t ZSTD_compressBlock_fast( | |
| 189 338 | 
             
            {
         | 
| 190 339 | 
             
                U32 const mls = ms->cParams.minMatch;
         | 
| 191 340 | 
             
                assert(ms->dictMatchState == NULL);
         | 
| 192 | 
            -
                 | 
| 193 | 
            -
             | 
| 194 | 
            -
             | 
| 195 | 
            -
             | 
| 196 | 
            -
                     | 
| 197 | 
            -
             | 
| 198 | 
            -
                     | 
| 199 | 
            -
             | 
| 200 | 
            -
                     | 
| 201 | 
            -
             | 
| 202 | 
            -
                     | 
| 341 | 
            +
                if (ms->cParams.targetLength > 1) {
         | 
| 342 | 
            +
                    switch(mls)
         | 
| 343 | 
            +
                    {
         | 
| 344 | 
            +
                    default: /* includes case 3 */
         | 
| 345 | 
            +
                    case 4 :
         | 
| 346 | 
            +
                        return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
         | 
| 347 | 
            +
                    case 5 :
         | 
| 348 | 
            +
                        return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
         | 
| 349 | 
            +
                    case 6 :
         | 
| 350 | 
            +
                        return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
         | 
| 351 | 
            +
                    case 7 :
         | 
| 352 | 
            +
                        return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
         | 
| 353 | 
            +
                    }
         | 
| 354 | 
            +
                } else {
         | 
| 355 | 
            +
                    switch(mls)
         | 
| 356 | 
            +
                    {
         | 
| 357 | 
            +
                    default: /* includes case 3 */
         | 
| 358 | 
            +
                    case 4 :
         | 
| 359 | 
            +
                        return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
         | 
| 360 | 
            +
                    case 5 :
         | 
| 361 | 
            +
                        return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
         | 
| 362 | 
            +
                    case 6 :
         | 
| 363 | 
            +
                        return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
         | 
| 364 | 
            +
                    case 7 :
         | 
| 365 | 
            +
                        return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
         | 
| 366 | 
            +
                    }
         | 
| 367 | 
            +
             | 
| 203 368 | 
             
                }
         | 
| 204 369 | 
             
            }
         | 
| 205 370 |  | 
| 206 371 | 
             
            FORCE_INLINE_TEMPLATE
         | 
| 207 372 | 
             
            size_t ZSTD_compressBlock_fast_dictMatchState_generic(
         | 
| 208 373 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 209 | 
            -
                    void const* src, size_t srcSize, U32 const mls)
         | 
| 374 | 
            +
                    void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
         | 
| 210 375 | 
             
            {
         | 
| 211 376 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| 212 377 | 
             
                U32* const hashTable = ms->hashTable;
         | 
| @@ -242,6 +407,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 242 407 | 
             
                assert(endIndex - prefixStartIndex <= maxDistance);
         | 
| 243 408 | 
             
                (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
         | 
| 244 409 |  | 
| 410 | 
            +
                (void)hasStep; /* not currently specialized on whether it's accelerated */
         | 
| 411 | 
            +
             | 
| 245 412 | 
             
                /* ensure there will be no underflow
         | 
| 246 413 | 
             
                 * when translating a dict index into a local index */
         | 
| 247 414 | 
             
                assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
         | 
| @@ -272,7 +439,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 272 439 | 
             
                        const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
         | 
| 273 440 | 
             
                        mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
         | 
| 274 441 | 
             
                        ip++;
         | 
| 275 | 
            -
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,  | 
| 442 | 
            +
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
         | 
| 276 443 | 
             
                    } else if ( (matchIndex <= prefixStartIndex) ) {
         | 
| 277 444 | 
             
                        size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
         | 
| 278 445 | 
             
                        U32 const dictMatchIndex = dictHashTable[dictHash];
         | 
| @@ -292,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 292 459 | 
             
                            } /* catch up */
         | 
| 293 460 | 
             
                            offset_2 = offset_1;
         | 
| 294 461 | 
             
                            offset_1 = offset;
         | 
| 295 | 
            -
                            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset | 
| 462 | 
            +
                            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
         | 
| 296 463 | 
             
                        }
         | 
| 297 464 | 
             
                    } else if (MEM_read32(match) != MEM_read32(ip)) {
         | 
| 298 465 | 
             
                        /* it's not a match, and we're not going to check the dictionary */
         | 
| @@ -307,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 307 474 | 
             
                             && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
         | 
| 308 475 | 
             
                        offset_2 = offset_1;
         | 
| 309 476 | 
             
                        offset_1 = offset;
         | 
| 310 | 
            -
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset | 
| 477 | 
            +
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
         | 
| 311 478 | 
             
                    }
         | 
| 312 479 |  | 
| 313 480 | 
             
                    /* match found */
         | 
| @@ -332,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 332 499 | 
             
                                const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
         | 
| 333 500 | 
             
                                size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
         | 
| 334 501 | 
             
                                U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
         | 
| 335 | 
            -
                                ZSTD_storeSeq(seqStore, 0, anchor, iend,  | 
| 502 | 
            +
                                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
         | 
| 336 503 | 
             
                                hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
         | 
| 337 504 | 
             
                                ip += repLength2;
         | 
| 338 505 | 
             
                                anchor = ip;
         | 
| @@ -351,6 +518,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
| 351 518 | 
             
                return (size_t)(iend - anchor);
         | 
| 352 519 | 
             
            }
         | 
| 353 520 |  | 
| 521 | 
            +
             | 
| 522 | 
            +
            ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
         | 
| 523 | 
            +
            ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
         | 
| 524 | 
            +
            ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
         | 
| 525 | 
            +
            ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
         | 
| 526 | 
            +
             | 
| 354 527 | 
             
            size_t ZSTD_compressBlock_fast_dictMatchState(
         | 
| 355 528 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 356 529 | 
             
                    void const* src, size_t srcSize)
         | 
| @@ -361,20 +534,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState( | |
| 361 534 | 
             
                {
         | 
| 362 535 | 
             
                default: /* includes case 3 */
         | 
| 363 536 | 
             
                case 4 :
         | 
| 364 | 
            -
                    return  | 
| 537 | 
            +
                    return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
         | 
| 365 538 | 
             
                case 5 :
         | 
| 366 | 
            -
                    return  | 
| 539 | 
            +
                    return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
         | 
| 367 540 | 
             
                case 6 :
         | 
| 368 | 
            -
                    return  | 
| 541 | 
            +
                    return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
         | 
| 369 542 | 
             
                case 7 :
         | 
| 370 | 
            -
                    return  | 
| 543 | 
            +
                    return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
         | 
| 371 544 | 
             
                }
         | 
| 372 545 | 
             
            }
         | 
| 373 546 |  | 
| 374 547 |  | 
| 375 548 | 
             
            static size_t ZSTD_compressBlock_fast_extDict_generic(
         | 
| 376 549 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 377 | 
            -
                    void const* src, size_t srcSize, U32 const mls)
         | 
| 550 | 
            +
                    void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
         | 
| 378 551 | 
             
            {
         | 
| 379 552 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| 380 553 | 
             
                U32* const hashTable = ms->hashTable;
         | 
| @@ -398,11 +571,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( | |
| 398 571 | 
             
                const BYTE* const ilimit = iend - 8;
         | 
| 399 572 | 
             
                U32 offset_1=rep[0], offset_2=rep[1];
         | 
| 400 573 |  | 
| 574 | 
            +
                (void)hasStep; /* not currently specialized on whether it's accelerated */
         | 
| 575 | 
            +
             | 
| 401 576 | 
             
                DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
         | 
| 402 577 |  | 
| 403 578 | 
             
                /* switch to "regular" variant if extDict is invalidated due to maxDistance */
         | 
| 404 579 | 
             
                if (prefixStartIndex == dictStartIndex)
         | 
| 405 | 
            -
                    return  | 
| 580 | 
            +
                    return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
         | 
| 406 581 |  | 
| 407 582 | 
             
                /* Search Loop */
         | 
| 408 583 | 
             
                while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
         | 
| @@ -418,12 +593,12 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( | |
| 418 593 | 
             
                    DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
         | 
| 419 594 |  | 
| 420 595 | 
             
                    if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
         | 
| 421 | 
            -
                         & (offset_1  | 
| 596 | 
            +
                         & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
         | 
| 422 597 | 
             
                       && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
         | 
| 423 598 | 
             
                        const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
         | 
| 424 599 | 
             
                        size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
         | 
| 425 600 | 
             
                        ip++;
         | 
| 426 | 
            -
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend,  | 
| 601 | 
            +
                        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
         | 
| 427 602 | 
             
                        ip += rLength;
         | 
| 428 603 | 
             
                        anchor = ip;
         | 
| 429 604 | 
             
                    } else {
         | 
| @@ -439,7 +614,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( | |
| 439 614 | 
             
                            size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
         | 
| 440 615 | 
             
                            while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
         | 
| 441 616 | 
             
                            offset_2 = offset_1; offset_1 = offset;  /* update offset history */
         | 
| 442 | 
            -
                            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset | 
| 617 | 
            +
                            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
         | 
| 443 618 | 
             
                            ip += mLength;
         | 
| 444 619 | 
             
                            anchor = ip;
         | 
| 445 620 | 
             
                    }   }
         | 
| @@ -453,12 +628,12 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( | |
| 453 628 | 
             
                            U32 const current2 = (U32)(ip-base);
         | 
| 454 629 | 
             
                            U32 const repIndex2 = current2 - offset_2;
         | 
| 455 630 | 
             
                            const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
         | 
| 456 | 
            -
                            if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2  | 
| 631 | 
            +
                            if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex))  /* intentional overflow */
         | 
| 457 632 | 
             
                               && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
         | 
| 458 633 | 
             
                                const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
         | 
| 459 634 | 
             
                                size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
         | 
| 460 635 | 
             
                                { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
         | 
| 461 | 
            -
                                ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend,  | 
| 636 | 
            +
                                ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
         | 
| 462 637 | 
             
                                hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
         | 
| 463 638 | 
             
                                ip += repLength2;
         | 
| 464 639 | 
             
                                anchor = ip;
         | 
| @@ -475,6 +650,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( | |
| 475 650 | 
             
                return (size_t)(iend - anchor);
         | 
| 476 651 | 
             
            }
         | 
| 477 652 |  | 
| 653 | 
            +
            ZSTD_GEN_FAST_FN(extDict, 4, 0)
         | 
| 654 | 
            +
            ZSTD_GEN_FAST_FN(extDict, 5, 0)
         | 
| 655 | 
            +
            ZSTD_GEN_FAST_FN(extDict, 6, 0)
         | 
| 656 | 
            +
            ZSTD_GEN_FAST_FN(extDict, 7, 0)
         | 
| 478 657 |  | 
| 479 658 | 
             
            size_t ZSTD_compressBlock_fast_extDict(
         | 
| 480 659 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| @@ -485,12 +664,12 @@ size_t ZSTD_compressBlock_fast_extDict( | |
| 485 664 | 
             
                {
         | 
| 486 665 | 
             
                default: /* includes case 3 */
         | 
| 487 666 | 
             
                case 4 :
         | 
| 488 | 
            -
                    return  | 
| 667 | 
            +
                    return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
         | 
| 489 668 | 
             
                case 5 :
         | 
| 490 | 
            -
                    return  | 
| 669 | 
            +
                    return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
         | 
| 491 670 | 
             
                case 6 :
         | 
| 492 | 
            -
                    return  | 
| 671 | 
            +
                    return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
         | 
| 493 672 | 
             
                case 7 :
         | 
| 494 | 
            -
                    return  | 
| 673 | 
            +
                    return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
         | 
| 495 674 | 
             
                }
         | 
| 496 675 | 
             
            }
         | 
| 
            File without changes
         |