zstdlib 0.7.0-x64-mingw32 → 0.10.0-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +20 -0
- data/README.md +7 -1
- data/Rakefile +38 -8
- data/ext/{zstdlib → zstdlib_c}/extconf.rb +11 -6
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/adler32.c +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/compress.c +0 -0
- data/ext/zstdlib_c/zlib-1.2.12/crc32.c +1116 -0
- data/ext/zstdlib_c/zlib-1.2.12/crc32.h +9446 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/deflate.c +78 -30
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/deflate.h +12 -15
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzclose.c +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzguts.h +3 -2
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzlib.c +5 -3
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzread.c +5 -7
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzwrite.c +25 -13
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/infback.c +2 -1
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffast.c +14 -14
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffast.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffixed.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inflate.c +39 -8
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inflate.h +3 -2
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inftrees.c +3 -3
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inftrees.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/trees.c +27 -48
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/trees.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/uncompr.c +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zconf.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zlib.h +123 -100
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zutil.c +2 -2
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zutil.h +12 -9
- data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +46 -22
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/compiler.h +335 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +1 -3
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/debug.c +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +12 -19
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/entropy_common.c +368 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +2 -1
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/error_private.h +159 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +41 -12
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +139 -22
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +47 -23
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +87 -98
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +34 -23
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +4 -4
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +6 -5
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +0 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +10 -10
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_deps.h +111 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_internal.h +191 -145
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_trace.h +163 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +89 -46
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +27 -29
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +2 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/huf_compress.c +1370 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress.c +2917 -868
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +458 -125
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +12 -11
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +4 -2
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +41 -18
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.c +26 -298
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +234 -83
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +313 -138
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +329 -150
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +1 -1
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.c +2104 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.h +125 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +321 -216
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +9 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_ldm_geartab.h +106 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +412 -166
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +169 -453
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstdmt_compress.h +113 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/huf_decompress.c +1044 -403
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +9 -9
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +2 -2
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +450 -105
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.c +913 -273
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +14 -5
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +59 -12
- data/ext/zstdlib_c/zstd-1.5.2/lib/zdict.h +452 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +699 -214
- data/ext/{zstdlib/zstd-1.4.5/lib/common → zstdlib_c/zstd-1.5.2/lib}/zstd_errors.h +2 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +133 -44
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/ext/zstdlib_c/zstd.mk +15 -0
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/zstdlib.rb +2 -2
- metadata +124 -116
- data/ext/zstdlib/zlib-1.2.11/crc32.c +0 -442
- data/ext/zstdlib/zlib-1.2.11/crc32.h +0 -441
- data/ext/zstdlib/zstd-1.4.5/lib/common/compiler.h +0 -175
- data/ext/zstdlib/zstd-1.4.5/lib/common/entropy_common.c +0 -216
- data/ext/zstdlib/zstd-1.4.5/lib/common/error_private.h +0 -80
- data/ext/zstdlib/zstd-1.4.5/lib/common/xxhash.c +0 -864
- data/ext/zstdlib/zstd-1.4.5/lib/common/xxhash.h +0 -285
- data/ext/zstdlib/zstd-1.4.5/lib/compress/huf_compress.c +0 -798
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.c +0 -1138
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.h +0 -67
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstdmt_compress.h +0 -192
- data/ext/zstdlib/zstd.mk +0 -14
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
| @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            /*
         | 
| 2 | 
            -
             * Copyright (c)  | 
| 2 | 
            +
             * Copyright (c) Yann Collet, Facebook, Inc.
         | 
| 3 3 | 
             
             * All rights reserved.
         | 
| 4 4 | 
             
             *
         | 
| 5 5 | 
             
             * This source code is licensed under both the BSD-style license (found in the
         | 
| @@ -14,7 +14,7 @@ | |
| 14 14 | 
             
            /*-*******************************************************
         | 
| 15 15 | 
             
            *  Dependencies
         | 
| 16 16 | 
             
            *********************************************************/
         | 
| 17 | 
            -
            #include  | 
| 17 | 
            +
            #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
         | 
| 18 18 | 
             
            #include "../common/compiler.h"    /* prefetch */
         | 
| 19 19 | 
             
            #include "../common/cpu.h"         /* bmi2 */
         | 
| 20 20 | 
             
            #include "../common/mem.h"         /* low level memory routines */
         | 
| @@ -44,7 +44,7 @@ | |
| 44 44 | 
             
            /*_*******************************************************
         | 
| 45 45 | 
             
            *  Memory operations
         | 
| 46 46 | 
             
            **********************************************************/
         | 
| 47 | 
            -
            static void ZSTD_copy4(void* dst, const void* src) {  | 
| 47 | 
            +
            static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
         | 
| 48 48 |  | 
| 49 49 |  | 
| 50 50 | 
             
            /*-*************************************************************
         | 
| @@ -69,15 +69,56 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, | |
| 69 69 | 
             
                }
         | 
| 70 70 | 
             
            }
         | 
| 71 71 |  | 
| 72 | 
            +
            /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
         | 
| 73 | 
            +
            static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
         | 
| 74 | 
            +
                const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
         | 
| 75 | 
            +
            {
         | 
| 76 | 
            +
                if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
         | 
| 77 | 
            +
                {
         | 
| 78 | 
            +
                    /* room for litbuffer to fit without read faulting */
         | 
| 79 | 
            +
                    dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
         | 
| 80 | 
            +
                    dctx->litBufferEnd = dctx->litBuffer + litSize;
         | 
| 81 | 
            +
                    dctx->litBufferLocation = ZSTD_in_dst;
         | 
| 82 | 
            +
                }
         | 
| 83 | 
            +
                else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
         | 
| 84 | 
            +
                {
         | 
| 85 | 
            +
                    /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
         | 
| 86 | 
            +
                    if (splitImmediately) {
         | 
| 87 | 
            +
                        /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
         | 
| 88 | 
            +
                        dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
         | 
| 89 | 
            +
                        dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
         | 
| 90 | 
            +
                    }
         | 
| 91 | 
            +
                    else {
         | 
| 92 | 
            +
                        /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
         | 
| 93 | 
            +
                        dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
         | 
| 94 | 
            +
                        dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
         | 
| 95 | 
            +
                    }
         | 
| 96 | 
            +
                    dctx->litBufferLocation = ZSTD_split;
         | 
| 97 | 
            +
                }
         | 
| 98 | 
            +
                else
         | 
| 99 | 
            +
                {
         | 
| 100 | 
            +
                    /* fits entirely within litExtraBuffer, so no split is necessary */
         | 
| 101 | 
            +
                    dctx->litBuffer = dctx->litExtraBuffer;
         | 
| 102 | 
            +
                    dctx->litBufferEnd = dctx->litBuffer + litSize;
         | 
| 103 | 
            +
                    dctx->litBufferLocation = ZSTD_not_in_dst;
         | 
| 104 | 
            +
                }
         | 
| 105 | 
            +
            }
         | 
| 72 106 |  | 
| 73 107 | 
             
            /* Hidden declaration for fullbench */
         | 
| 74 108 | 
             
            size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
         | 
| 75 | 
            -
                                      const void* src, size_t srcSize | 
| 109 | 
            +
                                      const void* src, size_t srcSize,
         | 
| 110 | 
            +
                                      void* dst, size_t dstCapacity, const streaming_operation streaming);
         | 
| 76 111 | 
             
            /*! ZSTD_decodeLiteralsBlock() :
         | 
| 112 | 
            +
             * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
         | 
| 113 | 
            +
             * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
         | 
| 114 | 
            +
             * block will be output.  Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
         | 
| 115 | 
            +
             * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
         | 
| 116 | 
            +
             *
         | 
| 77 117 | 
             
             * @return : nb of bytes read from src (< srcSize )
         | 
| 78 118 | 
             
             *  note : symbol not declared but exposed for fullbench */
         | 
| 79 119 | 
             
            size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
         | 
| 80 | 
            -
                                      const void* src, size_t srcSize | 
| 120 | 
            +
                                      const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */
         | 
| 121 | 
            +
                                      void* dst, size_t dstCapacity, const streaming_operation streaming)
         | 
| 81 122 | 
             
            {
         | 
| 82 123 | 
             
                DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
         | 
| 83 124 | 
             
                RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
         | 
| @@ -90,7 +131,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
| 90 131 | 
             
                    case set_repeat:
         | 
| 91 132 | 
             
                        DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
         | 
| 92 133 | 
             
                        RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
         | 
| 93 | 
            -
                         | 
| 134 | 
            +
                        ZSTD_FALLTHROUGH;
         | 
| 94 135 |  | 
| 95 136 | 
             
                    case set_compressed:
         | 
| 96 137 | 
             
                        RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
         | 
| @@ -99,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
| 99 140 | 
             
                            U32 const lhlCode = (istart[0] >> 2) & 3;
         | 
| 100 141 | 
             
                            U32 const lhc = MEM_readLE32(istart);
         | 
| 101 142 | 
             
                            size_t hufSuccess;
         | 
| 143 | 
            +
                            size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
         | 
| 102 144 | 
             
                            switch(lhlCode)
         | 
| 103 145 | 
             
                            {
         | 
| 104 146 | 
             
                            case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
         | 
| @@ -121,8 +163,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
| 121 163 | 
             
                                litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
         | 
| 122 164 | 
             
                                break;
         | 
| 123 165 | 
             
                            }
         | 
| 166 | 
            +
                            RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
         | 
| 124 167 | 
             
                            RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
         | 
| 125 168 | 
             
                            RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
         | 
| 169 | 
            +
                            RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
         | 
| 170 | 
            +
                            ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
         | 
| 126 171 |  | 
| 127 172 | 
             
                            /* prefetch huffman table if cold */
         | 
| 128 173 | 
             
                            if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
         | 
| @@ -133,11 +178,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
| 133 178 | 
             
                                if (singleStream) {
         | 
| 134 179 | 
             
                                    hufSuccess = HUF_decompress1X_usingDTable_bmi2(
         | 
| 135 180 | 
             
                                        dctx->litBuffer, litSize, istart+lhSize, litCSize,
         | 
| 136 | 
            -
                                        dctx->HUFptr, dctx | 
| 181 | 
            +
                                        dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
         | 
| 137 182 | 
             
                                } else {
         | 
| 138 183 | 
             
                                    hufSuccess = HUF_decompress4X_usingDTable_bmi2(
         | 
| 139 184 | 
             
                                        dctx->litBuffer, litSize, istart+lhSize, litCSize,
         | 
| 140 | 
            -
                                        dctx->HUFptr, dctx | 
| 185 | 
            +
                                        dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
         | 
| 141 186 | 
             
                                }
         | 
| 142 187 | 
             
                            } else {
         | 
| 143 188 | 
             
                                if (singleStream) {
         | 
| @@ -150,15 +195,22 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
| 150 195 | 
             
                                    hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
         | 
| 151 196 | 
             
                                        dctx->entropy.hufTable, dctx->litBuffer, litSize,
         | 
| 152 197 | 
             
                                        istart+lhSize, litCSize, dctx->workspace,
         | 
| 153 | 
            -
                                        sizeof(dctx->workspace), dctx | 
| 198 | 
            +
                                        sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
         | 
| 154 199 | 
             
            #endif
         | 
| 155 200 | 
             
                                } else {
         | 
| 156 201 | 
             
                                    hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
         | 
| 157 202 | 
             
                                        dctx->entropy.hufTable, dctx->litBuffer, litSize,
         | 
| 158 203 | 
             
                                        istart+lhSize, litCSize, dctx->workspace,
         | 
| 159 | 
            -
                                        sizeof(dctx->workspace), dctx | 
| 204 | 
            +
                                        sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
         | 
| 160 205 | 
             
                                }
         | 
| 161 206 | 
             
                            }
         | 
| 207 | 
            +
                            if (dctx->litBufferLocation == ZSTD_split)
         | 
| 208 | 
            +
                            {
         | 
| 209 | 
            +
                                ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
         | 
| 210 | 
            +
                                ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
         | 
| 211 | 
            +
                                dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
         | 
| 212 | 
            +
                                dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
         | 
| 213 | 
            +
                            }
         | 
| 162 214 |  | 
| 163 215 | 
             
                            RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
         | 
| 164 216 |  | 
| @@ -166,13 +218,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
| 166 218 | 
             
                            dctx->litSize = litSize;
         | 
| 167 219 | 
             
                            dctx->litEntropy = 1;
         | 
| 168 220 | 
             
                            if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
         | 
| 169 | 
            -
                            memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
         | 
| 170 221 | 
             
                            return litCSize + lhSize;
         | 
| 171 222 | 
             
                        }
         | 
| 172 223 |  | 
| 173 224 | 
             
                    case set_basic:
         | 
| 174 225 | 
             
                        {   size_t litSize, lhSize;
         | 
| 175 226 | 
             
                            U32 const lhlCode = ((istart[0]) >> 2) & 3;
         | 
| 227 | 
            +
                            size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
         | 
| 176 228 | 
             
                            switch(lhlCode)
         | 
| 177 229 | 
             
                            {
         | 
| 178 230 | 
             
                            case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
         | 
| @@ -189,23 +241,36 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
| 189 241 | 
             
                                break;
         | 
| 190 242 | 
             
                            }
         | 
| 191 243 |  | 
| 244 | 
            +
                            RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
         | 
| 245 | 
            +
                            RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
         | 
| 246 | 
            +
                            ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
         | 
| 192 247 | 
             
                            if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
         | 
| 193 248 | 
             
                                RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
         | 
| 194 | 
            -
                                 | 
| 249 | 
            +
                                if (dctx->litBufferLocation == ZSTD_split)
         | 
| 250 | 
            +
                                {
         | 
| 251 | 
            +
                                    ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
         | 
| 252 | 
            +
                                    ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
         | 
| 253 | 
            +
                                }
         | 
| 254 | 
            +
                                else
         | 
| 255 | 
            +
                                {
         | 
| 256 | 
            +
                                    ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
         | 
| 257 | 
            +
                                }
         | 
| 195 258 | 
             
                                dctx->litPtr = dctx->litBuffer;
         | 
| 196 259 | 
             
                                dctx->litSize = litSize;
         | 
| 197 | 
            -
                                memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
         | 
| 198 260 | 
             
                                return lhSize+litSize;
         | 
| 199 261 | 
             
                            }
         | 
| 200 262 | 
             
                            /* direct reference into compressed stream */
         | 
| 201 263 | 
             
                            dctx->litPtr = istart+lhSize;
         | 
| 202 264 | 
             
                            dctx->litSize = litSize;
         | 
| 265 | 
            +
                            dctx->litBufferEnd = dctx->litPtr + litSize;
         | 
| 266 | 
            +
                            dctx->litBufferLocation = ZSTD_not_in_dst;
         | 
| 203 267 | 
             
                            return lhSize+litSize;
         | 
| 204 268 | 
             
                        }
         | 
| 205 269 |  | 
| 206 270 | 
             
                    case set_rle:
         | 
| 207 271 | 
             
                        {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
         | 
| 208 272 | 
             
                            size_t litSize, lhSize;
         | 
| 273 | 
            +
                            size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
         | 
| 209 274 | 
             
                            switch(lhlCode)
         | 
| 210 275 | 
             
                            {
         | 
| 211 276 | 
             
                            case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
         | 
| @@ -222,8 +287,19 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
| 222 287 | 
             
                                RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
         | 
| 223 288 | 
             
                                break;
         | 
| 224 289 | 
             
                            }
         | 
| 290 | 
            +
                            RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
         | 
| 225 291 | 
             
                            RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
         | 
| 226 | 
            -
                             | 
| 292 | 
            +
                            RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
         | 
| 293 | 
            +
                            ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
         | 
| 294 | 
            +
                            if (dctx->litBufferLocation == ZSTD_split)
         | 
| 295 | 
            +
                            {
         | 
| 296 | 
            +
                                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
         | 
| 297 | 
            +
                                ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
         | 
| 298 | 
            +
                            }
         | 
| 299 | 
            +
                            else
         | 
| 300 | 
            +
                            {
         | 
| 301 | 
            +
                                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
         | 
| 302 | 
            +
                            }
         | 
| 227 303 | 
             
                            dctx->litPtr = dctx->litBuffer;
         | 
| 228 304 | 
             
                            dctx->litSize = litSize;
         | 
| 229 305 | 
             
                            return lhSize+1;
         | 
| @@ -236,7 +312,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
| 236 312 |  | 
| 237 313 | 
             
            /* Default FSE distribution tables.
         | 
| 238 314 | 
             
             * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
         | 
| 239 | 
            -
             * https://github.com/facebook/zstd/blob/ | 
| 315 | 
            +
             * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
         | 
| 240 316 | 
             
             * They were generated programmatically with following method :
         | 
| 241 317 | 
             
             * - start from default distributions, present in /lib/common/zstd_internal.h
         | 
| 242 318 | 
             
             * - generate tables normally, using ZSTD_buildFSETable()
         | 
| @@ -343,7 +419,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = { | |
| 343 419 | 
             
            };   /* ML_defaultDTable */
         | 
| 344 420 |  | 
| 345 421 |  | 
| 346 | 
            -
            static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue,  | 
| 422 | 
            +
            static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
         | 
| 347 423 | 
             
            {
         | 
| 348 424 | 
             
                void* ptr = dt;
         | 
| 349 425 | 
             
                ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
         | 
| @@ -355,7 +431,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB | |
| 355 431 | 
             
                cell->nbBits = 0;
         | 
| 356 432 | 
             
                cell->nextState = 0;
         | 
| 357 433 | 
             
                assert(nbAddBits < 255);
         | 
| 358 | 
            -
                cell->nbAdditionalBits =  | 
| 434 | 
            +
                cell->nbAdditionalBits = nbAddBits;
         | 
| 359 435 | 
             
                cell->baseValue = baseValue;
         | 
| 360 436 | 
             
            }
         | 
| 361 437 |  | 
| @@ -364,23 +440,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB | |
| 364 440 | 
             
             * generate FSE decoding table for one symbol (ll, ml or off)
         | 
| 365 441 | 
             
             * cannot fail if input is valid =>
         | 
| 366 442 | 
             
             * all inputs are presumed validated at this stage */
         | 
| 367 | 
            -
             | 
| 368 | 
            -
             | 
| 443 | 
            +
            FORCE_INLINE_TEMPLATE
         | 
| 444 | 
            +
            void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
         | 
| 369 445 | 
             
                        const short* normalizedCounter, unsigned maxSymbolValue,
         | 
| 370 | 
            -
                        const U32* baseValue, const  | 
| 371 | 
            -
                        unsigned tableLog)
         | 
| 446 | 
            +
                        const U32* baseValue, const U8* nbAdditionalBits,
         | 
| 447 | 
            +
                        unsigned tableLog, void* wksp, size_t wkspSize)
         | 
| 372 448 | 
             
            {
         | 
| 373 449 | 
             
                ZSTD_seqSymbol* const tableDecode = dt+1;
         | 
| 374 | 
            -
                U16 symbolNext[MaxSeq+1];
         | 
| 375 | 
            -
             | 
| 376 450 | 
             
                U32 const maxSV1 = maxSymbolValue + 1;
         | 
| 377 451 | 
             
                U32 const tableSize = 1 << tableLog;
         | 
| 378 | 
            -
             | 
| 452 | 
            +
             | 
| 453 | 
            +
                U16* symbolNext = (U16*)wksp;
         | 
| 454 | 
            +
                BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
         | 
| 455 | 
            +
                U32 highThreshold = tableSize - 1;
         | 
| 456 | 
            +
             | 
| 379 457 |  | 
| 380 458 | 
             
                /* Sanity Checks */
         | 
| 381 459 | 
             
                assert(maxSymbolValue <= MaxSeq);
         | 
| 382 460 | 
             
                assert(tableLog <= MaxFSELog);
         | 
| 383 | 
            -
             | 
| 461 | 
            +
                assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
         | 
| 462 | 
            +
                (void)wkspSize;
         | 
| 384 463 | 
             
                /* Init, lay down lowprob symbols */
         | 
| 385 464 | 
             
                {   ZSTD_seqSymbol_header DTableH;
         | 
| 386 465 | 
             
                    DTableH.tableLog = tableLog;
         | 
| @@ -396,16 +475,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, | |
| 396 475 | 
             
                                assert(normalizedCounter[s]>=0);
         | 
| 397 476 | 
             
                                symbolNext[s] = (U16)normalizedCounter[s];
         | 
| 398 477 | 
             
                    }   }   }
         | 
| 399 | 
            -
                     | 
| 478 | 
            +
                    ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
         | 
| 400 479 | 
             
                }
         | 
| 401 480 |  | 
| 402 481 | 
             
                /* Spread symbols */
         | 
| 403 | 
            -
                 | 
| 482 | 
            +
                assert(tableSize <= 512);
         | 
| 483 | 
            +
                /* Specialized symbol spreading for the case when there are
         | 
| 484 | 
            +
                 * no low probability (-1 count) symbols. When compressing
         | 
| 485 | 
            +
                 * small blocks we avoid low probability symbols to hit this
         | 
| 486 | 
            +
                 * case, since header decoding speed matters more.
         | 
| 487 | 
            +
                 */
         | 
| 488 | 
            +
                if (highThreshold == tableSize - 1) {
         | 
| 489 | 
            +
                    size_t const tableMask = tableSize-1;
         | 
| 490 | 
            +
                    size_t const step = FSE_TABLESTEP(tableSize);
         | 
| 491 | 
            +
                    /* First lay down the symbols in order.
         | 
| 492 | 
            +
                     * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
         | 
| 493 | 
            +
                     * misses since small blocks generally have small table logs, so nearly
         | 
| 494 | 
            +
                     * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
         | 
| 495 | 
            +
                     * our buffer to handle the over-write.
         | 
| 496 | 
            +
                     */
         | 
| 497 | 
            +
                    {
         | 
| 498 | 
            +
                        U64 const add = 0x0101010101010101ull;
         | 
| 499 | 
            +
                        size_t pos = 0;
         | 
| 500 | 
            +
                        U64 sv = 0;
         | 
| 501 | 
            +
                        U32 s;
         | 
| 502 | 
            +
                        for (s=0; s<maxSV1; ++s, sv += add) {
         | 
| 503 | 
            +
                            int i;
         | 
| 504 | 
            +
                            int const n = normalizedCounter[s];
         | 
| 505 | 
            +
                            MEM_write64(spread + pos, sv);
         | 
| 506 | 
            +
                            for (i = 8; i < n; i += 8) {
         | 
| 507 | 
            +
                                MEM_write64(spread + pos + i, sv);
         | 
| 508 | 
            +
                            }
         | 
| 509 | 
            +
                            pos += n;
         | 
| 510 | 
            +
                        }
         | 
| 511 | 
            +
                    }
         | 
| 512 | 
            +
                    /* Now we spread those positions across the table.
         | 
| 513 | 
            +
                     * The benefit of doing it in two stages is that we avoid the the
         | 
| 514 | 
            +
                     * variable size inner loop, which caused lots of branch misses.
         | 
| 515 | 
            +
                     * Now we can run through all the positions without any branch misses.
         | 
| 516 | 
            +
                     * We unroll the loop twice, since that is what emperically worked best.
         | 
| 517 | 
            +
                     */
         | 
| 518 | 
            +
                    {
         | 
| 519 | 
            +
                        size_t position = 0;
         | 
| 520 | 
            +
                        size_t s;
         | 
| 521 | 
            +
                        size_t const unroll = 2;
         | 
| 522 | 
            +
                        assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
         | 
| 523 | 
            +
                        for (s = 0; s < (size_t)tableSize; s += unroll) {
         | 
| 524 | 
            +
                            size_t u;
         | 
| 525 | 
            +
                            for (u = 0; u < unroll; ++u) {
         | 
| 526 | 
            +
                                size_t const uPosition = (position + (u * step)) & tableMask;
         | 
| 527 | 
            +
                                tableDecode[uPosition].baseValue = spread[s + u];
         | 
| 528 | 
            +
                            }
         | 
| 529 | 
            +
                            position = (position + (unroll * step)) & tableMask;
         | 
| 530 | 
            +
                        }
         | 
| 531 | 
            +
                        assert(position == 0);
         | 
| 532 | 
            +
                    }
         | 
| 533 | 
            +
                } else {
         | 
| 534 | 
            +
                    U32 const tableMask = tableSize-1;
         | 
| 404 535 | 
             
                    U32 const step = FSE_TABLESTEP(tableSize);
         | 
| 405 536 | 
             
                    U32 s, position = 0;
         | 
| 406 537 | 
             
                    for (s=0; s<maxSV1; s++) {
         | 
| 407 538 | 
             
                        int i;
         | 
| 408 | 
            -
                         | 
| 539 | 
            +
                        int const n = normalizedCounter[s];
         | 
| 540 | 
            +
                        for (i=0; i<n; i++) {
         | 
| 409 541 | 
             
                            tableDecode[position].baseValue = s;
         | 
| 410 542 | 
             
                            position = (position + step) & tableMask;
         | 
| 411 543 | 
             
                            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
         | 
| @@ -414,16 +546,56 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, | |
| 414 546 | 
             
                }
         | 
| 415 547 |  | 
| 416 548 | 
             
                /* Build Decoding table */
         | 
| 417 | 
            -
                { | 
| 549 | 
            +
                {
         | 
| 550 | 
            +
                    U32 u;
         | 
| 418 551 | 
             
                    for (u=0; u<tableSize; u++) {
         | 
| 419 552 | 
             
                        U32 const symbol = tableDecode[u].baseValue;
         | 
| 420 553 | 
             
                        U32 const nextState = symbolNext[symbol]++;
         | 
| 421 554 | 
             
                        tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
         | 
| 422 555 | 
             
                        tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
         | 
| 423 556 | 
             
                        assert(nbAdditionalBits[symbol] < 255);
         | 
| 424 | 
            -
                        tableDecode[u].nbAdditionalBits =  | 
| 557 | 
            +
                        tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
         | 
| 425 558 | 
             
                        tableDecode[u].baseValue = baseValue[symbol];
         | 
| 426 | 
            -
             | 
| 559 | 
            +
                    }
         | 
| 560 | 
            +
                }
         | 
| 561 | 
            +
            }
         | 
| 562 | 
            +
             | 
| 563 | 
            +
            /* Avoids the FORCE_INLINE of the _body() function. */
         | 
| 564 | 
            +
            static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
         | 
| 565 | 
            +
                        const short* normalizedCounter, unsigned maxSymbolValue,
         | 
| 566 | 
            +
                        const U32* baseValue, const U8* nbAdditionalBits,
         | 
| 567 | 
            +
                        unsigned tableLog, void* wksp, size_t wkspSize)
         | 
| 568 | 
            +
            {
         | 
| 569 | 
            +
                ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
         | 
| 570 | 
            +
                        baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
         | 
| 571 | 
            +
            }
         | 
| 572 | 
            +
             | 
| 573 | 
            +
            #if DYNAMIC_BMI2
         | 
| 574 | 
            +
            BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
         | 
| 575 | 
            +
                        const short* normalizedCounter, unsigned maxSymbolValue,
         | 
| 576 | 
            +
                        const U32* baseValue, const U8* nbAdditionalBits,
         | 
| 577 | 
            +
                        unsigned tableLog, void* wksp, size_t wkspSize)
         | 
| 578 | 
            +
            {
         | 
| 579 | 
            +
                ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
         | 
| 580 | 
            +
                        baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
         | 
| 581 | 
            +
            }
         | 
| 582 | 
            +
            #endif
         | 
| 583 | 
            +
             | 
| 584 | 
            +
            void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
         | 
| 585 | 
            +
                        const short* normalizedCounter, unsigned maxSymbolValue,
         | 
| 586 | 
            +
                        const U32* baseValue, const U8* nbAdditionalBits,
         | 
| 587 | 
            +
                        unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
         | 
| 588 | 
            +
            {
         | 
| 589 | 
            +
            #if DYNAMIC_BMI2
         | 
| 590 | 
            +
                if (bmi2) {
         | 
| 591 | 
            +
                    ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
         | 
| 592 | 
            +
                            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
         | 
| 593 | 
            +
                    return;
         | 
| 594 | 
            +
                }
         | 
| 595 | 
            +
            #endif
         | 
| 596 | 
            +
                (void)bmi2;
         | 
| 597 | 
            +
                ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
         | 
| 598 | 
            +
                        baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
         | 
| 427 599 | 
             
            }
         | 
| 428 600 |  | 
| 429 601 |  | 
| @@ -433,9 +605,10 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, | |
| 433 605 | 
             
            static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
         | 
| 434 606 | 
             
                                             symbolEncodingType_e type, unsigned max, U32 maxLog,
         | 
| 435 607 | 
             
                                             const void* src, size_t srcSize,
         | 
| 436 | 
            -
                                             const U32* baseValue, const  | 
| 608 | 
            +
                                             const U32* baseValue, const U8* nbAdditionalBits,
         | 
| 437 609 | 
             
                                             const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
         | 
| 438 | 
            -
                                             int ddictIsCold, int nbSeq | 
| 610 | 
            +
                                             int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
         | 
| 611 | 
            +
                                             int bmi2)
         | 
| 439 612 | 
             
            {
         | 
| 440 613 | 
             
                switch(type)
         | 
| 441 614 | 
             
                {
         | 
| @@ -444,7 +617,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb | |
| 444 617 | 
             
                    RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
         | 
| 445 618 | 
             
                    {   U32 const symbol = *(const BYTE*)src;
         | 
| 446 619 | 
             
                        U32 const baseline = baseValue[symbol];
         | 
| 447 | 
            -
                         | 
| 620 | 
            +
                        U8 const nbBits = nbAdditionalBits[symbol];
         | 
| 448 621 | 
             
                        ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
         | 
| 449 622 | 
             
                    }
         | 
| 450 623 | 
             
                    *DTablePtr = DTableSpace;
         | 
| @@ -467,7 +640,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb | |
| 467 640 | 
             
                        size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
         | 
| 468 641 | 
             
                        RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
         | 
| 469 642 | 
             
                        RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
         | 
| 470 | 
            -
                        ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
         | 
| 643 | 
            +
                        ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
         | 
| 471 644 | 
             
                        *DTablePtr = DTableSpace;
         | 
| 472 645 | 
             
                        return headerSize;
         | 
| 473 646 | 
             
                    }
         | 
| @@ -480,7 +653,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb | |
| 480 653 | 
             
            size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
         | 
| 481 654 | 
             
                                         const void* src, size_t srcSize)
         | 
| 482 655 | 
             
            {
         | 
| 483 | 
            -
                const BYTE* const istart = (const BYTE* | 
| 656 | 
            +
                const BYTE* const istart = (const BYTE*)src;
         | 
| 484 657 | 
             
                const BYTE* const iend = istart + srcSize;
         | 
| 485 658 | 
             
                const BYTE* ip = istart;
         | 
| 486 659 | 
             
                int nbSeq;
         | 
| @@ -499,7 +672,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, | |
| 499 672 | 
             
                if (nbSeq > 0x7F) {
         | 
| 500 673 | 
             
                    if (nbSeq == 0xFF) {
         | 
| 501 674 | 
             
                        RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
         | 
| 502 | 
            -
                        nbSeq = MEM_readLE16(ip) + LONGNBSEQ | 
| 675 | 
            +
                        nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
         | 
| 676 | 
            +
                        ip+=2;
         | 
| 503 677 | 
             
                    } else {
         | 
| 504 678 | 
             
                        RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
         | 
| 505 679 | 
             
                        nbSeq = ((nbSeq-0x80)<<8) + *ip++;
         | 
| @@ -520,7 +694,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, | |
| 520 694 | 
             
                                                                  ip, iend-ip,
         | 
| 521 695 | 
             
                                                                  LL_base, LL_bits,
         | 
| 522 696 | 
             
                                                                  LL_defaultDTable, dctx->fseEntropy,
         | 
| 523 | 
            -
                                                                  dctx->ddictIsCold, nbSeq | 
| 697 | 
            +
                                                                  dctx->ddictIsCold, nbSeq,
         | 
| 698 | 
            +
                                                                  dctx->workspace, sizeof(dctx->workspace),
         | 
| 699 | 
            +
                                                                  ZSTD_DCtx_get_bmi2(dctx));
         | 
| 524 700 | 
             
                        RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
         | 
| 525 701 | 
             
                        ip += llhSize;
         | 
| 526 702 | 
             
                    }
         | 
| @@ -530,7 +706,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, | |
| 530 706 | 
             
                                                                  ip, iend-ip,
         | 
| 531 707 | 
             
                                                                  OF_base, OF_bits,
         | 
| 532 708 | 
             
                                                                  OF_defaultDTable, dctx->fseEntropy,
         | 
| 533 | 
            -
                                                                  dctx->ddictIsCold, nbSeq | 
| 709 | 
            +
                                                                  dctx->ddictIsCold, nbSeq,
         | 
| 710 | 
            +
                                                                  dctx->workspace, sizeof(dctx->workspace),
         | 
| 711 | 
            +
                                                                  ZSTD_DCtx_get_bmi2(dctx));
         | 
| 534 712 | 
             
                        RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
         | 
| 535 713 | 
             
                        ip += ofhSize;
         | 
| 536 714 | 
             
                    }
         | 
| @@ -540,7 +718,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, | |
| 540 718 | 
             
                                                                  ip, iend-ip,
         | 
| 541 719 | 
             
                                                                  ML_base, ML_bits,
         | 
| 542 720 | 
             
                                                                  ML_defaultDTable, dctx->fseEntropy,
         | 
| 543 | 
            -
                                                                  dctx->ddictIsCold, nbSeq | 
| 721 | 
            +
                                                                  dctx->ddictIsCold, nbSeq,
         | 
| 722 | 
            +
                                                                  dctx->workspace, sizeof(dctx->workspace),
         | 
| 723 | 
            +
                                                                  ZSTD_DCtx_get_bmi2(dctx));
         | 
| 544 724 | 
             
                        RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
         | 
| 545 725 | 
             
                        ip += mlhSize;
         | 
| 546 726 | 
             
                    }
         | 
| @@ -554,7 +734,6 @@ typedef struct { | |
| 554 734 | 
             
                size_t litLength;
         | 
| 555 735 | 
             
                size_t matchLength;
         | 
| 556 736 | 
             
                size_t offset;
         | 
| 557 | 
            -
                const BYTE* match;
         | 
| 558 737 | 
             
            } seq_t;
         | 
| 559 738 |  | 
| 560 739 | 
             
            typedef struct {
         | 
| @@ -568,9 +747,6 @@ typedef struct { | |
| 568 747 | 
             
                ZSTD_fseState stateOffb;
         | 
| 569 748 | 
             
                ZSTD_fseState stateML;
         | 
| 570 749 | 
             
                size_t prevOffset[ZSTD_REP_NUM];
         | 
| 571 | 
            -
                const BYTE* prefixStart;
         | 
| 572 | 
            -
                const BYTE* dictEnd;
         | 
| 573 | 
            -
                size_t pos;
         | 
| 574 750 | 
             
            } seqState_t;
         | 
| 575 751 |  | 
| 576 752 | 
             
            /*! ZSTD_overlapCopy8() :
         | 
| @@ -613,7 +789,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { | |
| 613 789 | 
             
             *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
         | 
| 614 790 | 
             
             *           The src buffer must be before the dst buffer.
         | 
| 615 791 | 
             
             */
         | 
| 616 | 
            -
            static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
         | 
| 792 | 
            +
            static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
         | 
| 617 793 | 
             
                ptrdiff_t const diff = op - ip;
         | 
| 618 794 | 
             
                BYTE* const oend = op + length;
         | 
| 619 795 |  | 
| @@ -629,6 +805,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ | |
| 629 805 | 
             
                    /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
         | 
| 630 806 | 
             
                    assert(length >= 8);
         | 
| 631 807 | 
             
                    ZSTD_overlapCopy8(&op, &ip, diff);
         | 
| 808 | 
            +
                    length -= 8;
         | 
| 632 809 | 
             
                    assert(op - ip >= 8);
         | 
| 633 810 | 
             
                    assert(op <= oend);
         | 
| 634 811 | 
             
                }
         | 
| @@ -643,8 +820,31 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ | |
| 643 820 | 
             
                    assert(oend > oend_w);
         | 
| 644 821 | 
             
                    ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
         | 
| 645 822 | 
             
                    ip += oend_w - op;
         | 
| 646 | 
            -
                    op  | 
| 823 | 
            +
                    op += oend_w - op;
         | 
| 824 | 
            +
                }
         | 
| 825 | 
            +
                /* Handle the leftovers. */
         | 
| 826 | 
            +
                while (op < oend) *op++ = *ip++;
         | 
| 827 | 
            +
            }
         | 
| 828 | 
            +
             | 
| 829 | 
            +
            /* ZSTD_safecopyDstBeforeSrc():
         | 
| 830 | 
            +
             * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
         | 
| 831 | 
            +
             * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
         | 
| 832 | 
            +
            static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
         | 
| 833 | 
            +
                ptrdiff_t const diff = op - ip;
         | 
| 834 | 
            +
                BYTE* const oend = op + length;
         | 
| 835 | 
            +
             | 
| 836 | 
            +
                if (length < 8 || diff > -8) {
         | 
| 837 | 
            +
                    /* Handle short lengths, close overlaps, and dst not before src. */
         | 
| 838 | 
            +
                    while (op < oend) *op++ = *ip++;
         | 
| 839 | 
            +
                    return;
         | 
| 840 | 
            +
                }
         | 
| 841 | 
            +
             | 
| 842 | 
            +
                if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
         | 
| 843 | 
            +
                    ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
         | 
| 844 | 
            +
                    ip += oend - WILDCOPY_OVERLENGTH - op;
         | 
| 845 | 
            +
                    op += oend - WILDCOPY_OVERLENGTH - op;
         | 
| 647 846 | 
             
                }
         | 
| 847 | 
            +
             | 
| 648 848 | 
             
                /* Handle the leftovers. */
         | 
| 649 849 | 
             
                while (op < oend) *op++ = *ip++;
         | 
| 650 850 | 
             
            }
         | 
| @@ -659,9 +859,9 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ | |
| 659 859 | 
             
             */
         | 
| 660 860 | 
             
            FORCE_NOINLINE
         | 
| 661 861 | 
             
            size_t ZSTD_execSequenceEnd(BYTE* op,
         | 
| 662 | 
            -
             | 
| 663 | 
            -
             | 
| 664 | 
            -
             | 
| 862 | 
            +
                BYTE* const oend, seq_t sequence,
         | 
| 863 | 
            +
                const BYTE** litPtr, const BYTE* const litLimit,
         | 
| 864 | 
            +
                const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
         | 
| 665 865 | 
             
            {
         | 
| 666 866 | 
             
                BYTE* const oLitEnd = op + sequence.litLength;
         | 
| 667 867 | 
             
                size_t const sequenceLength = sequence.litLength + sequence.matchLength;
         | 
| @@ -684,27 +884,76 @@ size_t ZSTD_execSequenceEnd(BYTE* op, | |
| 684 884 | 
             
                if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
         | 
| 685 885 | 
             
                    /* offset beyond prefix */
         | 
| 686 886 | 
             
                    RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
         | 
| 687 | 
            -
                    match = dictEnd - (prefixStart-match);
         | 
| 887 | 
            +
                    match = dictEnd - (prefixStart - match);
         | 
| 688 888 | 
             
                    if (match + sequence.matchLength <= dictEnd) {
         | 
| 689 | 
            -
                         | 
| 889 | 
            +
                        ZSTD_memmove(oLitEnd, match, sequence.matchLength);
         | 
| 690 890 | 
             
                        return sequenceLength;
         | 
| 691 891 | 
             
                    }
         | 
| 692 892 | 
             
                    /* span extDict & currentPrefixSegment */
         | 
| 693 893 | 
             
                    {   size_t const length1 = dictEnd - match;
         | 
| 694 | 
            -
             | 
| 695 | 
            -
             | 
| 696 | 
            -
             | 
| 697 | 
            -
             | 
| 698 | 
            -
             | 
| 894 | 
            +
                    ZSTD_memmove(oLitEnd, match, length1);
         | 
| 895 | 
            +
                    op = oLitEnd + length1;
         | 
| 896 | 
            +
                    sequence.matchLength -= length1;
         | 
| 897 | 
            +
                    match = prefixStart;
         | 
| 898 | 
            +
                    }
         | 
| 899 | 
            +
                }
         | 
| 900 | 
            +
                ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
         | 
| 901 | 
            +
                return sequenceLength;
         | 
| 902 | 
            +
            }
         | 
| 903 | 
            +
             | 
| 904 | 
            +
            /* ZSTD_execSequenceEndSplitLitBuffer():
         | 
| 905 | 
            +
             * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
         | 
| 906 | 
            +
             */
         | 
| 907 | 
            +
            FORCE_NOINLINE
         | 
| 908 | 
            +
            size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
         | 
| 909 | 
            +
                BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
         | 
| 910 | 
            +
                const BYTE** litPtr, const BYTE* const litLimit,
         | 
| 911 | 
            +
                const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
         | 
| 912 | 
            +
            {
         | 
| 913 | 
            +
                BYTE* const oLitEnd = op + sequence.litLength;
         | 
| 914 | 
            +
                size_t const sequenceLength = sequence.litLength + sequence.matchLength;
         | 
| 915 | 
            +
                const BYTE* const iLitEnd = *litPtr + sequence.litLength;
         | 
| 916 | 
            +
                const BYTE* match = oLitEnd - sequence.offset;
         | 
| 917 | 
            +
             | 
| 918 | 
            +
             | 
| 919 | 
            +
                /* bounds checks : careful of address space overflow in 32-bit mode */
         | 
| 920 | 
            +
                RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
         | 
| 921 | 
            +
                RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
         | 
| 922 | 
            +
                assert(op < op + sequenceLength);
         | 
| 923 | 
            +
                assert(oLitEnd < op + sequenceLength);
         | 
| 924 | 
            +
             | 
| 925 | 
            +
                /* copy literals */
         | 
| 926 | 
            +
                RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
         | 
| 927 | 
            +
                ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
         | 
| 928 | 
            +
                op = oLitEnd;
         | 
| 929 | 
            +
                *litPtr = iLitEnd;
         | 
| 930 | 
            +
             | 
| 931 | 
            +
                /* copy Match */
         | 
| 932 | 
            +
                if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
         | 
| 933 | 
            +
                    /* offset beyond prefix */
         | 
| 934 | 
            +
                    RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
         | 
| 935 | 
            +
                    match = dictEnd - (prefixStart - match);
         | 
| 936 | 
            +
                    if (match + sequence.matchLength <= dictEnd) {
         | 
| 937 | 
            +
                        ZSTD_memmove(oLitEnd, match, sequence.matchLength);
         | 
| 938 | 
            +
                        return sequenceLength;
         | 
| 939 | 
            +
                    }
         | 
| 940 | 
            +
                    /* span extDict & currentPrefixSegment */
         | 
| 941 | 
            +
                    {   size_t const length1 = dictEnd - match;
         | 
| 942 | 
            +
                    ZSTD_memmove(oLitEnd, match, length1);
         | 
| 943 | 
            +
                    op = oLitEnd + length1;
         | 
| 944 | 
            +
                    sequence.matchLength -= length1;
         | 
| 945 | 
            +
                    match = prefixStart;
         | 
| 946 | 
            +
                    }
         | 
| 947 | 
            +
                }
         | 
| 699 948 | 
             
                ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
         | 
| 700 949 | 
             
                return sequenceLength;
         | 
| 701 950 | 
             
            }
         | 
| 702 951 |  | 
| 703 952 | 
             
            HINT_INLINE
         | 
| 704 953 | 
             
            size_t ZSTD_execSequence(BYTE* op,
         | 
| 705 | 
            -
             | 
| 706 | 
            -
             | 
| 707 | 
            -
             | 
| 954 | 
            +
                BYTE* const oend, seq_t sequence,
         | 
| 955 | 
            +
                const BYTE** litPtr, const BYTE* const litLimit,
         | 
| 956 | 
            +
                const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
         | 
| 708 957 | 
             
            {
         | 
| 709 958 | 
             
                BYTE* const oLitEnd = op + sequence.litLength;
         | 
| 710 959 | 
             
                size_t const sequenceLength = sequence.litLength + sequence.matchLength;
         | 
| @@ -713,6 +962,98 @@ size_t ZSTD_execSequence(BYTE* op, | |
| 713 962 | 
             
                const BYTE* const iLitEnd = *litPtr + sequence.litLength;
         | 
| 714 963 | 
             
                const BYTE* match = oLitEnd - sequence.offset;
         | 
| 715 964 |  | 
| 965 | 
            +
                assert(op != NULL /* Precondition */);
         | 
| 966 | 
            +
                assert(oend_w < oend /* No underflow */);
         | 
| 967 | 
            +
                /* Handle edge cases in a slow path:
         | 
| 968 | 
            +
                 *   - Read beyond end of literals
         | 
| 969 | 
            +
                 *   - Match end is within WILDCOPY_OVERLIMIT of oend
         | 
| 970 | 
            +
                 *   - 32-bit mode and the match length overflows
         | 
| 971 | 
            +
                 */
         | 
| 972 | 
            +
                if (UNLIKELY(
         | 
| 973 | 
            +
                    iLitEnd > litLimit ||
         | 
| 974 | 
            +
                    oMatchEnd > oend_w ||
         | 
| 975 | 
            +
                    (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
         | 
| 976 | 
            +
                    return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
         | 
| 977 | 
            +
             | 
| 978 | 
            +
                /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
         | 
| 979 | 
            +
                assert(op <= oLitEnd /* No overflow */);
         | 
| 980 | 
            +
                assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
         | 
| 981 | 
            +
                assert(oMatchEnd <= oend /* No underflow */);
         | 
| 982 | 
            +
                assert(iLitEnd <= litLimit /* Literal length is in bounds */);
         | 
| 983 | 
            +
                assert(oLitEnd <= oend_w /* Can wildcopy literals */);
         | 
| 984 | 
            +
                assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
         | 
| 985 | 
            +
             | 
| 986 | 
            +
                /* Copy Literals:
         | 
| 987 | 
            +
                 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
         | 
| 988 | 
            +
                 * We likely don't need the full 32-byte wildcopy.
         | 
| 989 | 
            +
                 */
         | 
| 990 | 
            +
                assert(WILDCOPY_OVERLENGTH >= 16);
         | 
| 991 | 
            +
                ZSTD_copy16(op, (*litPtr));
         | 
| 992 | 
            +
                if (UNLIKELY(sequence.litLength > 16)) {
         | 
| 993 | 
            +
                    ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
         | 
| 994 | 
            +
                }
         | 
| 995 | 
            +
                op = oLitEnd;
         | 
| 996 | 
            +
                *litPtr = iLitEnd;   /* update for next sequence */
         | 
| 997 | 
            +
             | 
| 998 | 
            +
                /* Copy Match */
         | 
| 999 | 
            +
                if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
         | 
| 1000 | 
            +
                    /* offset beyond prefix -> go into extDict */
         | 
| 1001 | 
            +
                    RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
         | 
| 1002 | 
            +
                    match = dictEnd + (match - prefixStart);
         | 
| 1003 | 
            +
                    if (match + sequence.matchLength <= dictEnd) {
         | 
| 1004 | 
            +
                        ZSTD_memmove(oLitEnd, match, sequence.matchLength);
         | 
| 1005 | 
            +
                        return sequenceLength;
         | 
| 1006 | 
            +
                    }
         | 
| 1007 | 
            +
                    /* span extDict & currentPrefixSegment */
         | 
| 1008 | 
            +
                    {   size_t const length1 = dictEnd - match;
         | 
| 1009 | 
            +
                    ZSTD_memmove(oLitEnd, match, length1);
         | 
| 1010 | 
            +
                    op = oLitEnd + length1;
         | 
| 1011 | 
            +
                    sequence.matchLength -= length1;
         | 
| 1012 | 
            +
                    match = prefixStart;
         | 
| 1013 | 
            +
                    }
         | 
| 1014 | 
            +
                }
         | 
| 1015 | 
            +
                /* Match within prefix of 1 or more bytes */
         | 
| 1016 | 
            +
                assert(op <= oMatchEnd);
         | 
| 1017 | 
            +
                assert(oMatchEnd <= oend_w);
         | 
| 1018 | 
            +
                assert(match >= prefixStart);
         | 
| 1019 | 
            +
                assert(sequence.matchLength >= 1);
         | 
| 1020 | 
            +
             | 
| 1021 | 
            +
                /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
         | 
| 1022 | 
            +
                 * without overlap checking.
         | 
| 1023 | 
            +
                 */
         | 
| 1024 | 
            +
                if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
         | 
| 1025 | 
            +
                    /* We bet on a full wildcopy for matches, since we expect matches to be
         | 
| 1026 | 
            +
                     * longer than literals (in general). In silesia, ~10% of matches are longer
         | 
| 1027 | 
            +
                     * than 16 bytes.
         | 
| 1028 | 
            +
                     */
         | 
| 1029 | 
            +
                    ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
         | 
| 1030 | 
            +
                    return sequenceLength;
         | 
| 1031 | 
            +
                }
         | 
| 1032 | 
            +
                assert(sequence.offset < WILDCOPY_VECLEN);
         | 
| 1033 | 
            +
             | 
| 1034 | 
            +
                /* Copy 8 bytes and spread the offset to be >= 8. */
         | 
| 1035 | 
            +
                ZSTD_overlapCopy8(&op, &match, sequence.offset);
         | 
| 1036 | 
            +
             | 
| 1037 | 
            +
                /* If the match length is > 8 bytes, then continue with the wildcopy. */
         | 
| 1038 | 
            +
                if (sequence.matchLength > 8) {
         | 
| 1039 | 
            +
                    assert(op < oMatchEnd);
         | 
| 1040 | 
            +
                    ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
         | 
| 1041 | 
            +
                }
         | 
| 1042 | 
            +
                return sequenceLength;
         | 
| 1043 | 
            +
            }
         | 
| 1044 | 
            +
             | 
| 1045 | 
            +
            HINT_INLINE
         | 
| 1046 | 
            +
            size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
         | 
| 1047 | 
            +
                BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
         | 
| 1048 | 
            +
                const BYTE** litPtr, const BYTE* const litLimit,
         | 
| 1049 | 
            +
                const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
         | 
| 1050 | 
            +
            {
         | 
| 1051 | 
            +
                BYTE* const oLitEnd = op + sequence.litLength;
         | 
| 1052 | 
            +
                size_t const sequenceLength = sequence.litLength + sequence.matchLength;
         | 
| 1053 | 
            +
                BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
         | 
| 1054 | 
            +
                const BYTE* const iLitEnd = *litPtr + sequence.litLength;
         | 
| 1055 | 
            +
                const BYTE* match = oLitEnd - sequence.offset;
         | 
| 1056 | 
            +
             | 
| 716 1057 | 
             
                assert(op != NULL /* Precondition */);
         | 
| 717 1058 | 
             
                assert(oend_w < oend /* No underflow */);
         | 
| 718 1059 | 
             
                /* Handle edge cases in a slow path:
         | 
| @@ -724,7 +1065,7 @@ size_t ZSTD_execSequence(BYTE* op, | |
| 724 1065 | 
             
                        iLitEnd > litLimit ||
         | 
| 725 1066 | 
             
                        oMatchEnd > oend_w ||
         | 
| 726 1067 | 
             
                        (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
         | 
| 727 | 
            -
                    return  | 
| 1068 | 
            +
                    return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
         | 
| 728 1069 |  | 
| 729 1070 | 
             
                /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
         | 
| 730 1071 | 
             
                assert(op <= oLitEnd /* No overflow */);
         | 
| @@ -752,12 +1093,12 @@ size_t ZSTD_execSequence(BYTE* op, | |
| 752 1093 | 
             
                    RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
         | 
| 753 1094 | 
             
                    match = dictEnd + (match - prefixStart);
         | 
| 754 1095 | 
             
                    if (match + sequence.matchLength <= dictEnd) {
         | 
| 755 | 
            -
                         | 
| 1096 | 
            +
                        ZSTD_memmove(oLitEnd, match, sequence.matchLength);
         | 
| 756 1097 | 
             
                        return sequenceLength;
         | 
| 757 1098 | 
             
                    }
         | 
| 758 1099 | 
             
                    /* span extDict & currentPrefixSegment */
         | 
| 759 1100 | 
             
                    {   size_t const length1 = dictEnd - match;
         | 
| 760 | 
            -
                         | 
| 1101 | 
            +
                        ZSTD_memmove(oLitEnd, match, length1);
         | 
| 761 1102 | 
             
                        op = oLitEnd + length1;
         | 
| 762 1103 | 
             
                        sequence.matchLength -= length1;
         | 
| 763 1104 | 
             
                        match = prefixStart;
         | 
| @@ -792,6 +1133,7 @@ size_t ZSTD_execSequence(BYTE* op, | |
| 792 1133 | 
             
                return sequenceLength;
         | 
| 793 1134 | 
             
            }
         | 
| 794 1135 |  | 
| 1136 | 
            +
             | 
| 795 1137 | 
             
            static void
         | 
| 796 1138 | 
             
            ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
         | 
| 797 1139 | 
             
            {
         | 
| @@ -805,20 +1147,10 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS | |
| 805 1147 | 
             
            }
         | 
| 806 1148 |  | 
| 807 1149 | 
             
            FORCE_INLINE_TEMPLATE void
         | 
| 808 | 
            -
             | 
| 1150 | 
            +
            ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
         | 
| 809 1151 | 
             
            {
         | 
| 810 | 
            -
                ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
         | 
| 811 | 
            -
                U32 const nbBits = DInfo.nbBits;
         | 
| 812 1152 | 
             
                size_t const lowBits = BIT_readBits(bitD, nbBits);
         | 
| 813 | 
            -
                DStatePtr->state =  | 
| 814 | 
            -
            }
         | 
| 815 | 
            -
             | 
| 816 | 
            -
            FORCE_INLINE_TEMPLATE void
         | 
| 817 | 
            -
            ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
         | 
| 818 | 
            -
            {
         | 
| 819 | 
            -
                U32 const nbBits = DInfo.nbBits;
         | 
| 820 | 
            -
                size_t const lowBits = BIT_readBits(bitD, nbBits);
         | 
| 821 | 
            -
                DStatePtr->state = DInfo.nextState + lowBits;
         | 
| 1153 | 
            +
                DStatePtr->state = nextState + lowBits;
         | 
| 822 1154 | 
             
            }
         | 
| 823 1155 |  | 
| 824 1156 | 
             
            /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
         | 
| @@ -832,123 +1164,112 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD | |
| 832 1164 | 
             
                    : 0)
         | 
| 833 1165 |  | 
| 834 1166 | 
             
            typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
         | 
| 835 | 
            -
            typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
         | 
| 836 1167 |  | 
| 837 1168 | 
             
            FORCE_INLINE_TEMPLATE seq_t
         | 
| 838 | 
            -
            ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets | 
| 1169 | 
            +
            ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
         | 
| 839 1170 | 
             
            {
         | 
| 840 1171 | 
             
                seq_t seq;
         | 
| 841 | 
            -
                ZSTD_seqSymbol const llDInfo = seqState->stateLL.table | 
| 842 | 
            -
                ZSTD_seqSymbol const mlDInfo = seqState->stateML.table | 
| 843 | 
            -
                ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table | 
| 844 | 
            -
                 | 
| 845 | 
            -
                 | 
| 846 | 
            -
                U32 const ofBase = ofDInfo | 
| 847 | 
            -
             | 
| 848 | 
            -
             | 
| 849 | 
            -
             | 
| 850 | 
            -
             | 
| 851 | 
            -
             | 
| 852 | 
            -
             | 
| 853 | 
            -
             | 
| 854 | 
            -
                     | 
| 855 | 
            -
             | 
| 856 | 
            -
             | 
| 857 | 
            -
             | 
| 858 | 
            -
             | 
| 859 | 
            -
             | 
| 860 | 
            -
             | 
| 861 | 
            -
             | 
| 862 | 
            -
             | 
| 863 | 
            -
             | 
| 864 | 
            -
             | 
| 865 | 
            -
             | 
| 866 | 
            -
             | 
| 867 | 
            -
                         | 
| 868 | 
            -
             | 
| 869 | 
            -
                         | 
| 870 | 
            -
             | 
| 871 | 
            -
             | 
| 872 | 
            -
             | 
| 873 | 
            -
             | 
| 874 | 
            -
                            if ( | 
| 875 | 
            -
                                 | 
| 876 | 
            -
             | 
| 877 | 
            -
                                 | 
| 878 | 
            -
                                 | 
| 879 | 
            -
                                 | 
| 1172 | 
            +
                const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
         | 
| 1173 | 
            +
                const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
         | 
| 1174 | 
            +
                const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
         | 
| 1175 | 
            +
                seq.matchLength = mlDInfo->baseValue;
         | 
| 1176 | 
            +
                seq.litLength = llDInfo->baseValue;
         | 
| 1177 | 
            +
                {   U32 const ofBase = ofDInfo->baseValue;
         | 
| 1178 | 
            +
                    BYTE const llBits = llDInfo->nbAdditionalBits;
         | 
| 1179 | 
            +
                    BYTE const mlBits = mlDInfo->nbAdditionalBits;
         | 
| 1180 | 
            +
                    BYTE const ofBits = ofDInfo->nbAdditionalBits;
         | 
| 1181 | 
            +
                    BYTE const totalBits = llBits+mlBits+ofBits;
         | 
| 1182 | 
            +
             | 
| 1183 | 
            +
                    U16 const llNext = llDInfo->nextState;
         | 
| 1184 | 
            +
                    U16 const mlNext = mlDInfo->nextState;
         | 
| 1185 | 
            +
                    U16 const ofNext = ofDInfo->nextState;
         | 
| 1186 | 
            +
                    U32 const llnbBits = llDInfo->nbBits;
         | 
| 1187 | 
            +
                    U32 const mlnbBits = mlDInfo->nbBits;
         | 
| 1188 | 
            +
                    U32 const ofnbBits = ofDInfo->nbBits;
         | 
| 1189 | 
            +
                    /*
         | 
| 1190 | 
            +
                     * As gcc has better branch and block analyzers, sometimes it is only
         | 
| 1191 | 
            +
                     * valuable to mark likelyness for clang, it gives around 3-4% of
         | 
| 1192 | 
            +
                     * performance.
         | 
| 1193 | 
            +
                     */
         | 
| 1194 | 
            +
             | 
| 1195 | 
            +
                    /* sequence */
         | 
| 1196 | 
            +
                    {   size_t offset;
         | 
| 1197 | 
            +
                #if defined(__clang__)
         | 
| 1198 | 
            +
                        if (LIKELY(ofBits > 1)) {
         | 
| 1199 | 
            +
                #else
         | 
| 1200 | 
            +
                        if (ofBits > 1) {
         | 
| 1201 | 
            +
                #endif
         | 
| 1202 | 
            +
                            ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
         | 
| 1203 | 
            +
                            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
         | 
| 1204 | 
            +
                            assert(ofBits <= MaxOff);
         | 
| 1205 | 
            +
                            if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
         | 
| 1206 | 
            +
                                U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
         | 
| 1207 | 
            +
                                offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
         | 
| 1208 | 
            +
                                BIT_reloadDStream(&seqState->DStream);
         | 
| 1209 | 
            +
                                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
         | 
| 1210 | 
            +
                                assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
         | 
| 1211 | 
            +
                            } else {
         | 
| 1212 | 
            +
                                offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
         | 
| 1213 | 
            +
                                if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
         | 
| 880 1214 | 
             
                            }
         | 
| 1215 | 
            +
                            seqState->prevOffset[2] = seqState->prevOffset[1];
         | 
| 1216 | 
            +
                            seqState->prevOffset[1] = seqState->prevOffset[0];
         | 
| 1217 | 
            +
                            seqState->prevOffset[0] = offset;
         | 
| 881 1218 | 
             
                        } else {
         | 
| 882 | 
            -
                             | 
| 883 | 
            -
                             | 
| 884 | 
            -
                                 | 
| 885 | 
            -
                                 | 
| 886 | 
            -
                                seqState->prevOffset[ | 
| 887 | 
            -
             | 
| 888 | 
            -
             | 
| 889 | 
            -
             | 
| 890 | 
            -
             | 
| 891 | 
            -
             | 
| 892 | 
            -
             | 
| 893 | 
            -
             | 
| 894 | 
            -
             | 
| 895 | 
            -
             | 
| 896 | 
            -
                if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
         | 
| 897 | 
            -
                    BIT_reloadDStream(&seqState->DStream);
         | 
| 898 | 
            -
                if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
         | 
| 899 | 
            -
                    BIT_reloadDStream(&seqState->DStream);
         | 
| 900 | 
            -
                /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
         | 
| 901 | 
            -
                ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
         | 
| 902 | 
            -
             | 
| 903 | 
            -
                seq.litLength = llBase;
         | 
| 904 | 
            -
                if (llBits > 0)
         | 
| 905 | 
            -
                    seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
         | 
| 906 | 
            -
             | 
| 907 | 
            -
                if (MEM_32bits())
         | 
| 908 | 
            -
                    BIT_reloadDStream(&seqState->DStream);
         | 
| 909 | 
            -
             | 
| 910 | 
            -
                DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
         | 
| 911 | 
            -
                            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
         | 
| 912 | 
            -
             | 
| 913 | 
            -
                if (prefetch == ZSTD_p_prefetch) {
         | 
| 914 | 
            -
                    size_t const pos = seqState->pos + seq.litLength;
         | 
| 915 | 
            -
                    const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
         | 
| 916 | 
            -
                    seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
         | 
| 917 | 
            -
                                                                * No consequence though : no memory access will occur, offset is only used for prefetching */
         | 
| 918 | 
            -
                    seqState->pos = pos + seq.matchLength;
         | 
| 919 | 
            -
                }
         | 
| 920 | 
            -
             | 
| 921 | 
            -
                /* ANS state update
         | 
| 922 | 
            -
                 * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
         | 
| 923 | 
            -
                 * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
         | 
| 924 | 
            -
                 * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
         | 
| 925 | 
            -
                 * better option, so it is the default for other compilers. But, if you
         | 
| 926 | 
            -
                 * measure that it is worse, please put up a pull request.
         | 
| 927 | 
            -
                 */
         | 
| 928 | 
            -
                {
         | 
| 929 | 
            -
            #if defined(__GNUC__) && !defined(__clang__)
         | 
| 930 | 
            -
                    const int kUseUpdateFseState = 1;
         | 
| 931 | 
            -
            #else
         | 
| 932 | 
            -
                    const int kUseUpdateFseState = 0;
         | 
| 933 | 
            -
            #endif
         | 
| 934 | 
            -
                    if (kUseUpdateFseState) {
         | 
| 935 | 
            -
                        ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
         | 
| 936 | 
            -
                        ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
         | 
| 937 | 
            -
                        if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
         | 
| 938 | 
            -
                        ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
         | 
| 939 | 
            -
                    } else {
         | 
| 940 | 
            -
                        ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo);    /* <=  9 bits */
         | 
| 941 | 
            -
                        ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo);    /* <=  9 bits */
         | 
| 942 | 
            -
                        if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
         | 
| 943 | 
            -
                        ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo);  /* <=  8 bits */
         | 
| 1219 | 
            +
                            U32 const ll0 = (llDInfo->baseValue == 0);
         | 
| 1220 | 
            +
                            if (LIKELY((ofBits == 0))) {
         | 
| 1221 | 
            +
                                offset = seqState->prevOffset[ll0];
         | 
| 1222 | 
            +
                                seqState->prevOffset[1] = seqState->prevOffset[!ll0];
         | 
| 1223 | 
            +
                                seqState->prevOffset[0] = offset;
         | 
| 1224 | 
            +
                            } else {
         | 
| 1225 | 
            +
                                offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
         | 
| 1226 | 
            +
                                {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
         | 
| 1227 | 
            +
                                    temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
         | 
| 1228 | 
            +
                                    if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
         | 
| 1229 | 
            +
                                    seqState->prevOffset[1] = seqState->prevOffset[0];
         | 
| 1230 | 
            +
                                    seqState->prevOffset[0] = offset = temp;
         | 
| 1231 | 
            +
                        }   }   }
         | 
| 1232 | 
            +
                        seq.offset = offset;
         | 
| 944 1233 | 
             
                    }
         | 
| 1234 | 
            +
             | 
| 1235 | 
            +
                #if defined(__clang__)
         | 
| 1236 | 
            +
                    if (UNLIKELY(mlBits > 0))
         | 
| 1237 | 
            +
                #else
         | 
| 1238 | 
            +
                    if (mlBits > 0)
         | 
| 1239 | 
            +
                #endif
         | 
| 1240 | 
            +
                        seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
         | 
| 1241 | 
            +
             | 
| 1242 | 
            +
                    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
         | 
| 1243 | 
            +
                        BIT_reloadDStream(&seqState->DStream);
         | 
| 1244 | 
            +
                    if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
         | 
| 1245 | 
            +
                        BIT_reloadDStream(&seqState->DStream);
         | 
| 1246 | 
            +
                    /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
         | 
| 1247 | 
            +
                    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
         | 
| 1248 | 
            +
             | 
| 1249 | 
            +
                #if defined(__clang__)
         | 
| 1250 | 
            +
                    if (UNLIKELY(llBits > 0))
         | 
| 1251 | 
            +
                #else
         | 
| 1252 | 
            +
                    if (llBits > 0)
         | 
| 1253 | 
            +
                #endif
         | 
| 1254 | 
            +
                        seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
         | 
| 1255 | 
            +
             | 
| 1256 | 
            +
                    if (MEM_32bits())
         | 
| 1257 | 
            +
                        BIT_reloadDStream(&seqState->DStream);
         | 
| 1258 | 
            +
             | 
| 1259 | 
            +
                    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
         | 
| 1260 | 
            +
                                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
         | 
| 1261 | 
            +
             | 
| 1262 | 
            +
                    ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
         | 
| 1263 | 
            +
                    ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
         | 
| 1264 | 
            +
                    if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
         | 
| 1265 | 
            +
                    ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
         | 
| 945 1266 | 
             
                }
         | 
| 946 1267 |  | 
| 947 1268 | 
             
                return seq;
         | 
| 948 1269 | 
             
            }
         | 
| 949 1270 |  | 
| 950 1271 | 
             
            #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
         | 
| 951 | 
            -
             | 
| 1272 | 
            +
            MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
         | 
| 952 1273 | 
             
            {
         | 
| 953 1274 | 
             
                size_t const windowSize = dctx->fParams.windowSize;
         | 
| 954 1275 | 
             
                /* No dictionary used. */
         | 
| @@ -969,6 +1290,7 @@ MEM_STATIC void ZSTD_assertValidSequence( | |
| 969 1290 | 
             
                    seq_t const seq,
         | 
| 970 1291 | 
             
                    BYTE const* prefixStart, BYTE const* virtualStart)
         | 
| 971 1292 | 
             
            {
         | 
| 1293 | 
            +
            #if DEBUGLEVEL >= 1
         | 
| 972 1294 | 
             
                size_t const windowSize = dctx->fParams.windowSize;
         | 
| 973 1295 | 
             
                size_t const sequenceSize = seq.litLength + seq.matchLength;
         | 
| 974 1296 | 
             
                BYTE const* const oLitEnd = op + seq.litLength;
         | 
| @@ -986,13 +1308,18 @@ MEM_STATIC void ZSTD_assertValidSequence( | |
| 986 1308 | 
             
                    /* Offset must be within our window. */
         | 
| 987 1309 | 
             
                    assert(seq.offset <= windowSize);
         | 
| 988 1310 | 
             
                }
         | 
| 1311 | 
            +
            #else
         | 
| 1312 | 
            +
                (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
         | 
| 1313 | 
            +
            #endif
         | 
| 989 1314 | 
             
            }
         | 
| 990 1315 | 
             
            #endif
         | 
| 991 1316 |  | 
| 992 1317 | 
             
            #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
         | 
| 1318 | 
            +
             | 
| 1319 | 
            +
             | 
| 993 1320 | 
             
            FORCE_INLINE_TEMPLATE size_t
         | 
| 994 1321 | 
             
            DONT_VECTORIZE
         | 
| 995 | 
            -
             | 
| 1322 | 
            +
            ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
         | 
| 996 1323 | 
             
                                           void* dst, size_t maxDstSize,
         | 
| 997 1324 | 
             
                                     const void* seqStart, size_t seqSize, int nbSeq,
         | 
| 998 1325 | 
             
                                     const ZSTD_longOffset_e isLongOffset,
         | 
| @@ -1000,21 +1327,20 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, | |
| 1000 1327 | 
             
            {
         | 
| 1001 1328 | 
             
                const BYTE* ip = (const BYTE*)seqStart;
         | 
| 1002 1329 | 
             
                const BYTE* const iend = ip + seqSize;
         | 
| 1003 | 
            -
                BYTE* const ostart = (BYTE* | 
| 1330 | 
            +
                BYTE* const ostart = (BYTE*)dst;
         | 
| 1004 1331 | 
             
                BYTE* const oend = ostart + maxDstSize;
         | 
| 1005 1332 | 
             
                BYTE* op = ostart;
         | 
| 1006 1333 | 
             
                const BYTE* litPtr = dctx->litPtr;
         | 
| 1007 | 
            -
                const BYTE*  | 
| 1334 | 
            +
                const BYTE* litBufferEnd = dctx->litBufferEnd;
         | 
| 1008 1335 | 
             
                const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
         | 
| 1009 1336 | 
             
                const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
         | 
| 1010 1337 | 
             
                const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
         | 
| 1011 | 
            -
                DEBUGLOG(5, " | 
| 1338 | 
            +
                DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
         | 
| 1012 1339 | 
             
                (void)frame;
         | 
| 1013 1340 |  | 
| 1014 1341 | 
             
                /* Regen sequences */
         | 
| 1015 1342 | 
             
                if (nbSeq) {
         | 
| 1016 1343 | 
             
                    seqState_t seqState;
         | 
| 1017 | 
            -
                    size_t error = 0;
         | 
| 1018 1344 | 
             
                    dctx->fseEntropy = 1;
         | 
| 1019 1345 | 
             
                    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         | 
| 1020 1346 | 
             
                    RETURN_ERROR_IF(
         | 
| @@ -1030,70 +1356,255 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, | |
| 1030 1356 | 
             
                            BIT_DStream_endOfBuffer < BIT_DStream_completed &&
         | 
| 1031 1357 | 
             
                            BIT_DStream_completed < BIT_DStream_overflow);
         | 
| 1032 1358 |  | 
| 1359 | 
            +
                    /* decompress without overrunning litPtr begins */
         | 
| 1360 | 
            +
                    {
         | 
| 1361 | 
            +
                        seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
         | 
| 1362 | 
            +
                        /* Align the decompression loop to 32 + 16 bytes.
         | 
| 1363 | 
            +
                            *
         | 
| 1364 | 
            +
                            * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
         | 
| 1365 | 
            +
                            * speed swings based on the alignment of the decompression loop. This
         | 
| 1366 | 
            +
                            * performance swing is caused by parts of the decompression loop falling
         | 
| 1367 | 
            +
                            * out of the DSB. The entire decompression loop should fit in the DSB,
         | 
| 1368 | 
            +
                            * when it can't we get much worse performance. You can measure if you've
         | 
| 1369 | 
            +
                            * hit the good case or the bad case with this perf command for some
         | 
| 1370 | 
            +
                            * compressed file test.zst:
         | 
| 1371 | 
            +
                            *
         | 
| 1372 | 
            +
                            *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
         | 
| 1373 | 
            +
                            *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
         | 
| 1374 | 
            +
                            *
         | 
| 1375 | 
            +
                            * If you see most cycles served out of the MITE you've hit the bad case.
         | 
| 1376 | 
            +
                            * If you see most cycles served out of the DSB you've hit the good case.
         | 
| 1377 | 
            +
                            * If it is pretty even then you may be in an okay case.
         | 
| 1378 | 
            +
                            *
         | 
| 1379 | 
            +
                            * This issue has been reproduced on the following CPUs:
         | 
| 1380 | 
            +
                            *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
         | 
| 1381 | 
            +
                            *               Use Instruments->Counters to get DSB/MITE cycles.
         | 
| 1382 | 
            +
                            *               I never got performance swings, but I was able to
         | 
| 1383 | 
            +
                            *               go from the good case of mostly DSB to half of the
         | 
| 1384 | 
            +
                            *               cycles served from MITE.
         | 
| 1385 | 
            +
                            *   - Coffeelake: Intel i9-9900k
         | 
| 1386 | 
            +
                            *   - Coffeelake: Intel i7-9700k
         | 
| 1387 | 
            +
                            *
         | 
| 1388 | 
            +
                            * I haven't been able to reproduce the instability or DSB misses on any
         | 
| 1389 | 
            +
                            * of the following CPUS:
         | 
| 1390 | 
            +
                            *   - Haswell
         | 
| 1391 | 
            +
                            *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
         | 
| 1392 | 
            +
                            *   - Skylake
         | 
| 1393 | 
            +
                            *
         | 
| 1394 | 
            +
                            * Alignment is done for each of the three major decompression loops:
         | 
| 1395 | 
            +
                            *   - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
         | 
| 1396 | 
            +
                            *   - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
         | 
| 1397 | 
            +
                            *   - ZSTD_decompressSequences_body
         | 
| 1398 | 
            +
                            * Alignment choices are made to minimize large swings on bad cases and influence on performance
         | 
| 1399 | 
            +
                            * from changes external to this code, rather than to overoptimize on the current commit.
         | 
| 1400 | 
            +
                            *
         | 
| 1401 | 
            +
                            * If you are seeing performance stability this script can help test.
         | 
| 1402 | 
            +
                            * It tests on 4 commits in zstd where I saw performance change.
         | 
| 1403 | 
            +
                            *
         | 
| 1404 | 
            +
                            *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
         | 
| 1405 | 
            +
                            */
         | 
| 1033 1406 | 
             
            #if defined(__GNUC__) && defined(__x86_64__)
         | 
| 1034 | 
            -
             | 
| 1035 | 
            -
             | 
| 1036 | 
            -
             | 
| 1037 | 
            -
             | 
| 1038 | 
            -
             | 
| 1039 | 
            -
             | 
| 1040 | 
            -
             | 
| 1041 | 
            -
             | 
| 1042 | 
            -
             | 
| 1043 | 
            -
             | 
| 1044 | 
            -
             | 
| 1045 | 
            -
             | 
| 1046 | 
            -
             | 
| 1047 | 
            -
             | 
| 1048 | 
            -
             | 
| 1049 | 
            -
             | 
| 1050 | 
            -
             | 
| 1051 | 
            -
             | 
| 1052 | 
            -
             | 
| 1053 | 
            -
             | 
| 1054 | 
            -
             | 
| 1055 | 
            -
             | 
| 1056 | 
            -
             | 
| 1057 | 
            -
             | 
| 1058 | 
            -
             | 
| 1059 | 
            -
             | 
| 1060 | 
            -
             | 
| 1061 | 
            -
             | 
| 1062 | 
            -
             | 
| 1063 | 
            -
             | 
| 1064 | 
            -
             | 
| 1065 | 
            -
             | 
| 1066 | 
            -
             | 
| 1067 | 
            -
             | 
| 1068 | 
            -
             | 
| 1069 | 
            -
             | 
| 1070 | 
            -
             | 
| 1071 | 
            -
             | 
| 1072 | 
            -
             | 
| 1407 | 
            +
                        __asm__(".p2align 6");
         | 
| 1408 | 
            +
            #  if __GNUC__ >= 7
         | 
| 1409 | 
            +
            	    /* good for gcc-7, gcc-9, and gcc-11 */
         | 
| 1410 | 
            +
                        __asm__("nop");
         | 
| 1411 | 
            +
                        __asm__(".p2align 5");
         | 
| 1412 | 
            +
                        __asm__("nop");
         | 
| 1413 | 
            +
                        __asm__(".p2align 4");
         | 
| 1414 | 
            +
            #    if __GNUC__ == 8 || __GNUC__ == 10
         | 
| 1415 | 
            +
            	    /* good for gcc-8 and gcc-10 */
         | 
| 1416 | 
            +
                        __asm__("nop");
         | 
| 1417 | 
            +
                        __asm__(".p2align 3");
         | 
| 1418 | 
            +
            #    endif
         | 
| 1419 | 
            +
            #  endif
         | 
| 1420 | 
            +
            #endif
         | 
| 1421 | 
            +
             | 
| 1422 | 
            +
                        /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
         | 
| 1423 | 
            +
                        for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
         | 
| 1424 | 
            +
                            size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
         | 
| 1425 | 
            +
            #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
         | 
| 1426 | 
            +
                            assert(!ZSTD_isError(oneSeqSize));
         | 
| 1427 | 
            +
                            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
         | 
| 1428 | 
            +
            #endif
         | 
| 1429 | 
            +
                            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
         | 
| 1430 | 
            +
                                return oneSeqSize;
         | 
| 1431 | 
            +
                            DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
         | 
| 1432 | 
            +
                            op += oneSeqSize;
         | 
| 1433 | 
            +
                            if (UNLIKELY(!--nbSeq))
         | 
| 1434 | 
            +
                                break;
         | 
| 1435 | 
            +
                            BIT_reloadDStream(&(seqState.DStream));
         | 
| 1436 | 
            +
                            sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
         | 
| 1437 | 
            +
                        }
         | 
| 1438 | 
            +
             | 
| 1439 | 
            +
                        /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
         | 
| 1440 | 
            +
                        if (nbSeq > 0) {
         | 
| 1441 | 
            +
                            const size_t leftoverLit = dctx->litBufferEnd - litPtr;
         | 
| 1442 | 
            +
                            if (leftoverLit)
         | 
| 1443 | 
            +
                            {
         | 
| 1444 | 
            +
                                RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
         | 
| 1445 | 
            +
                                ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
         | 
| 1446 | 
            +
                                sequence.litLength -= leftoverLit;
         | 
| 1447 | 
            +
                                op += leftoverLit;
         | 
| 1448 | 
            +
                            }
         | 
| 1449 | 
            +
                            litPtr = dctx->litExtraBuffer;
         | 
| 1450 | 
            +
                            litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
         | 
| 1451 | 
            +
                            dctx->litBufferLocation = ZSTD_not_in_dst;
         | 
| 1452 | 
            +
                            {
         | 
| 1453 | 
            +
                                size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
         | 
| 1454 | 
            +
            #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
         | 
| 1455 | 
            +
                                assert(!ZSTD_isError(oneSeqSize));
         | 
| 1456 | 
            +
                                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
         | 
| 1457 | 
            +
            #endif
         | 
| 1458 | 
            +
                                if (UNLIKELY(ZSTD_isError(oneSeqSize)))
         | 
| 1459 | 
            +
                                    return oneSeqSize;
         | 
| 1460 | 
            +
                                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
         | 
| 1461 | 
            +
                                op += oneSeqSize;
         | 
| 1462 | 
            +
                                if (--nbSeq)
         | 
| 1463 | 
            +
                                    BIT_reloadDStream(&(seqState.DStream));
         | 
| 1464 | 
            +
                            }
         | 
| 1465 | 
            +
                        }
         | 
| 1466 | 
            +
                    }
         | 
| 1467 | 
            +
             | 
| 1468 | 
            +
                    if (nbSeq > 0) /* there is remaining lit from extra buffer */
         | 
| 1469 | 
            +
                    {
         | 
| 1470 | 
            +
             | 
| 1471 | 
            +
            #if defined(__GNUC__) && defined(__x86_64__)
         | 
| 1472 | 
            +
                        __asm__(".p2align 6");
         | 
| 1473 | 
            +
                        __asm__("nop");
         | 
| 1474 | 
            +
            #  if __GNUC__ != 7
         | 
| 1475 | 
            +
                        /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
         | 
| 1476 | 
            +
                        __asm__(".p2align 4");
         | 
| 1477 | 
            +
                        __asm__("nop");
         | 
| 1478 | 
            +
                        __asm__(".p2align 3");
         | 
| 1479 | 
            +
            #  elif __GNUC__ >= 11
         | 
| 1480 | 
            +
                        __asm__(".p2align 3");
         | 
| 1481 | 
            +
            #  else
         | 
| 1482 | 
            +
                        __asm__(".p2align 5");
         | 
| 1483 | 
            +
                        __asm__("nop");
         | 
| 1484 | 
            +
                        __asm__(".p2align 3");
         | 
| 1485 | 
            +
            #  endif
         | 
| 1486 | 
            +
            #endif
         | 
| 1487 | 
            +
             | 
| 1488 | 
            +
                        for (; ; ) {
         | 
| 1489 | 
            +
                            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
         | 
| 1490 | 
            +
                            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
         | 
| 1491 | 
            +
            #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
         | 
| 1492 | 
            +
                            assert(!ZSTD_isError(oneSeqSize));
         | 
| 1493 | 
            +
                            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
         | 
| 1494 | 
            +
            #endif
         | 
| 1495 | 
            +
                            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
         | 
| 1496 | 
            +
                                return oneSeqSize;
         | 
| 1497 | 
            +
                            DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
         | 
| 1498 | 
            +
                            op += oneSeqSize;
         | 
| 1499 | 
            +
                            if (UNLIKELY(!--nbSeq))
         | 
| 1500 | 
            +
                                break;
         | 
| 1501 | 
            +
                            BIT_reloadDStream(&(seqState.DStream));
         | 
| 1502 | 
            +
                        }
         | 
| 1503 | 
            +
                    }
         | 
| 1504 | 
            +
             | 
| 1505 | 
            +
                    /* check if reached exact end */
         | 
| 1506 | 
            +
                    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
         | 
| 1507 | 
            +
                    RETURN_ERROR_IF(nbSeq, corruption_detected, "");
         | 
| 1508 | 
            +
                    RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
         | 
| 1509 | 
            +
                    /* save reps for next block */
         | 
| 1510 | 
            +
                    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
         | 
| 1511 | 
            +
                }
         | 
| 1512 | 
            +
             | 
| 1513 | 
            +
                /* last literal segment */
         | 
| 1514 | 
            +
                if (dctx->litBufferLocation == ZSTD_split)  /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
         | 
| 1515 | 
            +
                {
         | 
| 1516 | 
            +
                    size_t const lastLLSize = litBufferEnd - litPtr;
         | 
| 1517 | 
            +
                    RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
         | 
| 1518 | 
            +
                    if (op != NULL) {
         | 
| 1519 | 
            +
                        ZSTD_memmove(op, litPtr, lastLLSize);
         | 
| 1520 | 
            +
                        op += lastLLSize;
         | 
| 1521 | 
            +
                    }
         | 
| 1522 | 
            +
                    litPtr = dctx->litExtraBuffer;
         | 
| 1523 | 
            +
                    litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
         | 
| 1524 | 
            +
                    dctx->litBufferLocation = ZSTD_not_in_dst;
         | 
| 1525 | 
            +
                }
         | 
| 1526 | 
            +
                {   size_t const lastLLSize = litBufferEnd - litPtr;
         | 
| 1527 | 
            +
                    RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         | 
| 1528 | 
            +
                    if (op != NULL) {
         | 
| 1529 | 
            +
                        ZSTD_memcpy(op, litPtr, lastLLSize);
         | 
| 1530 | 
            +
                        op += lastLLSize;
         | 
| 1531 | 
            +
                    }
         | 
| 1532 | 
            +
                }
         | 
| 1533 | 
            +
             | 
| 1534 | 
            +
                return op-ostart;
         | 
| 1535 | 
            +
            }
         | 
| 1536 | 
            +
             | 
| 1537 | 
            +
            FORCE_INLINE_TEMPLATE size_t
         | 
| 1538 | 
            +
            DONT_VECTORIZE
         | 
| 1539 | 
            +
            ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
         | 
| 1540 | 
            +
                void* dst, size_t maxDstSize,
         | 
| 1541 | 
            +
                const void* seqStart, size_t seqSize, int nbSeq,
         | 
| 1542 | 
            +
                const ZSTD_longOffset_e isLongOffset,
         | 
| 1543 | 
            +
                const int frame)
         | 
| 1544 | 
            +
            {
         | 
| 1545 | 
            +
                const BYTE* ip = (const BYTE*)seqStart;
         | 
| 1546 | 
            +
                const BYTE* const iend = ip + seqSize;
         | 
| 1547 | 
            +
                BYTE* const ostart = (BYTE*)dst;
         | 
| 1548 | 
            +
                BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
         | 
| 1549 | 
            +
                BYTE* op = ostart;
         | 
| 1550 | 
            +
                const BYTE* litPtr = dctx->litPtr;
         | 
| 1551 | 
            +
                const BYTE* const litEnd = litPtr + dctx->litSize;
         | 
| 1552 | 
            +
                const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
         | 
| 1553 | 
            +
                const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
         | 
| 1554 | 
            +
                const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
         | 
| 1555 | 
            +
                DEBUGLOG(5, "ZSTD_decompressSequences_body");
         | 
| 1556 | 
            +
                (void)frame;
         | 
| 1557 | 
            +
             | 
| 1558 | 
            +
                /* Regen sequences */
         | 
| 1559 | 
            +
                if (nbSeq) {
         | 
| 1560 | 
            +
                    seqState_t seqState;
         | 
| 1561 | 
            +
                    dctx->fseEntropy = 1;
         | 
| 1562 | 
            +
                    { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         | 
| 1563 | 
            +
                    RETURN_ERROR_IF(
         | 
| 1564 | 
            +
                        ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
         | 
| 1565 | 
            +
                        corruption_detected, "");
         | 
| 1566 | 
            +
                    ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
         | 
| 1567 | 
            +
                    ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
         | 
| 1568 | 
            +
                    ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
         | 
| 1569 | 
            +
                    assert(dst != NULL);
         | 
| 1570 | 
            +
             | 
| 1571 | 
            +
                    ZSTD_STATIC_ASSERT(
         | 
| 1572 | 
            +
                        BIT_DStream_unfinished < BIT_DStream_completed &&
         | 
| 1573 | 
            +
                        BIT_DStream_endOfBuffer < BIT_DStream_completed &&
         | 
| 1574 | 
            +
                        BIT_DStream_completed < BIT_DStream_overflow);
         | 
| 1575 | 
            +
             | 
| 1576 | 
            +
            #if defined(__GNUC__) && defined(__x86_64__)
         | 
| 1577 | 
            +
                        __asm__(".p2align 6");
         | 
| 1578 | 
            +
                        __asm__("nop");
         | 
| 1579 | 
            +
            #  if __GNUC__ >= 7
         | 
| 1580 | 
            +
                        __asm__(".p2align 5");
         | 
| 1581 | 
            +
                        __asm__("nop");
         | 
| 1582 | 
            +
                        __asm__(".p2align 3");
         | 
| 1583 | 
            +
            #  else
         | 
| 1584 | 
            +
                        __asm__(".p2align 4");
         | 
| 1585 | 
            +
                        __asm__("nop");
         | 
| 1586 | 
            +
                        __asm__(".p2align 3");
         | 
| 1587 | 
            +
            #  endif
         | 
| 1073 1588 | 
             
            #endif
         | 
| 1589 | 
            +
             | 
| 1074 1590 | 
             
                    for ( ; ; ) {
         | 
| 1075 | 
            -
                        seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset | 
| 1591 | 
            +
                        seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
         | 
| 1076 1592 | 
             
                        size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
         | 
| 1077 1593 | 
             
            #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
         | 
| 1078 1594 | 
             
                        assert(!ZSTD_isError(oneSeqSize));
         | 
| 1079 1595 | 
             
                        if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
         | 
| 1080 1596 | 
             
            #endif
         | 
| 1597 | 
            +
                        if (UNLIKELY(ZSTD_isError(oneSeqSize)))
         | 
| 1598 | 
            +
                            return oneSeqSize;
         | 
| 1081 1599 | 
             
                        DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
         | 
| 1600 | 
            +
                        op += oneSeqSize;
         | 
| 1601 | 
            +
                        if (UNLIKELY(!--nbSeq))
         | 
| 1602 | 
            +
                            break;
         | 
| 1082 1603 | 
             
                        BIT_reloadDStream(&(seqState.DStream));
         | 
| 1083 | 
            -
                        /* gcc and clang both don't like early returns in this loop.
         | 
| 1084 | 
            -
                         * gcc doesn't like early breaks either.
         | 
| 1085 | 
            -
                         * Instead save an error and report it at the end.
         | 
| 1086 | 
            -
                         * When there is an error, don't increment op, so we don't
         | 
| 1087 | 
            -
                         * overwrite.
         | 
| 1088 | 
            -
                         */
         | 
| 1089 | 
            -
                        if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
         | 
| 1090 | 
            -
                        else op += oneSeqSize;
         | 
| 1091 | 
            -
                        if (UNLIKELY(!--nbSeq)) break;
         | 
| 1092 1604 | 
             
                    }
         | 
| 1093 1605 |  | 
| 1094 1606 | 
             
                    /* check if reached exact end */
         | 
| 1095 1607 | 
             
                    DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
         | 
| 1096 | 
            -
                    if (ZSTD_isError(error)) return error;
         | 
| 1097 1608 | 
             
                    RETURN_ERROR_IF(nbSeq, corruption_detected, "");
         | 
| 1098 1609 | 
             
                    RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
         | 
| 1099 1610 | 
             
                    /* save reps for next block */
         | 
| @@ -1104,7 +1615,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, | |
| 1104 1615 | 
             
                {   size_t const lastLLSize = litEnd - litPtr;
         | 
| 1105 1616 | 
             
                    RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         | 
| 1106 1617 | 
             
                    if (op != NULL) {
         | 
| 1107 | 
            -
                         | 
| 1618 | 
            +
                        ZSTD_memcpy(op, litPtr, lastLLSize);
         | 
| 1108 1619 | 
             
                        op += lastLLSize;
         | 
| 1109 1620 | 
             
                    }
         | 
| 1110 1621 | 
             
                }
         | 
| @@ -1121,9 +1632,37 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, | |
| 1121 1632 | 
             
            {
         | 
| 1122 1633 | 
             
                return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
         | 
| 1123 1634 | 
             
            }
         | 
| 1635 | 
            +
             | 
| 1636 | 
            +
            static size_t
         | 
| 1637 | 
            +
            ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
         | 
| 1638 | 
            +
                                                           void* dst, size_t maxDstSize,
         | 
| 1639 | 
            +
                                                     const void* seqStart, size_t seqSize, int nbSeq,
         | 
| 1640 | 
            +
                                                     const ZSTD_longOffset_e isLongOffset,
         | 
| 1641 | 
            +
                                                     const int frame)
         | 
| 1642 | 
            +
            {
         | 
| 1643 | 
            +
                return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
         | 
| 1644 | 
            +
            }
         | 
| 1124 1645 | 
             
            #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
         | 
| 1125 1646 |  | 
| 1126 1647 | 
             
            #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
         | 
| 1648 | 
            +
             | 
| 1649 | 
            +
            FORCE_INLINE_TEMPLATE size_t
         | 
| 1650 | 
            +
            ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
         | 
| 1651 | 
            +
                               const BYTE* const prefixStart, const BYTE* const dictEnd)
         | 
| 1652 | 
            +
            {
         | 
| 1653 | 
            +
                prefetchPos += sequence.litLength;
         | 
| 1654 | 
            +
                {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
         | 
| 1655 | 
            +
                    const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
         | 
| 1656 | 
            +
                                                                                          * No consequence though : memory address is only used for prefetching, not for dereferencing */
         | 
| 1657 | 
            +
                    PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
         | 
| 1658 | 
            +
                }
         | 
| 1659 | 
            +
                return prefetchPos + sequence.matchLength;
         | 
| 1660 | 
            +
            }
         | 
| 1661 | 
            +
             | 
| 1662 | 
            +
            /* This decoding function employs prefetching
         | 
| 1663 | 
            +
             * to reduce latency impact of cache misses.
         | 
| 1664 | 
            +
             * It's generally employed when block contains a significant portion of long-distance matches
         | 
| 1665 | 
            +
             * or when coupled with a "cold" dictionary */
         | 
| 1127 1666 | 
             
            FORCE_INLINE_TEMPLATE size_t
         | 
| 1128 1667 | 
             
            ZSTD_decompressSequencesLong_body(
         | 
| 1129 1668 | 
             
                                           ZSTD_DCtx* dctx,
         | 
| @@ -1134,11 +1673,11 @@ ZSTD_decompressSequencesLong_body( | |
| 1134 1673 | 
             
            {
         | 
| 1135 1674 | 
             
                const BYTE* ip = (const BYTE*)seqStart;
         | 
| 1136 1675 | 
             
                const BYTE* const iend = ip + seqSize;
         | 
| 1137 | 
            -
                BYTE* const ostart = (BYTE* | 
| 1138 | 
            -
                BYTE* const oend = ostart + maxDstSize;
         | 
| 1676 | 
            +
                BYTE* const ostart = (BYTE*)dst;
         | 
| 1677 | 
            +
                BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
         | 
| 1139 1678 | 
             
                BYTE* op = ostart;
         | 
| 1140 1679 | 
             
                const BYTE* litPtr = dctx->litPtr;
         | 
| 1141 | 
            -
                const BYTE*  | 
| 1680 | 
            +
                const BYTE* litBufferEnd = dctx->litBufferEnd;
         | 
| 1142 1681 | 
             
                const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
         | 
| 1143 1682 | 
             
                const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
         | 
| 1144 1683 | 
             
                const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
         | 
| @@ -1146,18 +1685,17 @@ ZSTD_decompressSequencesLong_body( | |
| 1146 1685 |  | 
| 1147 1686 | 
             
                /* Regen sequences */
         | 
| 1148 1687 | 
             
                if (nbSeq) {
         | 
| 1149 | 
            -
            #define STORED_SEQS  | 
| 1688 | 
            +
            #define STORED_SEQS 8
         | 
| 1150 1689 | 
             
            #define STORED_SEQS_MASK (STORED_SEQS-1)
         | 
| 1151 | 
            -
            #define ADVANCED_SEQS  | 
| 1690 | 
            +
            #define ADVANCED_SEQS STORED_SEQS
         | 
| 1152 1691 | 
             
                    seq_t sequences[STORED_SEQS];
         | 
| 1153 1692 | 
             
                    int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
         | 
| 1154 1693 | 
             
                    seqState_t seqState;
         | 
| 1155 1694 | 
             
                    int seqNb;
         | 
| 1695 | 
            +
                    size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
         | 
| 1696 | 
            +
             | 
| 1156 1697 | 
             
                    dctx->fseEntropy = 1;
         | 
| 1157 1698 | 
             
                    { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         | 
| 1158 | 
            -
                    seqState.prefixStart = prefixStart;
         | 
| 1159 | 
            -
                    seqState.pos = (size_t)(op-prefixStart);
         | 
| 1160 | 
            -
                    seqState.dictEnd = dictEnd;
         | 
| 1161 1699 | 
             
                    assert(dst != NULL);
         | 
| 1162 1700 | 
             
                    assert(iend >= ip);
         | 
| 1163 1701 | 
             
                    RETURN_ERROR_IF(
         | 
| @@ -1169,36 +1707,100 @@ ZSTD_decompressSequencesLong_body( | |
| 1169 1707 |  | 
| 1170 1708 | 
             
                    /* prepare in advance */
         | 
| 1171 1709 | 
             
                    for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
         | 
| 1172 | 
            -
                         | 
| 1173 | 
            -
                         | 
| 1710 | 
            +
                        seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
         | 
| 1711 | 
            +
                        prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
         | 
| 1712 | 
            +
                        sequences[seqNb] = sequence;
         | 
| 1174 1713 | 
             
                    }
         | 
| 1175 1714 | 
             
                    RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
         | 
| 1176 1715 |  | 
| 1177 | 
            -
                    /*  | 
| 1178 | 
            -
                    for ( | 
| 1179 | 
            -
                        seq_t  | 
| 1180 | 
            -
                        size_t  | 
| 1716 | 
            +
                    /* decompress without stomping litBuffer */
         | 
| 1717 | 
            +
                    for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
         | 
| 1718 | 
            +
                        seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
         | 
| 1719 | 
            +
                        size_t oneSeqSize;
         | 
| 1720 | 
            +
             | 
| 1721 | 
            +
                        if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
         | 
| 1722 | 
            +
                        {
         | 
| 1723 | 
            +
                            /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
         | 
| 1724 | 
            +
                            const size_t leftoverLit = dctx->litBufferEnd - litPtr;
         | 
| 1725 | 
            +
                            if (leftoverLit)
         | 
| 1726 | 
            +
                            {
         | 
| 1727 | 
            +
                                RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
         | 
| 1728 | 
            +
                                ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
         | 
| 1729 | 
            +
                                sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
         | 
| 1730 | 
            +
                                op += leftoverLit;
         | 
| 1731 | 
            +
                            }
         | 
| 1732 | 
            +
                            litPtr = dctx->litExtraBuffer;
         | 
| 1733 | 
            +
                            litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
         | 
| 1734 | 
            +
                            dctx->litBufferLocation = ZSTD_not_in_dst;
         | 
| 1735 | 
            +
                            oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
         | 
| 1181 1736 | 
             
            #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
         | 
| 1182 | 
            -
             | 
| 1183 | 
            -
             | 
| 1737 | 
            +
                            assert(!ZSTD_isError(oneSeqSize));
         | 
| 1738 | 
            +
                            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
         | 
| 1184 1739 | 
             
            #endif
         | 
| 1185 | 
            -
             | 
| 1186 | 
            -
             | 
| 1187 | 
            -
             | 
| 1188 | 
            -
             | 
| 1740 | 
            +
                            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
         | 
| 1741 | 
            +
             | 
| 1742 | 
            +
                            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
         | 
| 1743 | 
            +
                            sequences[seqNb & STORED_SEQS_MASK] = sequence;
         | 
| 1744 | 
            +
                            op += oneSeqSize;
         | 
| 1745 | 
            +
                        }
         | 
| 1746 | 
            +
                        else
         | 
| 1747 | 
            +
                        {
         | 
| 1748 | 
            +
                            /* lit buffer is either wholly contained in first or second split, or not split at all*/
         | 
| 1749 | 
            +
                            oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
         | 
| 1750 | 
            +
                                ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
         | 
| 1751 | 
            +
                                ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
         | 
| 1752 | 
            +
            #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
         | 
| 1753 | 
            +
                            assert(!ZSTD_isError(oneSeqSize));
         | 
| 1754 | 
            +
                            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
         | 
| 1755 | 
            +
            #endif
         | 
| 1756 | 
            +
                            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
         | 
| 1757 | 
            +
             | 
| 1758 | 
            +
                            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
         | 
| 1759 | 
            +
                            sequences[seqNb & STORED_SEQS_MASK] = sequence;
         | 
| 1760 | 
            +
                            op += oneSeqSize;
         | 
| 1761 | 
            +
                        }
         | 
| 1189 1762 | 
             
                    }
         | 
| 1190 1763 | 
             
                    RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
         | 
| 1191 1764 |  | 
| 1192 1765 | 
             
                    /* finish queue */
         | 
| 1193 1766 | 
             
                    seqNb -= seqAdvance;
         | 
| 1194 1767 | 
             
                    for ( ; seqNb<nbSeq ; seqNb++) {
         | 
| 1195 | 
            -
                         | 
| 1768 | 
            +
                        seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
         | 
| 1769 | 
            +
                        if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
         | 
| 1770 | 
            +
                        {
         | 
| 1771 | 
            +
                            const size_t leftoverLit = dctx->litBufferEnd - litPtr;
         | 
| 1772 | 
            +
                            if (leftoverLit)
         | 
| 1773 | 
            +
                            {
         | 
| 1774 | 
            +
                                RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
         | 
| 1775 | 
            +
                                ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
         | 
| 1776 | 
            +
                                sequence->litLength -= leftoverLit;
         | 
| 1777 | 
            +
                                op += leftoverLit;
         | 
| 1778 | 
            +
                            }
         | 
| 1779 | 
            +
                            litPtr = dctx->litExtraBuffer;
         | 
| 1780 | 
            +
                            litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
         | 
| 1781 | 
            +
                            dctx->litBufferLocation = ZSTD_not_in_dst;
         | 
| 1782 | 
            +
                            {
         | 
| 1783 | 
            +
                                size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
         | 
| 1196 1784 | 
             
            #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
         | 
| 1197 | 
            -
             | 
| 1198 | 
            -
             | 
| 1785 | 
            +
                                assert(!ZSTD_isError(oneSeqSize));
         | 
| 1786 | 
            +
                                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
         | 
| 1199 1787 | 
             
            #endif
         | 
| 1200 | 
            -
             | 
| 1201 | 
            -
             | 
| 1788 | 
            +
                                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
         | 
| 1789 | 
            +
                                op += oneSeqSize;
         | 
| 1790 | 
            +
                            }
         | 
| 1791 | 
            +
                        }
         | 
| 1792 | 
            +
                        else
         | 
| 1793 | 
            +
                        {
         | 
| 1794 | 
            +
                            size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
         | 
| 1795 | 
            +
                                ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
         | 
| 1796 | 
            +
                                ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
         | 
| 1797 | 
            +
            #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
         | 
| 1798 | 
            +
                            assert(!ZSTD_isError(oneSeqSize));
         | 
| 1799 | 
            +
                            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
         | 
| 1800 | 
            +
            #endif
         | 
| 1801 | 
            +
                            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
         | 
| 1802 | 
            +
                            op += oneSeqSize;
         | 
| 1803 | 
            +
                        }
         | 
| 1202 1804 | 
             
                    }
         | 
| 1203 1805 |  | 
| 1204 1806 | 
             
                    /* save reps for next block */
         | 
| @@ -1206,10 +1808,21 @@ ZSTD_decompressSequencesLong_body( | |
| 1206 1808 | 
             
                }
         | 
| 1207 1809 |  | 
| 1208 1810 | 
             
                /* last literal segment */
         | 
| 1209 | 
            -
                 | 
| 1811 | 
            +
                if (dctx->litBufferLocation == ZSTD_split)  /* first deplete literal buffer in dst, then copy litExtraBuffer */
         | 
| 1812 | 
            +
                {
         | 
| 1813 | 
            +
                    size_t const lastLLSize = litBufferEnd - litPtr;
         | 
| 1814 | 
            +
                    RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
         | 
| 1815 | 
            +
                    if (op != NULL) {
         | 
| 1816 | 
            +
                        ZSTD_memmove(op, litPtr, lastLLSize);
         | 
| 1817 | 
            +
                        op += lastLLSize;
         | 
| 1818 | 
            +
                    }
         | 
| 1819 | 
            +
                    litPtr = dctx->litExtraBuffer;
         | 
| 1820 | 
            +
                    litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
         | 
| 1821 | 
            +
                }
         | 
| 1822 | 
            +
                {   size_t const lastLLSize = litBufferEnd - litPtr;
         | 
| 1210 1823 | 
             
                    RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         | 
| 1211 1824 | 
             
                    if (op != NULL) {
         | 
| 1212 | 
            -
                         | 
| 1825 | 
            +
                        ZSTD_memmove(op, litPtr, lastLLSize);
         | 
| 1213 1826 | 
             
                        op += lastLLSize;
         | 
| 1214 1827 | 
             
                    }
         | 
| 1215 1828 | 
             
                }
         | 
| @@ -1233,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, | |
| 1233 1846 | 
             
            #if DYNAMIC_BMI2
         | 
| 1234 1847 |  | 
| 1235 1848 | 
             
            #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
         | 
| 1236 | 
            -
            static  | 
| 1849 | 
            +
            static BMI2_TARGET_ATTRIBUTE size_t
         | 
| 1237 1850 | 
             
            DONT_VECTORIZE
         | 
| 1238 1851 | 
             
            ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
         | 
| 1239 1852 | 
             
                                             void* dst, size_t maxDstSize,
         | 
| @@ -1243,10 +1856,20 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, | |
| 1243 1856 | 
             
            {
         | 
| 1244 1857 | 
             
                return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
         | 
| 1245 1858 | 
             
            }
         | 
| 1859 | 
            +
            static BMI2_TARGET_ATTRIBUTE size_t
         | 
| 1860 | 
            +
            DONT_VECTORIZE
         | 
| 1861 | 
            +
            ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
         | 
| 1862 | 
            +
                                             void* dst, size_t maxDstSize,
         | 
| 1863 | 
            +
                                       const void* seqStart, size_t seqSize, int nbSeq,
         | 
| 1864 | 
            +
                                       const ZSTD_longOffset_e isLongOffset,
         | 
| 1865 | 
            +
                                       const int frame)
         | 
| 1866 | 
            +
            {
         | 
| 1867 | 
            +
                return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
         | 
| 1868 | 
            +
            }
         | 
| 1246 1869 | 
             
            #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
         | 
| 1247 1870 |  | 
| 1248 1871 | 
             
            #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
         | 
| 1249 | 
            -
            static  | 
| 1872 | 
            +
            static BMI2_TARGET_ATTRIBUTE size_t
         | 
| 1250 1873 | 
             
            ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
         | 
| 1251 1874 | 
             
                                             void* dst, size_t maxDstSize,
         | 
| 1252 1875 | 
             
                                       const void* seqStart, size_t seqSize, int nbSeq,
         | 
| @@ -1275,11 +1898,25 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, | |
| 1275 1898 | 
             
            {
         | 
| 1276 1899 | 
             
                DEBUGLOG(5, "ZSTD_decompressSequences");
         | 
| 1277 1900 | 
             
            #if DYNAMIC_BMI2
         | 
| 1278 | 
            -
                if (dctx | 
| 1901 | 
            +
                if (ZSTD_DCtx_get_bmi2(dctx)) {
         | 
| 1279 1902 | 
             
                    return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
         | 
| 1280 1903 | 
             
                }
         | 
| 1281 1904 | 
             
            #endif
         | 
| 1282 | 
            -
             | 
| 1905 | 
            +
                return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
         | 
| 1906 | 
            +
            }
         | 
| 1907 | 
            +
            static size_t
         | 
| 1908 | 
            +
            ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
         | 
| 1909 | 
            +
                                             const void* seqStart, size_t seqSize, int nbSeq,
         | 
| 1910 | 
            +
                                             const ZSTD_longOffset_e isLongOffset,
         | 
| 1911 | 
            +
                                             const int frame)
         | 
| 1912 | 
            +
            {
         | 
| 1913 | 
            +
                DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
         | 
| 1914 | 
            +
            #if DYNAMIC_BMI2
         | 
| 1915 | 
            +
                if (ZSTD_DCtx_get_bmi2(dctx)) {
         | 
| 1916 | 
            +
                    return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
         | 
| 1917 | 
            +
                }
         | 
| 1918 | 
            +
            #endif
         | 
| 1919 | 
            +
                return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
         | 
| 1283 1920 | 
             
            }
         | 
| 1284 1921 | 
             
            #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
         | 
| 1285 1922 |  | 
| @@ -1299,7 +1936,7 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, | |
| 1299 1936 | 
             
            {
         | 
| 1300 1937 | 
             
                DEBUGLOG(5, "ZSTD_decompressSequencesLong");
         | 
| 1301 1938 | 
             
            #if DYNAMIC_BMI2
         | 
| 1302 | 
            -
                if (dctx | 
| 1939 | 
            +
                if (ZSTD_DCtx_get_bmi2(dctx)) {
         | 
| 1303 1940 | 
             
                    return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
         | 
| 1304 1941 | 
             
                }
         | 
| 1305 1942 | 
             
            #endif
         | 
| @@ -1340,7 +1977,7 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) | |
| 1340 1977 | 
             
            size_t
         | 
| 1341 1978 | 
             
            ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
         | 
| 1342 1979 | 
             
                                          void* dst, size_t dstCapacity,
         | 
| 1343 | 
            -
                                    const void* src, size_t srcSize, const int frame)
         | 
| 1980 | 
            +
                                    const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
         | 
| 1344 1981 | 
             
            {   /* blockType == blockCompressed */
         | 
| 1345 1982 | 
             
                const BYTE* ip = (const BYTE*)src;
         | 
| 1346 1983 | 
             
                /* isLongOffset must be true if there are long offsets.
         | 
| @@ -1355,7 +1992,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, | |
| 1355 1992 | 
             
                RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
         | 
| 1356 1993 |  | 
| 1357 1994 | 
             
                /* Decode literals section */
         | 
| 1358 | 
            -
                {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
         | 
| 1995 | 
            +
                {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
         | 
| 1359 1996 | 
             
                    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
         | 
| 1360 1997 | 
             
                    if (ZSTD_isError(litCSize)) return litCSize;
         | 
| 1361 1998 | 
             
                    ip += litCSize;
         | 
| @@ -1403,15 +2040,18 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, | |
| 1403 2040 |  | 
| 1404 2041 | 
             
            #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
         | 
| 1405 2042 | 
             
                    /* else */
         | 
| 1406 | 
            -
                     | 
| 2043 | 
            +
                    if (dctx->litBufferLocation == ZSTD_split)
         | 
| 2044 | 
            +
                        return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
         | 
| 2045 | 
            +
                    else
         | 
| 2046 | 
            +
                        return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
         | 
| 1407 2047 | 
             
            #endif
         | 
| 1408 2048 | 
             
                }
         | 
| 1409 2049 | 
             
            }
         | 
| 1410 2050 |  | 
| 1411 2051 |  | 
| 1412 | 
            -
            void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
         | 
| 2052 | 
            +
            void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
         | 
| 1413 2053 | 
             
            {
         | 
| 1414 | 
            -
                if (dst != dctx->previousDstEnd) {   /* not contiguous */
         | 
| 2054 | 
            +
                if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
         | 
| 1415 2055 | 
             
                    dctx->dictEnd = dctx->previousDstEnd;
         | 
| 1416 2056 | 
             
                    dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
         | 
| 1417 2057 | 
             
                    dctx->prefixStart = dst;
         | 
| @@ -1425,8 +2065,8 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, | |
| 1425 2065 | 
             
                                  const void* src, size_t srcSize)
         | 
| 1426 2066 | 
             
            {
         | 
| 1427 2067 | 
             
                size_t dSize;
         | 
| 1428 | 
            -
                ZSTD_checkContinuity(dctx, dst);
         | 
| 1429 | 
            -
                dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
         | 
| 2068 | 
            +
                ZSTD_checkContinuity(dctx, dst, dstCapacity);
         | 
| 2069 | 
            +
                dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
         | 
| 1430 2070 | 
             
                dctx->previousDstEnd = (char*)dst + dSize;
         | 
| 1431 2071 | 
             
                return dSize;
         | 
| 1432 2072 | 
             
            }
         |