zstdlib 0.10.0-arm64-darwin → 0.11.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +8 -0
- data/ext/zstdlib_c/extconf.rb +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/allocations.h +55 -0
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/bits.h +200 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/bitstream.h +19 -60
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/compiler.h +26 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/cpu.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/entropy_common.c +12 -40
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.c +9 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse.h +5 -83
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse_decompress.c +7 -99
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/huf.h +65 -156
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/mem.h +39 -46
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.c +26 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.h +7 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/portability_macros.h +22 -3
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/threading.c +176 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/threading.h +5 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.c +2 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_common.c +1 -36
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_deps.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_internal.h +17 -118
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_trace.h +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/clevels.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/fse_compress.c +7 -124
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/huf_compress.c +234 -169
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress.c +1243 -538
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_internal.h +225 -151
- data/ext/zstdlib_c/zstd-1.5.5/lib/compress/zstd_compress_literals.c +235 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_cwksp.h +128 -62
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.c +95 -33
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.c +433 -148
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.c +398 -345
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.c +106 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.c +17 -9
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress.c +434 -441
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress_amd64.S +30 -39
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.c +4 -4
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress.c +205 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.c +201 -81
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.h +6 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zdict.h +53 -31
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd.h +580 -135
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd_errors.h +27 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzcompatibility.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzguts.h +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzlib.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzread.c +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzwrite.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.c +46 -44
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.h +4 -1
- data/lib/2.4/zstdlib_c.bundle +0 -0
- data/lib/2.5/zstdlib_c.bundle +0 -0
- data/lib/2.6/zstdlib_c.bundle +0 -0
- data/lib/2.7/zstdlib_c.bundle +0 -0
- data/lib/3.0/zstdlib_c.bundle +0 -0
- data/lib/3.1/zstdlib_c.bundle +0 -0
- data/lib/3.2/zstdlib_c.bundle +0 -0
- metadata +82 -78
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/threading.c +0 -122
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_literals.c +0 -159
| @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            /* ******************************************************************
         | 
| 2 2 | 
             
             * huff0 huffman decoder,
         | 
| 3 3 | 
             
             * part of Finite State Entropy library
         | 
| 4 | 
            -
             * Copyright (c)  | 
| 4 | 
            +
             * Copyright (c) Meta Platforms, Inc. and affiliates.
         | 
| 5 5 | 
             
             *
         | 
| 6 6 | 
             
             *  You can contact the author at :
         | 
| 7 7 | 
             
             *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
         | 
| @@ -19,10 +19,10 @@ | |
| 19 19 | 
             
            #include "../common/compiler.h"
         | 
| 20 20 | 
             
            #include "../common/bitstream.h"  /* BIT_* */
         | 
| 21 21 | 
             
            #include "../common/fse.h"        /* to compress headers */
         | 
| 22 | 
            -
            #define HUF_STATIC_LINKING_ONLY
         | 
| 23 22 | 
             
            #include "../common/huf.h"
         | 
| 24 23 | 
             
            #include "../common/error_private.h"
         | 
| 25 24 | 
             
            #include "../common/zstd_internal.h"
         | 
| 25 | 
            +
            #include "../common/bits.h"       /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
         | 
| 26 26 |  | 
| 27 27 | 
             
            /* **************************************************************
         | 
| 28 28 | 
             
            *  Constants
         | 
| @@ -43,10 +43,14 @@ | |
| 43 43 | 
             
            #error "Cannot force the use of the X1 and X2 decoders at the same time!"
         | 
| 44 44 | 
             
            #endif
         | 
| 45 45 |  | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 46 | 
            +
            /* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
         | 
| 47 | 
            +
             * supported at runtime, so we can add the BMI2 target attribute.
         | 
| 48 | 
            +
             * When it is disabled, we will still get BMI2 if it is enabled statically.
         | 
| 49 | 
            +
             */
         | 
| 50 | 
            +
            #if DYNAMIC_BMI2
         | 
| 51 | 
            +
            # define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
         | 
| 48 52 | 
             
            #else
         | 
| 49 | 
            -
            # define  | 
| 53 | 
            +
            # define HUF_FAST_BMI2_ATTRS
         | 
| 50 54 | 
             
            #endif
         | 
| 51 55 |  | 
| 52 56 | 
             
            #ifdef __cplusplus
         | 
| @@ -56,18 +60,12 @@ | |
| 56 60 | 
             
            #endif
         | 
| 57 61 | 
             
            #define HUF_ASM_DECL HUF_EXTERN_C
         | 
| 58 62 |  | 
| 59 | 
            -
            #if DYNAMIC_BMI2 | 
| 63 | 
            +
            #if DYNAMIC_BMI2
         | 
| 60 64 | 
             
            # define HUF_NEED_BMI2_FUNCTION 1
         | 
| 61 65 | 
             
            #else
         | 
| 62 66 | 
             
            # define HUF_NEED_BMI2_FUNCTION 0
         | 
| 63 67 | 
             
            #endif
         | 
| 64 68 |  | 
| 65 | 
            -
            #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
         | 
| 66 | 
            -
            # define HUF_NEED_DEFAULT_FUNCTION 1
         | 
| 67 | 
            -
            #else
         | 
| 68 | 
            -
            # define HUF_NEED_DEFAULT_FUNCTION 0
         | 
| 69 | 
            -
            #endif
         | 
| 70 | 
            -
             | 
| 71 69 | 
             
            /* **************************************************************
         | 
| 72 70 | 
             
            *  Error Management
         | 
| 73 71 | 
             
            ****************************************************************/
         | 
| @@ -84,6 +82,11 @@ | |
| 84 82 | 
             
            /* **************************************************************
         | 
| 85 83 | 
             
            *  BMI2 Variant Wrappers
         | 
| 86 84 | 
             
            ****************************************************************/
         | 
| 85 | 
            +
            typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
         | 
| 86 | 
            +
                                                          const void *cSrc,
         | 
| 87 | 
            +
                                                          size_t cSrcSize,
         | 
| 88 | 
            +
                                                          const HUF_DTable *DTable);
         | 
| 89 | 
            +
             | 
| 87 90 | 
             
            #if DYNAMIC_BMI2
         | 
| 88 91 |  | 
| 89 92 | 
             
            #define HUF_DGEN(fn)                                                        \
         | 
| @@ -105,9 +108,9 @@ | |
| 105 108 | 
             
                }                                                                       \
         | 
| 106 109 | 
             
                                                                                        \
         | 
| 107 110 | 
             
                static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
         | 
| 108 | 
            -
                                 size_t cSrcSize, HUF_DTable const* DTable, int  | 
| 111 | 
            +
                                 size_t cSrcSize, HUF_DTable const* DTable, int flags)  \
         | 
| 109 112 | 
             
                {                                                                       \
         | 
| 110 | 
            -
                    if ( | 
| 113 | 
            +
                    if (flags & HUF_flags_bmi2) {                                       \
         | 
| 111 114 | 
             
                        return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
         | 
| 112 115 | 
             
                    }                                                                   \
         | 
| 113 116 | 
             
                    return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
         | 
| @@ -117,9 +120,9 @@ | |
| 117 120 |  | 
| 118 121 | 
             
            #define HUF_DGEN(fn)                                                        \
         | 
| 119 122 | 
             
                static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
         | 
| 120 | 
            -
                                 size_t cSrcSize, HUF_DTable const* DTable, int  | 
| 123 | 
            +
                                 size_t cSrcSize, HUF_DTable const* DTable, int flags)  \
         | 
| 121 124 | 
             
                {                                                                       \
         | 
| 122 | 
            -
                    (void) | 
| 125 | 
            +
                    (void)flags;                                                        \
         | 
| 123 126 | 
             
                    return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
         | 
| 124 127 | 
             
                }
         | 
| 125 128 |  | 
| @@ -138,15 +141,28 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) | |
| 138 141 | 
             
                return dtd;
         | 
| 139 142 | 
             
            }
         | 
| 140 143 |  | 
| 141 | 
            -
             | 
| 142 | 
            -
             | 
| 143 | 
            -
            static size_t HUF_initDStream(BYTE const* ip) {
         | 
| 144 | 
            +
            static size_t HUF_initFastDStream(BYTE const* ip) {
         | 
| 144 145 | 
             
                BYTE const lastByte = ip[7];
         | 
| 145 | 
            -
                size_t const bitsConsumed = lastByte ? 8 -  | 
| 146 | 
            +
                size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
         | 
| 146 147 | 
             
                size_t const value = MEM_readLEST(ip) | 1;
         | 
| 147 148 | 
             
                assert(bitsConsumed <= 8);
         | 
| 149 | 
            +
                assert(sizeof(size_t) == 8);
         | 
| 148 150 | 
             
                return value << bitsConsumed;
         | 
| 149 151 | 
             
            }
         | 
| 152 | 
            +
             | 
| 153 | 
            +
             | 
| 154 | 
            +
            /**
         | 
| 155 | 
            +
             * The input/output arguments to the Huffman fast decoding loop:
         | 
| 156 | 
            +
             *
         | 
| 157 | 
            +
             * ip [in/out] - The input pointers, must be updated to reflect what is consumed.
         | 
| 158 | 
            +
             * op [in/out] - The output pointers, must be updated to reflect what is written.
         | 
| 159 | 
            +
             * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
         | 
| 160 | 
            +
             * dt [in] - The decoding table.
         | 
| 161 | 
            +
             * ilimit [in] - The input limit, stop when any input pointer is below ilimit.
         | 
| 162 | 
            +
             * oend [in] - The end of the output stream. op[3] must not cross oend.
         | 
| 163 | 
            +
             * iend [in] - The end of each input stream. ip[i] may cross iend[i],
         | 
| 164 | 
            +
             *             as long as it is above ilimit, but that indicates corruption.
         | 
| 165 | 
            +
             */
         | 
| 150 166 | 
             
            typedef struct {
         | 
| 151 167 | 
             
                BYTE const* ip[4];
         | 
| 152 168 | 
             
                BYTE* op[4];
         | 
| @@ -155,15 +171,17 @@ typedef struct { | |
| 155 171 | 
             
                BYTE const* ilimit;
         | 
| 156 172 | 
             
                BYTE* oend;
         | 
| 157 173 | 
             
                BYTE const* iend[4];
         | 
| 158 | 
            -
            }  | 
| 174 | 
            +
            } HUF_DecompressFastArgs;
         | 
| 175 | 
            +
             | 
| 176 | 
            +
            typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
         | 
| 159 177 |  | 
| 160 178 | 
             
            /**
         | 
| 161 | 
            -
             * Initializes args for the  | 
| 162 | 
            -
             * @returns  | 
| 163 | 
            -
             *           | 
| 179 | 
            +
             * Initializes args for the fast decoding loop.
         | 
| 180 | 
            +
             * @returns 1 on success
         | 
| 181 | 
            +
             *          0 if the fallback implementation should be used.
         | 
| 164 182 | 
             
             *          Or an error code on failure.
         | 
| 165 183 | 
             
             */
         | 
| 166 | 
            -
            static size_t  | 
| 184 | 
            +
            static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
         | 
| 167 185 | 
             
            {
         | 
| 168 186 | 
             
                void const* dt = DTable + 1;
         | 
| 169 187 | 
             
                U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
         | 
| @@ -172,9 +190,11 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, | |
| 172 190 |  | 
| 173 191 | 
             
                BYTE* const oend = (BYTE*)dst + dstSize;
         | 
| 174 192 |  | 
| 175 | 
            -
                /* The  | 
| 176 | 
            -
                 *  | 
| 177 | 
            -
             | 
| 193 | 
            +
                /* The fast decoding loop assumes 64-bit little-endian.
         | 
| 194 | 
            +
                 * This condition is false on x32.
         | 
| 195 | 
            +
                 */
         | 
| 196 | 
            +
                if (!MEM_isLittleEndian() || MEM_32bits())
         | 
| 197 | 
            +
                    return 0;
         | 
| 178 198 |  | 
| 179 199 | 
             
                /* strict minimum : jump table + 1 byte per stream */
         | 
| 180 200 | 
             
                if (srcSize < 10)
         | 
| @@ -185,7 +205,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, | |
| 185 205 | 
             
                 * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
         | 
| 186 206 | 
             
                 */
         | 
| 187 207 | 
             
                if (dtLog != HUF_DECODER_FAST_TABLELOG)
         | 
| 188 | 
            -
                    return  | 
| 208 | 
            +
                    return 0;
         | 
| 189 209 |  | 
| 190 210 | 
             
                /* Read the jump table. */
         | 
| 191 211 | 
             
                {
         | 
| @@ -199,13 +219,13 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, | |
| 199 219 | 
             
                    args->iend[2] = args->iend[1] + length2;
         | 
| 200 220 | 
             
                    args->iend[3] = args->iend[2] + length3;
         | 
| 201 221 |  | 
| 202 | 
            -
                    /*  | 
| 222 | 
            +
                    /* HUF_initFastDStream() requires this, and this small of an input
         | 
| 203 223 | 
             
                     * won't benefit from the ASM loop anyways.
         | 
| 204 224 | 
             
                     * length1 must be >= 16 so that ip[0] >= ilimit before the loop
         | 
| 205 225 | 
             
                     * starts.
         | 
| 206 226 | 
             
                     */
         | 
| 207 227 | 
             
                    if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
         | 
| 208 | 
            -
                        return  | 
| 228 | 
            +
                        return 0;
         | 
| 209 229 | 
             
                    if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
         | 
| 210 230 | 
             
                }
         | 
| 211 231 | 
             
                /* ip[] contains the position that is currently loaded into bits[]. */
         | 
| @@ -222,7 +242,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, | |
| 222 242 |  | 
| 223 243 | 
             
                /* No point to call the ASM loop for tiny outputs. */
         | 
| 224 244 | 
             
                if (args->op[3] >= oend)
         | 
| 225 | 
            -
                    return  | 
| 245 | 
            +
                    return 0;
         | 
| 226 246 |  | 
| 227 247 | 
             
                /* bits[] is the bit container.
         | 
| 228 248 | 
             
                    * It is read from the MSB down to the LSB.
         | 
| @@ -231,10 +251,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, | |
| 231 251 | 
             
                    * set, so that CountTrailingZeros(bits[]) can be used
         | 
| 232 252 | 
             
                    * to count how many bits we've consumed.
         | 
| 233 253 | 
             
                    */
         | 
| 234 | 
            -
                args->bits[0] =  | 
| 235 | 
            -
                args->bits[1] =  | 
| 236 | 
            -
                args->bits[2] =  | 
| 237 | 
            -
                args->bits[3] =  | 
| 254 | 
            +
                args->bits[0] = HUF_initFastDStream(args->ip[0]);
         | 
| 255 | 
            +
                args->bits[1] = HUF_initFastDStream(args->ip[1]);
         | 
| 256 | 
            +
                args->bits[2] = HUF_initFastDStream(args->ip[2]);
         | 
| 257 | 
            +
                args->bits[3] = HUF_initFastDStream(args->ip[3]);
         | 
| 238 258 |  | 
| 239 259 | 
             
                /* If ip[] >= ilimit, it is guaranteed to be safe to
         | 
| 240 260 | 
             
                    * reload bits[]. It may be beyond its section, but is
         | 
| @@ -245,10 +265,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, | |
| 245 265 | 
             
                args->oend = oend;
         | 
| 246 266 | 
             
                args->dt = dt;
         | 
| 247 267 |  | 
| 248 | 
            -
                return  | 
| 268 | 
            +
                return 1;
         | 
| 249 269 | 
             
            }
         | 
| 250 270 |  | 
| 251 | 
            -
            static size_t HUF_initRemainingDStream(BIT_DStream_t* bit,  | 
| 271 | 
            +
            static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
         | 
| 252 272 | 
             
            {
         | 
| 253 273 | 
             
                /* Validate that we haven't overwritten. */
         | 
| 254 274 | 
             
                if (args->op[stream] > segmentEnd)
         | 
| @@ -262,15 +282,15 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs | |
| 262 282 | 
             
                    return ERROR(corruption_detected);
         | 
| 263 283 |  | 
| 264 284 | 
             
                /* Construct the BIT_DStream_t. */
         | 
| 265 | 
            -
                 | 
| 266 | 
            -
                bit-> | 
| 285 | 
            +
                assert(sizeof(size_t) == 8);
         | 
| 286 | 
            +
                bit->bitContainer = MEM_readLEST(args->ip[stream]);
         | 
| 287 | 
            +
                bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
         | 
| 267 288 | 
             
                bit->start = (const char*)args->iend[0];
         | 
| 268 289 | 
             
                bit->limitPtr = bit->start + sizeof(size_t);
         | 
| 269 290 | 
             
                bit->ptr = (const char*)args->ip[stream];
         | 
| 270 291 |  | 
| 271 292 | 
             
                return 0;
         | 
| 272 293 | 
             
            }
         | 
| 273 | 
            -
            #endif
         | 
| 274 294 |  | 
| 275 295 |  | 
| 276 296 | 
             
            #ifndef HUF_FORCE_DECOMPRESS_X2
         | 
| @@ -287,10 +307,11 @@ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1;   /* single-symbol decodi | |
| 287 307 | 
             
            static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
         | 
| 288 308 | 
             
                U64 D4;
         | 
| 289 309 | 
             
                if (MEM_isLittleEndian()) {
         | 
| 290 | 
            -
                    D4 = (symbol << 8) + nbBits;
         | 
| 310 | 
            +
                    D4 = (U64)((symbol << 8) + nbBits);
         | 
| 291 311 | 
             
                } else {
         | 
| 292 | 
            -
                    D4 = symbol + (nbBits << 8);
         | 
| 312 | 
            +
                    D4 = (U64)(symbol + (nbBits << 8));
         | 
| 293 313 | 
             
                }
         | 
| 314 | 
            +
                assert(D4 < (1U << 16));
         | 
| 294 315 | 
             
                D4 *= 0x0001000100010001ULL;
         | 
| 295 316 | 
             
                return D4;
         | 
| 296 317 | 
             
            }
         | 
| @@ -333,13 +354,7 @@ typedef struct { | |
| 333 354 | 
             
                    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
         | 
| 334 355 | 
             
            } HUF_ReadDTableX1_Workspace;
         | 
| 335 356 |  | 
| 336 | 
            -
             | 
| 337 | 
            -
            size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
         | 
| 338 | 
            -
            {
         | 
| 339 | 
            -
                return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
         | 
| 340 | 
            -
            }
         | 
| 341 | 
            -
             | 
| 342 | 
            -
            size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
         | 
| 357 | 
            +
            size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags)
         | 
| 343 358 | 
             
            {
         | 
| 344 359 | 
             
                U32 tableLog = 0;
         | 
| 345 360 | 
             
                U32 nbSymbols = 0;
         | 
| @@ -354,7 +369,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr | |
| 354 369 | 
             
                DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
         | 
| 355 370 | 
             
                /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
         | 
| 356 371 |  | 
| 357 | 
            -
                iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp),  | 
| 372 | 
            +
                iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
         | 
| 358 373 | 
             
                if (HUF_isError(iSize)) return iSize;
         | 
| 359 374 |  | 
| 360 375 |  | 
| @@ -381,9 +396,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr | |
| 381 396 | 
             
                 * rankStart[0] is not filled because there are no entries in the table for
         | 
| 382 397 | 
             
                 * weight 0.
         | 
| 383 398 | 
             
                 */
         | 
| 384 | 
            -
                {
         | 
| 385 | 
            -
                     | 
| 386 | 
            -
                    int nextRankStart = 0;
         | 
| 399 | 
            +
                {   int n;
         | 
| 400 | 
            +
                    U32 nextRankStart = 0;
         | 
| 387 401 | 
             
                    int const unroll = 4;
         | 
| 388 402 | 
             
                    int const nLimit = (int)nbSymbols - unroll + 1;
         | 
| 389 403 | 
             
                    for (n=0; n<(int)tableLog+1; n++) {
         | 
| @@ -410,10 +424,9 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr | |
| 410 424 | 
             
                 * We can switch based on the length to a different inner loop which is
         | 
| 411 425 | 
             
                 * optimized for that particular case.
         | 
| 412 426 | 
             
                 */
         | 
| 413 | 
            -
                {
         | 
| 414 | 
            -
                     | 
| 415 | 
            -
                    int  | 
| 416 | 
            -
                    int rankStart=0;
         | 
| 427 | 
            +
                {   U32 w;
         | 
| 428 | 
            +
                    int symbol = wksp->rankVal[0];
         | 
| 429 | 
            +
                    int rankStart = 0;
         | 
| 417 430 | 
             
                    for (w=1; w<tableLog+1; ++w) {
         | 
| 418 431 | 
             
                        int const symbolCount = wksp->rankVal[w];
         | 
| 419 432 | 
             
                        int const length = (1 << w) >> 1;
         | 
| @@ -523,7 +536,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons | |
| 523 536 | 
             
                while (p < pEnd)
         | 
| 524 537 | 
             
                    HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
         | 
| 525 538 |  | 
| 526 | 
            -
                return pEnd-pStart;
         | 
| 539 | 
            +
                return (size_t)(pEnd-pStart);
         | 
| 527 540 | 
             
            }
         | 
| 528 541 |  | 
| 529 542 | 
             
            FORCE_INLINE_TEMPLATE size_t
         | 
| @@ -549,6 +562,10 @@ HUF_decompress1X1_usingDTable_internal_body( | |
| 549 562 | 
             
                return dstSize;
         | 
| 550 563 | 
             
            }
         | 
| 551 564 |  | 
| 565 | 
            +
            /* HUF_decompress4X1_usingDTable_internal_body():
         | 
| 566 | 
            +
             * Conditions :
         | 
| 567 | 
            +
             * @dstSize >= 6
         | 
| 568 | 
            +
             */
         | 
| 552 569 | 
             
            FORCE_INLINE_TEMPLATE size_t
         | 
| 553 570 | 
             
            HUF_decompress4X1_usingDTable_internal_body(
         | 
| 554 571 | 
             
                      void* dst,  size_t dstSize,
         | 
| @@ -592,6 +609,7 @@ HUF_decompress4X1_usingDTable_internal_body( | |
| 592 609 |  | 
| 593 610 | 
             
                    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
         | 
| 594 611 | 
             
                    if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
         | 
| 612 | 
            +
                    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
         | 
| 595 613 | 
             
                    CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         | 
| 596 614 | 
             
                    CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         | 
| 597 615 | 
             
                    CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
         | 
| @@ -654,38 +672,142 @@ size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo | |
| 654 672 | 
             
            }
         | 
| 655 673 | 
             
            #endif
         | 
| 656 674 |  | 
| 657 | 
            -
            #if HUF_NEED_DEFAULT_FUNCTION
         | 
| 658 675 | 
             
            static
         | 
| 659 676 | 
             
            size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
         | 
| 660 677 | 
             
                                size_t cSrcSize, HUF_DTable const* DTable) {
         | 
| 661 678 | 
             
                return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 662 679 | 
             
            }
         | 
| 663 | 
            -
            #endif
         | 
| 664 680 |  | 
| 665 681 | 
             
            #if ZSTD_ENABLE_ASM_X86_64_BMI2
         | 
| 666 682 |  | 
| 667 | 
            -
            HUF_ASM_DECL void  | 
| 683 | 
            +
            HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
         | 
| 668 684 |  | 
| 669 | 
            -
             | 
| 685 | 
            +
            #endif
         | 
| 686 | 
            +
             | 
| 687 | 
            +
            static HUF_FAST_BMI2_ATTRS
         | 
| 688 | 
            +
            void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
         | 
| 689 | 
            +
            {
         | 
| 690 | 
            +
                U64 bits[4];
         | 
| 691 | 
            +
                BYTE const* ip[4];
         | 
| 692 | 
            +
                BYTE* op[4];
         | 
| 693 | 
            +
                U16 const* const dtable = (U16 const*)args->dt;
         | 
| 694 | 
            +
                BYTE* const oend = args->oend;
         | 
| 695 | 
            +
                BYTE const* const ilimit = args->ilimit;
         | 
| 696 | 
            +
             | 
| 697 | 
            +
                /* Copy the arguments to local variables */
         | 
| 698 | 
            +
                ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
         | 
| 699 | 
            +
                ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
         | 
| 700 | 
            +
                ZSTD_memcpy(&op, &args->op, sizeof(op));
         | 
| 701 | 
            +
             | 
| 702 | 
            +
                assert(MEM_isLittleEndian());
         | 
| 703 | 
            +
                assert(!MEM_32bits());
         | 
| 704 | 
            +
             | 
| 705 | 
            +
                for (;;) {
         | 
| 706 | 
            +
                    BYTE* olimit;
         | 
| 707 | 
            +
                    int stream;
         | 
| 708 | 
            +
                    int symbol;
         | 
| 709 | 
            +
             | 
| 710 | 
            +
                    /* Assert loop preconditions */
         | 
| 711 | 
            +
            #ifndef NDEBUG
         | 
| 712 | 
            +
                    for (stream = 0; stream < 4; ++stream) {
         | 
| 713 | 
            +
                        assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
         | 
| 714 | 
            +
                        assert(ip[stream] >= ilimit);
         | 
| 715 | 
            +
                    }
         | 
| 716 | 
            +
            #endif
         | 
| 717 | 
            +
                    /* Compute olimit */
         | 
| 718 | 
            +
                    {
         | 
| 719 | 
            +
                        /* Each iteration produces 5 output symbols per stream */
         | 
| 720 | 
            +
                        size_t const oiters = (size_t)(oend - op[3]) / 5;
         | 
| 721 | 
            +
                        /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
         | 
| 722 | 
            +
                         * per stream.
         | 
| 723 | 
            +
                         */
         | 
| 724 | 
            +
                        size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
         | 
| 725 | 
            +
                        /* We can safely run iters iterations before running bounds checks */
         | 
| 726 | 
            +
                        size_t const iters = MIN(oiters, iiters);
         | 
| 727 | 
            +
                        size_t const symbols = iters * 5;
         | 
| 728 | 
            +
             | 
| 729 | 
            +
                        /* We can simply check that op[3] < olimit, instead of checking all
         | 
| 730 | 
            +
                         * of our bounds, since we can't hit the other bounds until we've run
         | 
| 731 | 
            +
                         * iters iterations, which only happens when op[3] == olimit.
         | 
| 732 | 
            +
                         */
         | 
| 733 | 
            +
                        olimit = op[3] + symbols;
         | 
| 734 | 
            +
             | 
| 735 | 
            +
                        /* Exit fast decoding loop once we get close to the end. */
         | 
| 736 | 
            +
                        if (op[3] + 20 > olimit)
         | 
| 737 | 
            +
                            break;
         | 
| 738 | 
            +
             | 
| 739 | 
            +
                        /* Exit the decoding loop if any input pointer has crossed the
         | 
| 740 | 
            +
                         * previous one. This indicates corruption, and a precondition
         | 
| 741 | 
            +
                         * to our loop is that ip[i] >= ip[0].
         | 
| 742 | 
            +
                         */
         | 
| 743 | 
            +
                        for (stream = 1; stream < 4; ++stream) {
         | 
| 744 | 
            +
                            if (ip[stream] < ip[stream - 1])
         | 
| 745 | 
            +
                                goto _out;
         | 
| 746 | 
            +
                        }
         | 
| 747 | 
            +
                    }
         | 
| 748 | 
            +
             | 
| 749 | 
            +
            #ifndef NDEBUG
         | 
| 750 | 
            +
                    for (stream = 1; stream < 4; ++stream) {
         | 
| 751 | 
            +
                        assert(ip[stream] >= ip[stream - 1]);
         | 
| 752 | 
            +
                    }
         | 
| 753 | 
            +
            #endif
         | 
| 754 | 
            +
             | 
| 755 | 
            +
                    do {
         | 
| 756 | 
            +
                        /* Decode 5 symbols in each of the 4 streams */
         | 
| 757 | 
            +
                        for (symbol = 0; symbol < 5; ++symbol) {
         | 
| 758 | 
            +
                            for (stream = 0; stream < 4; ++stream) {
         | 
| 759 | 
            +
                                int const index = (int)(bits[stream] >> 53);
         | 
| 760 | 
            +
                                int const entry = (int)dtable[index];
         | 
| 761 | 
            +
                                bits[stream] <<= (entry & 63);
         | 
| 762 | 
            +
                                op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
         | 
| 763 | 
            +
                            }
         | 
| 764 | 
            +
                        }
         | 
| 765 | 
            +
                        /* Reload the bitstreams */
         | 
| 766 | 
            +
                        for (stream = 0; stream < 4; ++stream) {
         | 
| 767 | 
            +
                            int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
         | 
| 768 | 
            +
                            int const nbBits = ctz & 7;
         | 
| 769 | 
            +
                            int const nbBytes = ctz >> 3;
         | 
| 770 | 
            +
                            op[stream] += 5;
         | 
| 771 | 
            +
                            ip[stream] -= nbBytes;
         | 
| 772 | 
            +
                            bits[stream] = MEM_read64(ip[stream]) | 1;
         | 
| 773 | 
            +
                            bits[stream] <<= nbBits;
         | 
| 774 | 
            +
                        }
         | 
| 775 | 
            +
                    } while (op[3] < olimit);
         | 
| 776 | 
            +
                }
         | 
| 777 | 
            +
             | 
| 778 | 
            +
            _out:
         | 
| 779 | 
            +
             | 
| 780 | 
            +
                /* Save the final values of each of the state variables back to args. */
         | 
| 781 | 
            +
                ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
         | 
| 782 | 
            +
                ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
         | 
| 783 | 
            +
                ZSTD_memcpy(&args->op, &op, sizeof(op));
         | 
| 784 | 
            +
            }
         | 
| 785 | 
            +
             | 
| 786 | 
            +
            /**
         | 
| 787 | 
            +
             * @returns @p dstSize on success (>= 6)
         | 
| 788 | 
            +
             *          0 if the fallback implementation should be used
         | 
| 789 | 
            +
             *          An error if an error occurred
         | 
| 790 | 
            +
             */
         | 
| 791 | 
            +
            static HUF_FAST_BMI2_ATTRS
         | 
| 670 792 | 
             
            size_t
         | 
| 671 | 
            -
             | 
| 793 | 
            +
            HUF_decompress4X1_usingDTable_internal_fast(
         | 
| 672 794 | 
             
                      void* dst,  size_t dstSize,
         | 
| 673 795 | 
             
                const void* cSrc, size_t cSrcSize,
         | 
| 674 | 
            -
                const HUF_DTable* DTable | 
| 796 | 
            +
                const HUF_DTable* DTable,
         | 
| 797 | 
            +
                HUF_DecompressFastLoopFn loopFn)
         | 
| 675 798 | 
             
            {
         | 
| 676 799 | 
             
                void const* dt = DTable + 1;
         | 
| 677 800 | 
             
                const BYTE* const iend = (const BYTE*)cSrc + 6;
         | 
| 678 801 | 
             
                BYTE* const oend = (BYTE*)dst + dstSize;
         | 
| 679 | 
            -
                 | 
| 680 | 
            -
                {
         | 
| 681 | 
            -
                     | 
| 682 | 
            -
                     | 
| 683 | 
            -
             | 
| 684 | 
            -
                        return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 802 | 
            +
                HUF_DecompressFastArgs args;
         | 
| 803 | 
            +
                {   size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 804 | 
            +
                    FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
         | 
| 805 | 
            +
                    if (ret == 0)
         | 
| 806 | 
            +
                        return 0;
         | 
| 685 807 | 
             
                }
         | 
| 686 808 |  | 
| 687 809 | 
             
                assert(args.ip[0] >= args.ilimit);
         | 
| 688 | 
            -
                 | 
| 810 | 
            +
                loopFn(&args);
         | 
| 689 811 |  | 
| 690 812 | 
             
                /* Our loop guarantees that ip[] >= ilimit and that we haven't
         | 
| 691 813 | 
             
                * overwritten any op[].
         | 
| @@ -698,8 +820,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm( | |
| 698 820 | 
             
                (void)iend;
         | 
| 699 821 |  | 
| 700 822 | 
             
                /* finish bit streams one by one. */
         | 
| 701 | 
            -
                {
         | 
| 702 | 
            -
                    size_t const segmentSize = (dstSize+3) / 4;
         | 
| 823 | 
            +
                {   size_t const segmentSize = (dstSize+3) / 4;
         | 
| 703 824 | 
             
                    BYTE* segmentEnd = (BYTE*)dst;
         | 
| 704 825 | 
             
                    int i;
         | 
| 705 826 | 
             
                    for (i = 0; i < 4; ++i) {
         | 
| @@ -716,97 +837,59 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm( | |
| 716 837 | 
             
                }
         | 
| 717 838 |  | 
| 718 839 | 
             
                /* decoded size */
         | 
| 840 | 
            +
                assert(dstSize != 0);
         | 
| 719 841 | 
             
                return dstSize;
         | 
| 720 842 | 
             
            }
         | 
| 721 | 
            -
            #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
         | 
| 722 | 
            -
             | 
| 723 | 
            -
            typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
         | 
| 724 | 
            -
                                                           const void *cSrc,
         | 
| 725 | 
            -
                                                           size_t cSrcSize,
         | 
| 726 | 
            -
                                                           const HUF_DTable *DTable);
         | 
| 727 843 |  | 
| 728 844 | 
             
            HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
         | 
| 729 845 |  | 
| 730 846 | 
             
            static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
         | 
| 731 | 
            -
                                size_t cSrcSize, HUF_DTable const* DTable, int  | 
| 847 | 
            +
                                size_t cSrcSize, HUF_DTable const* DTable, int flags)
         | 
| 732 848 | 
             
            {
         | 
| 849 | 
            +
                HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
         | 
| 850 | 
            +
                HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
         | 
| 851 | 
            +
             | 
| 733 852 | 
             
            #if DYNAMIC_BMI2
         | 
| 734 | 
            -
                if ( | 
| 853 | 
            +
                if (flags & HUF_flags_bmi2) {
         | 
| 854 | 
            +
                    fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
         | 
| 735 855 | 
             
            # if ZSTD_ENABLE_ASM_X86_64_BMI2
         | 
| 736 | 
            -
                     | 
| 737 | 
            -
             | 
| 738 | 
            -
                     | 
| 856 | 
            +
                    if (!(flags & HUF_flags_disableAsm)) {
         | 
| 857 | 
            +
                        loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
         | 
| 858 | 
            +
                    }
         | 
| 739 859 | 
             
            # endif
         | 
| 860 | 
            +
                } else {
         | 
| 861 | 
            +
                    return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 740 862 | 
             
                }
         | 
| 741 | 
            -
            #else
         | 
| 742 | 
            -
                (void)bmi2;
         | 
| 743 863 | 
             
            #endif
         | 
| 744 864 |  | 
| 745 865 | 
             
            #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
         | 
| 746 | 
            -
                 | 
| 747 | 
            -
             | 
| 748 | 
            -
                 | 
| 866 | 
            +
                if (!(flags & HUF_flags_disableAsm)) {
         | 
| 867 | 
            +
                    loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
         | 
| 868 | 
            +
                }
         | 
| 749 869 | 
             
            #endif
         | 
| 750 | 
            -
            }
         | 
| 751 870 |  | 
| 752 | 
            -
             | 
| 753 | 
            -
            size_t  | 
| 754 | 
            -
             | 
| 755 | 
            -
             | 
| 756 | 
            -
                 | 
| 757 | 
            -
             | 
| 758 | 
            -
                DTableDesc dtd = HUF_getDTableDesc(DTable);
         | 
| 759 | 
            -
                if (dtd.tableType != 0) return ERROR(GENERIC);
         | 
| 760 | 
            -
                return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 761 | 
            -
            }
         | 
| 762 | 
            -
             | 
| 763 | 
            -
            size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
         | 
| 764 | 
            -
                                               const void* cSrc, size_t cSrcSize,
         | 
| 765 | 
            -
                                               void* workSpace, size_t wkspSize)
         | 
| 766 | 
            -
            {
         | 
| 767 | 
            -
                const BYTE* ip = (const BYTE*) cSrc;
         | 
| 768 | 
            -
             | 
| 769 | 
            -
                size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
         | 
| 770 | 
            -
                if (HUF_isError(hSize)) return hSize;
         | 
| 771 | 
            -
                if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
         | 
| 772 | 
            -
                ip += hSize; cSrcSize -= hSize;
         | 
| 773 | 
            -
             | 
| 774 | 
            -
                return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
         | 
| 775 | 
            -
            }
         | 
| 776 | 
            -
             | 
| 777 | 
            -
             | 
| 778 | 
            -
            size_t HUF_decompress4X1_usingDTable(
         | 
| 779 | 
            -
                      void* dst,  size_t dstSize,
         | 
| 780 | 
            -
                const void* cSrc, size_t cSrcSize,
         | 
| 781 | 
            -
                const HUF_DTable* DTable)
         | 
| 782 | 
            -
            {
         | 
| 783 | 
            -
                DTableDesc dtd = HUF_getDTableDesc(DTable);
         | 
| 784 | 
            -
                if (dtd.tableType != 0) return ERROR(GENERIC);
         | 
| 785 | 
            -
                return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 871 | 
            +
                if (!(flags & HUF_flags_disableFast)) {
         | 
| 872 | 
            +
                    size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
         | 
| 873 | 
            +
                    if (ret != 0)
         | 
| 874 | 
            +
                        return ret;
         | 
| 875 | 
            +
                }
         | 
| 876 | 
            +
                return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 786 877 | 
             
            }
         | 
| 787 878 |  | 
| 788 | 
            -
            static size_t  | 
| 879 | 
            +
            static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
         | 
| 789 880 | 
             
                                               const void* cSrc, size_t cSrcSize,
         | 
| 790 | 
            -
                                               void* workSpace, size_t wkspSize, int  | 
| 881 | 
            +
                                               void* workSpace, size_t wkspSize, int flags)
         | 
| 791 882 | 
             
            {
         | 
| 792 883 | 
             
                const BYTE* ip = (const BYTE*) cSrc;
         | 
| 793 884 |  | 
| 794 | 
            -
                size_t const hSize =  | 
| 885 | 
            +
                size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
         | 
| 795 886 | 
             
                if (HUF_isError(hSize)) return hSize;
         | 
| 796 887 | 
             
                if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
         | 
| 797 888 | 
             
                ip += hSize; cSrcSize -= hSize;
         | 
| 798 889 |  | 
| 799 | 
            -
                return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,  | 
| 800 | 
            -
            }
         | 
| 801 | 
            -
             | 
| 802 | 
            -
            size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
         | 
| 803 | 
            -
                                               const void* cSrc, size_t cSrcSize,
         | 
| 804 | 
            -
                                               void* workSpace, size_t wkspSize)
         | 
| 805 | 
            -
            {
         | 
| 806 | 
            -
                return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
         | 
| 890 | 
            +
                return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
         | 
| 807 891 | 
             
            }
         | 
| 808 892 |  | 
| 809 | 
            -
             | 
| 810 893 | 
             
            #endif /* HUF_FORCE_DECOMPRESS_X2 */
         | 
| 811 894 |  | 
| 812 895 |  | 
| @@ -989,7 +1072,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 | |
| 989 1072 |  | 
| 990 1073 | 
             
            static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
         | 
| 991 1074 | 
             
                                       const sortedSymbol_t* sortedList,
         | 
| 992 | 
            -
                                       const U32* rankStart,  | 
| 1075 | 
            +
                                       const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight,
         | 
| 993 1076 | 
             
                                       const U32 nbBitsBaseline)
         | 
| 994 1077 | 
             
            {
         | 
| 995 1078 | 
             
                U32* const rankVal = rankValOrigin[0];
         | 
| @@ -1044,14 +1127,7 @@ typedef struct { | |
| 1044 1127 |  | 
| 1045 1128 | 
             
            size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
         | 
| 1046 1129 | 
             
                                   const void* src, size_t srcSize,
         | 
| 1047 | 
            -
                                         void* workSpace, size_t wkspSize)
         | 
| 1048 | 
            -
            {
         | 
| 1049 | 
            -
                return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
         | 
| 1050 | 
            -
            }
         | 
| 1051 | 
            -
             | 
| 1052 | 
            -
            size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
         | 
| 1053 | 
            -
                                   const void* src, size_t srcSize,
         | 
| 1054 | 
            -
                                         void* workSpace, size_t wkspSize, int bmi2)
         | 
| 1130 | 
            +
                                         void* workSpace, size_t wkspSize, int flags)
         | 
| 1055 1131 | 
             
            {
         | 
| 1056 1132 | 
             
                U32 tableLog, maxW, nbSymbols;
         | 
| 1057 1133 | 
             
                DTableDesc dtd = HUF_getDTableDesc(DTable);
         | 
| @@ -1073,7 +1149,7 @@ size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, | |
| 1073 1149 | 
             
                if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
         | 
| 1074 1150 | 
             
                /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
         | 
| 1075 1151 |  | 
| 1076 | 
            -
                iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp),  | 
| 1152 | 
            +
                iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags);
         | 
| 1077 1153 | 
             
                if (HUF_isError(iSize)) return iSize;
         | 
| 1078 1154 |  | 
| 1079 1155 | 
             
                /* check result */
         | 
| @@ -1244,6 +1320,11 @@ HUF_decompress1X2_usingDTable_internal_body( | |
| 1244 1320 | 
             
                /* decoded size */
         | 
| 1245 1321 | 
             
                return dstSize;
         | 
| 1246 1322 | 
             
            }
         | 
| 1323 | 
            +
             | 
| 1324 | 
            +
            /* HUF_decompress4X2_usingDTable_internal_body():
         | 
| 1325 | 
            +
             * Conditions:
         | 
| 1326 | 
            +
             * @dstSize >= 6
         | 
| 1327 | 
            +
             */
         | 
| 1247 1328 | 
             
            FORCE_INLINE_TEMPLATE size_t
         | 
| 1248 1329 | 
             
            HUF_decompress4X2_usingDTable_internal_body(
         | 
| 1249 1330 | 
             
                      void* dst,  size_t dstSize,
         | 
| @@ -1284,8 +1365,9 @@ HUF_decompress4X2_usingDTable_internal_body( | |
| 1284 1365 | 
             
                    DTableDesc const dtd = HUF_getDTableDesc(DTable);
         | 
| 1285 1366 | 
             
                    U32 const dtLog = dtd.tableLog;
         | 
| 1286 1367 |  | 
| 1287 | 
            -
                    if (length4 > cSrcSize) return ERROR(corruption_detected); | 
| 1288 | 
            -
                    if (opStart4 > oend) return ERROR(corruption_detected); | 
| 1368 | 
            +
                    if (length4 > cSrcSize) return ERROR(corruption_detected);  /* overflow */
         | 
| 1369 | 
            +
                    if (opStart4 > oend) return ERROR(corruption_detected);     /* overflow */
         | 
| 1370 | 
            +
                    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
         | 
| 1289 1371 | 
             
                    CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         | 
| 1290 1372 | 
             
                    CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         | 
| 1291 1373 | 
             
                    CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
         | 
| @@ -1370,36 +1452,177 @@ size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo | |
| 1370 1452 | 
             
            }
         | 
| 1371 1453 | 
             
            #endif
         | 
| 1372 1454 |  | 
| 1373 | 
            -
            #if HUF_NEED_DEFAULT_FUNCTION
         | 
| 1374 1455 | 
             
            static
         | 
| 1375 1456 | 
             
            size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
         | 
| 1376 1457 | 
             
                                size_t cSrcSize, HUF_DTable const* DTable) {
         | 
| 1377 1458 | 
             
                return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 1378 1459 | 
             
            }
         | 
| 1379 | 
            -
            #endif
         | 
| 1380 1460 |  | 
| 1381 1461 | 
             
            #if ZSTD_ENABLE_ASM_X86_64_BMI2
         | 
| 1382 1462 |  | 
| 1383 | 
            -
            HUF_ASM_DECL void  | 
| 1463 | 
            +
            HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
         | 
| 1464 | 
            +
             | 
| 1465 | 
            +
            #endif
         | 
| 1466 | 
            +
             | 
| 1467 | 
            +
            static HUF_FAST_BMI2_ATTRS
         | 
| 1468 | 
            +
            void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
         | 
| 1469 | 
            +
            {
         | 
| 1470 | 
            +
                U64 bits[4];
         | 
| 1471 | 
            +
                BYTE const* ip[4];
         | 
| 1472 | 
            +
                BYTE* op[4];
         | 
| 1473 | 
            +
                BYTE* oend[4];
         | 
| 1474 | 
            +
                HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
         | 
| 1475 | 
            +
                BYTE const* const ilimit = args->ilimit;
         | 
| 1476 | 
            +
             | 
| 1477 | 
            +
                /* Copy the arguments to local registers. */
         | 
| 1478 | 
            +
                ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
         | 
| 1479 | 
            +
                ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
         | 
| 1480 | 
            +
                ZSTD_memcpy(&op, &args->op, sizeof(op));
         | 
| 1481 | 
            +
             | 
| 1482 | 
            +
                oend[0] = op[1];
         | 
| 1483 | 
            +
                oend[1] = op[2];
         | 
| 1484 | 
            +
                oend[2] = op[3];
         | 
| 1485 | 
            +
                oend[3] = args->oend;
         | 
| 1486 | 
            +
             | 
| 1487 | 
            +
                assert(MEM_isLittleEndian());
         | 
| 1488 | 
            +
                assert(!MEM_32bits());
         | 
| 1489 | 
            +
             | 
| 1490 | 
            +
                for (;;) {
         | 
| 1491 | 
            +
                    BYTE* olimit;
         | 
| 1492 | 
            +
                    int stream;
         | 
| 1493 | 
            +
                    int symbol;
         | 
| 1494 | 
            +
             | 
| 1495 | 
            +
                    /* Assert loop preconditions */
         | 
| 1496 | 
            +
            #ifndef NDEBUG
         | 
| 1497 | 
            +
                    for (stream = 0; stream < 4; ++stream) {
         | 
| 1498 | 
            +
                        assert(op[stream] <= oend[stream]);
         | 
| 1499 | 
            +
                        assert(ip[stream] >= ilimit);
         | 
| 1500 | 
            +
                    }
         | 
| 1501 | 
            +
            #endif
         | 
| 1502 | 
            +
                    /* Compute olimit */
         | 
| 1503 | 
            +
                    {
         | 
| 1504 | 
            +
                        /* Each loop does 5 table lookups for each of the 4 streams.
         | 
| 1505 | 
            +
                         * Each table lookup consumes up to 11 bits of input, and produces
         | 
| 1506 | 
            +
                         * up to 2 bytes of output.
         | 
| 1507 | 
            +
                         */
         | 
| 1508 | 
            +
                        /* We can consume up to 7 bytes of input per iteration per stream.
         | 
| 1509 | 
            +
                         * We also know that each input pointer is >= ip[0]. So we can run
         | 
| 1510 | 
            +
                         * iters loops before running out of input.
         | 
| 1511 | 
            +
                         */
         | 
| 1512 | 
            +
                        size_t iters = (size_t)(ip[0] - ilimit) / 7;
         | 
| 1513 | 
            +
                        /* Each iteration can produce up to 10 bytes of output per stream.
         | 
| 1514 | 
            +
                         * Each output stream my advance at different rates. So take the
         | 
| 1515 | 
            +
                         * minimum number of safe iterations among all the output streams.
         | 
| 1516 | 
            +
                         */
         | 
| 1517 | 
            +
                        for (stream = 0; stream < 4; ++stream) {
         | 
| 1518 | 
            +
                            size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
         | 
| 1519 | 
            +
                            iters = MIN(iters, oiters);
         | 
| 1520 | 
            +
                        }
         | 
| 1521 | 
            +
             | 
| 1522 | 
            +
                        /* Each iteration produces at least 5 output symbols. So until
         | 
| 1523 | 
            +
                         * op[3] crosses olimit, we know we haven't executed iters
         | 
| 1524 | 
            +
                         * iterations yet. This saves us maintaining an iters counter,
         | 
| 1525 | 
            +
                         * at the expense of computing the remaining # of iterations
         | 
| 1526 | 
            +
                         * more frequently.
         | 
| 1527 | 
            +
                         */
         | 
| 1528 | 
            +
                        olimit = op[3] + (iters * 5);
         | 
| 1529 | 
            +
             | 
| 1530 | 
            +
                        /* Exit the fast decoding loop if we are too close to the end. */
         | 
| 1531 | 
            +
                        if (op[3] + 10 > olimit)
         | 
| 1532 | 
            +
                            break;
         | 
| 1533 | 
            +
             | 
| 1534 | 
            +
                        /* Exit the decoding loop if any input pointer has crossed the
         | 
| 1535 | 
            +
                         * previous one. This indicates corruption, and a precondition
         | 
| 1536 | 
            +
                         * to our loop is that ip[i] >= ip[0].
         | 
| 1537 | 
            +
                         */
         | 
| 1538 | 
            +
                        for (stream = 1; stream < 4; ++stream) {
         | 
| 1539 | 
            +
                            if (ip[stream] < ip[stream - 1])
         | 
| 1540 | 
            +
                                goto _out;
         | 
| 1541 | 
            +
                        }
         | 
| 1542 | 
            +
                    }
         | 
| 1543 | 
            +
             | 
| 1544 | 
            +
            #ifndef NDEBUG
         | 
| 1545 | 
            +
                    for (stream = 1; stream < 4; ++stream) {
         | 
| 1546 | 
            +
                        assert(ip[stream] >= ip[stream - 1]);
         | 
| 1547 | 
            +
                    }
         | 
| 1548 | 
            +
            #endif
         | 
| 1549 | 
            +
             | 
| 1550 | 
            +
                    do {
         | 
| 1551 | 
            +
                        /* Do 5 table lookups for each of the first 3 streams */
         | 
| 1552 | 
            +
                        for (symbol = 0; symbol < 5; ++symbol) {
         | 
| 1553 | 
            +
                            for (stream = 0; stream < 3; ++stream) {
         | 
| 1554 | 
            +
                                int const index = (int)(bits[stream] >> 53);
         | 
| 1555 | 
            +
                                HUF_DEltX2 const entry = dtable[index];
         | 
| 1556 | 
            +
                                MEM_write16(op[stream], entry.sequence);
         | 
| 1557 | 
            +
                                bits[stream] <<= (entry.nbBits);
         | 
| 1558 | 
            +
                                op[stream] += (entry.length);
         | 
| 1559 | 
            +
                            }
         | 
| 1560 | 
            +
                        }
         | 
| 1561 | 
            +
                        /* Do 1 table lookup from the final stream */
         | 
| 1562 | 
            +
                        {
         | 
| 1563 | 
            +
                            int const index = (int)(bits[3] >> 53);
         | 
| 1564 | 
            +
                            HUF_DEltX2 const entry = dtable[index];
         | 
| 1565 | 
            +
                            MEM_write16(op[3], entry.sequence);
         | 
| 1566 | 
            +
                            bits[3] <<= (entry.nbBits);
         | 
| 1567 | 
            +
                            op[3] += (entry.length);
         | 
| 1568 | 
            +
                        }
         | 
| 1569 | 
            +
                        /* Do 4 table lookups from the final stream & reload bitstreams */
         | 
| 1570 | 
            +
                        for (stream = 0; stream < 4; ++stream) {
         | 
| 1571 | 
            +
                            /* Do a table lookup from the final stream.
         | 
| 1572 | 
            +
                             * This is interleaved with the reloading to reduce register
         | 
| 1573 | 
            +
                             * pressure. This shouldn't be necessary, but compilers can
         | 
| 1574 | 
            +
                             * struggle with codegen with high register pressure.
         | 
| 1575 | 
            +
                             */
         | 
| 1576 | 
            +
                            {
         | 
| 1577 | 
            +
                                int const index = (int)(bits[3] >> 53);
         | 
| 1578 | 
            +
                                HUF_DEltX2 const entry = dtable[index];
         | 
| 1579 | 
            +
                                MEM_write16(op[3], entry.sequence);
         | 
| 1580 | 
            +
                                bits[3] <<= (entry.nbBits);
         | 
| 1581 | 
            +
                                op[3] += (entry.length);
         | 
| 1582 | 
            +
                            }
         | 
| 1583 | 
            +
                            /* Reload the bistreams. The final bitstream must be reloaded
         | 
| 1584 | 
            +
                             * after the 5th symbol was decoded.
         | 
| 1585 | 
            +
                             */
         | 
| 1586 | 
            +
                            {
         | 
| 1587 | 
            +
                                int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
         | 
| 1588 | 
            +
                                int const nbBits = ctz & 7;
         | 
| 1589 | 
            +
                                int const nbBytes = ctz >> 3;
         | 
| 1590 | 
            +
                                ip[stream] -= nbBytes;
         | 
| 1591 | 
            +
                                bits[stream] = MEM_read64(ip[stream]) | 1;
         | 
| 1592 | 
            +
                                bits[stream] <<= nbBits;
         | 
| 1593 | 
            +
                            }
         | 
| 1594 | 
            +
                        }
         | 
| 1595 | 
            +
                    } while (op[3] < olimit);
         | 
| 1596 | 
            +
                }
         | 
| 1384 1597 |  | 
| 1385 | 
            -
             | 
| 1386 | 
            -
             | 
| 1598 | 
            +
            _out:
         | 
| 1599 | 
            +
             | 
| 1600 | 
            +
                /* Save the final values of each of the state variables back to args. */
         | 
| 1601 | 
            +
                ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
         | 
| 1602 | 
            +
                ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
         | 
| 1603 | 
            +
                ZSTD_memcpy(&args->op, &op, sizeof(op));
         | 
| 1604 | 
            +
            }
         | 
| 1605 | 
            +
             | 
| 1606 | 
            +
             | 
| 1607 | 
            +
            static HUF_FAST_BMI2_ATTRS size_t
         | 
| 1608 | 
            +
            HUF_decompress4X2_usingDTable_internal_fast(
         | 
| 1387 1609 | 
             
                      void* dst,  size_t dstSize,
         | 
| 1388 1610 | 
             
                const void* cSrc, size_t cSrcSize,
         | 
| 1389 | 
            -
                const HUF_DTable* DTable | 
| 1611 | 
            +
                const HUF_DTable* DTable,
         | 
| 1612 | 
            +
                HUF_DecompressFastLoopFn loopFn) {
         | 
| 1390 1613 | 
             
                void const* dt = DTable + 1;
         | 
| 1391 1614 | 
             
                const BYTE* const iend = (const BYTE*)cSrc + 6;
         | 
| 1392 1615 | 
             
                BYTE* const oend = (BYTE*)dst + dstSize;
         | 
| 1393 | 
            -
                 | 
| 1616 | 
            +
                HUF_DecompressFastArgs args;
         | 
| 1394 1617 | 
             
                {
         | 
| 1395 | 
            -
                    size_t const ret =  | 
| 1618 | 
            +
                    size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 1396 1619 | 
             
                    FORWARD_IF_ERROR(ret, "Failed to init asm args");
         | 
| 1397 | 
            -
                    if (ret  | 
| 1398 | 
            -
                        return  | 
| 1620 | 
            +
                    if (ret == 0)
         | 
| 1621 | 
            +
                        return 0;
         | 
| 1399 1622 | 
             
                }
         | 
| 1400 1623 |  | 
| 1401 1624 | 
             
                assert(args.ip[0] >= args.ilimit);
         | 
| 1402 | 
            -
                 | 
| 1625 | 
            +
                loopFn(&args);
         | 
| 1403 1626 |  | 
| 1404 1627 | 
             
                /* note : op4 already verified within main loop */
         | 
| 1405 1628 | 
             
                assert(args.ip[0] >= iend);
         | 
| @@ -1430,91 +1653,72 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm( | |
| 1430 1653 | 
             
                /* decoded size */
         | 
| 1431 1654 | 
             
                return dstSize;
         | 
| 1432 1655 | 
             
            }
         | 
| 1433 | 
            -
            #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
         | 
| 1434 1656 |  | 
| 1435 1657 | 
             
            static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
         | 
| 1436 | 
            -
                                size_t cSrcSize, HUF_DTable const* DTable, int  | 
| 1658 | 
            +
                                size_t cSrcSize, HUF_DTable const* DTable, int flags)
         | 
| 1437 1659 | 
             
            {
         | 
| 1660 | 
            +
                HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
         | 
| 1661 | 
            +
                HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
         | 
| 1662 | 
            +
             | 
| 1438 1663 | 
             
            #if DYNAMIC_BMI2
         | 
| 1439 | 
            -
                if ( | 
| 1664 | 
            +
                if (flags & HUF_flags_bmi2) {
         | 
| 1665 | 
            +
                    fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
         | 
| 1440 1666 | 
             
            # if ZSTD_ENABLE_ASM_X86_64_BMI2
         | 
| 1441 | 
            -
                     | 
| 1442 | 
            -
             | 
| 1443 | 
            -
                     | 
| 1667 | 
            +
                    if (!(flags & HUF_flags_disableAsm)) {
         | 
| 1668 | 
            +
                        loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
         | 
| 1669 | 
            +
                    }
         | 
| 1444 1670 | 
             
            # endif
         | 
| 1671 | 
            +
                } else {
         | 
| 1672 | 
            +
                    return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 1445 1673 | 
             
                }
         | 
| 1446 | 
            -
            #else
         | 
| 1447 | 
            -
                (void)bmi2;
         | 
| 1448 1674 | 
             
            #endif
         | 
| 1449 1675 |  | 
| 1450 1676 | 
             
            #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
         | 
| 1451 | 
            -
                 | 
| 1452 | 
            -
             | 
| 1453 | 
            -
                 | 
| 1677 | 
            +
                if (!(flags & HUF_flags_disableAsm)) {
         | 
| 1678 | 
            +
                    loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
         | 
| 1679 | 
            +
                }
         | 
| 1454 1680 | 
             
            #endif
         | 
| 1681 | 
            +
             | 
| 1682 | 
            +
                if (!(flags & HUF_flags_disableFast)) {
         | 
| 1683 | 
            +
                    size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
         | 
| 1684 | 
            +
                    if (ret != 0)
         | 
| 1685 | 
            +
                        return ret;
         | 
| 1686 | 
            +
                }
         | 
| 1687 | 
            +
                return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 1455 1688 | 
             
            }
         | 
| 1456 1689 |  | 
| 1457 1690 | 
             
            HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
         | 
| 1458 1691 |  | 
| 1459 | 
            -
            size_t HUF_decompress1X2_usingDTable(
         | 
| 1460 | 
            -
                      void* dst,  size_t dstSize,
         | 
| 1461 | 
            -
                const void* cSrc, size_t cSrcSize,
         | 
| 1462 | 
            -
                const HUF_DTable* DTable)
         | 
| 1463 | 
            -
            {
         | 
| 1464 | 
            -
                DTableDesc dtd = HUF_getDTableDesc(DTable);
         | 
| 1465 | 
            -
                if (dtd.tableType != 1) return ERROR(GENERIC);
         | 
| 1466 | 
            -
                return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 1467 | 
            -
            }
         | 
| 1468 | 
            -
             | 
| 1469 1692 | 
             
            size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
         | 
| 1470 1693 | 
             
                                               const void* cSrc, size_t cSrcSize,
         | 
| 1471 | 
            -
                                               void* workSpace, size_t wkspSize)
         | 
| 1694 | 
            +
                                               void* workSpace, size_t wkspSize, int flags)
         | 
| 1472 1695 | 
             
            {
         | 
| 1473 1696 | 
             
                const BYTE* ip = (const BYTE*) cSrc;
         | 
| 1474 1697 |  | 
| 1475 1698 | 
             
                size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
         | 
| 1476 | 
            -
                                                           workSpace, wkspSize);
         | 
| 1699 | 
            +
                                                           workSpace, wkspSize, flags);
         | 
| 1477 1700 | 
             
                if (HUF_isError(hSize)) return hSize;
         | 
| 1478 1701 | 
             
                if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
         | 
| 1479 1702 | 
             
                ip += hSize; cSrcSize -= hSize;
         | 
| 1480 1703 |  | 
| 1481 | 
            -
                return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx,  | 
| 1482 | 
            -
            }
         | 
| 1483 | 
            -
             | 
| 1484 | 
            -
             | 
| 1485 | 
            -
            size_t HUF_decompress4X2_usingDTable(
         | 
| 1486 | 
            -
                      void* dst,  size_t dstSize,
         | 
| 1487 | 
            -
                const void* cSrc, size_t cSrcSize,
         | 
| 1488 | 
            -
                const HUF_DTable* DTable)
         | 
| 1489 | 
            -
            {
         | 
| 1490 | 
            -
                DTableDesc dtd = HUF_getDTableDesc(DTable);
         | 
| 1491 | 
            -
                if (dtd.tableType != 1) return ERROR(GENERIC);
         | 
| 1492 | 
            -
                return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 1704 | 
            +
                return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
         | 
| 1493 1705 | 
             
            }
         | 
| 1494 1706 |  | 
| 1495 | 
            -
            static size_t  | 
| 1707 | 
            +
            static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
         | 
| 1496 1708 | 
             
                                               const void* cSrc, size_t cSrcSize,
         | 
| 1497 | 
            -
                                               void* workSpace, size_t wkspSize, int  | 
| 1709 | 
            +
                                               void* workSpace, size_t wkspSize, int flags)
         | 
| 1498 1710 | 
             
            {
         | 
| 1499 1711 | 
             
                const BYTE* ip = (const BYTE*) cSrc;
         | 
| 1500 1712 |  | 
| 1501 1713 | 
             
                size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
         | 
| 1502 | 
            -
                                                     workSpace, wkspSize);
         | 
| 1714 | 
            +
                                                     workSpace, wkspSize, flags);
         | 
| 1503 1715 | 
             
                if (HUF_isError(hSize)) return hSize;
         | 
| 1504 1716 | 
             
                if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
         | 
| 1505 1717 | 
             
                ip += hSize; cSrcSize -= hSize;
         | 
| 1506 1718 |  | 
| 1507 | 
            -
                return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,  | 
| 1719 | 
            +
                return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
         | 
| 1508 1720 | 
             
            }
         | 
| 1509 1721 |  | 
| 1510 | 
            -
            size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
         | 
| 1511 | 
            -
                                               const void* cSrc, size_t cSrcSize,
         | 
| 1512 | 
            -
                                               void* workSpace, size_t wkspSize)
         | 
| 1513 | 
            -
            {
         | 
| 1514 | 
            -
                return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
         | 
| 1515 | 
            -
            }
         | 
| 1516 | 
            -
             | 
| 1517 | 
            -
             | 
| 1518 1722 | 
             
            #endif /* HUF_FORCE_DECOMPRESS_X1 */
         | 
| 1519 1723 |  | 
| 1520 1724 |  | 
| @@ -1522,44 +1726,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, | |
| 1522 1726 | 
             
            /* Universal decompression selectors */
         | 
| 1523 1727 | 
             
            /* ***********************************/
         | 
| 1524 1728 |  | 
| 1525 | 
            -
            size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
         | 
| 1526 | 
            -
                                                const void* cSrc, size_t cSrcSize,
         | 
| 1527 | 
            -
                                                const HUF_DTable* DTable)
         | 
| 1528 | 
            -
            {
         | 
| 1529 | 
            -
                DTableDesc const dtd = HUF_getDTableDesc(DTable);
         | 
| 1530 | 
            -
            #if defined(HUF_FORCE_DECOMPRESS_X1)
         | 
| 1531 | 
            -
                (void)dtd;
         | 
| 1532 | 
            -
                assert(dtd.tableType == 0);
         | 
| 1533 | 
            -
                return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 1534 | 
            -
            #elif defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1535 | 
            -
                (void)dtd;
         | 
| 1536 | 
            -
                assert(dtd.tableType == 1);
         | 
| 1537 | 
            -
                return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 1538 | 
            -
            #else
         | 
| 1539 | 
            -
                return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
         | 
| 1540 | 
            -
                                       HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 1541 | 
            -
            #endif
         | 
| 1542 | 
            -
            }
         | 
| 1543 | 
            -
             | 
| 1544 | 
            -
            size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
         | 
| 1545 | 
            -
                                                const void* cSrc, size_t cSrcSize,
         | 
| 1546 | 
            -
                                                const HUF_DTable* DTable)
         | 
| 1547 | 
            -
            {
         | 
| 1548 | 
            -
                DTableDesc const dtd = HUF_getDTableDesc(DTable);
         | 
| 1549 | 
            -
            #if defined(HUF_FORCE_DECOMPRESS_X1)
         | 
| 1550 | 
            -
                (void)dtd;
         | 
| 1551 | 
            -
                assert(dtd.tableType == 0);
         | 
| 1552 | 
            -
                return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 1553 | 
            -
            #elif defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1554 | 
            -
                (void)dtd;
         | 
| 1555 | 
            -
                assert(dtd.tableType == 1);
         | 
| 1556 | 
            -
                return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 1557 | 
            -
            #else
         | 
| 1558 | 
            -
                return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
         | 
| 1559 | 
            -
                                       HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
         | 
| 1560 | 
            -
            #endif
         | 
| 1561 | 
            -
            }
         | 
| 1562 | 
            -
             | 
| 1563 1729 |  | 
| 1564 1730 | 
             
            #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1565 1731 | 
             
            typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
         | 
| @@ -1614,36 +1780,9 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) | |
| 1614 1780 | 
             
            #endif
         | 
| 1615 1781 | 
             
            }
         | 
| 1616 1782 |  | 
| 1617 | 
            -
             | 
| 1618 | 
            -
            size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
         | 
| 1619 | 
            -
                                                 size_t dstSize, const void* cSrc,
         | 
| 1620 | 
            -
                                                 size_t cSrcSize, void* workSpace,
         | 
| 1621 | 
            -
                                                 size_t wkspSize)
         | 
| 1622 | 
            -
            {
         | 
| 1623 | 
            -
                /* validation checks */
         | 
| 1624 | 
            -
                if (dstSize == 0) return ERROR(dstSize_tooSmall);
         | 
| 1625 | 
            -
                if (cSrcSize == 0) return ERROR(corruption_detected);
         | 
| 1626 | 
            -
             | 
| 1627 | 
            -
                {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
         | 
| 1628 | 
            -
            #if defined(HUF_FORCE_DECOMPRESS_X1)
         | 
| 1629 | 
            -
                    (void)algoNb;
         | 
| 1630 | 
            -
                    assert(algoNb == 0);
         | 
| 1631 | 
            -
                    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
         | 
| 1632 | 
            -
            #elif defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1633 | 
            -
                    (void)algoNb;
         | 
| 1634 | 
            -
                    assert(algoNb == 1);
         | 
| 1635 | 
            -
                    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
         | 
| 1636 | 
            -
            #else
         | 
| 1637 | 
            -
                    return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
         | 
| 1638 | 
            -
                                        cSrcSize, workSpace, wkspSize):
         | 
| 1639 | 
            -
                                    HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
         | 
| 1640 | 
            -
            #endif
         | 
| 1641 | 
            -
                }
         | 
| 1642 | 
            -
            }
         | 
| 1643 | 
            -
             | 
| 1644 1783 | 
             
            size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
         | 
| 1645 1784 | 
             
                                              const void* cSrc, size_t cSrcSize,
         | 
| 1646 | 
            -
                                              void* workSpace, size_t wkspSize)
         | 
| 1785 | 
            +
                                              void* workSpace, size_t wkspSize, int flags)
         | 
| 1647 1786 | 
             
            {
         | 
| 1648 1787 | 
             
                /* validation checks */
         | 
| 1649 1788 | 
             
                if (dstSize == 0) return ERROR(dstSize_tooSmall);
         | 
| @@ -1656,71 +1795,71 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, | |
| 1656 1795 | 
             
                    (void)algoNb;
         | 
| 1657 1796 | 
             
                    assert(algoNb == 0);
         | 
| 1658 1797 | 
             
                    return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
         | 
| 1659 | 
            -
                                            cSrcSize, workSpace, wkspSize);
         | 
| 1798 | 
            +
                                            cSrcSize, workSpace, wkspSize, flags);
         | 
| 1660 1799 | 
             
            #elif defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1661 1800 | 
             
                    (void)algoNb;
         | 
| 1662 1801 | 
             
                    assert(algoNb == 1);
         | 
| 1663 1802 | 
             
                    return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
         | 
| 1664 | 
            -
                                            cSrcSize, workSpace, wkspSize);
         | 
| 1803 | 
            +
                                            cSrcSize, workSpace, wkspSize, flags);
         | 
| 1665 1804 | 
             
            #else
         | 
| 1666 1805 | 
             
                    return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
         | 
| 1667 | 
            -
                                            cSrcSize, workSpace, wkspSize):
         | 
| 1806 | 
            +
                                            cSrcSize, workSpace, wkspSize, flags):
         | 
| 1668 1807 | 
             
                                    HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
         | 
| 1669 | 
            -
                                            cSrcSize, workSpace, wkspSize);
         | 
| 1808 | 
            +
                                            cSrcSize, workSpace, wkspSize, flags);
         | 
| 1670 1809 | 
             
            #endif
         | 
| 1671 1810 | 
             
                }
         | 
| 1672 1811 | 
             
            }
         | 
| 1673 1812 |  | 
| 1674 1813 |  | 
| 1675 | 
            -
            size_t  | 
| 1814 | 
            +
            size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
         | 
| 1676 1815 | 
             
            {
         | 
| 1677 1816 | 
             
                DTableDesc const dtd = HUF_getDTableDesc(DTable);
         | 
| 1678 1817 | 
             
            #if defined(HUF_FORCE_DECOMPRESS_X1)
         | 
| 1679 1818 | 
             
                (void)dtd;
         | 
| 1680 1819 | 
             
                assert(dtd.tableType == 0);
         | 
| 1681 | 
            -
                return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,  | 
| 1820 | 
            +
                return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
         | 
| 1682 1821 | 
             
            #elif defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1683 1822 | 
             
                (void)dtd;
         | 
| 1684 1823 | 
             
                assert(dtd.tableType == 1);
         | 
| 1685 | 
            -
                return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,  | 
| 1824 | 
            +
                return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
         | 
| 1686 1825 | 
             
            #else
         | 
| 1687 | 
            -
                return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,  | 
| 1688 | 
            -
                                       HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,  | 
| 1826 | 
            +
                return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
         | 
| 1827 | 
            +
                                       HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
         | 
| 1689 1828 | 
             
            #endif
         | 
| 1690 1829 | 
             
            }
         | 
| 1691 1830 |  | 
| 1692 1831 | 
             
            #ifndef HUF_FORCE_DECOMPRESS_X2
         | 
| 1693 | 
            -
            size_t  | 
| 1832 | 
            +
            size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
         | 
| 1694 1833 | 
             
            {
         | 
| 1695 1834 | 
             
                const BYTE* ip = (const BYTE*) cSrc;
         | 
| 1696 1835 |  | 
| 1697 | 
            -
                size_t const hSize =  | 
| 1836 | 
            +
                size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
         | 
| 1698 1837 | 
             
                if (HUF_isError(hSize)) return hSize;
         | 
| 1699 1838 | 
             
                if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
         | 
| 1700 1839 | 
             
                ip += hSize; cSrcSize -= hSize;
         | 
| 1701 1840 |  | 
| 1702 | 
            -
                return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,  | 
| 1841 | 
            +
                return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
         | 
| 1703 1842 | 
             
            }
         | 
| 1704 1843 | 
             
            #endif
         | 
| 1705 1844 |  | 
| 1706 | 
            -
            size_t  | 
| 1845 | 
            +
            size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
         | 
| 1707 1846 | 
             
            {
         | 
| 1708 1847 | 
             
                DTableDesc const dtd = HUF_getDTableDesc(DTable);
         | 
| 1709 1848 | 
             
            #if defined(HUF_FORCE_DECOMPRESS_X1)
         | 
| 1710 1849 | 
             
                (void)dtd;
         | 
| 1711 1850 | 
             
                assert(dtd.tableType == 0);
         | 
| 1712 | 
            -
                return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,  | 
| 1851 | 
            +
                return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
         | 
| 1713 1852 | 
             
            #elif defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1714 1853 | 
             
                (void)dtd;
         | 
| 1715 1854 | 
             
                assert(dtd.tableType == 1);
         | 
| 1716 | 
            -
                return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,  | 
| 1855 | 
            +
                return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
         | 
| 1717 1856 | 
             
            #else
         | 
| 1718 | 
            -
                return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,  | 
| 1719 | 
            -
                                       HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,  | 
| 1857 | 
            +
                return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
         | 
| 1858 | 
            +
                                       HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
         | 
| 1720 1859 | 
             
            #endif
         | 
| 1721 1860 | 
             
            }
         | 
| 1722 1861 |  | 
| 1723 | 
            -
            size_t  | 
| 1862 | 
            +
            size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
         | 
| 1724 1863 | 
             
            {
         | 
| 1725 1864 | 
             
                /* validation checks */
         | 
| 1726 1865 | 
             
                if (dstSize == 0) return ERROR(dstSize_tooSmall);
         | 
| @@ -1730,160 +1869,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds | |
| 1730 1869 | 
             
            #if defined(HUF_FORCE_DECOMPRESS_X1)
         | 
| 1731 1870 | 
             
                    (void)algoNb;
         | 
| 1732 1871 | 
             
                    assert(algoNb == 0);
         | 
| 1733 | 
            -
                    return  | 
| 1872 | 
            +
                    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
         | 
| 1734 1873 | 
             
            #elif defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1735 1874 | 
             
                    (void)algoNb;
         | 
| 1736 1875 | 
             
                    assert(algoNb == 1);
         | 
| 1737 | 
            -
                    return  | 
| 1876 | 
            +
                    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
         | 
| 1738 1877 | 
             
            #else
         | 
| 1739 | 
            -
                    return algoNb ?  | 
| 1740 | 
            -
                                     | 
| 1878 | 
            +
                    return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
         | 
| 1879 | 
            +
                                    HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
         | 
| 1741 1880 | 
             
            #endif
         | 
| 1742 1881 | 
             
                }
         | 
| 1743 1882 | 
             
            }
         | 
| 1744 | 
            -
             | 
| 1745 | 
            -
            #ifndef ZSTD_NO_UNUSED_FUNCTIONS
         | 
| 1746 | 
            -
            #ifndef HUF_FORCE_DECOMPRESS_X2
         | 
| 1747 | 
            -
            size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
         | 
| 1748 | 
            -
            {
         | 
| 1749 | 
            -
                U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
         | 
| 1750 | 
            -
                return HUF_readDTableX1_wksp(DTable, src, srcSize,
         | 
| 1751 | 
            -
                                             workSpace, sizeof(workSpace));
         | 
| 1752 | 
            -
            }
         | 
| 1753 | 
            -
             | 
| 1754 | 
            -
            size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
         | 
| 1755 | 
            -
                                          const void* cSrc, size_t cSrcSize)
         | 
| 1756 | 
            -
            {
         | 
| 1757 | 
            -
                U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
         | 
| 1758 | 
            -
                return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
         | 
| 1759 | 
            -
                                                   workSpace, sizeof(workSpace));
         | 
| 1760 | 
            -
            }
         | 
| 1761 | 
            -
             | 
| 1762 | 
            -
            size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
         | 
| 1763 | 
            -
            {
         | 
| 1764 | 
            -
                HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
         | 
| 1765 | 
            -
                return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
         | 
| 1766 | 
            -
            }
         | 
| 1767 | 
            -
            #endif
         | 
| 1768 | 
            -
             | 
| 1769 | 
            -
            #ifndef HUF_FORCE_DECOMPRESS_X1
         | 
| 1770 | 
            -
            size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
         | 
| 1771 | 
            -
            {
         | 
| 1772 | 
            -
              U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
         | 
| 1773 | 
            -
              return HUF_readDTableX2_wksp(DTable, src, srcSize,
         | 
| 1774 | 
            -
                                           workSpace, sizeof(workSpace));
         | 
| 1775 | 
            -
            }
         | 
| 1776 | 
            -
             | 
| 1777 | 
            -
            size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
         | 
| 1778 | 
            -
                                          const void* cSrc, size_t cSrcSize)
         | 
| 1779 | 
            -
            {
         | 
| 1780 | 
            -
                U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
         | 
| 1781 | 
            -
                return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
         | 
| 1782 | 
            -
                                                   workSpace, sizeof(workSpace));
         | 
| 1783 | 
            -
            }
         | 
| 1784 | 
            -
             | 
| 1785 | 
            -
            size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
         | 
| 1786 | 
            -
            {
         | 
| 1787 | 
            -
                HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
         | 
| 1788 | 
            -
                return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
         | 
| 1789 | 
            -
            }
         | 
| 1790 | 
            -
            #endif
         | 
| 1791 | 
            -
             | 
| 1792 | 
            -
            #ifndef HUF_FORCE_DECOMPRESS_X2
         | 
| 1793 | 
            -
            size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
         | 
| 1794 | 
            -
            {
         | 
| 1795 | 
            -
                U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
         | 
| 1796 | 
            -
                return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
         | 
| 1797 | 
            -
                                                   workSpace, sizeof(workSpace));
         | 
| 1798 | 
            -
            }
         | 
| 1799 | 
            -
            size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
         | 
| 1800 | 
            -
            {
         | 
| 1801 | 
            -
                HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
         | 
| 1802 | 
            -
                return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
         | 
| 1803 | 
            -
            }
         | 
| 1804 | 
            -
            #endif
         | 
| 1805 | 
            -
             | 
| 1806 | 
            -
            #ifndef HUF_FORCE_DECOMPRESS_X1
         | 
| 1807 | 
            -
            size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
         | 
| 1808 | 
            -
                                          const void* cSrc, size_t cSrcSize)
         | 
| 1809 | 
            -
            {
         | 
| 1810 | 
            -
                U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
         | 
| 1811 | 
            -
                return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
         | 
| 1812 | 
            -
                                                   workSpace, sizeof(workSpace));
         | 
| 1813 | 
            -
            }
         | 
| 1814 | 
            -
             | 
| 1815 | 
            -
            size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
         | 
| 1816 | 
            -
            {
         | 
| 1817 | 
            -
                HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
         | 
| 1818 | 
            -
                return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
         | 
| 1819 | 
            -
            }
         | 
| 1820 | 
            -
            #endif
         | 
| 1821 | 
            -
             | 
| 1822 | 
            -
            typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
         | 
| 1823 | 
            -
             | 
| 1824 | 
            -
            size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
         | 
| 1825 | 
            -
            {
         | 
| 1826 | 
            -
            #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1827 | 
            -
                static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
         | 
| 1828 | 
            -
            #endif
         | 
| 1829 | 
            -
             | 
| 1830 | 
            -
                /* validation checks */
         | 
| 1831 | 
            -
                if (dstSize == 0) return ERROR(dstSize_tooSmall);
         | 
| 1832 | 
            -
                if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
         | 
| 1833 | 
            -
                if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
         | 
| 1834 | 
            -
                if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
         | 
| 1835 | 
            -
             | 
| 1836 | 
            -
                {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
         | 
| 1837 | 
            -
            #if defined(HUF_FORCE_DECOMPRESS_X1)
         | 
| 1838 | 
            -
                    (void)algoNb;
         | 
| 1839 | 
            -
                    assert(algoNb == 0);
         | 
| 1840 | 
            -
                    return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
         | 
| 1841 | 
            -
            #elif defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1842 | 
            -
                    (void)algoNb;
         | 
| 1843 | 
            -
                    assert(algoNb == 1);
         | 
| 1844 | 
            -
                    return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
         | 
| 1845 | 
            -
            #else
         | 
| 1846 | 
            -
                    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
         | 
| 1847 | 
            -
            #endif
         | 
| 1848 | 
            -
                }
         | 
| 1849 | 
            -
            }
         | 
| 1850 | 
            -
             | 
| 1851 | 
            -
            size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
         | 
| 1852 | 
            -
            {
         | 
| 1853 | 
            -
                /* validation checks */
         | 
| 1854 | 
            -
                if (dstSize == 0) return ERROR(dstSize_tooSmall);
         | 
| 1855 | 
            -
                if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
         | 
| 1856 | 
            -
                if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
         | 
| 1857 | 
            -
                if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
         | 
| 1858 | 
            -
             | 
| 1859 | 
            -
                {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
         | 
| 1860 | 
            -
            #if defined(HUF_FORCE_DECOMPRESS_X1)
         | 
| 1861 | 
            -
                    (void)algoNb;
         | 
| 1862 | 
            -
                    assert(algoNb == 0);
         | 
| 1863 | 
            -
                    return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
         | 
| 1864 | 
            -
            #elif defined(HUF_FORCE_DECOMPRESS_X2)
         | 
| 1865 | 
            -
                    (void)algoNb;
         | 
| 1866 | 
            -
                    assert(algoNb == 1);
         | 
| 1867 | 
            -
                    return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
         | 
| 1868 | 
            -
            #else
         | 
| 1869 | 
            -
                    return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
         | 
| 1870 | 
            -
                                    HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
         | 
| 1871 | 
            -
            #endif
         | 
| 1872 | 
            -
                }
         | 
| 1873 | 
            -
            }
         | 
| 1874 | 
            -
             | 
| 1875 | 
            -
            size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
         | 
| 1876 | 
            -
            {
         | 
| 1877 | 
            -
                U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
         | 
| 1878 | 
            -
                return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
         | 
| 1879 | 
            -
                                                     workSpace, sizeof(workSpace));
         | 
| 1880 | 
            -
            }
         | 
| 1881 | 
            -
             | 
| 1882 | 
            -
            size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
         | 
| 1883 | 
            -
                                         const void* cSrc, size_t cSrcSize)
         | 
| 1884 | 
            -
            {
         | 
| 1885 | 
            -
                U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
         | 
| 1886 | 
            -
                return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
         | 
| 1887 | 
            -
                                                  workSpace, sizeof(workSpace));
         | 
| 1888 | 
            -
            }
         | 
| 1889 | 
            -
            #endif
         |