zstdlib 0.12.0-x86_64-darwin → 0.13.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +7 -0
- data/Rakefile +1 -1
- data/ext/zstdlib_c/extconf.rb +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/allocations.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/bitstream.h +49 -29
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/compiler.h +114 -22
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/cpu.h +36 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/debug.c +6 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/debug.h +20 -11
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/error_private.h +45 -36
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/fse.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/fse_decompress.c +19 -17
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/huf.h +14 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/mem.h +0 -9
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/pool.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/pool.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/portability_macros.h +2 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/threading.c +8 -2
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/xxhash.c +5 -11
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/xxhash.h +2341 -1007
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_internal.h +5 -5
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/fse_compress.c +8 -7
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/huf_compress.c +54 -25
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress.c +282 -161
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_internal.h +29 -27
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_superblock.c +224 -113
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_cwksp.h +19 -13
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_double_fast.c +17 -5
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_double_fast.h +11 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_fast.c +14 -6
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_lazy.c +129 -87
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_lazy.h +103 -28
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_ldm.c +8 -2
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_opt.c +216 -112
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_opt.h +31 -7
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstdmt_compress.c +94 -79
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/huf_decompress.c +188 -126
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/huf_decompress_amd64.S +38 -19
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress.c +84 -32
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress_block.c +231 -208
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress_block.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress_internal.h +2 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/zstd.h +129 -60
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzclose.c +1 -3
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzlib.c +20 -73
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzread.c +17 -58
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzwrite.c +18 -58
- data/lib/2.4/zstdlib_c.bundle +0 -0
- data/lib/2.5/zstdlib_c.bundle +0 -0
- data/lib/2.6/zstdlib_c.bundle +0 -0
- data/lib/2.7/zstdlib_c.bundle +0 -0
- data/lib/3.0/zstdlib_c.bundle +0 -0
- data/lib/3.1/zstdlib_c.bundle +0 -0
- data/lib/3.2/zstdlib_c.bundle +0 -0
- data/lib/3.3/zstdlib_c.bundle +0 -0
- metadata +75 -75
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/bits.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/entropy_common.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/error_private.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/threading.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_common.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_deps.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_trace.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/clevels.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/hist.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/hist.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_literals.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_literals.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_sequences.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_sequences.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_superblock.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_fast.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_ldm.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_ldm_geartab.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstdmt_compress.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_ddict.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_ddict.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/zdict.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/zstd_errors.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzcompatibility.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzguts.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/zstd_zlibwrapper.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/zstd_zlibwrapper.h +0 -0
| @@ -34,6 +34,12 @@ | |
| 34 34 | 
             
            *  Macros
         | 
| 35 35 | 
             
            ****************************************************************/
         | 
| 36 36 |  | 
| 37 | 
            +
            #ifdef HUF_DISABLE_FAST_DECODE
         | 
| 38 | 
            +
            # define HUF_ENABLE_FAST_DECODE 0
         | 
| 39 | 
            +
            #else
         | 
| 40 | 
            +
            # define HUF_ENABLE_FAST_DECODE 1
         | 
| 41 | 
            +
            #endif
         | 
| 42 | 
            +
             | 
| 37 43 | 
             
            /* These two optional macros force the use one way or another of the two
         | 
| 38 44 | 
             
             * Huffman decompression implementations. You can't force in both directions
         | 
| 39 45 | 
             
             * at the same time.
         | 
| @@ -158,17 +164,18 @@ static size_t HUF_initFastDStream(BYTE const* ip) { | |
| 158 164 | 
             
             * op [in/out] - The output pointers, must be updated to reflect what is written.
         | 
| 159 165 | 
             
             * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
         | 
| 160 166 | 
             
             * dt [in] - The decoding table.
         | 
| 161 | 
            -
             *  | 
| 167 | 
            +
             * ilowest [in] - The beginning of the valid range of the input. Decoders may read
         | 
| 168 | 
            +
             *                down to this pointer. It may be below iend[0].
         | 
| 162 169 | 
             
             * oend [in] - The end of the output stream. op[3] must not cross oend.
         | 
| 163 170 | 
             
             * iend [in] - The end of each input stream. ip[i] may cross iend[i],
         | 
| 164 | 
            -
             *             as long as it is above  | 
| 171 | 
            +
             *             as long as it is above ilowest, but that indicates corruption.
         | 
| 165 172 | 
             
             */
         | 
| 166 173 | 
             
            typedef struct {
         | 
| 167 174 | 
             
                BYTE const* ip[4];
         | 
| 168 175 | 
             
                BYTE* op[4];
         | 
| 169 176 | 
             
                U64 bits[4];
         | 
| 170 177 | 
             
                void const* dt;
         | 
| 171 | 
            -
                BYTE const*  | 
| 178 | 
            +
                BYTE const* ilowest;
         | 
| 172 179 | 
             
                BYTE* oend;
         | 
| 173 180 | 
             
                BYTE const* iend[4];
         | 
| 174 181 | 
             
            } HUF_DecompressFastArgs;
         | 
| @@ -186,9 +193,9 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds | |
| 186 193 | 
             
                void const* dt = DTable + 1;
         | 
| 187 194 | 
             
                U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
         | 
| 188 195 |  | 
| 189 | 
            -
                const BYTE* const  | 
| 196 | 
            +
                const BYTE* const istart = (const BYTE*)src;
         | 
| 190 197 |  | 
| 191 | 
            -
                BYTE* const oend = (BYTE*)dst  | 
| 198 | 
            +
                BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
         | 
| 192 199 |  | 
| 193 200 | 
             
                /* The fast decoding loop assumes 64-bit little-endian.
         | 
| 194 201 | 
             
                 * This condition is false on x32.
         | 
| @@ -196,6 +203,11 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds | |
| 196 203 | 
             
                if (!MEM_isLittleEndian() || MEM_32bits())
         | 
| 197 204 | 
             
                    return 0;
         | 
| 198 205 |  | 
| 206 | 
            +
                /* Avoid nullptr addition */
         | 
| 207 | 
            +
                if (dstSize == 0)
         | 
| 208 | 
            +
                    return 0;
         | 
| 209 | 
            +
                assert(dst != NULL);
         | 
| 210 | 
            +
             | 
| 199 211 | 
             
                /* strict minimum : jump table + 1 byte per stream */
         | 
| 200 212 | 
             
                if (srcSize < 10)
         | 
| 201 213 | 
             
                    return ERROR(corruption_detected);
         | 
| @@ -209,7 +221,6 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds | |
| 209 221 |  | 
| 210 222 | 
             
                /* Read the jump table. */
         | 
| 211 223 | 
             
                {
         | 
| 212 | 
            -
                    const BYTE* const istart = (const BYTE*)src;
         | 
| 213 224 | 
             
                    size_t const length1 = MEM_readLE16(istart);
         | 
| 214 225 | 
             
                    size_t const length2 = MEM_readLE16(istart+2);
         | 
| 215 226 | 
             
                    size_t const length3 = MEM_readLE16(istart+4);
         | 
| @@ -221,10 +232,8 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds | |
| 221 232 |  | 
| 222 233 | 
             
                    /* HUF_initFastDStream() requires this, and this small of an input
         | 
| 223 234 | 
             
                     * won't benefit from the ASM loop anyways.
         | 
| 224 | 
            -
                     * length1 must be >= 16 so that ip[0] >= ilimit before the loop
         | 
| 225 | 
            -
                     * starts.
         | 
| 226 235 | 
             
                     */
         | 
| 227 | 
            -
                    if (length1 <  | 
| 236 | 
            +
                    if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
         | 
| 228 237 | 
             
                        return 0;
         | 
| 229 238 | 
             
                    if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
         | 
| 230 239 | 
             
                }
         | 
| @@ -256,11 +265,12 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds | |
| 256 265 | 
             
                args->bits[2] = HUF_initFastDStream(args->ip[2]);
         | 
| 257 266 | 
             
                args->bits[3] = HUF_initFastDStream(args->ip[3]);
         | 
| 258 267 |  | 
| 259 | 
            -
                /*  | 
| 260 | 
            -
             | 
| 261 | 
            -
             | 
| 262 | 
            -
             | 
| 263 | 
            -
             | 
| 268 | 
            +
                /* The decoders must be sure to never read beyond ilowest.
         | 
| 269 | 
            +
                 * This is lower than iend[0], but allowing decoders to read
         | 
| 270 | 
            +
                 * down to ilowest can allow an extra iteration or two in the
         | 
| 271 | 
            +
                 * fast loop.
         | 
| 272 | 
            +
                 */
         | 
| 273 | 
            +
                args->ilowest = istart;
         | 
| 264 274 |  | 
| 265 275 | 
             
                args->oend = oend;
         | 
| 266 276 | 
             
                args->dt = dt;
         | 
| @@ -285,13 +295,31 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArg | |
| 285 295 | 
             
                assert(sizeof(size_t) == 8);
         | 
| 286 296 | 
             
                bit->bitContainer = MEM_readLEST(args->ip[stream]);
         | 
| 287 297 | 
             
                bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
         | 
| 288 | 
            -
                bit->start = (const char*)args-> | 
| 298 | 
            +
                bit->start = (const char*)args->ilowest;
         | 
| 289 299 | 
             
                bit->limitPtr = bit->start + sizeof(size_t);
         | 
| 290 300 | 
             
                bit->ptr = (const char*)args->ip[stream];
         | 
| 291 301 |  | 
| 292 302 | 
             
                return 0;
         | 
| 293 303 | 
             
            }
         | 
| 294 304 |  | 
| 305 | 
            +
            /* Calls X(N) for each stream 0, 1, 2, 3. */
         | 
| 306 | 
            +
            #define HUF_4X_FOR_EACH_STREAM(X) \
         | 
| 307 | 
            +
                do {                          \
         | 
| 308 | 
            +
                    X(0);                     \
         | 
| 309 | 
            +
                    X(1);                     \
         | 
| 310 | 
            +
                    X(2);                     \
         | 
| 311 | 
            +
                    X(3);                     \
         | 
| 312 | 
            +
                } while (0)
         | 
| 313 | 
            +
             | 
| 314 | 
            +
            /* Calls X(N, var) for each stream 0, 1, 2, 3. */
         | 
| 315 | 
            +
            #define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
         | 
| 316 | 
            +
                do {                                        \
         | 
| 317 | 
            +
                    X(0, (var));                            \
         | 
| 318 | 
            +
                    X(1, (var));                            \
         | 
| 319 | 
            +
                    X(2, (var));                            \
         | 
| 320 | 
            +
                    X(3, (var));                            \
         | 
| 321 | 
            +
                } while (0)
         | 
| 322 | 
            +
             | 
| 295 323 |  | 
| 296 324 | 
             
            #ifndef HUF_FORCE_DECOMPRESS_X2
         | 
| 297 325 |  | 
| @@ -500,15 +528,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog | |
| 500 528 | 
             
            }
         | 
| 501 529 |  | 
| 502 530 | 
             
            #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
         | 
| 503 | 
            -
                *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
         | 
| 531 | 
            +
                do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
         | 
| 504 532 |  | 
| 505 | 
            -
            #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) | 
| 506 | 
            -
                 | 
| 507 | 
            -
                     | 
| 533 | 
            +
            #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)      \
         | 
| 534 | 
            +
                do {                                            \
         | 
| 535 | 
            +
                    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
         | 
| 536 | 
            +
                        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
         | 
| 537 | 
            +
                } while (0)
         | 
| 508 538 |  | 
| 509 | 
            -
            #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) | 
| 510 | 
            -
                 | 
| 511 | 
            -
                     | 
| 539 | 
            +
            #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr)      \
         | 
| 540 | 
            +
                do {                                            \
         | 
| 541 | 
            +
                    if (MEM_64bits())                           \
         | 
| 542 | 
            +
                        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
         | 
| 543 | 
            +
                } while (0)
         | 
| 512 544 |  | 
| 513 545 | 
             
            HINT_INLINE size_t
         | 
| 514 546 | 
             
            HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
         | 
| @@ -546,7 +578,7 @@ HUF_decompress1X1_usingDTable_internal_body( | |
| 546 578 | 
             
                const HUF_DTable* DTable)
         | 
| 547 579 | 
             
            {
         | 
| 548 580 | 
             
                BYTE* op = (BYTE*)dst;
         | 
| 549 | 
            -
                BYTE* const oend = op  | 
| 581 | 
            +
                BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
         | 
| 550 582 | 
             
                const void* dtPtr = DTable + 1;
         | 
| 551 583 | 
             
                const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
         | 
| 552 584 | 
             
                BIT_DStream_t bitD;
         | 
| @@ -574,6 +606,7 @@ HUF_decompress4X1_usingDTable_internal_body( | |
| 574 606 | 
             
            {
         | 
| 575 607 | 
             
                /* Check */
         | 
| 576 608 | 
             
                if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
         | 
| 609 | 
            +
                if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
         | 
| 577 610 |  | 
| 578 611 | 
             
                {   const BYTE* const istart = (const BYTE*) cSrc;
         | 
| 579 612 | 
             
                    BYTE* const ostart = (BYTE*) dst;
         | 
| @@ -609,7 +642,7 @@ HUF_decompress4X1_usingDTable_internal_body( | |
| 609 642 |  | 
| 610 643 | 
             
                    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
         | 
| 611 644 | 
             
                    if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
         | 
| 612 | 
            -
                     | 
| 645 | 
            +
                    assert(dstSize >= 6); /* validated above */
         | 
| 613 646 | 
             
                    CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         | 
| 614 647 | 
             
                    CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         | 
| 615 648 | 
             
                    CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
         | 
| @@ -692,7 +725,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 692 725 | 
             
                BYTE* op[4];
         | 
| 693 726 | 
             
                U16 const* const dtable = (U16 const*)args->dt;
         | 
| 694 727 | 
             
                BYTE* const oend = args->oend;
         | 
| 695 | 
            -
                BYTE const* const  | 
| 728 | 
            +
                BYTE const* const ilowest = args->ilowest;
         | 
| 696 729 |  | 
| 697 730 | 
             
                /* Copy the arguments to local variables */
         | 
| 698 731 | 
             
                ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
         | 
| @@ -705,13 +738,12 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 705 738 | 
             
                for (;;) {
         | 
| 706 739 | 
             
                    BYTE* olimit;
         | 
| 707 740 | 
             
                    int stream;
         | 
| 708 | 
            -
                    int symbol;
         | 
| 709 741 |  | 
| 710 742 | 
             
                    /* Assert loop preconditions */
         | 
| 711 743 | 
             
            #ifndef NDEBUG
         | 
| 712 744 | 
             
                    for (stream = 0; stream < 4; ++stream) {
         | 
| 713 745 | 
             
                        assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
         | 
| 714 | 
            -
                        assert(ip[stream] >=  | 
| 746 | 
            +
                        assert(ip[stream] >= ilowest);
         | 
| 715 747 | 
             
                    }
         | 
| 716 748 | 
             
            #endif
         | 
| 717 749 | 
             
                    /* Compute olimit */
         | 
| @@ -721,7 +753,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 721 753 | 
             
                        /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
         | 
| 722 754 | 
             
                         * per stream.
         | 
| 723 755 | 
             
                         */
         | 
| 724 | 
            -
                        size_t const iiters = (size_t)(ip[0] -  | 
| 756 | 
            +
                        size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
         | 
| 725 757 | 
             
                        /* We can safely run iters iterations before running bounds checks */
         | 
| 726 758 | 
             
                        size_t const iters = MIN(oiters, iiters);
         | 
| 727 759 | 
             
                        size_t const symbols = iters * 5;
         | 
| @@ -732,8 +764,8 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 732 764 | 
             
                         */
         | 
| 733 765 | 
             
                        olimit = op[3] + symbols;
         | 
| 734 766 |  | 
| 735 | 
            -
                        /* Exit fast decoding loop once we  | 
| 736 | 
            -
                        if (op[3]  | 
| 767 | 
            +
                        /* Exit fast decoding loop once we reach the end. */
         | 
| 768 | 
            +
                        if (op[3] == olimit)
         | 
| 737 769 | 
             
                            break;
         | 
| 738 770 |  | 
| 739 771 | 
             
                        /* Exit the decoding loop if any input pointer has crossed the
         | 
| @@ -752,27 +784,42 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 752 784 | 
             
                    }
         | 
| 753 785 | 
             
            #endif
         | 
| 754 786 |  | 
| 787 | 
            +
            #define HUF_4X1_DECODE_SYMBOL(_stream, _symbol)                 \
         | 
| 788 | 
            +
                do {                                                        \
         | 
| 789 | 
            +
                    int const index = (int)(bits[(_stream)] >> 53);         \
         | 
| 790 | 
            +
                    int const entry = (int)dtable[index];                   \
         | 
| 791 | 
            +
                    bits[(_stream)] <<= (entry & 0x3F);                     \
         | 
| 792 | 
            +
                    op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
         | 
| 793 | 
            +
                } while (0)
         | 
| 794 | 
            +
             | 
| 795 | 
            +
            #define HUF_4X1_RELOAD_STREAM(_stream)                              \
         | 
| 796 | 
            +
                do {                                                            \
         | 
| 797 | 
            +
                    int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
         | 
| 798 | 
            +
                    int const nbBits = ctz & 7;                                 \
         | 
| 799 | 
            +
                    int const nbBytes = ctz >> 3;                               \
         | 
| 800 | 
            +
                    op[(_stream)] += 5;                                         \
         | 
| 801 | 
            +
                    ip[(_stream)] -= nbBytes;                                   \
         | 
| 802 | 
            +
                    bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
         | 
| 803 | 
            +
                    bits[(_stream)] <<= nbBits;                                 \
         | 
| 804 | 
            +
                } while (0)
         | 
| 805 | 
            +
             | 
| 806 | 
            +
                    /* Manually unroll the loop because compilers don't consistently
         | 
| 807 | 
            +
                     * unroll the inner loops, which destroys performance.
         | 
| 808 | 
            +
                     */
         | 
| 755 809 | 
             
                    do {
         | 
| 756 810 | 
             
                        /* Decode 5 symbols in each of the 4 streams */
         | 
| 757 | 
            -
                         | 
| 758 | 
            -
             | 
| 759 | 
            -
             | 
| 760 | 
            -
             | 
| 761 | 
            -
             | 
| 762 | 
            -
             | 
| 763 | 
            -
             | 
| 764 | 
            -
                         | 
| 765 | 
            -
                        /* Reload the bitstreams */
         | 
| 766 | 
            -
                        for (stream = 0; stream < 4; ++stream) {
         | 
| 767 | 
            -
                            int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
         | 
| 768 | 
            -
                            int const nbBits = ctz & 7;
         | 
| 769 | 
            -
                            int const nbBytes = ctz >> 3;
         | 
| 770 | 
            -
                            op[stream] += 5;
         | 
| 771 | 
            -
                            ip[stream] -= nbBytes;
         | 
| 772 | 
            -
                            bits[stream] = MEM_read64(ip[stream]) | 1;
         | 
| 773 | 
            -
                            bits[stream] <<= nbBits;
         | 
| 774 | 
            -
                        }
         | 
| 811 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
         | 
| 812 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
         | 
| 813 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
         | 
| 814 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
         | 
| 815 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
         | 
| 816 | 
            +
             | 
| 817 | 
            +
                        /* Reload each of the 4 the bitstreams */
         | 
| 818 | 
            +
                        HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
         | 
| 775 819 | 
             
                    } while (op[3] < olimit);
         | 
| 820 | 
            +
             | 
| 821 | 
            +
            #undef HUF_4X1_DECODE_SYMBOL
         | 
| 822 | 
            +
            #undef HUF_4X1_RELOAD_STREAM
         | 
| 776 823 | 
             
                }
         | 
| 777 824 |  | 
| 778 825 | 
             
            _out:
         | 
| @@ -797,8 +844,8 @@ HUF_decompress4X1_usingDTable_internal_fast( | |
| 797 844 | 
             
                HUF_DecompressFastLoopFn loopFn)
         | 
| 798 845 | 
             
            {
         | 
| 799 846 | 
             
                void const* dt = DTable + 1;
         | 
| 800 | 
            -
                const | 
| 801 | 
            -
                BYTE* const oend = (BYTE*)dst  | 
| 847 | 
            +
                BYTE const* const ilowest = (BYTE const*)cSrc;
         | 
| 848 | 
            +
                BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
         | 
| 802 849 | 
             
                HUF_DecompressFastArgs args;
         | 
| 803 850 | 
             
                {   size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| 804 851 | 
             
                    FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
         | 
| @@ -806,18 +853,22 @@ HUF_decompress4X1_usingDTable_internal_fast( | |
| 806 853 | 
             
                        return 0;
         | 
| 807 854 | 
             
                }
         | 
| 808 855 |  | 
| 809 | 
            -
                assert(args.ip[0] >= args. | 
| 856 | 
            +
                assert(args.ip[0] >= args.ilowest);
         | 
| 810 857 | 
             
                loopFn(&args);
         | 
| 811 858 |  | 
| 812 | 
            -
                /* Our loop guarantees that ip[] >=  | 
| 859 | 
            +
                /* Our loop guarantees that ip[] >= ilowest and that we haven't
         | 
| 813 860 | 
             
                * overwritten any op[].
         | 
| 814 861 | 
             
                */
         | 
| 815 | 
            -
                assert(args.ip[0] >=  | 
| 816 | 
            -
                assert(args.ip[ | 
| 817 | 
            -
                assert(args.ip[ | 
| 818 | 
            -
                assert(args.ip[ | 
| 862 | 
            +
                assert(args.ip[0] >= ilowest);
         | 
| 863 | 
            +
                assert(args.ip[0] >= ilowest);
         | 
| 864 | 
            +
                assert(args.ip[1] >= ilowest);
         | 
| 865 | 
            +
                assert(args.ip[2] >= ilowest);
         | 
| 866 | 
            +
                assert(args.ip[3] >= ilowest);
         | 
| 819 867 | 
             
                assert(args.op[3] <= oend);
         | 
| 820 | 
            -
             | 
| 868 | 
            +
             | 
| 869 | 
            +
                assert(ilowest == args.ilowest);
         | 
| 870 | 
            +
                assert(ilowest + 6 == args.iend[0]);
         | 
| 871 | 
            +
                (void)ilowest;
         | 
| 821 872 |  | 
| 822 873 | 
             
                /* finish bit streams one by one. */
         | 
| 823 874 | 
             
                {   size_t const segmentSize = (dstSize+3) / 4;
         | 
| @@ -868,7 +919,7 @@ static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, | |
| 868 919 | 
             
                }
         | 
| 869 920 | 
             
            #endif
         | 
| 870 921 |  | 
| 871 | 
            -
                if (!(flags & HUF_flags_disableFast)) {
         | 
| 922 | 
            +
                if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
         | 
| 872 923 | 
             
                    size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
         | 
| 873 924 | 
             
                    if (ret != 0)
         | 
| 874 925 | 
             
                        return ret;
         | 
| @@ -1239,15 +1290,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, c | |
| 1239 1290 | 
             
            }
         | 
| 1240 1291 |  | 
| 1241 1292 | 
             
            #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
         | 
| 1242 | 
            -
                ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
         | 
| 1293 | 
            +
                do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
         | 
| 1243 1294 |  | 
| 1244 | 
            -
            #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) | 
| 1245 | 
            -
                 | 
| 1246 | 
            -
                     | 
| 1295 | 
            +
            #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)                     \
         | 
| 1296 | 
            +
                do {                                                           \
         | 
| 1297 | 
            +
                    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12))                \
         | 
| 1298 | 
            +
                        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
         | 
| 1299 | 
            +
                } while (0)
         | 
| 1247 1300 |  | 
| 1248 | 
            -
            #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) | 
| 1249 | 
            -
                 | 
| 1250 | 
            -
                     | 
| 1301 | 
            +
            #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr)                     \
         | 
| 1302 | 
            +
                do {                                                           \
         | 
| 1303 | 
            +
                    if (MEM_64bits())                                          \
         | 
| 1304 | 
            +
                        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
         | 
| 1305 | 
            +
                } while (0)
         | 
| 1251 1306 |  | 
| 1252 1307 | 
             
            HINT_INLINE size_t
         | 
| 1253 1308 | 
             
            HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
         | 
| @@ -1307,7 +1362,7 @@ HUF_decompress1X2_usingDTable_internal_body( | |
| 1307 1362 |  | 
| 1308 1363 | 
             
                /* decode */
         | 
| 1309 1364 | 
             
                {   BYTE* const ostart = (BYTE*) dst;
         | 
| 1310 | 
            -
                    BYTE* const oend = ostart  | 
| 1365 | 
            +
                    BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
         | 
| 1311 1366 | 
             
                    const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
         | 
| 1312 1367 | 
             
                    const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
         | 
| 1313 1368 | 
             
                    DTableDesc const dtd = HUF_getDTableDesc(DTable);
         | 
| @@ -1332,6 +1387,7 @@ HUF_decompress4X2_usingDTable_internal_body( | |
| 1332 1387 | 
             
                const HUF_DTable* DTable)
         | 
| 1333 1388 | 
             
            {
         | 
| 1334 1389 | 
             
                if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
         | 
| 1390 | 
            +
                if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
         | 
| 1335 1391 |  | 
| 1336 1392 | 
             
                {   const BYTE* const istart = (const BYTE*) cSrc;
         | 
| 1337 1393 | 
             
                    BYTE* const ostart = (BYTE*) dst;
         | 
| @@ -1367,7 +1423,7 @@ HUF_decompress4X2_usingDTable_internal_body( | |
| 1367 1423 |  | 
| 1368 1424 | 
             
                    if (length4 > cSrcSize) return ERROR(corruption_detected);  /* overflow */
         | 
| 1369 1425 | 
             
                    if (opStart4 > oend) return ERROR(corruption_detected);     /* overflow */
         | 
| 1370 | 
            -
                     | 
| 1426 | 
            +
                    assert(dstSize >= 6 /* validated above */);
         | 
| 1371 1427 | 
             
                    CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         | 
| 1372 1428 | 
             
                    CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         | 
| 1373 1429 | 
             
                    CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
         | 
| @@ -1472,7 +1528,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 1472 1528 | 
             
                BYTE* op[4];
         | 
| 1473 1529 | 
             
                BYTE* oend[4];
         | 
| 1474 1530 | 
             
                HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
         | 
| 1475 | 
            -
                BYTE const* const  | 
| 1531 | 
            +
                BYTE const* const ilowest = args->ilowest;
         | 
| 1476 1532 |  | 
| 1477 1533 | 
             
                /* Copy the arguments to local registers. */
         | 
| 1478 1534 | 
             
                ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
         | 
| @@ -1490,13 +1546,12 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 1490 1546 | 
             
                for (;;) {
         | 
| 1491 1547 | 
             
                    BYTE* olimit;
         | 
| 1492 1548 | 
             
                    int stream;
         | 
| 1493 | 
            -
                    int symbol;
         | 
| 1494 1549 |  | 
| 1495 1550 | 
             
                    /* Assert loop preconditions */
         | 
| 1496 1551 | 
             
            #ifndef NDEBUG
         | 
| 1497 1552 | 
             
                    for (stream = 0; stream < 4; ++stream) {
         | 
| 1498 1553 | 
             
                        assert(op[stream] <= oend[stream]);
         | 
| 1499 | 
            -
                        assert(ip[stream] >=  | 
| 1554 | 
            +
                        assert(ip[stream] >= ilowest);
         | 
| 1500 1555 | 
             
                    }
         | 
| 1501 1556 | 
             
            #endif
         | 
| 1502 1557 | 
             
                    /* Compute olimit */
         | 
| @@ -1509,7 +1564,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 1509 1564 | 
             
                         * We also know that each input pointer is >= ip[0]. So we can run
         | 
| 1510 1565 | 
             
                         * iters loops before running out of input.
         | 
| 1511 1566 | 
             
                         */
         | 
| 1512 | 
            -
                        size_t iters = (size_t)(ip[0] -  | 
| 1567 | 
            +
                        size_t iters = (size_t)(ip[0] - ilowest) / 7;
         | 
| 1513 1568 | 
             
                        /* Each iteration can produce up to 10 bytes of output per stream.
         | 
| 1514 1569 | 
             
                         * Each output stream my advance at different rates. So take the
         | 
| 1515 1570 | 
             
                         * minimum number of safe iterations among all the output streams.
         | 
| @@ -1527,8 +1582,8 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 1527 1582 | 
             
                         */
         | 
| 1528 1583 | 
             
                        olimit = op[3] + (iters * 5);
         | 
| 1529 1584 |  | 
| 1530 | 
            -
                        /* Exit the fast decoding loop  | 
| 1531 | 
            -
                        if (op[3]  | 
| 1585 | 
            +
                        /* Exit the fast decoding loop once we reach the end. */
         | 
| 1586 | 
            +
                        if (op[3] == olimit)
         | 
| 1532 1587 | 
             
                            break;
         | 
| 1533 1588 |  | 
| 1534 1589 | 
             
                        /* Exit the decoding loop if any input pointer has crossed the
         | 
| @@ -1547,54 +1602,58 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* | |
| 1547 1602 | 
             
                    }
         | 
| 1548 1603 | 
             
            #endif
         | 
| 1549 1604 |  | 
| 1605 | 
            +
            #define HUF_4X2_DECODE_SYMBOL(_stream, _decode3)                      \
         | 
| 1606 | 
            +
                do {                                                              \
         | 
| 1607 | 
            +
                    if ((_decode3) || (_stream) != 3) {                           \
         | 
| 1608 | 
            +
                        int const index = (int)(bits[(_stream)] >> 53);           \
         | 
| 1609 | 
            +
                        HUF_DEltX2 const entry = dtable[index];                   \
         | 
| 1610 | 
            +
                        MEM_write16(op[(_stream)], entry.sequence); \
         | 
| 1611 | 
            +
                        bits[(_stream)] <<= (entry.nbBits) & 0x3F;                \
         | 
| 1612 | 
            +
                        op[(_stream)] += (entry.length);                          \
         | 
| 1613 | 
            +
                    }                                                             \
         | 
| 1614 | 
            +
                } while (0)
         | 
| 1615 | 
            +
             | 
| 1616 | 
            +
            #define HUF_4X2_RELOAD_STREAM(_stream)                                  \
         | 
| 1617 | 
            +
                do {                                                                \
         | 
| 1618 | 
            +
                    HUF_4X2_DECODE_SYMBOL(3, 1);                                    \
         | 
| 1619 | 
            +
                    {                                                               \
         | 
| 1620 | 
            +
                        int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
         | 
| 1621 | 
            +
                        int const nbBits = ctz & 7;                                 \
         | 
| 1622 | 
            +
                        int const nbBytes = ctz >> 3;                               \
         | 
| 1623 | 
            +
                        ip[(_stream)] -= nbBytes;                                   \
         | 
| 1624 | 
            +
                        bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
         | 
| 1625 | 
            +
                        bits[(_stream)] <<= nbBits;                                 \
         | 
| 1626 | 
            +
                    }                                                               \
         | 
| 1627 | 
            +
                } while (0)
         | 
| 1628 | 
            +
             | 
| 1629 | 
            +
                    /* Manually unroll the loop because compilers don't consistently
         | 
| 1630 | 
            +
                     * unroll the inner loops, which destroys performance.
         | 
| 1631 | 
            +
                     */
         | 
| 1550 1632 | 
             
                    do {
         | 
| 1551 | 
            -
                        /*  | 
| 1552 | 
            -
             | 
| 1553 | 
            -
             | 
| 1554 | 
            -
             | 
| 1555 | 
            -
             | 
| 1556 | 
            -
             | 
| 1557 | 
            -
             | 
| 1558 | 
            -
             | 
| 1559 | 
            -
             | 
| 1560 | 
            -
             | 
| 1561 | 
            -
                        /*  | 
| 1562 | 
            -
                         | 
| 1563 | 
            -
             | 
| 1564 | 
            -
             | 
| 1565 | 
            -
             | 
| 1566 | 
            -
             | 
| 1567 | 
            -
             | 
| 1568 | 
            -
                         | 
| 1569 | 
            -
                        /* Do 4 table lookups from the final stream & reload bitstreams */
         | 
| 1570 | 
            -
                        for (stream = 0; stream < 4; ++stream) {
         | 
| 1571 | 
            -
                            /* Do a table lookup from the final stream.
         | 
| 1572 | 
            -
                             * This is interleaved with the reloading to reduce register
         | 
| 1573 | 
            -
                             * pressure. This shouldn't be necessary, but compilers can
         | 
| 1574 | 
            -
                             * struggle with codegen with high register pressure.
         | 
| 1575 | 
            -
                             */
         | 
| 1576 | 
            -
                            {
         | 
| 1577 | 
            -
                                int const index = (int)(bits[3] >> 53);
         | 
| 1578 | 
            -
                                HUF_DEltX2 const entry = dtable[index];
         | 
| 1579 | 
            -
                                MEM_write16(op[3], entry.sequence);
         | 
| 1580 | 
            -
                                bits[3] <<= (entry.nbBits);
         | 
| 1581 | 
            -
                                op[3] += (entry.length);
         | 
| 1582 | 
            -
                            }
         | 
| 1583 | 
            -
                            /* Reload the bistreams. The final bitstream must be reloaded
         | 
| 1584 | 
            -
                             * after the 5th symbol was decoded.
         | 
| 1585 | 
            -
                             */
         | 
| 1586 | 
            -
                            {
         | 
| 1587 | 
            -
                                int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
         | 
| 1588 | 
            -
                                int const nbBits = ctz & 7;
         | 
| 1589 | 
            -
                                int const nbBytes = ctz >> 3;
         | 
| 1590 | 
            -
                                ip[stream] -= nbBytes;
         | 
| 1591 | 
            -
                                bits[stream] = MEM_read64(ip[stream]) | 1;
         | 
| 1592 | 
            -
                                bits[stream] <<= nbBits;
         | 
| 1593 | 
            -
                            }
         | 
| 1594 | 
            -
                        }
         | 
| 1633 | 
            +
                        /* Decode 5 symbols from each of the first 3 streams.
         | 
| 1634 | 
            +
                         * The final stream will be decoded during the reload phase
         | 
| 1635 | 
            +
                         * to reduce register pressure.
         | 
| 1636 | 
            +
                         */
         | 
| 1637 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
         | 
| 1638 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
         | 
| 1639 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
         | 
| 1640 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
         | 
| 1641 | 
            +
                        HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
         | 
| 1642 | 
            +
             | 
| 1643 | 
            +
                        /* Decode one symbol from the final stream */
         | 
| 1644 | 
            +
                        HUF_4X2_DECODE_SYMBOL(3, 1);
         | 
| 1645 | 
            +
             | 
| 1646 | 
            +
                        /* Decode 4 symbols from the final stream & reload bitstreams.
         | 
| 1647 | 
            +
                         * The final stream is reloaded last, meaning that all 5 symbols
         | 
| 1648 | 
            +
                         * are decoded from the final stream before it is reloaded.
         | 
| 1649 | 
            +
                         */
         | 
| 1650 | 
            +
                        HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
         | 
| 1595 1651 | 
             
                    } while (op[3] < olimit);
         | 
| 1596 1652 | 
             
                }
         | 
| 1597 1653 |  | 
| 1654 | 
            +
            #undef HUF_4X2_DECODE_SYMBOL
         | 
| 1655 | 
            +
            #undef HUF_4X2_RELOAD_STREAM
         | 
| 1656 | 
            +
             | 
| 1598 1657 | 
             
            _out:
         | 
| 1599 1658 |  | 
| 1600 1659 | 
             
                /* Save the final values of each of the state variables back to args. */
         | 
| @@ -1611,8 +1670,8 @@ HUF_decompress4X2_usingDTable_internal_fast( | |
| 1611 1670 | 
             
                const HUF_DTable* DTable,
         | 
| 1612 1671 | 
             
                HUF_DecompressFastLoopFn loopFn) {
         | 
| 1613 1672 | 
             
                void const* dt = DTable + 1;
         | 
| 1614 | 
            -
                const BYTE* const  | 
| 1615 | 
            -
                BYTE* const oend = (BYTE*)dst  | 
| 1673 | 
            +
                const BYTE* const ilowest = (const BYTE*)cSrc;
         | 
| 1674 | 
            +
                BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
         | 
| 1616 1675 | 
             
                HUF_DecompressFastArgs args;
         | 
| 1617 1676 | 
             
                {
         | 
| 1618 1677 | 
             
                    size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
         | 
| @@ -1621,16 +1680,19 @@ HUF_decompress4X2_usingDTable_internal_fast( | |
| 1621 1680 | 
             
                        return 0;
         | 
| 1622 1681 | 
             
                }
         | 
| 1623 1682 |  | 
| 1624 | 
            -
                assert(args.ip[0] >= args. | 
| 1683 | 
            +
                assert(args.ip[0] >= args.ilowest);
         | 
| 1625 1684 | 
             
                loopFn(&args);
         | 
| 1626 1685 |  | 
| 1627 1686 | 
             
                /* note : op4 already verified within main loop */
         | 
| 1628 | 
            -
                assert(args.ip[0] >=  | 
| 1629 | 
            -
                assert(args.ip[1] >=  | 
| 1630 | 
            -
                assert(args.ip[2] >=  | 
| 1631 | 
            -
                assert(args.ip[3] >=  | 
| 1687 | 
            +
                assert(args.ip[0] >= ilowest);
         | 
| 1688 | 
            +
                assert(args.ip[1] >= ilowest);
         | 
| 1689 | 
            +
                assert(args.ip[2] >= ilowest);
         | 
| 1690 | 
            +
                assert(args.ip[3] >= ilowest);
         | 
| 1632 1691 | 
             
                assert(args.op[3] <= oend);
         | 
| 1633 | 
            -
             | 
| 1692 | 
            +
             | 
| 1693 | 
            +
                assert(ilowest == args.ilowest);
         | 
| 1694 | 
            +
                assert(ilowest + 6 == args.iend[0]);
         | 
| 1695 | 
            +
                (void)ilowest;
         | 
| 1634 1696 |  | 
| 1635 1697 | 
             
                /* finish bitStreams one by one */
         | 
| 1636 1698 | 
             
                {
         | 
| @@ -1679,7 +1741,7 @@ static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, | |
| 1679 1741 | 
             
                }
         | 
| 1680 1742 | 
             
            #endif
         | 
| 1681 1743 |  | 
| 1682 | 
            -
                if (!(flags & HUF_flags_disableFast)) {
         | 
| 1744 | 
            +
                if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
         | 
| 1683 1745 | 
             
                    size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
         | 
| 1684 1746 | 
             
                    if (ret != 0)
         | 
| 1685 1747 | 
             
                        return ret;
         |