zstdlib 0.9.0-arm64-darwin → 0.11.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +13 -0
- data/ext/zstdlib_c/extconf.rb +3 -3
- data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
- data/ext/zstdlib_c/zlib-1.2.12/crc32.c +1116 -0
- data/ext/zstdlib_c/zlib-1.2.12/crc32.h +9446 -0
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/deflate.c +78 -30
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/deflate.h +12 -15
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzguts.h +3 -2
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzlib.c +5 -3
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzread.c +5 -7
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzwrite.c +25 -13
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/infback.c +2 -1
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inffast.c +14 -14
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inflate.c +39 -8
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inflate.h +3 -2
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inftrees.c +3 -3
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/trees.c +27 -48
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/zlib.h +123 -100
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/zutil.c +2 -2
- data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/zutil.h +12 -9
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/allocations.h +55 -0
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/bits.h +200 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/bitstream.h +19 -60
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/compiler.h +26 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/cpu.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/entropy_common.c +12 -40
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.c +9 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse.h +5 -83
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse_decompress.c +7 -99
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/huf.h +65 -156
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/mem.h +39 -46
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.c +26 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.h +7 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/portability_macros.h +22 -3
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/threading.c +176 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/threading.h +5 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.c +2 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_common.c +1 -36
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_deps.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_internal.h +17 -118
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_trace.h +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/clevels.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/fse_compress.c +7 -124
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/huf_compress.c +234 -169
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress.c +1243 -538
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_internal.h +225 -151
- data/ext/zstdlib_c/zstd-1.5.5/lib/compress/zstd_compress_literals.c +235 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_cwksp.h +128 -62
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.c +95 -33
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.c +433 -148
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.c +398 -345
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.c +106 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.c +17 -9
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress.c +434 -441
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress_amd64.S +30 -39
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.c +4 -4
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress.c +205 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.c +201 -81
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.h +6 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zdict.h +53 -31
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd.h +580 -135
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd_errors.h +27 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzcompatibility.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzguts.h +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzlib.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzread.c +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzwrite.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.c +46 -44
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.h +4 -1
- data/lib/2.4/zstdlib_c.bundle +0 -0
- data/lib/2.5/zstdlib_c.bundle +0 -0
- data/lib/2.6/zstdlib_c.bundle +0 -0
- data/lib/2.7/zstdlib_c.bundle +0 -0
- data/lib/3.0/zstdlib_c.bundle +0 -0
- data/lib/3.1/zstdlib_c.bundle +0 -0
- data/lib/3.2/zstdlib_c.bundle +0 -0
- metadata +108 -104
- data/ext/zstdlib_c/zlib-1.2.11/crc32.c +0 -442
- data/ext/zstdlib_c/zlib-1.2.11/crc32.h +0 -441
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/threading.c +0 -122
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_literals.c +0 -159
- /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/adler32.c +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/compress.c +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzclose.c +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inffast.h +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inffixed.h +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inftrees.h +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/trees.h +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/uncompr.c +0 -0
- /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/zconf.h +0 -0
| @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            /*
         | 
| 2 | 
            -
             * Copyright (c)  | 
| 2 | 
            +
             * Copyright (c) Meta Platforms, Inc. and affiliates.
         | 
| 3 3 | 
             
             * All rights reserved.
         | 
| 4 4 | 
             
             *
         | 
| 5 5 | 
             
             * This source code is licensed under both the BSD-style license (found in the
         | 
| @@ -10,6 +10,9 @@ | |
| 10 10 |  | 
| 11 11 | 
             
            #include "zstd_compress_internal.h"
         | 
| 12 12 | 
             
            #include "zstd_lazy.h"
         | 
| 13 | 
            +
            #include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            #define kLazySkippingStep 8
         | 
| 13 16 |  | 
| 14 17 |  | 
| 15 18 | 
             
            /*-*************************************
         | 
| @@ -197,8 +200,8 @@ ZSTD_DUBT_findBetterDictMatch ( | |
| 197 200 | 
             
                        U32 matchIndex = dictMatchIndex + dictIndexDelta;
         | 
| 198 201 | 
             
                        if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
         | 
| 199 202 | 
             
                            DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
         | 
| 200 | 
            -
                                curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr,  | 
| 201 | 
            -
                            bestLength = matchLength, *offsetPtr =  | 
| 203 | 
            +
                                curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
         | 
| 204 | 
            +
                            bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
         | 
| 202 205 | 
             
                        }
         | 
| 203 206 | 
             
                        if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
         | 
| 204 207 | 
             
                            break;   /* drop, to guarantee consistency (miss a little bit of compression) */
         | 
| @@ -218,7 +221,7 @@ ZSTD_DUBT_findBetterDictMatch ( | |
| 218 221 | 
             
                }
         | 
| 219 222 |  | 
| 220 223 | 
             
                if (bestLength >= MINMATCH) {
         | 
| 221 | 
            -
                    U32 const mIndex = curr - (U32) | 
| 224 | 
            +
                    U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex;
         | 
| 222 225 | 
             
                    DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
         | 
| 223 226 | 
             
                                curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
         | 
| 224 227 | 
             
                }
         | 
| @@ -230,7 +233,7 @@ ZSTD_DUBT_findBetterDictMatch ( | |
| 230 233 | 
             
            static size_t
         | 
| 231 234 | 
             
            ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
         | 
| 232 235 | 
             
                                    const BYTE* const ip, const BYTE* const iend,
         | 
| 233 | 
            -
                                    size_t*  | 
| 236 | 
            +
                                    size_t* offBasePtr,
         | 
| 234 237 | 
             
                                    U32 const mls,
         | 
| 235 238 | 
             
                                    const ZSTD_dictMode_e dictMode)
         | 
| 236 239 | 
             
            {
         | 
| @@ -327,8 +330,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, | |
| 327 330 | 
             
                        if (matchLength > bestLength) {
         | 
| 328 331 | 
             
                            if (matchLength > matchEndIdx - matchIndex)
         | 
| 329 332 | 
             
                                matchEndIdx = matchIndex + (U32)matchLength;
         | 
| 330 | 
            -
                            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32) | 
| 331 | 
            -
                                bestLength = matchLength, * | 
| 333 | 
            +
                            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) )
         | 
| 334 | 
            +
                                bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex);
         | 
| 332 335 | 
             
                            if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
         | 
| 333 336 | 
             
                                if (dictMode == ZSTD_dictMatchState) {
         | 
| 334 337 | 
             
                                    nbCompares = 0; /* in addition to avoiding checking any
         | 
| @@ -361,16 +364,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, | |
| 361 364 | 
             
                    if (dictMode == ZSTD_dictMatchState && nbCompares) {
         | 
| 362 365 | 
             
                        bestLength = ZSTD_DUBT_findBetterDictMatch(
         | 
| 363 366 | 
             
                                ms, ip, iend,
         | 
| 364 | 
            -
                                 | 
| 367 | 
            +
                                offBasePtr, bestLength, nbCompares,
         | 
| 365 368 | 
             
                                mls, dictMode);
         | 
| 366 369 | 
             
                    }
         | 
| 367 370 |  | 
| 368 371 | 
             
                    assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
         | 
| 369 372 | 
             
                    ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
         | 
| 370 373 | 
             
                    if (bestLength >= MINMATCH) {
         | 
| 371 | 
            -
                        U32 const mIndex = curr - (U32) | 
| 374 | 
            +
                        U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex;
         | 
| 372 375 | 
             
                        DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
         | 
| 373 | 
            -
                                    curr, (U32)bestLength, (U32)* | 
| 376 | 
            +
                                    curr, (U32)bestLength, (U32)*offBasePtr, mIndex);
         | 
| 374 377 | 
             
                    }
         | 
| 375 378 | 
             
                    return bestLength;
         | 
| 376 379 | 
             
                }
         | 
| @@ -381,14 +384,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, | |
| 381 384 | 
             
            FORCE_INLINE_TEMPLATE size_t
         | 
| 382 385 | 
             
            ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
         | 
| 383 386 | 
             
                            const BYTE* const ip, const BYTE* const iLimit,
         | 
| 384 | 
            -
                                  size_t*  | 
| 387 | 
            +
                                  size_t* offBasePtr,
         | 
| 385 388 | 
             
                            const U32 mls /* template */,
         | 
| 386 389 | 
             
                            const ZSTD_dictMode_e dictMode)
         | 
| 387 390 | 
             
            {
         | 
| 388 391 | 
             
                DEBUGLOG(7, "ZSTD_BtFindBestMatch");
         | 
| 389 392 | 
             
                if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
         | 
| 390 393 | 
             
                ZSTD_updateDUBT(ms, ip, iLimit, mls);
         | 
| 391 | 
            -
                return ZSTD_DUBT_findBestMatch(ms, ip, iLimit,  | 
| 394 | 
            +
                return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode);
         | 
| 392 395 | 
             
            }
         | 
| 393 396 |  | 
| 394 397 | 
             
            /***********************************
         | 
| @@ -561,7 +564,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb | |
| 561 564 | 
             
                    /* save best solution */
         | 
| 562 565 | 
             
                    if (currentMl > ml) {
         | 
| 563 566 | 
             
                        ml = currentMl;
         | 
| 564 | 
            -
                        *offsetPtr =  | 
| 567 | 
            +
                        *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
         | 
| 565 568 | 
             
                        if (ip+currentMl == iLimit) {
         | 
| 566 569 | 
             
                            /* best possible, avoids read overflow on next attempt */
         | 
| 567 570 | 
             
                            return ml;
         | 
| @@ -598,7 +601,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb | |
| 598 601 | 
             
                        /* save best solution */
         | 
| 599 602 | 
             
                        if (currentMl > ml) {
         | 
| 600 603 | 
             
                            ml = currentMl;
         | 
| 601 | 
            -
                            *offsetPtr =  | 
| 604 | 
            +
                            *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
         | 
| 602 605 | 
             
                            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
         | 
| 603 606 | 
             
                        }
         | 
| 604 607 | 
             
                    }
         | 
| @@ -617,7 +620,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb | |
| 617 620 | 
             
            FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
         | 
| 618 621 | 
             
                                    ZSTD_matchState_t* ms,
         | 
| 619 622 | 
             
                                    const ZSTD_compressionParameters* const cParams,
         | 
| 620 | 
            -
                                    const BYTE* ip, U32 const mls)
         | 
| 623 | 
            +
                                    const BYTE* ip, U32 const mls, U32 const lazySkipping)
         | 
| 621 624 | 
             
            {
         | 
| 622 625 | 
             
                U32* const hashTable  = ms->hashTable;
         | 
| 623 626 | 
             
                const U32 hashLog = cParams->hashLog;
         | 
| @@ -632,6 +635,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( | |
| 632 635 | 
             
                    NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
         | 
| 633 636 | 
             
                    hashTable[h] = idx;
         | 
| 634 637 | 
             
                    idx++;
         | 
| 638 | 
            +
                    /* Stop inserting every position when in the lazy skipping mode. */
         | 
| 639 | 
            +
                    if (lazySkipping)
         | 
| 640 | 
            +
                        break;
         | 
| 635 641 | 
             
                }
         | 
| 636 642 |  | 
| 637 643 | 
             
                ms->nextToUpdate = target;
         | 
| @@ -640,7 +646,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( | |
| 640 646 |  | 
| 641 647 | 
             
            U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
         | 
| 642 648 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| 643 | 
            -
                return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
         | 
| 649 | 
            +
                return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
         | 
| 644 650 | 
             
            }
         | 
| 645 651 |  | 
| 646 652 | 
             
            /* inlining is important to hardwire a hot branch (template emulation) */
         | 
| @@ -684,14 +690,15 @@ size_t ZSTD_HcFindBestMatch( | |
| 684 690 | 
             
                }
         | 
| 685 691 |  | 
| 686 692 | 
             
                /* HC4 match finder */
         | 
| 687 | 
            -
                matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
         | 
| 693 | 
            +
                matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
         | 
| 688 694 |  | 
| 689 695 | 
             
                for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
         | 
| 690 696 | 
             
                    size_t currentMl=0;
         | 
| 691 697 | 
             
                    if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
         | 
| 692 698 | 
             
                        const BYTE* const match = base + matchIndex;
         | 
| 693 699 | 
             
                        assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
         | 
| 694 | 
            -
                         | 
| 700 | 
            +
                        /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
         | 
| 701 | 
            +
                        if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3))   /* potentially better */
         | 
| 695 702 | 
             
                            currentMl = ZSTD_count(ip, match, iLimit);
         | 
| 696 703 | 
             
                    } else {
         | 
| 697 704 | 
             
                        const BYTE* const match = dictBase + matchIndex;
         | 
| @@ -703,7 +710,7 @@ size_t ZSTD_HcFindBestMatch( | |
| 703 710 | 
             
                    /* save best solution */
         | 
| 704 711 | 
             
                    if (currentMl > ml) {
         | 
| 705 712 | 
             
                        ml = currentMl;
         | 
| 706 | 
            -
                        *offsetPtr =  | 
| 713 | 
            +
                        *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
         | 
| 707 714 | 
             
                        if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
         | 
| 708 715 | 
             
                    }
         | 
| 709 716 |  | 
| @@ -739,7 +746,7 @@ size_t ZSTD_HcFindBestMatch( | |
| 739 746 | 
             
                        if (currentMl > ml) {
         | 
| 740 747 | 
             
                            ml = currentMl;
         | 
| 741 748 | 
             
                            assert(curr > matchIndex + dmsIndexDelta);
         | 
| 742 | 
            -
                            *offsetPtr =  | 
| 749 | 
            +
                            *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
         | 
| 743 750 | 
             
                            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
         | 
| 744 751 | 
             
                        }
         | 
| 745 752 |  | 
| @@ -756,8 +763,6 @@ size_t ZSTD_HcFindBestMatch( | |
| 756 763 | 
             
            * (SIMD) Row-based matchfinder
         | 
| 757 764 | 
             
            ***********************************/
         | 
| 758 765 | 
             
            /* Constants for row-based hash */
         | 
| 759 | 
            -
            #define ZSTD_ROW_HASH_TAG_OFFSET 16     /* byte offset of hashes in the match state's tagTable from the beginning of a row */
         | 
| 760 | 
            -
            #define ZSTD_ROW_HASH_TAG_BITS 8        /* nb bits to use for the tag */
         | 
| 761 766 | 
             
            #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
         | 
| 762 767 | 
             
            #define ZSTD_ROW_HASH_MAX_ENTRIES 64    /* absolute maximum number of entries per row, for all configurations */
         | 
| 763 768 |  | 
| @@ -769,73 +774,19 @@ typedef U64 ZSTD_VecMask;   /* Clarifies when we are interacting with a U64 repr | |
| 769 774 | 
             
             * Starting from the LSB, returns the idx of the next non-zero bit.
         | 
| 770 775 | 
             
             * Basically counting the nb of trailing zeroes.
         | 
| 771 776 | 
             
             */
         | 
| 772 | 
            -
             | 
| 773 | 
            -
                 | 
| 774 | 
            -
            #   if defined(_MSC_VER) && defined(_WIN64)
         | 
| 775 | 
            -
                    if (val != 0) {
         | 
| 776 | 
            -
                        unsigned long r;
         | 
| 777 | 
            -
                        _BitScanForward64(&r, val);
         | 
| 778 | 
            -
                        return (U32)(r);
         | 
| 779 | 
            -
                    } else {
         | 
| 780 | 
            -
                        /* Should not reach this code path */
         | 
| 781 | 
            -
                        __assume(0);
         | 
| 782 | 
            -
                    }
         | 
| 783 | 
            -
            #   elif (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))
         | 
| 784 | 
            -
                if (sizeof(size_t) == 4) {
         | 
| 785 | 
            -
                    U32 mostSignificantWord = (U32)(val >> 32);
         | 
| 786 | 
            -
                    U32 leastSignificantWord = (U32)val;
         | 
| 787 | 
            -
                    if (leastSignificantWord == 0) {
         | 
| 788 | 
            -
                        return 32 + (U32)__builtin_ctz(mostSignificantWord);
         | 
| 789 | 
            -
                    } else {
         | 
| 790 | 
            -
                        return (U32)__builtin_ctz(leastSignificantWord);
         | 
| 791 | 
            -
                    }
         | 
| 792 | 
            -
                } else {
         | 
| 793 | 
            -
                    return (U32)__builtin_ctzll(val);
         | 
| 794 | 
            -
                }
         | 
| 795 | 
            -
            #   else
         | 
| 796 | 
            -
                /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count
         | 
| 797 | 
            -
                 * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer
         | 
| 798 | 
            -
                 */
         | 
| 799 | 
            -
                val = ~val & (val - 1ULL); /* Lowest set bit mask */
         | 
| 800 | 
            -
                val = val - ((val >> 1) & 0x5555555555555555);
         | 
| 801 | 
            -
                val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
         | 
| 802 | 
            -
                return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
         | 
| 803 | 
            -
            #   endif
         | 
| 804 | 
            -
            }
         | 
| 805 | 
            -
             | 
| 806 | 
            -
            /* ZSTD_rotateRight_*():
         | 
| 807 | 
            -
             * Rotates a bitfield to the right by "count" bits.
         | 
| 808 | 
            -
             * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
         | 
| 809 | 
            -
             */
         | 
| 810 | 
            -
            FORCE_INLINE_TEMPLATE
         | 
| 811 | 
            -
            U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
         | 
| 812 | 
            -
                assert(count < 64);
         | 
| 813 | 
            -
                count &= 0x3F; /* for fickle pattern recognition */
         | 
| 814 | 
            -
                return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
         | 
| 815 | 
            -
            }
         | 
| 816 | 
            -
             | 
| 817 | 
            -
            FORCE_INLINE_TEMPLATE
         | 
| 818 | 
            -
            U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
         | 
| 819 | 
            -
                assert(count < 32);
         | 
| 820 | 
            -
                count &= 0x1F; /* for fickle pattern recognition */
         | 
| 821 | 
            -
                return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
         | 
| 822 | 
            -
            }
         | 
| 823 | 
            -
             | 
| 824 | 
            -
            FORCE_INLINE_TEMPLATE
         | 
| 825 | 
            -
            U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
         | 
| 826 | 
            -
                assert(count < 16);
         | 
| 827 | 
            -
                count &= 0x0F; /* for fickle pattern recognition */
         | 
| 828 | 
            -
                return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
         | 
| 777 | 
            +
            MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
         | 
| 778 | 
            +
                return ZSTD_countTrailingZeros64(val);
         | 
| 829 779 | 
             
            }
         | 
| 830 780 |  | 
| 831 781 | 
             
            /* ZSTD_row_nextIndex():
         | 
| 832 782 | 
             
             * Returns the next index to insert at within a tagTable row, and updates the "head"
         | 
| 833 | 
            -
             * value to reflect the update. Essentially cycles backwards from [ | 
| 783 | 
            +
             * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
         | 
| 834 784 | 
             
             */
         | 
| 835 785 | 
             
            FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
         | 
| 836 | 
            -
             | 
| 837 | 
            -
             | 
| 838 | 
            -
             | 
| 786 | 
            +
                U32 next = (*tagRow-1) & rowMask;
         | 
| 787 | 
            +
                next += (next == 0) ? rowMask : 0; /* skip first position */
         | 
| 788 | 
            +
                *tagRow = (BYTE)next;
         | 
| 789 | 
            +
                return next;
         | 
| 839 790 | 
             
            }
         | 
| 840 791 |  | 
| 841 792 | 
             
            /* ZSTD_isAligned():
         | 
| @@ -849,7 +800,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) { | |
| 849 800 | 
             
            /* ZSTD_row_prefetch():
         | 
| 850 801 | 
             
             * Performs prefetching for the hashTable and tagTable at a given row.
         | 
| 851 802 | 
             
             */
         | 
| 852 | 
            -
            FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable,  | 
| 803 | 
            +
            FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
         | 
| 853 804 | 
             
                PREFETCH_L1(hashTable + relRow);
         | 
| 854 805 | 
             
                if (rowLog >= 5) {
         | 
| 855 806 | 
             
                    PREFETCH_L1(hashTable + relRow + 16);
         | 
| @@ -873,13 +824,13 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B | |
| 873 824 | 
             
                                               U32 idx, const BYTE* const iLimit)
         | 
| 874 825 | 
             
            {
         | 
| 875 826 | 
             
                U32 const* const hashTable = ms->hashTable;
         | 
| 876 | 
            -
                 | 
| 827 | 
            +
                BYTE const* const tagTable = ms->tagTable;
         | 
| 877 828 | 
             
                U32 const hashLog = ms->rowHashLog;
         | 
| 878 829 | 
             
                U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
         | 
| 879 830 | 
             
                U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
         | 
| 880 831 |  | 
| 881 832 | 
             
                for (; idx < lim; ++idx) {
         | 
| 882 | 
            -
                    U32 const hash = (U32) | 
| 833 | 
            +
                    U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
         | 
| 883 834 | 
             
                    U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
         | 
| 884 835 | 
             
                    ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
         | 
| 885 836 | 
             
                    ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
         | 
| @@ -895,11 +846,12 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B | |
| 895 846 | 
             
             * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
         | 
| 896 847 | 
             
             */
         | 
| 897 848 | 
             
            FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
         | 
| 898 | 
            -
                                                               | 
| 849 | 
            +
                                                              BYTE const* tagTable, BYTE const* base,
         | 
| 899 850 | 
             
                                                              U32 idx, U32 const hashLog,
         | 
| 900 | 
            -
                                                              U32 const rowLog, U32 const mls | 
| 851 | 
            +
                                                              U32 const rowLog, U32 const mls,
         | 
| 852 | 
            +
                                                              U64 const hashSalt)
         | 
| 901 853 | 
             
            {
         | 
| 902 | 
            -
                U32 const newHash = (U32) | 
| 854 | 
            +
                U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
         | 
| 903 855 | 
             
                U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
         | 
| 904 856 | 
             
                ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
         | 
| 905 857 | 
             
                {   U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
         | 
| @@ -917,22 +869,21 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, | |
| 917 869 | 
             
                                                                    U32 const rowMask, U32 const useCache)
         | 
| 918 870 | 
             
            {
         | 
| 919 871 | 
             
                U32* const hashTable = ms->hashTable;
         | 
| 920 | 
            -
                 | 
| 872 | 
            +
                BYTE* const tagTable = ms->tagTable;
         | 
| 921 873 | 
             
                U32 const hashLog = ms->rowHashLog;
         | 
| 922 874 | 
             
                const BYTE* const base = ms->window.base;
         | 
| 923 875 |  | 
| 924 876 | 
             
                DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
         | 
| 925 877 | 
             
                for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
         | 
| 926 | 
            -
                    U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
         | 
| 927 | 
            -
                                              : (U32) | 
| 878 | 
            +
                    U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
         | 
| 879 | 
            +
                                              : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
         | 
| 928 880 | 
             
                    U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
         | 
| 929 881 | 
             
                    U32* const row = hashTable + relRow;
         | 
| 930 | 
            -
                    BYTE* tagRow =  | 
| 931 | 
            -
                                                                   Explicit cast allows us to get exact desired position within each row */
         | 
| 882 | 
            +
                    BYTE* tagRow = tagTable + relRow;
         | 
| 932 883 | 
             
                    U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
         | 
| 933 884 |  | 
| 934 | 
            -
                    assert(hash ==  | 
| 935 | 
            -
                     | 
| 885 | 
            +
                    assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
         | 
| 886 | 
            +
                    tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
         | 
| 936 887 | 
             
                    row[pos] = updateStartIdx;
         | 
| 937 888 | 
             
                }
         | 
| 938 889 | 
             
            }
         | 
| @@ -980,7 +931,35 @@ void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { | |
| 980 931 | 
             
                const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
         | 
| 981 932 |  | 
| 982 933 | 
             
                DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
         | 
| 983 | 
            -
                ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /*  | 
| 934 | 
            +
                ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */);
         | 
| 935 | 
            +
            }
         | 
| 936 | 
            +
             | 
| 937 | 
            +
            /* Returns the mask width of bits group of which will be set to 1. Given not all
         | 
| 938 | 
            +
             * architectures have easy movemask instruction, this helps to iterate over
         | 
| 939 | 
            +
             * groups of bits easier and faster.
         | 
| 940 | 
            +
             */
         | 
| 941 | 
            +
            FORCE_INLINE_TEMPLATE U32
         | 
| 942 | 
            +
            ZSTD_row_matchMaskGroupWidth(const U32 rowEntries)
         | 
| 943 | 
            +
            {
         | 
| 944 | 
            +
                assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
         | 
| 945 | 
            +
                assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
         | 
| 946 | 
            +
                (void)rowEntries;
         | 
| 947 | 
            +
            #if defined(ZSTD_ARCH_ARM_NEON)
         | 
| 948 | 
            +
                /* NEON path only works for little endian */
         | 
| 949 | 
            +
                if (!MEM_isLittleEndian()) {
         | 
| 950 | 
            +
                    return 1;
         | 
| 951 | 
            +
                }
         | 
| 952 | 
            +
                if (rowEntries == 16) {
         | 
| 953 | 
            +
                    return 4;
         | 
| 954 | 
            +
                }
         | 
| 955 | 
            +
                if (rowEntries == 32) {
         | 
| 956 | 
            +
                    return 2;
         | 
| 957 | 
            +
                }
         | 
| 958 | 
            +
                if (rowEntries == 64) {
         | 
| 959 | 
            +
                    return 1;
         | 
| 960 | 
            +
                }
         | 
| 961 | 
            +
            #endif
         | 
| 962 | 
            +
                return 1;
         | 
| 984 963 | 
             
            }
         | 
| 985 964 |  | 
| 986 965 | 
             
            #if defined(ZSTD_ARCH_X86_SSE2)
         | 
| @@ -1003,71 +982,82 @@ ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U | |
| 1003 982 | 
             
            }
         | 
| 1004 983 | 
             
            #endif
         | 
| 1005 984 |  | 
| 1006 | 
            -
             | 
| 1007 | 
            -
              | 
| 1008 | 
            -
              | 
| 1009 | 
            -
             | 
| 985 | 
            +
            #if defined(ZSTD_ARCH_ARM_NEON)
         | 
| 986 | 
            +
            FORCE_INLINE_TEMPLATE ZSTD_VecMask
         | 
| 987 | 
            +
            ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped)
         | 
| 988 | 
            +
            {
         | 
| 989 | 
            +
                assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
         | 
| 990 | 
            +
                if (rowEntries == 16) {
         | 
| 991 | 
            +
                    /* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits.
         | 
| 992 | 
            +
                     * After that groups of 4 bits represent the equalMask. We lower
         | 
| 993 | 
            +
                     * all bits except the highest in these groups by doing AND with
         | 
| 994 | 
            +
                     * 0x88 = 0b10001000.
         | 
| 995 | 
            +
                     */
         | 
| 996 | 
            +
                    const uint8x16_t chunk = vld1q_u8(src);
         | 
| 997 | 
            +
                    const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
         | 
| 998 | 
            +
                    const uint8x8_t res = vshrn_n_u16(equalMask, 4);
         | 
| 999 | 
            +
                    const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
         | 
| 1000 | 
            +
                    return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull;
         | 
| 1001 | 
            +
                } else if (rowEntries == 32) {
         | 
| 1002 | 
            +
                    /* Same idea as with rowEntries == 16 but doing AND with
         | 
| 1003 | 
            +
                     * 0x55 = 0b01010101.
         | 
| 1004 | 
            +
                     */
         | 
| 1005 | 
            +
                    const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src);
         | 
| 1006 | 
            +
                    const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
         | 
| 1007 | 
            +
                    const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
         | 
| 1008 | 
            +
                    const uint8x16_t dup = vdupq_n_u8(tag);
         | 
| 1009 | 
            +
                    const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6);
         | 
| 1010 | 
            +
                    const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6);
         | 
| 1011 | 
            +
                    const uint8x8_t res = vsli_n_u8(t0, t1, 4);
         | 
| 1012 | 
            +
                    const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ;
         | 
| 1013 | 
            +
                    return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull;
         | 
| 1014 | 
            +
                } else { /* rowEntries == 64 */
         | 
| 1015 | 
            +
                    const uint8x16x4_t chunk = vld4q_u8(src);
         | 
| 1016 | 
            +
                    const uint8x16_t dup = vdupq_n_u8(tag);
         | 
| 1017 | 
            +
                    const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
         | 
| 1018 | 
            +
                    const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
         | 
| 1019 | 
            +
                    const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
         | 
| 1020 | 
            +
                    const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
         | 
| 1021 | 
            +
             | 
| 1022 | 
            +
                    const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
         | 
| 1023 | 
            +
                    const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
         | 
| 1024 | 
            +
                    const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
         | 
| 1025 | 
            +
                    const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
         | 
| 1026 | 
            +
                    const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
         | 
| 1027 | 
            +
                    const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
         | 
| 1028 | 
            +
                    return ZSTD_rotateRight_U64(matches, headGrouped);
         | 
| 1029 | 
            +
                }
         | 
| 1030 | 
            +
            }
         | 
| 1031 | 
            +
            #endif
         | 
| 1032 | 
            +
             | 
| 1033 | 
            +
            /* Returns a ZSTD_VecMask (U64) that has the nth group (determined by
         | 
| 1034 | 
            +
             * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag"
         | 
| 1035 | 
            +
             * matches the hash at the nth position in a row of the tagTable.
         | 
| 1036 | 
            +
             * Each row is a circular buffer beginning at the value of "headGrouped". So we
         | 
| 1037 | 
            +
             * must rotate the "matches" bitfield to match up with the actual layout of the
         | 
| 1038 | 
            +
             * entries within the hashTable */
         | 
| 1010 1039 | 
             
            FORCE_INLINE_TEMPLATE ZSTD_VecMask
         | 
| 1011 | 
            -
            ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32  | 
| 1040 | 
            +
            ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
         | 
| 1012 1041 | 
             
            {
         | 
| 1013 | 
            -
                const BYTE* const src = tagRow | 
| 1042 | 
            +
                const BYTE* const src = tagRow;
         | 
| 1014 1043 | 
             
                assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
         | 
| 1015 1044 | 
             
                assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
         | 
| 1045 | 
            +
                assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
         | 
| 1016 1046 |  | 
| 1017 1047 | 
             
            #if defined(ZSTD_ARCH_X86_SSE2)
         | 
| 1018 1048 |  | 
| 1019 | 
            -
                return ZSTD_row_getSSEMask(rowEntries / 16, src, tag,  | 
| 1049 | 
            +
                return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped);
         | 
| 1020 1050 |  | 
| 1021 1051 | 
             
            #else /* SW or NEON-LE */
         | 
| 1022 1052 |  | 
| 1023 1053 | 
             
            # if defined(ZSTD_ARCH_ARM_NEON)
         | 
| 1024 1054 | 
             
              /* This NEON path only works for little endian - otherwise use SWAR below */
         | 
| 1025 1055 | 
             
                if (MEM_isLittleEndian()) {
         | 
| 1026 | 
            -
                     | 
| 1027 | 
            -
                        const uint8x16_t chunk = vld1q_u8(src);
         | 
| 1028 | 
            -
                        const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
         | 
| 1029 | 
            -
                        const uint16x8_t t0 = vshlq_n_u16(equalMask, 7);
         | 
| 1030 | 
            -
                        const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14));
         | 
| 1031 | 
            -
                        const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14));
         | 
| 1032 | 
            -
                        const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28));
         | 
| 1033 | 
            -
                        const U16 hi = (U16)vgetq_lane_u8(t3, 8);
         | 
| 1034 | 
            -
                        const U16 lo = (U16)vgetq_lane_u8(t3, 0);
         | 
| 1035 | 
            -
                        return ZSTD_rotateRight_U16((hi << 8) | lo, head);
         | 
| 1036 | 
            -
                    } else if (rowEntries == 32) {
         | 
| 1037 | 
            -
                        const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src);
         | 
| 1038 | 
            -
                        const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
         | 
| 1039 | 
            -
                        const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
         | 
| 1040 | 
            -
                        const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag));
         | 
| 1041 | 
            -
                        const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag));
         | 
| 1042 | 
            -
                        const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0));
         | 
| 1043 | 
            -
                        const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1));
         | 
| 1044 | 
            -
                        const uint8x8_t t0 = vreinterpret_u8_s8(pack0);
         | 
| 1045 | 
            -
                        const uint8x8_t t1 = vreinterpret_u8_s8(pack1);
         | 
| 1046 | 
            -
                        const uint8x8_t t2 = vsri_n_u8(t1, t0, 2);
         | 
| 1047 | 
            -
                        const uint8x8x2_t t3 = vuzp_u8(t2, t0);
         | 
| 1048 | 
            -
                        const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4);
         | 
| 1049 | 
            -
                        const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0);
         | 
| 1050 | 
            -
                        return ZSTD_rotateRight_U32(matches, head);
         | 
| 1051 | 
            -
                    } else { /* rowEntries == 64 */
         | 
| 1052 | 
            -
                        const uint8x16x4_t chunk = vld4q_u8(src);
         | 
| 1053 | 
            -
                        const uint8x16_t dup = vdupq_n_u8(tag);
         | 
| 1054 | 
            -
                        const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
         | 
| 1055 | 
            -
                        const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
         | 
| 1056 | 
            -
                        const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
         | 
| 1057 | 
            -
                        const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
         | 
| 1058 | 
            -
             | 
| 1059 | 
            -
                        const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
         | 
| 1060 | 
            -
                        const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
         | 
| 1061 | 
            -
                        const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
         | 
| 1062 | 
            -
                        const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
         | 
| 1063 | 
            -
                        const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
         | 
| 1064 | 
            -
                        const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
         | 
| 1065 | 
            -
                        return ZSTD_rotateRight_U64(matches, head);
         | 
| 1066 | 
            -
                    }
         | 
| 1056 | 
            +
                    return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
         | 
| 1067 1057 | 
             
                }
         | 
| 1068 1058 | 
             
            # endif /* ZSTD_ARCH_ARM_NEON */
         | 
| 1069 1059 | 
             
                /* SWAR */
         | 
| 1070 | 
            -
                {   const  | 
| 1060 | 
            +
                {   const int chunkSize = sizeof(size_t);
         | 
| 1071 1061 | 
             
                    const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
         | 
| 1072 1062 | 
             
                    const size_t xFF = ~((size_t)0);
         | 
| 1073 1063 | 
             
                    const size_t x01 = xFF / 0xFF;
         | 
| @@ -1100,11 +1090,11 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, | |
| 1100 1090 | 
             
                    }
         | 
| 1101 1091 | 
             
                    matches = ~matches;
         | 
| 1102 1092 | 
             
                    if (rowEntries == 16) {
         | 
| 1103 | 
            -
                        return ZSTD_rotateRight_U16((U16)matches,  | 
| 1093 | 
            +
                        return ZSTD_rotateRight_U16((U16)matches, headGrouped);
         | 
| 1104 1094 | 
             
                    } else if (rowEntries == 32) {
         | 
| 1105 | 
            -
                        return ZSTD_rotateRight_U32((U32)matches,  | 
| 1095 | 
            +
                        return ZSTD_rotateRight_U32((U32)matches, headGrouped);
         | 
| 1106 1096 | 
             
                    } else {
         | 
| 1107 | 
            -
                        return ZSTD_rotateRight_U64((U64)matches,  | 
| 1097 | 
            +
                        return ZSTD_rotateRight_U64((U64)matches, headGrouped);
         | 
| 1108 1098 | 
             
                    }
         | 
| 1109 1099 | 
             
                }
         | 
| 1110 1100 | 
             
            #endif
         | 
| @@ -1134,7 +1124,7 @@ size_t ZSTD_RowFindBestMatch( | |
| 1134 1124 | 
             
                                    const U32 rowLog)
         | 
| 1135 1125 | 
             
            {
         | 
| 1136 1126 | 
             
                U32* const hashTable = ms->hashTable;
         | 
| 1137 | 
            -
                 | 
| 1127 | 
            +
                BYTE* const tagTable = ms->tagTable;
         | 
| 1138 1128 | 
             
                U32* const hashCache = ms->hashCache;
         | 
| 1139 1129 | 
             
                const U32 hashLog = ms->rowHashLog;
         | 
| 1140 1130 | 
             
                const ZSTD_compressionParameters* const cParams = &ms->cParams;
         | 
| @@ -1152,8 +1142,11 @@ size_t ZSTD_RowFindBestMatch( | |
| 1152 1142 | 
             
                const U32 rowEntries = (1U << rowLog);
         | 
| 1153 1143 | 
             
                const U32 rowMask = rowEntries - 1;
         | 
| 1154 1144 | 
             
                const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
         | 
| 1145 | 
            +
                const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
         | 
| 1146 | 
            +
                const U64 hashSalt = ms->hashSalt;
         | 
| 1155 1147 | 
             
                U32 nbAttempts = 1U << cappedSearchLog;
         | 
| 1156 1148 | 
             
                size_t ml=4-1;
         | 
| 1149 | 
            +
                U32 hash;
         | 
| 1157 1150 |  | 
| 1158 1151 | 
             
                /* DMS/DDS variables that may be referenced laster */
         | 
| 1159 1152 | 
             
                const ZSTD_matchState_t* const dms = ms->dictMatchState;
         | 
| @@ -1177,7 +1170,7 @@ size_t ZSTD_RowFindBestMatch( | |
| 1177 1170 | 
             
                if (dictMode == ZSTD_dictMatchState) {
         | 
| 1178 1171 | 
             
                    /* Prefetch DMS rows */
         | 
| 1179 1172 | 
             
                    U32* const dmsHashTable = dms->hashTable;
         | 
| 1180 | 
            -
                     | 
| 1173 | 
            +
                    BYTE* const dmsTagTable = dms->tagTable;
         | 
| 1181 1174 | 
             
                    U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
         | 
| 1182 1175 | 
             
                    U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
         | 
| 1183 1176 | 
             
                    dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
         | 
| @@ -1187,23 +1180,34 @@ size_t ZSTD_RowFindBestMatch( | |
| 1187 1180 | 
             
                }
         | 
| 1188 1181 |  | 
| 1189 1182 | 
             
                /* Update the hashTable and tagTable up to (but not including) ip */
         | 
| 1190 | 
            -
                 | 
| 1183 | 
            +
                if (!ms->lazySkipping) {
         | 
| 1184 | 
            +
                    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
         | 
| 1185 | 
            +
                    hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
         | 
| 1186 | 
            +
                } else {
         | 
| 1187 | 
            +
                    /* Stop inserting every position when in the lazy skipping mode.
         | 
| 1188 | 
            +
                     * The hash cache is also not kept up to date in this mode.
         | 
| 1189 | 
            +
                     */
         | 
| 1190 | 
            +
                    hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
         | 
| 1191 | 
            +
                    ms->nextToUpdate = curr;
         | 
| 1192 | 
            +
                }
         | 
| 1193 | 
            +
                ms->hashSaltEntropy += hash; /* collect salt entropy */
         | 
| 1194 | 
            +
             | 
| 1191 1195 | 
             
                {   /* Get the hash for ip, compute the appropriate row */
         | 
| 1192 | 
            -
                    U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
         | 
| 1193 1196 | 
             
                    U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
         | 
| 1194 1197 | 
             
                    U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
         | 
| 1195 1198 | 
             
                    U32* const row = hashTable + relRow;
         | 
| 1196 1199 | 
             
                    BYTE* tagRow = (BYTE*)(tagTable + relRow);
         | 
| 1197 | 
            -
                    U32 const  | 
| 1200 | 
            +
                    U32 const headGrouped = (*tagRow & rowMask) * groupWidth;
         | 
| 1198 1201 | 
             
                    U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
         | 
| 1199 1202 | 
             
                    size_t numMatches = 0;
         | 
| 1200 1203 | 
             
                    size_t currMatch = 0;
         | 
| 1201 | 
            -
                    ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag,  | 
| 1204 | 
            +
                    ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
         | 
| 1202 1205 |  | 
| 1203 1206 | 
             
                    /* Cycle through the matches and prefetch */
         | 
| 1204 | 
            -
                    for (; (matches > 0) && (nbAttempts > 0);  | 
| 1205 | 
            -
                        U32 const matchPos = ( | 
| 1207 | 
            +
                    for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
         | 
| 1208 | 
            +
                        U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
         | 
| 1206 1209 | 
             
                        U32 const matchIndex = row[matchPos];
         | 
| 1210 | 
            +
                        if(matchPos == 0) continue;
         | 
| 1207 1211 | 
             
                        assert(numMatches < rowEntries);
         | 
| 1208 1212 | 
             
                        if (matchIndex < lowLimit)
         | 
| 1209 1213 | 
             
                            break;
         | 
| @@ -1213,13 +1217,14 @@ size_t ZSTD_RowFindBestMatch( | |
| 1213 1217 | 
             
                            PREFETCH_L1(dictBase + matchIndex);
         | 
| 1214 1218 | 
             
                        }
         | 
| 1215 1219 | 
             
                        matchBuffer[numMatches++] = matchIndex;
         | 
| 1220 | 
            +
                        --nbAttempts;
         | 
| 1216 1221 | 
             
                    }
         | 
| 1217 1222 |  | 
| 1218 1223 | 
             
                    /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
         | 
| 1219 1224 | 
             
                       in ZSTD_row_update_internal() at the next search. */
         | 
| 1220 1225 | 
             
                    {
         | 
| 1221 1226 | 
             
                        U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
         | 
| 1222 | 
            -
                        tagRow[pos | 
| 1227 | 
            +
                        tagRow[pos] = (BYTE)tag;
         | 
| 1223 1228 | 
             
                        row[pos] = ms->nextToUpdate++;
         | 
| 1224 1229 | 
             
                    }
         | 
| 1225 1230 |  | 
| @@ -1233,7 +1238,8 @@ size_t ZSTD_RowFindBestMatch( | |
| 1233 1238 | 
             
                        if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
         | 
| 1234 1239 | 
             
                            const BYTE* const match = base + matchIndex;
         | 
| 1235 1240 | 
             
                            assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
         | 
| 1236 | 
            -
                             | 
| 1241 | 
            +
                            /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
         | 
| 1242 | 
            +
                            if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3))   /* potentially better */
         | 
| 1237 1243 | 
             
                                currentMl = ZSTD_count(ip, match, iLimit);
         | 
| 1238 1244 | 
             
                        } else {
         | 
| 1239 1245 | 
             
                            const BYTE* const match = dictBase + matchIndex;
         | 
| @@ -1245,7 +1251,7 @@ size_t ZSTD_RowFindBestMatch( | |
| 1245 1251 | 
             
                        /* Save best solution */
         | 
| 1246 1252 | 
             
                        if (currentMl > ml) {
         | 
| 1247 1253 | 
             
                            ml = currentMl;
         | 
| 1248 | 
            -
                            *offsetPtr =  | 
| 1254 | 
            +
                            *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
         | 
| 1249 1255 | 
             
                            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
         | 
| 1250 1256 | 
             
                        }
         | 
| 1251 1257 | 
             
                    }
         | 
| @@ -1263,19 +1269,21 @@ size_t ZSTD_RowFindBestMatch( | |
| 1263 1269 | 
             
                    const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
         | 
| 1264 1270 | 
             
                    const U32 dmsIndexDelta        = dictLimit - dmsSize;
         | 
| 1265 1271 |  | 
| 1266 | 
            -
                    {   U32 const  | 
| 1272 | 
            +
                    {   U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth;
         | 
| 1267 1273 | 
             
                        U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
         | 
| 1268 1274 | 
             
                        size_t numMatches = 0;
         | 
| 1269 1275 | 
             
                        size_t currMatch = 0;
         | 
| 1270 | 
            -
                        ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag,  | 
| 1276 | 
            +
                        ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
         | 
| 1271 1277 |  | 
| 1272 | 
            -
                        for (; (matches > 0) && (nbAttempts > 0);  | 
| 1273 | 
            -
                            U32 const matchPos = ( | 
| 1278 | 
            +
                        for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
         | 
| 1279 | 
            +
                            U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
         | 
| 1274 1280 | 
             
                            U32 const matchIndex = dmsRow[matchPos];
         | 
| 1281 | 
            +
                            if(matchPos == 0) continue;
         | 
| 1275 1282 | 
             
                            if (matchIndex < dmsLowestIndex)
         | 
| 1276 1283 | 
             
                                break;
         | 
| 1277 1284 | 
             
                            PREFETCH_L1(dmsBase + matchIndex);
         | 
| 1278 1285 | 
             
                            matchBuffer[numMatches++] = matchIndex;
         | 
| 1286 | 
            +
                            --nbAttempts;
         | 
| 1279 1287 | 
             
                        }
         | 
| 1280 1288 |  | 
| 1281 1289 | 
             
                        /* Return the longest match */
         | 
| @@ -1294,7 +1302,7 @@ size_t ZSTD_RowFindBestMatch( | |
| 1294 1302 | 
             
                            if (currentMl > ml) {
         | 
| 1295 1303 | 
             
                                ml = currentMl;
         | 
| 1296 1304 | 
             
                                assert(curr > matchIndex + dmsIndexDelta);
         | 
| 1297 | 
            -
                                *offsetPtr =  | 
| 1305 | 
            +
                                *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
         | 
| 1298 1306 | 
             
                                if (ip+currentMl == iLimit) break;
         | 
| 1299 1307 | 
             
                            }
         | 
| 1300 1308 | 
             
                        }
         | 
| @@ -1304,14 +1312,10 @@ size_t ZSTD_RowFindBestMatch( | |
| 1304 1312 | 
             
            }
         | 
| 1305 1313 |  | 
| 1306 1314 |  | 
| 1307 | 
            -
            typedef size_t (*searchMax_f)(
         | 
| 1308 | 
            -
                                ZSTD_matchState_t* ms,
         | 
| 1309 | 
            -
                                const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
         | 
| 1310 | 
            -
             | 
| 1311 1315 | 
             
            /**
         | 
| 1312 | 
            -
             *  | 
| 1313 | 
            -
             *  | 
| 1314 | 
            -
             *  | 
| 1316 | 
            +
             * Generate search functions templated on (dictMode, mls, rowLog).
         | 
| 1317 | 
            +
             * These functions are outlined for code size & compilation time.
         | 
| 1318 | 
            +
             * ZSTD_searchMax() dispatches to the correct implementation function.
         | 
| 1315 1319 | 
             
             *
         | 
| 1316 1320 | 
             
             * TODO: The start of the search function involves loading and calculating a
         | 
| 1317 1321 | 
             
             * bunch of constants from the ZSTD_matchState_t. These computations could be
         | 
| @@ -1329,25 +1333,25 @@ typedef size_t (*searchMax_f)( | |
| 1329 1333 | 
             
             * the single segment loop. It should go in searchMax instead of its own
         | 
| 1330 1334 | 
             
             * function to avoid having multiple virtual function calls per search.
         | 
| 1331 1335 | 
             
             */
         | 
| 1332 | 
            -
            typedef struct {
         | 
| 1333 | 
            -
                searchMax_f searchMax;
         | 
| 1334 | 
            -
            } ZSTD_LazyVTable;
         | 
| 1335 1336 |  | 
| 1336 | 
            -
            #define  | 
| 1337 | 
            -
             | 
| 1338 | 
            -
             | 
| 1339 | 
            -
                        const BYTE* ip, const BYTE* const iLimit,                                 \
         | 
| 1340 | 
            -
                        size_t* offsetPtr)                                                        \
         | 
| 1341 | 
            -
                {                                                                                 \
         | 
| 1342 | 
            -
                    assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls);                          \
         | 
| 1343 | 
            -
                    return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
         | 
| 1344 | 
            -
                }                                                                                 \
         | 
| 1345 | 
            -
                static const ZSTD_LazyVTable ZSTD_BtVTable_##dictMode##_##mls = {                 \
         | 
| 1346 | 
            -
                    ZSTD_BtFindBestMatch_##dictMode##_##mls                                       \
         | 
| 1347 | 
            -
                };
         | 
| 1337 | 
            +
            #define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls
         | 
| 1338 | 
            +
            #define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls
         | 
| 1339 | 
            +
            #define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
         | 
| 1348 1340 |  | 
| 1349 | 
            -
            #define  | 
| 1350 | 
            -
             | 
| 1341 | 
            +
            #define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
         | 
| 1342 | 
            +
             | 
| 1343 | 
            +
            #define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls)                                           \
         | 
| 1344 | 
            +
                ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)(                      \
         | 
| 1345 | 
            +
                        ZSTD_matchState_t* ms,                                                     \
         | 
| 1346 | 
            +
                        const BYTE* ip, const BYTE* const iLimit,                                  \
         | 
| 1347 | 
            +
                        size_t* offBasePtr)                                                        \
         | 
| 1348 | 
            +
                {                                                                                  \
         | 
| 1349 | 
            +
                    assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls);                           \
         | 
| 1350 | 
            +
                    return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \
         | 
| 1351 | 
            +
                }                                                                                  \
         | 
| 1352 | 
            +
             | 
| 1353 | 
            +
            #define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls)                                          \
         | 
| 1354 | 
            +
                ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)(                     \
         | 
| 1351 1355 | 
             
                        ZSTD_matchState_t* ms,                                                    \
         | 
| 1352 1356 | 
             
                        const BYTE* ip, const BYTE* const iLimit,                                 \
         | 
| 1353 1357 | 
             
                        size_t* offsetPtr)                                                        \
         | 
| @@ -1355,12 +1359,9 @@ typedef struct { | |
| 1355 1359 | 
             
                    assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls);                          \
         | 
| 1356 1360 | 
             
                    return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
         | 
| 1357 1361 | 
             
                }                                                                                 \
         | 
| 1358 | 
            -
                static const ZSTD_LazyVTable ZSTD_HcVTable_##dictMode##_##mls = {                 \
         | 
| 1359 | 
            -
                    ZSTD_HcFindBestMatch_##dictMode##_##mls                                       \
         | 
| 1360 | 
            -
                };
         | 
| 1361 1362 |  | 
| 1362 | 
            -
            #define  | 
| 1363 | 
            -
                 | 
| 1363 | 
            +
            #define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)                                          \
         | 
| 1364 | 
            +
                ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(                     \
         | 
| 1364 1365 | 
             
                        ZSTD_matchState_t* ms,                                                             \
         | 
| 1365 1366 | 
             
                        const BYTE* ip, const BYTE* const iLimit,                                          \
         | 
| 1366 1367 | 
             
                        size_t* offsetPtr)                                                                 \
         | 
| @@ -1369,9 +1370,6 @@ typedef struct { | |
| 1369 1370 | 
             
                    assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog);                               \
         | 
| 1370 1371 | 
             
                    return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
         | 
| 1371 1372 | 
             
                }                                                                                          \
         | 
| 1372 | 
            -
                static const ZSTD_LazyVTable ZSTD_RowVTable_##dictMode##_##mls##_##rowLog = {              \
         | 
| 1373 | 
            -
                    ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog                                    \
         | 
| 1374 | 
            -
                };
         | 
| 1375 1373 |  | 
| 1376 1374 | 
             
            #define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \
         | 
| 1377 1375 | 
             
                X(dictMode, mls, 4)                        \
         | 
| @@ -1394,84 +1392,103 @@ typedef struct { | |
| 1394 1392 | 
             
                X(__VA_ARGS__, dictMatchState)      \
         | 
| 1395 1393 | 
             
                X(__VA_ARGS__, dedicatedDictSearch)
         | 
| 1396 1394 |  | 
| 1397 | 
            -
            /* Generate  | 
| 1398 | 
            -
            ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG,  | 
| 1399 | 
            -
            /* Generate  | 
| 1400 | 
            -
            ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS,  | 
| 1401 | 
            -
            /* Generate  | 
| 1402 | 
            -
            ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS,  | 
| 1403 | 
            -
             | 
| 1404 | 
            -
            #define GEN_ZSTD_BT_VTABLE_ARRAY(dictMode) \
         | 
| 1405 | 
            -
                {                                      \
         | 
| 1406 | 
            -
                    &ZSTD_BtVTable_##dictMode##_4,     \
         | 
| 1407 | 
            -
                    &ZSTD_BtVTable_##dictMode##_5,     \
         | 
| 1408 | 
            -
                    &ZSTD_BtVTable_##dictMode##_6      \
         | 
| 1409 | 
            -
                }
         | 
| 1395 | 
            +
            /* Generate row search fns for each combination of (dictMode, mls, rowLog) */
         | 
| 1396 | 
            +
            ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN)
         | 
| 1397 | 
            +
            /* Generate binary Tree search fns for each combination of (dictMode, mls) */
         | 
| 1398 | 
            +
            ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN)
         | 
| 1399 | 
            +
            /* Generate hash chain search fns for each combination of (dictMode, mls) */
         | 
| 1400 | 
            +
            ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN)
         | 
| 1410 1401 |  | 
| 1411 | 
            -
             | 
| 1412 | 
            -
                {                                      \
         | 
| 1413 | 
            -
                    &ZSTD_HcVTable_##dictMode##_4,     \
         | 
| 1414 | 
            -
                    &ZSTD_HcVTable_##dictMode##_5,     \
         | 
| 1415 | 
            -
                    &ZSTD_HcVTable_##dictMode##_6      \
         | 
| 1416 | 
            -
                }
         | 
| 1417 | 
            -
             | 
| 1418 | 
            -
            #define GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, mls) \
         | 
| 1419 | 
            -
                {                                             \
         | 
| 1420 | 
            -
                    &ZSTD_RowVTable_##dictMode##_##mls##_4,   \
         | 
| 1421 | 
            -
                    &ZSTD_RowVTable_##dictMode##_##mls##_5,   \
         | 
| 1422 | 
            -
                    &ZSTD_RowVTable_##dictMode##_##mls##_6    \
         | 
| 1423 | 
            -
                }
         | 
| 1402 | 
            +
            typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
         | 
| 1424 1403 |  | 
| 1425 | 
            -
            #define  | 
| 1426 | 
            -
                 | 
| 1427 | 
            -
                     | 
| 1428 | 
            -
             | 
| 1429 | 
            -
             | 
| 1404 | 
            +
            #define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls)                         \
         | 
| 1405 | 
            +
                case mls:                                                             \
         | 
| 1406 | 
            +
                    return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
         | 
| 1407 | 
            +
            #define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls)                         \
         | 
| 1408 | 
            +
                case mls:                                                             \
         | 
| 1409 | 
            +
                    return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
         | 
| 1410 | 
            +
            #define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog)                         \
         | 
| 1411 | 
            +
                case rowLog:                                                                   \
         | 
| 1412 | 
            +
                    return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
         | 
| 1413 | 
            +
             | 
| 1414 | 
            +
            #define ZSTD_SWITCH_MLS(X, dictMode)   \
         | 
| 1415 | 
            +
                switch (mls) {                     \
         | 
| 1416 | 
            +
                    ZSTD_FOR_EACH_MLS(X, dictMode) \
         | 
| 1430 1417 | 
             
                }
         | 
| 1431 1418 |  | 
| 1432 | 
            -
            #define  | 
| 1433 | 
            -
                 | 
| 1434 | 
            -
                     | 
| 1435 | 
            -
             | 
| 1436 | 
            -
                     | 
| 1437 | 
            -
                     | 
| 1438 | 
            -
             | 
| 1439 | 
            -
             | 
| 1440 | 
            -
             | 
| 1441 | 
            -
             | 
| 1442 | 
            -
             | 
| 1443 | 
            -
             | 
| 1419 | 
            +
            #define ZSTD_SWITCH_ROWLOG(dictMode, mls)                                    \
         | 
| 1420 | 
            +
                case mls:                                                                \
         | 
| 1421 | 
            +
                    switch (rowLog) {                                                    \
         | 
| 1422 | 
            +
                        ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \
         | 
| 1423 | 
            +
                    }                                                                    \
         | 
| 1424 | 
            +
                    ZSTD_UNREACHABLE;                                                    \
         | 
| 1425 | 
            +
                    break;
         | 
| 1426 | 
            +
             | 
| 1427 | 
            +
            #define ZSTD_SWITCH_SEARCH_METHOD(dictMode)                       \
         | 
| 1428 | 
            +
                switch (searchMethod) {                                       \
         | 
| 1429 | 
            +
                    case search_hashChain:                                    \
         | 
| 1430 | 
            +
                        ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
         | 
| 1431 | 
            +
                        break;                                                \
         | 
| 1432 | 
            +
                    case search_binaryTree:                                   \
         | 
| 1433 | 
            +
                        ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
         | 
| 1434 | 
            +
                        break;                                                \
         | 
| 1435 | 
            +
                    case search_rowHash:                                      \
         | 
| 1436 | 
            +
                        ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode)         \
         | 
| 1437 | 
            +
                        break;                                                \
         | 
| 1438 | 
            +
                }                                                             \
         | 
| 1439 | 
            +
                ZSTD_UNREACHABLE;
         | 
| 1444 1440 |  | 
| 1445 1441 | 
             
            /**
         | 
| 1446 | 
            -
             *  | 
| 1447 | 
            -
             *  | 
| 1448 | 
            -
             *  | 
| 1449 | 
            -
             *  | 
| 1442 | 
            +
             * Searches for the longest match at @p ip.
         | 
| 1443 | 
            +
             * Dispatches to the correct implementation function based on the
         | 
| 1444 | 
            +
             * (searchMethod, dictMode, mls, rowLog). We use switch statements
         | 
| 1445 | 
            +
             * here instead of using an indirect function call through a function
         | 
| 1446 | 
            +
             * pointer because after Spectre and Meltdown mitigations, indirect
         | 
| 1447 | 
            +
             * function calls can be very costly, especially in the kernel.
         | 
| 1448 | 
            +
             *
         | 
| 1449 | 
            +
             * NOTE: dictMode and searchMethod should be templated, so those switch
         | 
| 1450 | 
            +
             * statements should be optimized out. Only the mls & rowLog switches
         | 
| 1451 | 
            +
             * should be left.
         | 
| 1452 | 
            +
             *
         | 
| 1453 | 
            +
             * @param ms The match state.
         | 
| 1454 | 
            +
             * @param ip The position to search at.
         | 
| 1455 | 
            +
             * @param iend The end of the input data.
         | 
| 1456 | 
            +
             * @param[out] offsetPtr Stores the match offset into this pointer.
         | 
| 1457 | 
            +
             * @param mls The minimum search length, in the range [4, 6].
         | 
| 1458 | 
            +
             * @param rowLog The row log (if applicable), in the range [4, 6].
         | 
| 1459 | 
            +
             * @param searchMethod The search method to use (templated).
         | 
| 1460 | 
            +
             * @param dictMode The dictMode (templated).
         | 
| 1461 | 
            +
             *
         | 
| 1462 | 
            +
             * @returns The length of the longest match found, or < mls if no match is found.
         | 
| 1463 | 
            +
             * If a match is found its offset is stored in @p offsetPtr.
         | 
| 1450 1464 | 
             
             */
         | 
| 1451 | 
            -
             | 
| 1452 | 
            -
             | 
| 1453 | 
            -
             | 
| 1465 | 
            +
            FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
         | 
| 1466 | 
            +
                ZSTD_matchState_t* ms,
         | 
| 1467 | 
            +
                const BYTE* ip,
         | 
| 1468 | 
            +
                const BYTE* iend,
         | 
| 1469 | 
            +
                size_t* offsetPtr,
         | 
| 1470 | 
            +
                U32 const mls,
         | 
| 1471 | 
            +
                U32 const rowLog,
         | 
| 1472 | 
            +
                searchMethod_e const searchMethod,
         | 
| 1473 | 
            +
                ZSTD_dictMode_e const dictMode)
         | 
| 1454 1474 | 
             
            {
         | 
| 1455 | 
            -
                 | 
| 1456 | 
            -
             | 
| 1457 | 
            -
                 | 
| 1458 | 
            -
             | 
| 1459 | 
            -
                 | 
| 1460 | 
            -
             | 
| 1461 | 
            -
                 | 
| 1462 | 
            -
             | 
| 1463 | 
            -
                switch (searchMethod) {
         | 
| 1464 | 
            -
                    case search_hashChain:
         | 
| 1465 | 
            -
                        return hcVTables[dictMode][mls - 4];
         | 
| 1466 | 
            -
                    case search_binaryTree:
         | 
| 1467 | 
            -
                        return btVTables[dictMode][mls - 4];
         | 
| 1468 | 
            -
                    case search_rowHash:
         | 
| 1469 | 
            -
                        return rowVTables[dictMode][mls - 4][rowLog - 4];
         | 
| 1470 | 
            -
                    default:
         | 
| 1471 | 
            -
                        return NULL;
         | 
| 1475 | 
            +
                if (dictMode == ZSTD_noDict) {
         | 
| 1476 | 
            +
                    ZSTD_SWITCH_SEARCH_METHOD(noDict)
         | 
| 1477 | 
            +
                } else if (dictMode == ZSTD_extDict) {
         | 
| 1478 | 
            +
                    ZSTD_SWITCH_SEARCH_METHOD(extDict)
         | 
| 1479 | 
            +
                } else if (dictMode == ZSTD_dictMatchState) {
         | 
| 1480 | 
            +
                    ZSTD_SWITCH_SEARCH_METHOD(dictMatchState)
         | 
| 1481 | 
            +
                } else if (dictMode == ZSTD_dedicatedDictSearch) {
         | 
| 1482 | 
            +
                    ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch)
         | 
| 1472 1483 | 
             
                }
         | 
| 1484 | 
            +
                ZSTD_UNREACHABLE;
         | 
| 1485 | 
            +
                return 0;
         | 
| 1473 1486 | 
             
            }
         | 
| 1474 1487 |  | 
| 1488 | 
            +
            /* *******************************
         | 
| 1489 | 
            +
            *  Common parser - lazy strategy
         | 
| 1490 | 
            +
            *********************************/
         | 
| 1491 | 
            +
             | 
| 1475 1492 | 
             
            FORCE_INLINE_TEMPLATE size_t
         | 
| 1476 1493 | 
             
            ZSTD_compressBlock_lazy_generic(
         | 
| 1477 1494 | 
             
                                    ZSTD_matchState_t* ms, seqStore_t* seqStore,
         | 
| @@ -1488,9 +1505,11 @@ ZSTD_compressBlock_lazy_generic( | |
| 1488 1505 | 
             
                const BYTE* const base = ms->window.base;
         | 
| 1489 1506 | 
             
                const U32 prefixLowestIndex = ms->window.dictLimit;
         | 
| 1490 1507 | 
             
                const BYTE* const prefixLowest = base + prefixLowestIndex;
         | 
| 1508 | 
            +
                const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
         | 
| 1509 | 
            +
                const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
         | 
| 1491 1510 |  | 
| 1492 | 
            -
                 | 
| 1493 | 
            -
                U32  | 
| 1511 | 
            +
                U32 offset_1 = rep[0], offset_2 = rep[1];
         | 
| 1512 | 
            +
                U32 offsetSaved1 = 0, offsetSaved2 = 0;
         | 
| 1494 1513 |  | 
| 1495 1514 | 
             
                const int isDMS = dictMode == ZSTD_dictMatchState;
         | 
| 1496 1515 | 
             
                const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
         | 
| @@ -1505,16 +1524,14 @@ ZSTD_compressBlock_lazy_generic( | |
| 1505 1524 | 
             
                                                 0;
         | 
| 1506 1525 | 
             
                const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
         | 
| 1507 1526 |  | 
| 1508 | 
            -
                assert(searchMax != NULL);
         | 
| 1509 | 
            -
             | 
| 1510 1527 | 
             
                DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
         | 
| 1511 1528 | 
             
                ip += (dictAndPrefixLength == 0);
         | 
| 1512 1529 | 
             
                if (dictMode == ZSTD_noDict) {
         | 
| 1513 1530 | 
             
                    U32 const curr = (U32)(ip - base);
         | 
| 1514 1531 | 
             
                    U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
         | 
| 1515 1532 | 
             
                    U32 const maxRep = curr - windowLow;
         | 
| 1516 | 
            -
                    if (offset_2 > maxRep)  | 
| 1517 | 
            -
                    if (offset_1 > maxRep)  | 
| 1533 | 
            +
                    if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
         | 
| 1534 | 
            +
                    if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
         | 
| 1518 1535 | 
             
                }
         | 
| 1519 1536 | 
             
                if (isDxS) {
         | 
| 1520 1537 | 
             
                    /* dictMatchState repCode checks don't currently handle repCode == 0
         | 
| @@ -1523,11 +1540,11 @@ ZSTD_compressBlock_lazy_generic( | |
| 1523 1540 | 
             
                    assert(offset_2 <= dictAndPrefixLength);
         | 
| 1524 1541 | 
             
                }
         | 
| 1525 1542 |  | 
| 1543 | 
            +
                /* Reset the lazy skipping state */
         | 
| 1544 | 
            +
                ms->lazySkipping = 0;
         | 
| 1545 | 
            +
             | 
| 1526 1546 | 
             
                if (searchMethod == search_rowHash) {
         | 
| 1527 | 
            -
                     | 
| 1528 | 
            -
                    ZSTD_row_fillHashCache(ms, base, rowLog,
         | 
| 1529 | 
            -
                                        MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
         | 
| 1530 | 
            -
                                        ms->nextToUpdate, ilimit);
         | 
| 1547 | 
            +
                    ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
         | 
| 1531 1548 | 
             
                }
         | 
| 1532 1549 |  | 
| 1533 1550 | 
             
                /* Match Loop */
         | 
| @@ -1539,7 +1556,7 @@ ZSTD_compressBlock_lazy_generic( | |
| 1539 1556 | 
             
            #endif
         | 
| 1540 1557 | 
             
                while (ip < ilimit) {
         | 
| 1541 1558 | 
             
                    size_t matchLength=0;
         | 
| 1542 | 
            -
                    size_t  | 
| 1559 | 
            +
                    size_t offBase = REPCODE1_TO_OFFBASE;
         | 
| 1543 1560 | 
             
                    const BYTE* start=ip+1;
         | 
| 1544 1561 | 
             
                    DEBUGLOG(7, "search baseline (depth 0)");
         | 
| 1545 1562 |  | 
| @@ -1564,14 +1581,23 @@ ZSTD_compressBlock_lazy_generic( | |
| 1564 1581 | 
             
                    }
         | 
| 1565 1582 |  | 
| 1566 1583 | 
             
                    /* first search (depth 0) */
         | 
| 1567 | 
            -
                    {   size_t  | 
| 1568 | 
            -
                        size_t const ml2 =  | 
| 1584 | 
            +
                    {   size_t offbaseFound = 999999999;
         | 
| 1585 | 
            +
                        size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode);
         | 
| 1569 1586 | 
             
                        if (ml2 > matchLength)
         | 
| 1570 | 
            -
                            matchLength = ml2, start = ip,  | 
| 1587 | 
            +
                            matchLength = ml2, start = ip, offBase = offbaseFound;
         | 
| 1571 1588 | 
             
                    }
         | 
| 1572 1589 |  | 
| 1573 1590 | 
             
                    if (matchLength < 4) {
         | 
| 1574 | 
            -
                         | 
| 1591 | 
            +
                        size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */;
         | 
| 1592 | 
            +
                        ip += step;
         | 
| 1593 | 
            +
                        /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
         | 
| 1594 | 
            +
                         * In this mode we stop inserting every position into our tables, and only insert
         | 
| 1595 | 
            +
                         * positions that we search, which is one in step positions.
         | 
| 1596 | 
            +
                         * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
         | 
| 1597 | 
            +
                         * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
         | 
| 1598 | 
            +
                         * triggered once we've gone 2KB without finding any matches.
         | 
| 1599 | 
            +
                         */
         | 
| 1600 | 
            +
                        ms->lazySkipping = step > kLazySkippingStep;
         | 
| 1575 1601 | 
             
                        continue;
         | 
| 1576 1602 | 
             
                    }
         | 
| 1577 1603 |  | 
| @@ -1581,12 +1607,12 @@ ZSTD_compressBlock_lazy_generic( | |
| 1581 1607 | 
             
                        DEBUGLOG(7, "search depth 1");
         | 
| 1582 1608 | 
             
                        ip ++;
         | 
| 1583 1609 | 
             
                        if ( (dictMode == ZSTD_noDict)
         | 
| 1584 | 
            -
                          && ( | 
| 1610 | 
            +
                          && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
         | 
| 1585 1611 | 
             
                            size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
         | 
| 1586 1612 | 
             
                            int const gain2 = (int)(mlRep * 3);
         | 
| 1587 | 
            -
                            int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32) | 
| 1613 | 
            +
                            int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
         | 
| 1588 1614 | 
             
                            if ((mlRep >= 4) && (gain2 > gain1))
         | 
| 1589 | 
            -
                                matchLength = mlRep,  | 
| 1615 | 
            +
                                matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
         | 
| 1590 1616 | 
             
                        }
         | 
| 1591 1617 | 
             
                        if (isDxS) {
         | 
| 1592 1618 | 
             
                            const U32 repIndex = (U32)(ip - base) - offset_1;
         | 
| @@ -1598,17 +1624,17 @@ ZSTD_compressBlock_lazy_generic( | |
| 1598 1624 | 
             
                                const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
         | 
| 1599 1625 | 
             
                                size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
         | 
| 1600 1626 | 
             
                                int const gain2 = (int)(mlRep * 3);
         | 
| 1601 | 
            -
                                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32) | 
| 1627 | 
            +
                                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
         | 
| 1602 1628 | 
             
                                if ((mlRep >= 4) && (gain2 > gain1))
         | 
| 1603 | 
            -
                                    matchLength = mlRep,  | 
| 1629 | 
            +
                                    matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
         | 
| 1604 1630 | 
             
                            }
         | 
| 1605 1631 | 
             
                        }
         | 
| 1606 | 
            -
                        {   size_t  | 
| 1607 | 
            -
                            size_t const ml2 =  | 
| 1608 | 
            -
                            int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32) | 
| 1609 | 
            -
                            int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32) | 
| 1632 | 
            +
                        {   size_t ofbCandidate=999999999;
         | 
| 1633 | 
            +
                            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
         | 
| 1634 | 
            +
                            int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
         | 
| 1635 | 
            +
                            int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
         | 
| 1610 1636 | 
             
                            if ((ml2 >= 4) && (gain2 > gain1)) {
         | 
| 1611 | 
            -
                                matchLength = ml2,  | 
| 1637 | 
            +
                                matchLength = ml2, offBase = ofbCandidate, start = ip;
         | 
| 1612 1638 | 
             
                                continue;   /* search a better one */
         | 
| 1613 1639 | 
             
                        }   }
         | 
| 1614 1640 |  | 
| @@ -1617,12 +1643,12 @@ ZSTD_compressBlock_lazy_generic( | |
| 1617 1643 | 
             
                            DEBUGLOG(7, "search depth 2");
         | 
| 1618 1644 | 
             
                            ip ++;
         | 
| 1619 1645 | 
             
                            if ( (dictMode == ZSTD_noDict)
         | 
| 1620 | 
            -
                              && ( | 
| 1646 | 
            +
                              && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
         | 
| 1621 1647 | 
             
                                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
         | 
| 1622 1648 | 
             
                                int const gain2 = (int)(mlRep * 4);
         | 
| 1623 | 
            -
                                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32) | 
| 1649 | 
            +
                                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
         | 
| 1624 1650 | 
             
                                if ((mlRep >= 4) && (gain2 > gain1))
         | 
| 1625 | 
            -
                                    matchLength = mlRep,  | 
| 1651 | 
            +
                                    matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
         | 
| 1626 1652 | 
             
                            }
         | 
| 1627 1653 | 
             
                            if (isDxS) {
         | 
| 1628 1654 | 
             
                                const U32 repIndex = (U32)(ip - base) - offset_1;
         | 
| @@ -1634,17 +1660,17 @@ ZSTD_compressBlock_lazy_generic( | |
| 1634 1660 | 
             
                                    const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
         | 
| 1635 1661 | 
             
                                    size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
         | 
| 1636 1662 | 
             
                                    int const gain2 = (int)(mlRep * 4);
         | 
| 1637 | 
            -
                                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32) | 
| 1663 | 
            +
                                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
         | 
| 1638 1664 | 
             
                                    if ((mlRep >= 4) && (gain2 > gain1))
         | 
| 1639 | 
            -
                                        matchLength = mlRep,  | 
| 1665 | 
            +
                                        matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
         | 
| 1640 1666 | 
             
                                }
         | 
| 1641 1667 | 
             
                            }
         | 
| 1642 | 
            -
                            {   size_t  | 
| 1643 | 
            -
                                size_t const ml2 =  | 
| 1644 | 
            -
                                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32) | 
| 1645 | 
            -
                                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32) | 
| 1668 | 
            +
                            {   size_t ofbCandidate=999999999;
         | 
| 1669 | 
            +
                                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
         | 
| 1670 | 
            +
                                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
         | 
| 1671 | 
            +
                                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
         | 
| 1646 1672 | 
             
                                if ((ml2 >= 4) && (gain2 > gain1)) {
         | 
| 1647 | 
            -
                                    matchLength = ml2,  | 
| 1673 | 
            +
                                    matchLength = ml2, offBase = ofbCandidate, start = ip;
         | 
| 1648 1674 | 
             
                                    continue;
         | 
| 1649 1675 | 
             
                        }   }   }
         | 
| 1650 1676 | 
             
                        break;  /* nothing found : store previous solution */
         | 
| @@ -1655,26 +1681,33 @@ ZSTD_compressBlock_lazy_generic( | |
| 1655 1681 | 
             
                     * notably if `value` is unsigned, resulting in a large positive `-value`.
         | 
| 1656 1682 | 
             
                     */
         | 
| 1657 1683 | 
             
                    /* catch up */
         | 
| 1658 | 
            -
                    if ( | 
| 1684 | 
            +
                    if (OFFBASE_IS_OFFSET(offBase)) {
         | 
| 1659 1685 | 
             
                        if (dictMode == ZSTD_noDict) {
         | 
| 1660 | 
            -
                            while ( ((start > anchor) & (start -  | 
| 1661 | 
            -
                                 && (start[-1] == (start- | 
| 1686 | 
            +
                            while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest))
         | 
| 1687 | 
            +
                                 && (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) )  /* only search for offset within prefix */
         | 
| 1662 1688 | 
             
                                { start--; matchLength++; }
         | 
| 1663 1689 | 
             
                        }
         | 
| 1664 1690 | 
             
                        if (isDxS) {
         | 
| 1665 | 
            -
                            U32 const matchIndex = (U32)((size_t)(start-base) -  | 
| 1691 | 
            +
                            U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
         | 
| 1666 1692 | 
             
                            const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
         | 
| 1667 1693 | 
             
                            const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
         | 
| 1668 1694 | 
             
                            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
         | 
| 1669 1695 | 
             
                        }
         | 
| 1670 | 
            -
                        offset_2 = offset_1; offset_1 = (U32) | 
| 1696 | 
            +
                        offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
         | 
| 1671 1697 | 
             
                    }
         | 
| 1672 1698 | 
             
                    /* store sequence */
         | 
| 1673 1699 | 
             
            _storeSequence:
         | 
| 1674 1700 | 
             
                    {   size_t const litLength = (size_t)(start - anchor);
         | 
| 1675 | 
            -
                        ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32) | 
| 1701 | 
            +
                        ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
         | 
| 1676 1702 | 
             
                        anchor = ip = start + matchLength;
         | 
| 1677 1703 | 
             
                    }
         | 
| 1704 | 
            +
                    if (ms->lazySkipping) {
         | 
| 1705 | 
            +
                        /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
         | 
| 1706 | 
            +
                        if (searchMethod == search_rowHash) {
         | 
| 1707 | 
            +
                            ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
         | 
| 1708 | 
            +
                        }
         | 
| 1709 | 
            +
                        ms->lazySkipping = 0;
         | 
| 1710 | 
            +
                    }
         | 
| 1678 1711 |  | 
| 1679 1712 | 
             
                    /* check immediate repcode */
         | 
| 1680 1713 | 
             
                    if (isDxS) {
         | 
| @@ -1688,8 +1721,8 @@ _storeSequence: | |
| 1688 1721 | 
             
                               && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
         | 
| 1689 1722 | 
             
                                const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
         | 
| 1690 1723 | 
             
                                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
         | 
| 1691 | 
            -
                                 | 
| 1692 | 
            -
                                ZSTD_storeSeq(seqStore, 0, anchor, iend,  | 
| 1724 | 
            +
                                offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase;   /* swap offset_2 <=> offset_1 */
         | 
| 1725 | 
            +
                                ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
         | 
| 1693 1726 | 
             
                                ip += matchLength;
         | 
| 1694 1727 | 
             
                                anchor = ip;
         | 
| 1695 1728 | 
             
                                continue;
         | 
| @@ -1703,16 +1736,20 @@ _storeSequence: | |
| 1703 1736 | 
             
                             && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
         | 
| 1704 1737 | 
             
                            /* store sequence */
         | 
| 1705 1738 | 
             
                            matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
         | 
| 1706 | 
            -
                             | 
| 1707 | 
            -
                            ZSTD_storeSeq(seqStore, 0, anchor, iend,  | 
| 1739 | 
            +
                            offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */
         | 
| 1740 | 
            +
                            ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
         | 
| 1708 1741 | 
             
                            ip += matchLength;
         | 
| 1709 1742 | 
             
                            anchor = ip;
         | 
| 1710 1743 | 
             
                            continue;   /* faster when present ... (?) */
         | 
| 1711 1744 | 
             
                }   }   }
         | 
| 1712 1745 |  | 
| 1713 | 
            -
                /*  | 
| 1714 | 
            -
             | 
| 1715 | 
            -
                 | 
| 1746 | 
            +
                /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
         | 
| 1747 | 
            +
                 * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
         | 
| 1748 | 
            +
                offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
         | 
| 1749 | 
            +
             | 
| 1750 | 
            +
                /* save reps for next block */
         | 
| 1751 | 
            +
                rep[0] = offset_1 ? offset_1 : offsetSaved1;
         | 
| 1752 | 
            +
                rep[1] = offset_2 ? offset_2 : offsetSaved2;
         | 
| 1716 1753 |  | 
| 1717 1754 | 
             
                /* Return the last literals size */
         | 
| 1718 1755 | 
             
                return (size_t)(iend - anchor);
         | 
| @@ -1881,19 +1918,20 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| 1881 1918 | 
             
                const BYTE* const dictEnd  = dictBase + dictLimit;
         | 
| 1882 1919 | 
             
                const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
         | 
| 1883 1920 | 
             
                const U32 windowLog = ms->cParams.windowLog;
         | 
| 1884 | 
            -
                const U32  | 
| 1921 | 
            +
                const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
         | 
| 1922 | 
            +
                const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
         | 
| 1885 1923 |  | 
| 1886 | 
            -
                searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, ZSTD_extDict)->searchMax;
         | 
| 1887 1924 | 
             
                U32 offset_1 = rep[0], offset_2 = rep[1];
         | 
| 1888 1925 |  | 
| 1889 1926 | 
             
                DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
         | 
| 1890 1927 |  | 
| 1928 | 
            +
                /* Reset the lazy skipping state */
         | 
| 1929 | 
            +
                ms->lazySkipping = 0;
         | 
| 1930 | 
            +
             | 
| 1891 1931 | 
             
                /* init */
         | 
| 1892 1932 | 
             
                ip += (ip == prefixStart);
         | 
| 1893 1933 | 
             
                if (searchMethod == search_rowHash) {
         | 
| 1894 | 
            -
                    ZSTD_row_fillHashCache(ms, base, rowLog,
         | 
| 1895 | 
            -
                                           MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
         | 
| 1896 | 
            -
                                           ms->nextToUpdate, ilimit);
         | 
| 1934 | 
            +
                    ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
         | 
| 1897 1935 | 
             
                }
         | 
| 1898 1936 |  | 
| 1899 1937 | 
             
                /* Match Loop */
         | 
| @@ -1905,7 +1943,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| 1905 1943 | 
             
            #endif
         | 
| 1906 1944 | 
             
                while (ip < ilimit) {
         | 
| 1907 1945 | 
             
                    size_t matchLength=0;
         | 
| 1908 | 
            -
                    size_t  | 
| 1946 | 
            +
                    size_t offBase = REPCODE1_TO_OFFBASE;
         | 
| 1909 1947 | 
             
                    const BYTE* start=ip+1;
         | 
| 1910 1948 | 
             
                    U32 curr = (U32)(ip-base);
         | 
| 1911 1949 |  | 
| @@ -1924,14 +1962,23 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| 1924 1962 | 
             
                    }   }
         | 
| 1925 1963 |  | 
| 1926 1964 | 
             
                    /* first search (depth 0) */
         | 
| 1927 | 
            -
                    {   size_t  | 
| 1928 | 
            -
                        size_t const ml2 =  | 
| 1965 | 
            +
                    {   size_t ofbCandidate = 999999999;
         | 
| 1966 | 
            +
                        size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
         | 
| 1929 1967 | 
             
                        if (ml2 > matchLength)
         | 
| 1930 | 
            -
                            matchLength = ml2, start = ip,  | 
| 1968 | 
            +
                            matchLength = ml2, start = ip, offBase = ofbCandidate;
         | 
| 1931 1969 | 
             
                    }
         | 
| 1932 1970 |  | 
| 1933 1971 | 
             
                    if (matchLength < 4) {
         | 
| 1934 | 
            -
                         | 
| 1972 | 
            +
                        size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
         | 
| 1973 | 
            +
                        ip += step + 1;   /* jump faster over incompressible sections */
         | 
| 1974 | 
            +
                        /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
         | 
| 1975 | 
            +
                         * In this mode we stop inserting every position into our tables, and only insert
         | 
| 1976 | 
            +
                         * positions that we search, which is one in step positions.
         | 
| 1977 | 
            +
                         * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
         | 
| 1978 | 
            +
                         * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
         | 
| 1979 | 
            +
                         * triggered once we've gone 2KB without finding any matches.
         | 
| 1980 | 
            +
                         */
         | 
| 1981 | 
            +
                        ms->lazySkipping = step > kLazySkippingStep;
         | 
| 1935 1982 | 
             
                        continue;
         | 
| 1936 1983 | 
             
                    }
         | 
| 1937 1984 |  | 
| @@ -1941,7 +1988,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| 1941 1988 | 
             
                        ip ++;
         | 
| 1942 1989 | 
             
                        curr++;
         | 
| 1943 1990 | 
             
                        /* check repCode */
         | 
| 1944 | 
            -
                        if ( | 
| 1991 | 
            +
                        if (offBase) {
         | 
| 1945 1992 | 
             
                            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
         | 
| 1946 1993 | 
             
                            const U32 repIndex = (U32)(curr - offset_1);
         | 
| 1947 1994 | 
             
                            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
         | 
| @@ -1953,18 +2000,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| 1953 2000 | 
             
                                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
         | 
| 1954 2001 | 
             
                                size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
         | 
| 1955 2002 | 
             
                                int const gain2 = (int)(repLength * 3);
         | 
| 1956 | 
            -
                                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32) | 
| 2003 | 
            +
                                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
         | 
| 1957 2004 | 
             
                                if ((repLength >= 4) && (gain2 > gain1))
         | 
| 1958 | 
            -
                                    matchLength = repLength,  | 
| 2005 | 
            +
                                    matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
         | 
| 1959 2006 | 
             
                        }   }
         | 
| 1960 2007 |  | 
| 1961 2008 | 
             
                        /* search match, depth 1 */
         | 
| 1962 | 
            -
                        {   size_t  | 
| 1963 | 
            -
                            size_t const ml2 =  | 
| 1964 | 
            -
                            int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32) | 
| 1965 | 
            -
                            int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32) | 
| 2009 | 
            +
                        {   size_t ofbCandidate = 999999999;
         | 
| 2010 | 
            +
                            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
         | 
| 2011 | 
            +
                            int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
         | 
| 2012 | 
            +
                            int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
         | 
| 1966 2013 | 
             
                            if ((ml2 >= 4) && (gain2 > gain1)) {
         | 
| 1967 | 
            -
                                matchLength = ml2,  | 
| 2014 | 
            +
                                matchLength = ml2, offBase = ofbCandidate, start = ip;
         | 
| 1968 2015 | 
             
                                continue;   /* search a better one */
         | 
| 1969 2016 | 
             
                        }   }
         | 
| 1970 2017 |  | 
| @@ -1973,7 +2020,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| 1973 2020 | 
             
                            ip ++;
         | 
| 1974 2021 | 
             
                            curr++;
         | 
| 1975 2022 | 
             
                            /* check repCode */
         | 
| 1976 | 
            -
                            if ( | 
| 2023 | 
            +
                            if (offBase) {
         | 
| 1977 2024 | 
             
                                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
         | 
| 1978 2025 | 
             
                                const U32 repIndex = (U32)(curr - offset_1);
         | 
| 1979 2026 | 
             
                                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
         | 
| @@ -1985,38 +2032,45 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( | |
| 1985 2032 | 
             
                                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
         | 
| 1986 2033 | 
             
                                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
         | 
| 1987 2034 | 
             
                                    int const gain2 = (int)(repLength * 4);
         | 
| 1988 | 
            -
                                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32) | 
| 2035 | 
            +
                                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
         | 
| 1989 2036 | 
             
                                    if ((repLength >= 4) && (gain2 > gain1))
         | 
| 1990 | 
            -
                                        matchLength = repLength,  | 
| 2037 | 
            +
                                        matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
         | 
| 1991 2038 | 
             
                            }   }
         | 
| 1992 2039 |  | 
| 1993 2040 | 
             
                            /* search match, depth 2 */
         | 
| 1994 | 
            -
                            {   size_t  | 
| 1995 | 
            -
                                size_t const ml2 =  | 
| 1996 | 
            -
                                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32) | 
| 1997 | 
            -
                                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32) | 
| 2041 | 
            +
                            {   size_t ofbCandidate = 999999999;
         | 
| 2042 | 
            +
                                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
         | 
| 2043 | 
            +
                                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate));   /* raw approx */
         | 
| 2044 | 
            +
                                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
         | 
| 1998 2045 | 
             
                                if ((ml2 >= 4) && (gain2 > gain1)) {
         | 
| 1999 | 
            -
                                    matchLength = ml2,  | 
| 2046 | 
            +
                                    matchLength = ml2, offBase = ofbCandidate, start = ip;
         | 
| 2000 2047 | 
             
                                    continue;
         | 
| 2001 2048 | 
             
                        }   }   }
         | 
| 2002 2049 | 
             
                        break;  /* nothing found : store previous solution */
         | 
| 2003 2050 | 
             
                    }
         | 
| 2004 2051 |  | 
| 2005 2052 | 
             
                    /* catch up */
         | 
| 2006 | 
            -
                    if ( | 
| 2007 | 
            -
                        U32 const matchIndex = (U32)((size_t)(start-base) -  | 
| 2053 | 
            +
                    if (OFFBASE_IS_OFFSET(offBase)) {
         | 
| 2054 | 
            +
                        U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
         | 
| 2008 2055 | 
             
                        const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
         | 
| 2009 2056 | 
             
                        const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
         | 
| 2010 2057 | 
             
                        while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
         | 
| 2011 | 
            -
                        offset_2 = offset_1; offset_1 = (U32) | 
| 2058 | 
            +
                        offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
         | 
| 2012 2059 | 
             
                    }
         | 
| 2013 2060 |  | 
| 2014 2061 | 
             
                    /* store sequence */
         | 
| 2015 2062 | 
             
            _storeSequence:
         | 
| 2016 2063 | 
             
                    {   size_t const litLength = (size_t)(start - anchor);
         | 
| 2017 | 
            -
                        ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32) | 
| 2064 | 
            +
                        ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
         | 
| 2018 2065 | 
             
                        anchor = ip = start + matchLength;
         | 
| 2019 2066 | 
             
                    }
         | 
| 2067 | 
            +
                    if (ms->lazySkipping) {
         | 
| 2068 | 
            +
                        /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
         | 
| 2069 | 
            +
                        if (searchMethod == search_rowHash) {
         | 
| 2070 | 
            +
                            ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
         | 
| 2071 | 
            +
                        }
         | 
| 2072 | 
            +
                        ms->lazySkipping = 0;
         | 
| 2073 | 
            +
                    }
         | 
| 2020 2074 |  | 
| 2021 2075 | 
             
                    /* check immediate repcode */
         | 
| 2022 2076 | 
             
                    while (ip <= ilimit) {
         | 
| @@ -2031,8 +2085,8 @@ _storeSequence: | |
| 2031 2085 | 
             
                            /* repcode detected we should take it */
         | 
| 2032 2086 | 
             
                            const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
         | 
| 2033 2087 | 
             
                            matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
         | 
| 2034 | 
            -
                             | 
| 2035 | 
            -
                            ZSTD_storeSeq(seqStore, 0, anchor, iend,  | 
| 2088 | 
            +
                            offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase;   /* swap offset history */
         | 
| 2089 | 
            +
                            ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
         | 
| 2036 2090 | 
             
                            ip += matchLength;
         | 
| 2037 2091 | 
             
                            anchor = ip;
         | 
| 2038 2092 | 
             
                            continue;   /* faster when present ... (?) */
         | 
| @@ -2098,7 +2152,6 @@ size_t ZSTD_compressBlock_lazy_extDict_row( | |
| 2098 2152 | 
             
            size_t ZSTD_compressBlock_lazy2_extDict_row(
         | 
| 2099 2153 | 
             
                    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         | 
| 2100 2154 | 
             
                    void const* src, size_t srcSize)
         | 
| 2101 | 
            -
             | 
| 2102 2155 | 
             
            {
         | 
| 2103 2156 | 
             
                return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
         | 
| 2104 2157 | 
             
            }
         |