zstdlib 0.10.0-x64-mingw32 → 0.11.0-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +8 -0
- data/ext/zstdlib_c/extconf.rb +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/allocations.h +55 -0
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/bits.h +200 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/bitstream.h +19 -60
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/compiler.h +26 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/cpu.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/entropy_common.c +12 -40
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.c +9 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse.h +5 -83
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse_decompress.c +7 -99
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/huf.h +65 -156
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/mem.h +39 -46
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.c +26 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.h +7 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/portability_macros.h +22 -3
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/threading.c +176 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/threading.h +5 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.c +2 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_common.c +1 -36
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_deps.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_internal.h +17 -118
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_trace.h +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/clevels.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/fse_compress.c +7 -124
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/huf_compress.c +234 -169
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress.c +1243 -538
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_internal.h +225 -151
- data/ext/zstdlib_c/zstd-1.5.5/lib/compress/zstd_compress_literals.c +235 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_cwksp.h +128 -62
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.c +95 -33
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.c +433 -148
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.c +398 -345
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.c +106 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.c +17 -9
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress.c +434 -441
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress_amd64.S +30 -39
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.c +4 -4
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress.c +205 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.c +201 -81
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.h +6 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zdict.h +53 -31
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd.h +580 -135
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd_errors.h +27 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzcompatibility.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzguts.h +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzlib.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzread.c +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzwrite.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.c +46 -44
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.h +4 -1
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- metadata +80 -77
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/threading.c +0 -122
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_literals.c +0 -159
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -10,6 +10,9 @@
|
|
10
10
|
|
11
11
|
#include "zstd_compress_internal.h"
|
12
12
|
#include "zstd_lazy.h"
|
13
|
+
#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
|
14
|
+
|
15
|
+
#define kLazySkippingStep 8
|
13
16
|
|
14
17
|
|
15
18
|
/*-*************************************
|
@@ -197,8 +200,8 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
197
200
|
U32 matchIndex = dictMatchIndex + dictIndexDelta;
|
198
201
|
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
|
199
202
|
DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
|
200
|
-
curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr,
|
201
|
-
bestLength = matchLength, *offsetPtr =
|
203
|
+
curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
|
204
|
+
bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
202
205
|
}
|
203
206
|
if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
|
204
207
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
@@ -218,7 +221,7 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
218
221
|
}
|
219
222
|
|
220
223
|
if (bestLength >= MINMATCH) {
|
221
|
-
U32 const mIndex = curr - (U32)
|
224
|
+
U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex;
|
222
225
|
DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
|
223
226
|
curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
|
224
227
|
}
|
@@ -230,7 +233,7 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
230
233
|
static size_t
|
231
234
|
ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
232
235
|
const BYTE* const ip, const BYTE* const iend,
|
233
|
-
size_t*
|
236
|
+
size_t* offBasePtr,
|
234
237
|
U32 const mls,
|
235
238
|
const ZSTD_dictMode_e dictMode)
|
236
239
|
{
|
@@ -327,8 +330,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
327
330
|
if (matchLength > bestLength) {
|
328
331
|
if (matchLength > matchEndIdx - matchIndex)
|
329
332
|
matchEndIdx = matchIndex + (U32)matchLength;
|
330
|
-
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)
|
331
|
-
bestLength = matchLength, *
|
333
|
+
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) )
|
334
|
+
bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
332
335
|
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
333
336
|
if (dictMode == ZSTD_dictMatchState) {
|
334
337
|
nbCompares = 0; /* in addition to avoiding checking any
|
@@ -361,16 +364,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
361
364
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
362
365
|
bestLength = ZSTD_DUBT_findBetterDictMatch(
|
363
366
|
ms, ip, iend,
|
364
|
-
|
367
|
+
offBasePtr, bestLength, nbCompares,
|
365
368
|
mls, dictMode);
|
366
369
|
}
|
367
370
|
|
368
371
|
assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
|
369
372
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
370
373
|
if (bestLength >= MINMATCH) {
|
371
|
-
U32 const mIndex = curr - (U32)
|
374
|
+
U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex;
|
372
375
|
DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
|
373
|
-
curr, (U32)bestLength, (U32)*
|
376
|
+
curr, (U32)bestLength, (U32)*offBasePtr, mIndex);
|
374
377
|
}
|
375
378
|
return bestLength;
|
376
379
|
}
|
@@ -381,14 +384,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
381
384
|
FORCE_INLINE_TEMPLATE size_t
|
382
385
|
ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
|
383
386
|
const BYTE* const ip, const BYTE* const iLimit,
|
384
|
-
size_t*
|
387
|
+
size_t* offBasePtr,
|
385
388
|
const U32 mls /* template */,
|
386
389
|
const ZSTD_dictMode_e dictMode)
|
387
390
|
{
|
388
391
|
DEBUGLOG(7, "ZSTD_BtFindBestMatch");
|
389
392
|
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
|
390
393
|
ZSTD_updateDUBT(ms, ip, iLimit, mls);
|
391
|
-
return ZSTD_DUBT_findBestMatch(ms, ip, iLimit,
|
394
|
+
return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode);
|
392
395
|
}
|
393
396
|
|
394
397
|
/***********************************
|
@@ -561,7 +564,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
|
|
561
564
|
/* save best solution */
|
562
565
|
if (currentMl > ml) {
|
563
566
|
ml = currentMl;
|
564
|
-
*offsetPtr =
|
567
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
|
565
568
|
if (ip+currentMl == iLimit) {
|
566
569
|
/* best possible, avoids read overflow on next attempt */
|
567
570
|
return ml;
|
@@ -598,7 +601,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
|
|
598
601
|
/* save best solution */
|
599
602
|
if (currentMl > ml) {
|
600
603
|
ml = currentMl;
|
601
|
-
*offsetPtr =
|
604
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
|
602
605
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
603
606
|
}
|
604
607
|
}
|
@@ -617,7 +620,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
|
|
617
620
|
FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
|
618
621
|
ZSTD_matchState_t* ms,
|
619
622
|
const ZSTD_compressionParameters* const cParams,
|
620
|
-
const BYTE* ip, U32 const mls)
|
623
|
+
const BYTE* ip, U32 const mls, U32 const lazySkipping)
|
621
624
|
{
|
622
625
|
U32* const hashTable = ms->hashTable;
|
623
626
|
const U32 hashLog = cParams->hashLog;
|
@@ -632,6 +635,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
|
|
632
635
|
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
|
633
636
|
hashTable[h] = idx;
|
634
637
|
idx++;
|
638
|
+
/* Stop inserting every position when in the lazy skipping mode. */
|
639
|
+
if (lazySkipping)
|
640
|
+
break;
|
635
641
|
}
|
636
642
|
|
637
643
|
ms->nextToUpdate = target;
|
@@ -640,7 +646,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
|
|
640
646
|
|
641
647
|
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
|
642
648
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
643
|
-
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
|
649
|
+
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
|
644
650
|
}
|
645
651
|
|
646
652
|
/* inlining is important to hardwire a hot branch (template emulation) */
|
@@ -684,14 +690,15 @@ size_t ZSTD_HcFindBestMatch(
|
|
684
690
|
}
|
685
691
|
|
686
692
|
/* HC4 match finder */
|
687
|
-
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
|
693
|
+
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
|
688
694
|
|
689
695
|
for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
|
690
696
|
size_t currentMl=0;
|
691
697
|
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
692
698
|
const BYTE* const match = base + matchIndex;
|
693
699
|
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
|
694
|
-
|
700
|
+
/* read 4B starting from (match + ml + 1 - sizeof(U32)) */
|
701
|
+
if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
|
695
702
|
currentMl = ZSTD_count(ip, match, iLimit);
|
696
703
|
} else {
|
697
704
|
const BYTE* const match = dictBase + matchIndex;
|
@@ -703,7 +710,7 @@ size_t ZSTD_HcFindBestMatch(
|
|
703
710
|
/* save best solution */
|
704
711
|
if (currentMl > ml) {
|
705
712
|
ml = currentMl;
|
706
|
-
*offsetPtr =
|
713
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
707
714
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
708
715
|
}
|
709
716
|
|
@@ -739,7 +746,7 @@ size_t ZSTD_HcFindBestMatch(
|
|
739
746
|
if (currentMl > ml) {
|
740
747
|
ml = currentMl;
|
741
748
|
assert(curr > matchIndex + dmsIndexDelta);
|
742
|
-
*offsetPtr =
|
749
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
|
743
750
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
744
751
|
}
|
745
752
|
|
@@ -756,8 +763,6 @@ size_t ZSTD_HcFindBestMatch(
|
|
756
763
|
* (SIMD) Row-based matchfinder
|
757
764
|
***********************************/
|
758
765
|
/* Constants for row-based hash */
|
759
|
-
#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
|
760
|
-
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
|
761
766
|
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
|
762
767
|
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
|
763
768
|
|
@@ -769,73 +774,19 @@ typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 repr
|
|
769
774
|
* Starting from the LSB, returns the idx of the next non-zero bit.
|
770
775
|
* Basically counting the nb of trailing zeroes.
|
771
776
|
*/
|
772
|
-
|
773
|
-
|
774
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
775
|
-
if (val != 0) {
|
776
|
-
unsigned long r;
|
777
|
-
_BitScanForward64(&r, val);
|
778
|
-
return (U32)(r);
|
779
|
-
} else {
|
780
|
-
/* Should not reach this code path */
|
781
|
-
__assume(0);
|
782
|
-
}
|
783
|
-
# elif (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))
|
784
|
-
if (sizeof(size_t) == 4) {
|
785
|
-
U32 mostSignificantWord = (U32)(val >> 32);
|
786
|
-
U32 leastSignificantWord = (U32)val;
|
787
|
-
if (leastSignificantWord == 0) {
|
788
|
-
return 32 + (U32)__builtin_ctz(mostSignificantWord);
|
789
|
-
} else {
|
790
|
-
return (U32)__builtin_ctz(leastSignificantWord);
|
791
|
-
}
|
792
|
-
} else {
|
793
|
-
return (U32)__builtin_ctzll(val);
|
794
|
-
}
|
795
|
-
# else
|
796
|
-
/* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count
|
797
|
-
* and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer
|
798
|
-
*/
|
799
|
-
val = ~val & (val - 1ULL); /* Lowest set bit mask */
|
800
|
-
val = val - ((val >> 1) & 0x5555555555555555);
|
801
|
-
val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
|
802
|
-
return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
|
803
|
-
# endif
|
804
|
-
}
|
805
|
-
|
806
|
-
/* ZSTD_rotateRight_*():
|
807
|
-
* Rotates a bitfield to the right by "count" bits.
|
808
|
-
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
|
809
|
-
*/
|
810
|
-
FORCE_INLINE_TEMPLATE
|
811
|
-
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
|
812
|
-
assert(count < 64);
|
813
|
-
count &= 0x3F; /* for fickle pattern recognition */
|
814
|
-
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
|
815
|
-
}
|
816
|
-
|
817
|
-
FORCE_INLINE_TEMPLATE
|
818
|
-
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
|
819
|
-
assert(count < 32);
|
820
|
-
count &= 0x1F; /* for fickle pattern recognition */
|
821
|
-
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
|
822
|
-
}
|
823
|
-
|
824
|
-
FORCE_INLINE_TEMPLATE
|
825
|
-
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
|
826
|
-
assert(count < 16);
|
827
|
-
count &= 0x0F; /* for fickle pattern recognition */
|
828
|
-
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
|
777
|
+
MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
|
778
|
+
return ZSTD_countTrailingZeros64(val);
|
829
779
|
}
|
830
780
|
|
831
781
|
/* ZSTD_row_nextIndex():
|
832
782
|
* Returns the next index to insert at within a tagTable row, and updates the "head"
|
833
|
-
* value to reflect the update. Essentially cycles backwards from [
|
783
|
+
* value to reflect the update. Essentially cycles backwards from [1, {entries per row})
|
834
784
|
*/
|
835
785
|
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
|
836
|
-
|
837
|
-
|
838
|
-
|
786
|
+
U32 next = (*tagRow-1) & rowMask;
|
787
|
+
next += (next == 0) ? rowMask : 0; /* skip first position */
|
788
|
+
*tagRow = (BYTE)next;
|
789
|
+
return next;
|
839
790
|
}
|
840
791
|
|
841
792
|
/* ZSTD_isAligned():
|
@@ -849,7 +800,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
|
|
849
800
|
/* ZSTD_row_prefetch():
|
850
801
|
* Performs prefetching for the hashTable and tagTable at a given row.
|
851
802
|
*/
|
852
|
-
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable,
|
803
|
+
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
|
853
804
|
PREFETCH_L1(hashTable + relRow);
|
854
805
|
if (rowLog >= 5) {
|
855
806
|
PREFETCH_L1(hashTable + relRow + 16);
|
@@ -873,13 +824,13 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
|
|
873
824
|
U32 idx, const BYTE* const iLimit)
|
874
825
|
{
|
875
826
|
U32 const* const hashTable = ms->hashTable;
|
876
|
-
|
827
|
+
BYTE const* const tagTable = ms->tagTable;
|
877
828
|
U32 const hashLog = ms->rowHashLog;
|
878
829
|
U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
|
879
830
|
U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
|
880
831
|
|
881
832
|
for (; idx < lim; ++idx) {
|
882
|
-
U32 const hash = (U32)
|
833
|
+
U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
|
883
834
|
U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
884
835
|
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
|
885
836
|
ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
|
@@ -895,11 +846,12 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
|
|
895
846
|
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
|
896
847
|
*/
|
897
848
|
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
|
898
|
-
|
849
|
+
BYTE const* tagTable, BYTE const* base,
|
899
850
|
U32 idx, U32 const hashLog,
|
900
|
-
U32 const rowLog, U32 const mls
|
851
|
+
U32 const rowLog, U32 const mls,
|
852
|
+
U64 const hashSalt)
|
901
853
|
{
|
902
|
-
U32 const newHash = (U32)
|
854
|
+
U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
|
903
855
|
U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
904
856
|
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
|
905
857
|
{ U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
|
@@ -917,22 +869,21 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
|
|
917
869
|
U32 const rowMask, U32 const useCache)
|
918
870
|
{
|
919
871
|
U32* const hashTable = ms->hashTable;
|
920
|
-
|
872
|
+
BYTE* const tagTable = ms->tagTable;
|
921
873
|
U32 const hashLog = ms->rowHashLog;
|
922
874
|
const BYTE* const base = ms->window.base;
|
923
875
|
|
924
876
|
DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
|
925
877
|
for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
|
926
|
-
U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
|
927
|
-
: (U32)
|
878
|
+
U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
|
879
|
+
: (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
|
928
880
|
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
929
881
|
U32* const row = hashTable + relRow;
|
930
|
-
BYTE* tagRow =
|
931
|
-
Explicit cast allows us to get exact desired position within each row */
|
882
|
+
BYTE* tagRow = tagTable + relRow;
|
932
883
|
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
|
933
884
|
|
934
|
-
assert(hash ==
|
935
|
-
|
885
|
+
assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
|
886
|
+
tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
|
936
887
|
row[pos] = updateStartIdx;
|
937
888
|
}
|
938
889
|
}
|
@@ -980,7 +931,35 @@ void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
|
|
980
931
|
const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
|
981
932
|
|
982
933
|
DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
|
983
|
-
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /*
|
934
|
+
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */);
|
935
|
+
}
|
936
|
+
|
937
|
+
/* Returns the mask width of bits group of which will be set to 1. Given not all
|
938
|
+
* architectures have easy movemask instruction, this helps to iterate over
|
939
|
+
* groups of bits easier and faster.
|
940
|
+
*/
|
941
|
+
FORCE_INLINE_TEMPLATE U32
|
942
|
+
ZSTD_row_matchMaskGroupWidth(const U32 rowEntries)
|
943
|
+
{
|
944
|
+
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
945
|
+
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
|
946
|
+
(void)rowEntries;
|
947
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
948
|
+
/* NEON path only works for little endian */
|
949
|
+
if (!MEM_isLittleEndian()) {
|
950
|
+
return 1;
|
951
|
+
}
|
952
|
+
if (rowEntries == 16) {
|
953
|
+
return 4;
|
954
|
+
}
|
955
|
+
if (rowEntries == 32) {
|
956
|
+
return 2;
|
957
|
+
}
|
958
|
+
if (rowEntries == 64) {
|
959
|
+
return 1;
|
960
|
+
}
|
961
|
+
#endif
|
962
|
+
return 1;
|
984
963
|
}
|
985
964
|
|
986
965
|
#if defined(ZSTD_ARCH_X86_SSE2)
|
@@ -1003,71 +982,82 @@ ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U
|
|
1003
982
|
}
|
1004
983
|
#endif
|
1005
984
|
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
985
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
986
|
+
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
987
|
+
ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped)
|
988
|
+
{
|
989
|
+
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
990
|
+
if (rowEntries == 16) {
|
991
|
+
/* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits.
|
992
|
+
* After that groups of 4 bits represent the equalMask. We lower
|
993
|
+
* all bits except the highest in these groups by doing AND with
|
994
|
+
* 0x88 = 0b10001000.
|
995
|
+
*/
|
996
|
+
const uint8x16_t chunk = vld1q_u8(src);
|
997
|
+
const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
|
998
|
+
const uint8x8_t res = vshrn_n_u16(equalMask, 4);
|
999
|
+
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
1000
|
+
return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull;
|
1001
|
+
} else if (rowEntries == 32) {
|
1002
|
+
/* Same idea as with rowEntries == 16 but doing AND with
|
1003
|
+
* 0x55 = 0b01010101.
|
1004
|
+
*/
|
1005
|
+
const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src);
|
1006
|
+
const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
|
1007
|
+
const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
|
1008
|
+
const uint8x16_t dup = vdupq_n_u8(tag);
|
1009
|
+
const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6);
|
1010
|
+
const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6);
|
1011
|
+
const uint8x8_t res = vsli_n_u8(t0, t1, 4);
|
1012
|
+
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ;
|
1013
|
+
return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull;
|
1014
|
+
} else { /* rowEntries == 64 */
|
1015
|
+
const uint8x16x4_t chunk = vld4q_u8(src);
|
1016
|
+
const uint8x16_t dup = vdupq_n_u8(tag);
|
1017
|
+
const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
|
1018
|
+
const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
|
1019
|
+
const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
|
1020
|
+
const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
|
1021
|
+
|
1022
|
+
const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
|
1023
|
+
const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
|
1024
|
+
const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
|
1025
|
+
const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
|
1026
|
+
const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
|
1027
|
+
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
|
1028
|
+
return ZSTD_rotateRight_U64(matches, headGrouped);
|
1029
|
+
}
|
1030
|
+
}
|
1031
|
+
#endif
|
1032
|
+
|
1033
|
+
/* Returns a ZSTD_VecMask (U64) that has the nth group (determined by
|
1034
|
+
* ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag"
|
1035
|
+
* matches the hash at the nth position in a row of the tagTable.
|
1036
|
+
* Each row is a circular buffer beginning at the value of "headGrouped". So we
|
1037
|
+
* must rotate the "matches" bitfield to match up with the actual layout of the
|
1038
|
+
* entries within the hashTable */
|
1010
1039
|
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
1011
|
-
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32
|
1040
|
+
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
|
1012
1041
|
{
|
1013
|
-
const BYTE* const src = tagRow
|
1042
|
+
const BYTE* const src = tagRow;
|
1014
1043
|
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
1015
1044
|
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
|
1045
|
+
assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
|
1016
1046
|
|
1017
1047
|
#if defined(ZSTD_ARCH_X86_SSE2)
|
1018
1048
|
|
1019
|
-
return ZSTD_row_getSSEMask(rowEntries / 16, src, tag,
|
1049
|
+
return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped);
|
1020
1050
|
|
1021
1051
|
#else /* SW or NEON-LE */
|
1022
1052
|
|
1023
1053
|
# if defined(ZSTD_ARCH_ARM_NEON)
|
1024
1054
|
/* This NEON path only works for little endian - otherwise use SWAR below */
|
1025
1055
|
if (MEM_isLittleEndian()) {
|
1026
|
-
|
1027
|
-
const uint8x16_t chunk = vld1q_u8(src);
|
1028
|
-
const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
|
1029
|
-
const uint16x8_t t0 = vshlq_n_u16(equalMask, 7);
|
1030
|
-
const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14));
|
1031
|
-
const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14));
|
1032
|
-
const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28));
|
1033
|
-
const U16 hi = (U16)vgetq_lane_u8(t3, 8);
|
1034
|
-
const U16 lo = (U16)vgetq_lane_u8(t3, 0);
|
1035
|
-
return ZSTD_rotateRight_U16((hi << 8) | lo, head);
|
1036
|
-
} else if (rowEntries == 32) {
|
1037
|
-
const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src);
|
1038
|
-
const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
|
1039
|
-
const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
|
1040
|
-
const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag));
|
1041
|
-
const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag));
|
1042
|
-
const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0));
|
1043
|
-
const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1));
|
1044
|
-
const uint8x8_t t0 = vreinterpret_u8_s8(pack0);
|
1045
|
-
const uint8x8_t t1 = vreinterpret_u8_s8(pack1);
|
1046
|
-
const uint8x8_t t2 = vsri_n_u8(t1, t0, 2);
|
1047
|
-
const uint8x8x2_t t3 = vuzp_u8(t2, t0);
|
1048
|
-
const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4);
|
1049
|
-
const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0);
|
1050
|
-
return ZSTD_rotateRight_U32(matches, head);
|
1051
|
-
} else { /* rowEntries == 64 */
|
1052
|
-
const uint8x16x4_t chunk = vld4q_u8(src);
|
1053
|
-
const uint8x16_t dup = vdupq_n_u8(tag);
|
1054
|
-
const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
|
1055
|
-
const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
|
1056
|
-
const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
|
1057
|
-
const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
|
1058
|
-
|
1059
|
-
const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
|
1060
|
-
const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
|
1061
|
-
const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
|
1062
|
-
const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
|
1063
|
-
const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
|
1064
|
-
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
|
1065
|
-
return ZSTD_rotateRight_U64(matches, head);
|
1066
|
-
}
|
1056
|
+
return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
|
1067
1057
|
}
|
1068
1058
|
# endif /* ZSTD_ARCH_ARM_NEON */
|
1069
1059
|
/* SWAR */
|
1070
|
-
{ const
|
1060
|
+
{ const int chunkSize = sizeof(size_t);
|
1071
1061
|
const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
|
1072
1062
|
const size_t xFF = ~((size_t)0);
|
1073
1063
|
const size_t x01 = xFF / 0xFF;
|
@@ -1100,11 +1090,11 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head,
|
|
1100
1090
|
}
|
1101
1091
|
matches = ~matches;
|
1102
1092
|
if (rowEntries == 16) {
|
1103
|
-
return ZSTD_rotateRight_U16((U16)matches,
|
1093
|
+
return ZSTD_rotateRight_U16((U16)matches, headGrouped);
|
1104
1094
|
} else if (rowEntries == 32) {
|
1105
|
-
return ZSTD_rotateRight_U32((U32)matches,
|
1095
|
+
return ZSTD_rotateRight_U32((U32)matches, headGrouped);
|
1106
1096
|
} else {
|
1107
|
-
return ZSTD_rotateRight_U64((U64)matches,
|
1097
|
+
return ZSTD_rotateRight_U64((U64)matches, headGrouped);
|
1108
1098
|
}
|
1109
1099
|
}
|
1110
1100
|
#endif
|
@@ -1134,7 +1124,7 @@ size_t ZSTD_RowFindBestMatch(
|
|
1134
1124
|
const U32 rowLog)
|
1135
1125
|
{
|
1136
1126
|
U32* const hashTable = ms->hashTable;
|
1137
|
-
|
1127
|
+
BYTE* const tagTable = ms->tagTable;
|
1138
1128
|
U32* const hashCache = ms->hashCache;
|
1139
1129
|
const U32 hashLog = ms->rowHashLog;
|
1140
1130
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -1152,8 +1142,11 @@ size_t ZSTD_RowFindBestMatch(
|
|
1152
1142
|
const U32 rowEntries = (1U << rowLog);
|
1153
1143
|
const U32 rowMask = rowEntries - 1;
|
1154
1144
|
const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
|
1145
|
+
const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
|
1146
|
+
const U64 hashSalt = ms->hashSalt;
|
1155
1147
|
U32 nbAttempts = 1U << cappedSearchLog;
|
1156
1148
|
size_t ml=4-1;
|
1149
|
+
U32 hash;
|
1157
1150
|
|
1158
1151
|
/* DMS/DDS variables that may be referenced laster */
|
1159
1152
|
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
@@ -1177,7 +1170,7 @@ size_t ZSTD_RowFindBestMatch(
|
|
1177
1170
|
if (dictMode == ZSTD_dictMatchState) {
|
1178
1171
|
/* Prefetch DMS rows */
|
1179
1172
|
U32* const dmsHashTable = dms->hashTable;
|
1180
|
-
|
1173
|
+
BYTE* const dmsTagTable = dms->tagTable;
|
1181
1174
|
U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
|
1182
1175
|
U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
1183
1176
|
dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
|
@@ -1187,23 +1180,34 @@ size_t ZSTD_RowFindBestMatch(
|
|
1187
1180
|
}
|
1188
1181
|
|
1189
1182
|
/* Update the hashTable and tagTable up to (but not including) ip */
|
1190
|
-
|
1183
|
+
if (!ms->lazySkipping) {
|
1184
|
+
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
|
1185
|
+
hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
|
1186
|
+
} else {
|
1187
|
+
/* Stop inserting every position when in the lazy skipping mode.
|
1188
|
+
* The hash cache is also not kept up to date in this mode.
|
1189
|
+
*/
|
1190
|
+
hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
|
1191
|
+
ms->nextToUpdate = curr;
|
1192
|
+
}
|
1193
|
+
ms->hashSaltEntropy += hash; /* collect salt entropy */
|
1194
|
+
|
1191
1195
|
{ /* Get the hash for ip, compute the appropriate row */
|
1192
|
-
U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
|
1193
1196
|
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
1194
1197
|
U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
|
1195
1198
|
U32* const row = hashTable + relRow;
|
1196
1199
|
BYTE* tagRow = (BYTE*)(tagTable + relRow);
|
1197
|
-
U32 const
|
1200
|
+
U32 const headGrouped = (*tagRow & rowMask) * groupWidth;
|
1198
1201
|
U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
|
1199
1202
|
size_t numMatches = 0;
|
1200
1203
|
size_t currMatch = 0;
|
1201
|
-
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag,
|
1204
|
+
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
|
1202
1205
|
|
1203
1206
|
/* Cycle through the matches and prefetch */
|
1204
|
-
for (; (matches > 0) && (nbAttempts > 0);
|
1205
|
-
U32 const matchPos = (
|
1207
|
+
for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
|
1208
|
+
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
|
1206
1209
|
U32 const matchIndex = row[matchPos];
|
1210
|
+
if(matchPos == 0) continue;
|
1207
1211
|
assert(numMatches < rowEntries);
|
1208
1212
|
if (matchIndex < lowLimit)
|
1209
1213
|
break;
|
@@ -1213,13 +1217,14 @@ size_t ZSTD_RowFindBestMatch(
|
|
1213
1217
|
PREFETCH_L1(dictBase + matchIndex);
|
1214
1218
|
}
|
1215
1219
|
matchBuffer[numMatches++] = matchIndex;
|
1220
|
+
--nbAttempts;
|
1216
1221
|
}
|
1217
1222
|
|
1218
1223
|
/* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
|
1219
1224
|
in ZSTD_row_update_internal() at the next search. */
|
1220
1225
|
{
|
1221
1226
|
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
|
1222
|
-
tagRow[pos
|
1227
|
+
tagRow[pos] = (BYTE)tag;
|
1223
1228
|
row[pos] = ms->nextToUpdate++;
|
1224
1229
|
}
|
1225
1230
|
|
@@ -1233,7 +1238,8 @@ size_t ZSTD_RowFindBestMatch(
|
|
1233
1238
|
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
1234
1239
|
const BYTE* const match = base + matchIndex;
|
1235
1240
|
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
|
1236
|
-
|
1241
|
+
/* read 4B starting from (match + ml + 1 - sizeof(U32)) */
|
1242
|
+
if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
|
1237
1243
|
currentMl = ZSTD_count(ip, match, iLimit);
|
1238
1244
|
} else {
|
1239
1245
|
const BYTE* const match = dictBase + matchIndex;
|
@@ -1245,7 +1251,7 @@ size_t ZSTD_RowFindBestMatch(
|
|
1245
1251
|
/* Save best solution */
|
1246
1252
|
if (currentMl > ml) {
|
1247
1253
|
ml = currentMl;
|
1248
|
-
*offsetPtr =
|
1254
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
1249
1255
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
1250
1256
|
}
|
1251
1257
|
}
|
@@ -1263,19 +1269,21 @@ size_t ZSTD_RowFindBestMatch(
|
|
1263
1269
|
const U32 dmsSize = (U32)(dmsEnd - dmsBase);
|
1264
1270
|
const U32 dmsIndexDelta = dictLimit - dmsSize;
|
1265
1271
|
|
1266
|
-
{ U32 const
|
1272
|
+
{ U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth;
|
1267
1273
|
U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
|
1268
1274
|
size_t numMatches = 0;
|
1269
1275
|
size_t currMatch = 0;
|
1270
|
-
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag,
|
1276
|
+
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
|
1271
1277
|
|
1272
|
-
for (; (matches > 0) && (nbAttempts > 0);
|
1273
|
-
U32 const matchPos = (
|
1278
|
+
for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
|
1279
|
+
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
|
1274
1280
|
U32 const matchIndex = dmsRow[matchPos];
|
1281
|
+
if(matchPos == 0) continue;
|
1275
1282
|
if (matchIndex < dmsLowestIndex)
|
1276
1283
|
break;
|
1277
1284
|
PREFETCH_L1(dmsBase + matchIndex);
|
1278
1285
|
matchBuffer[numMatches++] = matchIndex;
|
1286
|
+
--nbAttempts;
|
1279
1287
|
}
|
1280
1288
|
|
1281
1289
|
/* Return the longest match */
|
@@ -1294,7 +1302,7 @@ size_t ZSTD_RowFindBestMatch(
|
|
1294
1302
|
if (currentMl > ml) {
|
1295
1303
|
ml = currentMl;
|
1296
1304
|
assert(curr > matchIndex + dmsIndexDelta);
|
1297
|
-
*offsetPtr =
|
1305
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
|
1298
1306
|
if (ip+currentMl == iLimit) break;
|
1299
1307
|
}
|
1300
1308
|
}
|
@@ -1304,14 +1312,10 @@ size_t ZSTD_RowFindBestMatch(
|
|
1304
1312
|
}
|
1305
1313
|
|
1306
1314
|
|
1307
|
-
typedef size_t (*searchMax_f)(
|
1308
|
-
ZSTD_matchState_t* ms,
|
1309
|
-
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
1310
|
-
|
1311
1315
|
/**
|
1312
|
-
*
|
1313
|
-
*
|
1314
|
-
*
|
1316
|
+
* Generate search functions templated on (dictMode, mls, rowLog).
|
1317
|
+
* These functions are outlined for code size & compilation time.
|
1318
|
+
* ZSTD_searchMax() dispatches to the correct implementation function.
|
1315
1319
|
*
|
1316
1320
|
* TODO: The start of the search function involves loading and calculating a
|
1317
1321
|
* bunch of constants from the ZSTD_matchState_t. These computations could be
|
@@ -1329,25 +1333,25 @@ typedef size_t (*searchMax_f)(
|
|
1329
1333
|
* the single segment loop. It should go in searchMax instead of its own
|
1330
1334
|
* function to avoid having multiple virtual function calls per search.
|
1331
1335
|
*/
|
1332
|
-
typedef struct {
|
1333
|
-
searchMax_f searchMax;
|
1334
|
-
} ZSTD_LazyVTable;
|
1335
1336
|
|
1336
|
-
#define
|
1337
|
-
|
1338
|
-
|
1339
|
-
const BYTE* ip, const BYTE* const iLimit, \
|
1340
|
-
size_t* offsetPtr) \
|
1341
|
-
{ \
|
1342
|
-
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
|
1343
|
-
return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
|
1344
|
-
} \
|
1345
|
-
static const ZSTD_LazyVTable ZSTD_BtVTable_##dictMode##_##mls = { \
|
1346
|
-
ZSTD_BtFindBestMatch_##dictMode##_##mls \
|
1347
|
-
};
|
1337
|
+
#define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls
|
1338
|
+
#define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls
|
1339
|
+
#define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
|
1348
1340
|
|
1349
|
-
#define
|
1350
|
-
|
1341
|
+
#define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
|
1342
|
+
|
1343
|
+
#define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \
|
1344
|
+
ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \
|
1345
|
+
ZSTD_matchState_t* ms, \
|
1346
|
+
const BYTE* ip, const BYTE* const iLimit, \
|
1347
|
+
size_t* offBasePtr) \
|
1348
|
+
{ \
|
1349
|
+
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
|
1350
|
+
return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \
|
1351
|
+
} \
|
1352
|
+
|
1353
|
+
#define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \
|
1354
|
+
ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \
|
1351
1355
|
ZSTD_matchState_t* ms, \
|
1352
1356
|
const BYTE* ip, const BYTE* const iLimit, \
|
1353
1357
|
size_t* offsetPtr) \
|
@@ -1355,12 +1359,9 @@ typedef struct {
|
|
1355
1359
|
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
|
1356
1360
|
return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
|
1357
1361
|
} \
|
1358
|
-
static const ZSTD_LazyVTable ZSTD_HcVTable_##dictMode##_##mls = { \
|
1359
|
-
ZSTD_HcFindBestMatch_##dictMode##_##mls \
|
1360
|
-
};
|
1361
1362
|
|
1362
|
-
#define
|
1363
|
-
|
1363
|
+
#define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \
|
1364
|
+
ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \
|
1364
1365
|
ZSTD_matchState_t* ms, \
|
1365
1366
|
const BYTE* ip, const BYTE* const iLimit, \
|
1366
1367
|
size_t* offsetPtr) \
|
@@ -1369,9 +1370,6 @@ typedef struct {
|
|
1369
1370
|
assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \
|
1370
1371
|
return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
|
1371
1372
|
} \
|
1372
|
-
static const ZSTD_LazyVTable ZSTD_RowVTable_##dictMode##_##mls##_##rowLog = { \
|
1373
|
-
ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog \
|
1374
|
-
};
|
1375
1373
|
|
1376
1374
|
#define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \
|
1377
1375
|
X(dictMode, mls, 4) \
|
@@ -1394,84 +1392,103 @@ typedef struct {
|
|
1394
1392
|
X(__VA_ARGS__, dictMatchState) \
|
1395
1393
|
X(__VA_ARGS__, dedicatedDictSearch)
|
1396
1394
|
|
1397
|
-
/* Generate
|
1398
|
-
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG,
|
1399
|
-
/* Generate
|
1400
|
-
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS,
|
1401
|
-
/* Generate
|
1402
|
-
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS,
|
1403
|
-
|
1404
|
-
#define GEN_ZSTD_BT_VTABLE_ARRAY(dictMode) \
|
1405
|
-
{ \
|
1406
|
-
&ZSTD_BtVTable_##dictMode##_4, \
|
1407
|
-
&ZSTD_BtVTable_##dictMode##_5, \
|
1408
|
-
&ZSTD_BtVTable_##dictMode##_6 \
|
1409
|
-
}
|
1395
|
+
/* Generate row search fns for each combination of (dictMode, mls, rowLog) */
|
1396
|
+
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN)
|
1397
|
+
/* Generate binary Tree search fns for each combination of (dictMode, mls) */
|
1398
|
+
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN)
|
1399
|
+
/* Generate hash chain search fns for each combination of (dictMode, mls) */
|
1400
|
+
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN)
|
1410
1401
|
|
1411
|
-
|
1412
|
-
{ \
|
1413
|
-
&ZSTD_HcVTable_##dictMode##_4, \
|
1414
|
-
&ZSTD_HcVTable_##dictMode##_5, \
|
1415
|
-
&ZSTD_HcVTable_##dictMode##_6 \
|
1416
|
-
}
|
1417
|
-
|
1418
|
-
#define GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, mls) \
|
1419
|
-
{ \
|
1420
|
-
&ZSTD_RowVTable_##dictMode##_##mls##_4, \
|
1421
|
-
&ZSTD_RowVTable_##dictMode##_##mls##_5, \
|
1422
|
-
&ZSTD_RowVTable_##dictMode##_##mls##_6 \
|
1423
|
-
}
|
1402
|
+
typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
|
1424
1403
|
|
1425
|
-
#define
|
1426
|
-
|
1427
|
-
|
1428
|
-
|
1429
|
-
|
1404
|
+
#define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \
|
1405
|
+
case mls: \
|
1406
|
+
return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
|
1407
|
+
#define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \
|
1408
|
+
case mls: \
|
1409
|
+
return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
|
1410
|
+
#define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \
|
1411
|
+
case rowLog: \
|
1412
|
+
return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
|
1413
|
+
|
1414
|
+
#define ZSTD_SWITCH_MLS(X, dictMode) \
|
1415
|
+
switch (mls) { \
|
1416
|
+
ZSTD_FOR_EACH_MLS(X, dictMode) \
|
1430
1417
|
}
|
1431
1418
|
|
1432
|
-
#define
|
1433
|
-
|
1434
|
-
|
1435
|
-
|
1436
|
-
|
1437
|
-
|
1438
|
-
|
1439
|
-
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1419
|
+
#define ZSTD_SWITCH_ROWLOG(dictMode, mls) \
|
1420
|
+
case mls: \
|
1421
|
+
switch (rowLog) { \
|
1422
|
+
ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \
|
1423
|
+
} \
|
1424
|
+
ZSTD_UNREACHABLE; \
|
1425
|
+
break;
|
1426
|
+
|
1427
|
+
#define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \
|
1428
|
+
switch (searchMethod) { \
|
1429
|
+
case search_hashChain: \
|
1430
|
+
ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
|
1431
|
+
break; \
|
1432
|
+
case search_binaryTree: \
|
1433
|
+
ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
|
1434
|
+
break; \
|
1435
|
+
case search_rowHash: \
|
1436
|
+
ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \
|
1437
|
+
break; \
|
1438
|
+
} \
|
1439
|
+
ZSTD_UNREACHABLE;
|
1444
1440
|
|
1445
1441
|
/**
|
1446
|
-
*
|
1447
|
-
*
|
1448
|
-
*
|
1449
|
-
*
|
1442
|
+
* Searches for the longest match at @p ip.
|
1443
|
+
* Dispatches to the correct implementation function based on the
|
1444
|
+
* (searchMethod, dictMode, mls, rowLog). We use switch statements
|
1445
|
+
* here instead of using an indirect function call through a function
|
1446
|
+
* pointer because after Spectre and Meltdown mitigations, indirect
|
1447
|
+
* function calls can be very costly, especially in the kernel.
|
1448
|
+
*
|
1449
|
+
* NOTE: dictMode and searchMethod should be templated, so those switch
|
1450
|
+
* statements should be optimized out. Only the mls & rowLog switches
|
1451
|
+
* should be left.
|
1452
|
+
*
|
1453
|
+
* @param ms The match state.
|
1454
|
+
* @param ip The position to search at.
|
1455
|
+
* @param iend The end of the input data.
|
1456
|
+
* @param[out] offsetPtr Stores the match offset into this pointer.
|
1457
|
+
* @param mls The minimum search length, in the range [4, 6].
|
1458
|
+
* @param rowLog The row log (if applicable), in the range [4, 6].
|
1459
|
+
* @param searchMethod The search method to use (templated).
|
1460
|
+
* @param dictMode The dictMode (templated).
|
1461
|
+
*
|
1462
|
+
* @returns The length of the longest match found, or < mls if no match is found.
|
1463
|
+
* If a match is found its offset is stored in @p offsetPtr.
|
1450
1464
|
*/
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1465
|
+
FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
|
1466
|
+
ZSTD_matchState_t* ms,
|
1467
|
+
const BYTE* ip,
|
1468
|
+
const BYTE* iend,
|
1469
|
+
size_t* offsetPtr,
|
1470
|
+
U32 const mls,
|
1471
|
+
U32 const rowLog,
|
1472
|
+
searchMethod_e const searchMethod,
|
1473
|
+
ZSTD_dictMode_e const dictMode)
|
1454
1474
|
{
|
1455
|
-
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1459
|
-
|
1460
|
-
|
1461
|
-
|
1462
|
-
|
1463
|
-
switch (searchMethod) {
|
1464
|
-
case search_hashChain:
|
1465
|
-
return hcVTables[dictMode][mls - 4];
|
1466
|
-
case search_binaryTree:
|
1467
|
-
return btVTables[dictMode][mls - 4];
|
1468
|
-
case search_rowHash:
|
1469
|
-
return rowVTables[dictMode][mls - 4][rowLog - 4];
|
1470
|
-
default:
|
1471
|
-
return NULL;
|
1475
|
+
if (dictMode == ZSTD_noDict) {
|
1476
|
+
ZSTD_SWITCH_SEARCH_METHOD(noDict)
|
1477
|
+
} else if (dictMode == ZSTD_extDict) {
|
1478
|
+
ZSTD_SWITCH_SEARCH_METHOD(extDict)
|
1479
|
+
} else if (dictMode == ZSTD_dictMatchState) {
|
1480
|
+
ZSTD_SWITCH_SEARCH_METHOD(dictMatchState)
|
1481
|
+
} else if (dictMode == ZSTD_dedicatedDictSearch) {
|
1482
|
+
ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch)
|
1472
1483
|
}
|
1484
|
+
ZSTD_UNREACHABLE;
|
1485
|
+
return 0;
|
1473
1486
|
}
|
1474
1487
|
|
1488
|
+
/* *******************************
|
1489
|
+
* Common parser - lazy strategy
|
1490
|
+
*********************************/
|
1491
|
+
|
1475
1492
|
FORCE_INLINE_TEMPLATE size_t
|
1476
1493
|
ZSTD_compressBlock_lazy_generic(
|
1477
1494
|
ZSTD_matchState_t* ms, seqStore_t* seqStore,
|
@@ -1488,9 +1505,11 @@ ZSTD_compressBlock_lazy_generic(
|
|
1488
1505
|
const BYTE* const base = ms->window.base;
|
1489
1506
|
const U32 prefixLowestIndex = ms->window.dictLimit;
|
1490
1507
|
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
1508
|
+
const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
|
1509
|
+
const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
|
1491
1510
|
|
1492
|
-
|
1493
|
-
U32
|
1511
|
+
U32 offset_1 = rep[0], offset_2 = rep[1];
|
1512
|
+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
1494
1513
|
|
1495
1514
|
const int isDMS = dictMode == ZSTD_dictMatchState;
|
1496
1515
|
const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
|
@@ -1505,16 +1524,14 @@ ZSTD_compressBlock_lazy_generic(
|
|
1505
1524
|
0;
|
1506
1525
|
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
|
1507
1526
|
|
1508
|
-
assert(searchMax != NULL);
|
1509
|
-
|
1510
1527
|
DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
|
1511
1528
|
ip += (dictAndPrefixLength == 0);
|
1512
1529
|
if (dictMode == ZSTD_noDict) {
|
1513
1530
|
U32 const curr = (U32)(ip - base);
|
1514
1531
|
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
|
1515
1532
|
U32 const maxRep = curr - windowLow;
|
1516
|
-
if (offset_2 > maxRep)
|
1517
|
-
if (offset_1 > maxRep)
|
1533
|
+
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
|
1534
|
+
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
|
1518
1535
|
}
|
1519
1536
|
if (isDxS) {
|
1520
1537
|
/* dictMatchState repCode checks don't currently handle repCode == 0
|
@@ -1523,11 +1540,11 @@ ZSTD_compressBlock_lazy_generic(
|
|
1523
1540
|
assert(offset_2 <= dictAndPrefixLength);
|
1524
1541
|
}
|
1525
1542
|
|
1543
|
+
/* Reset the lazy skipping state */
|
1544
|
+
ms->lazySkipping = 0;
|
1545
|
+
|
1526
1546
|
if (searchMethod == search_rowHash) {
|
1527
|
-
|
1528
|
-
ZSTD_row_fillHashCache(ms, base, rowLog,
|
1529
|
-
MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
|
1530
|
-
ms->nextToUpdate, ilimit);
|
1547
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
1531
1548
|
}
|
1532
1549
|
|
1533
1550
|
/* Match Loop */
|
@@ -1539,7 +1556,7 @@ ZSTD_compressBlock_lazy_generic(
|
|
1539
1556
|
#endif
|
1540
1557
|
while (ip < ilimit) {
|
1541
1558
|
size_t matchLength=0;
|
1542
|
-
size_t
|
1559
|
+
size_t offBase = REPCODE1_TO_OFFBASE;
|
1543
1560
|
const BYTE* start=ip+1;
|
1544
1561
|
DEBUGLOG(7, "search baseline (depth 0)");
|
1545
1562
|
|
@@ -1564,14 +1581,23 @@ ZSTD_compressBlock_lazy_generic(
|
|
1564
1581
|
}
|
1565
1582
|
|
1566
1583
|
/* first search (depth 0) */
|
1567
|
-
{ size_t
|
1568
|
-
size_t const ml2 =
|
1584
|
+
{ size_t offbaseFound = 999999999;
|
1585
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode);
|
1569
1586
|
if (ml2 > matchLength)
|
1570
|
-
matchLength = ml2, start = ip,
|
1587
|
+
matchLength = ml2, start = ip, offBase = offbaseFound;
|
1571
1588
|
}
|
1572
1589
|
|
1573
1590
|
if (matchLength < 4) {
|
1574
|
-
|
1591
|
+
size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
|
1592
|
+
ip += step;
|
1593
|
+
/* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
|
1594
|
+
* In this mode we stop inserting every position into our tables, and only insert
|
1595
|
+
* positions that we search, which is one in step positions.
|
1596
|
+
* The exact cutoff is flexible, I've just chosen a number that is reasonably high,
|
1597
|
+
* so we minimize the compression ratio loss in "normal" scenarios. This mode gets
|
1598
|
+
* triggered once we've gone 2KB without finding any matches.
|
1599
|
+
*/
|
1600
|
+
ms->lazySkipping = step > kLazySkippingStep;
|
1575
1601
|
continue;
|
1576
1602
|
}
|
1577
1603
|
|
@@ -1581,12 +1607,12 @@ ZSTD_compressBlock_lazy_generic(
|
|
1581
1607
|
DEBUGLOG(7, "search depth 1");
|
1582
1608
|
ip ++;
|
1583
1609
|
if ( (dictMode == ZSTD_noDict)
|
1584
|
-
&& (
|
1610
|
+
&& (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
|
1585
1611
|
size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
|
1586
1612
|
int const gain2 = (int)(mlRep * 3);
|
1587
|
-
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)
|
1613
|
+
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
|
1588
1614
|
if ((mlRep >= 4) && (gain2 > gain1))
|
1589
|
-
matchLength = mlRep,
|
1615
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
1590
1616
|
}
|
1591
1617
|
if (isDxS) {
|
1592
1618
|
const U32 repIndex = (U32)(ip - base) - offset_1;
|
@@ -1598,17 +1624,17 @@ ZSTD_compressBlock_lazy_generic(
|
|
1598
1624
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
1599
1625
|
size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
1600
1626
|
int const gain2 = (int)(mlRep * 3);
|
1601
|
-
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)
|
1627
|
+
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
|
1602
1628
|
if ((mlRep >= 4) && (gain2 > gain1))
|
1603
|
-
matchLength = mlRep,
|
1629
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
1604
1630
|
}
|
1605
1631
|
}
|
1606
|
-
{ size_t
|
1607
|
-
size_t const ml2 =
|
1608
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
1609
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
1632
|
+
{ size_t ofbCandidate=999999999;
|
1633
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
|
1634
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
1635
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
|
1610
1636
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
1611
|
-
matchLength = ml2,
|
1637
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
1612
1638
|
continue; /* search a better one */
|
1613
1639
|
} }
|
1614
1640
|
|
@@ -1617,12 +1643,12 @@ ZSTD_compressBlock_lazy_generic(
|
|
1617
1643
|
DEBUGLOG(7, "search depth 2");
|
1618
1644
|
ip ++;
|
1619
1645
|
if ( (dictMode == ZSTD_noDict)
|
1620
|
-
&& (
|
1646
|
+
&& (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
|
1621
1647
|
size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
|
1622
1648
|
int const gain2 = (int)(mlRep * 4);
|
1623
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
1649
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
|
1624
1650
|
if ((mlRep >= 4) && (gain2 > gain1))
|
1625
|
-
matchLength = mlRep,
|
1651
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
1626
1652
|
}
|
1627
1653
|
if (isDxS) {
|
1628
1654
|
const U32 repIndex = (U32)(ip - base) - offset_1;
|
@@ -1634,17 +1660,17 @@ ZSTD_compressBlock_lazy_generic(
|
|
1634
1660
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
1635
1661
|
size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
1636
1662
|
int const gain2 = (int)(mlRep * 4);
|
1637
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
1663
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
|
1638
1664
|
if ((mlRep >= 4) && (gain2 > gain1))
|
1639
|
-
matchLength = mlRep,
|
1665
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
1640
1666
|
}
|
1641
1667
|
}
|
1642
|
-
{ size_t
|
1643
|
-
size_t const ml2 =
|
1644
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
1645
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
1668
|
+
{ size_t ofbCandidate=999999999;
|
1669
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
|
1670
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
1671
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
|
1646
1672
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
1647
|
-
matchLength = ml2,
|
1673
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
1648
1674
|
continue;
|
1649
1675
|
} } }
|
1650
1676
|
break; /* nothing found : store previous solution */
|
@@ -1655,26 +1681,33 @@ ZSTD_compressBlock_lazy_generic(
|
|
1655
1681
|
* notably if `value` is unsigned, resulting in a large positive `-value`.
|
1656
1682
|
*/
|
1657
1683
|
/* catch up */
|
1658
|
-
if (
|
1684
|
+
if (OFFBASE_IS_OFFSET(offBase)) {
|
1659
1685
|
if (dictMode == ZSTD_noDict) {
|
1660
|
-
while ( ((start > anchor) & (start -
|
1661
|
-
&& (start[-1] == (start-
|
1686
|
+
while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest))
|
1687
|
+
&& (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefix */
|
1662
1688
|
{ start--; matchLength++; }
|
1663
1689
|
}
|
1664
1690
|
if (isDxS) {
|
1665
|
-
U32 const matchIndex = (U32)((size_t)(start-base) -
|
1691
|
+
U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
|
1666
1692
|
const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
|
1667
1693
|
const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
|
1668
1694
|
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
|
1669
1695
|
}
|
1670
|
-
offset_2 = offset_1; offset_1 = (U32)
|
1696
|
+
offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
|
1671
1697
|
}
|
1672
1698
|
/* store sequence */
|
1673
1699
|
_storeSequence:
|
1674
1700
|
{ size_t const litLength = (size_t)(start - anchor);
|
1675
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)
|
1701
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
|
1676
1702
|
anchor = ip = start + matchLength;
|
1677
1703
|
}
|
1704
|
+
if (ms->lazySkipping) {
|
1705
|
+
/* We've found a match, disable lazy skipping mode, and refill the hash cache. */
|
1706
|
+
if (searchMethod == search_rowHash) {
|
1707
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
1708
|
+
}
|
1709
|
+
ms->lazySkipping = 0;
|
1710
|
+
}
|
1678
1711
|
|
1679
1712
|
/* check immediate repcode */
|
1680
1713
|
if (isDxS) {
|
@@ -1688,8 +1721,8 @@ _storeSequence:
|
|
1688
1721
|
&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {
|
1689
1722
|
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
|
1690
1723
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
|
1691
|
-
|
1692
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
1724
|
+
offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset_2 <=> offset_1 */
|
1725
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
|
1693
1726
|
ip += matchLength;
|
1694
1727
|
anchor = ip;
|
1695
1728
|
continue;
|
@@ -1703,16 +1736,20 @@ _storeSequence:
|
|
1703
1736
|
&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
|
1704
1737
|
/* store sequence */
|
1705
1738
|
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
1706
|
-
|
1707
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
1739
|
+
offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */
|
1740
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
|
1708
1741
|
ip += matchLength;
|
1709
1742
|
anchor = ip;
|
1710
1743
|
continue; /* faster when present ... (?) */
|
1711
1744
|
} } }
|
1712
1745
|
|
1713
|
-
/*
|
1714
|
-
|
1715
|
-
|
1746
|
+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
|
1747
|
+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
|
1748
|
+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
|
1749
|
+
|
1750
|
+
/* save reps for next block */
|
1751
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
|
1752
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
|
1716
1753
|
|
1717
1754
|
/* Return the last literals size */
|
1718
1755
|
return (size_t)(iend - anchor);
|
@@ -1881,19 +1918,20 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1881
1918
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
1882
1919
|
const BYTE* const dictStart = dictBase + ms->window.lowLimit;
|
1883
1920
|
const U32 windowLog = ms->cParams.windowLog;
|
1884
|
-
const U32
|
1921
|
+
const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
|
1922
|
+
const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
|
1885
1923
|
|
1886
|
-
searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, ZSTD_extDict)->searchMax;
|
1887
1924
|
U32 offset_1 = rep[0], offset_2 = rep[1];
|
1888
1925
|
|
1889
1926
|
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
|
1890
1927
|
|
1928
|
+
/* Reset the lazy skipping state */
|
1929
|
+
ms->lazySkipping = 0;
|
1930
|
+
|
1891
1931
|
/* init */
|
1892
1932
|
ip += (ip == prefixStart);
|
1893
1933
|
if (searchMethod == search_rowHash) {
|
1894
|
-
ZSTD_row_fillHashCache(ms, base, rowLog,
|
1895
|
-
MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
|
1896
|
-
ms->nextToUpdate, ilimit);
|
1934
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
1897
1935
|
}
|
1898
1936
|
|
1899
1937
|
/* Match Loop */
|
@@ -1905,7 +1943,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1905
1943
|
#endif
|
1906
1944
|
while (ip < ilimit) {
|
1907
1945
|
size_t matchLength=0;
|
1908
|
-
size_t
|
1946
|
+
size_t offBase = REPCODE1_TO_OFFBASE;
|
1909
1947
|
const BYTE* start=ip+1;
|
1910
1948
|
U32 curr = (U32)(ip-base);
|
1911
1949
|
|
@@ -1924,14 +1962,23 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1924
1962
|
} }
|
1925
1963
|
|
1926
1964
|
/* first search (depth 0) */
|
1927
|
-
{ size_t
|
1928
|
-
size_t const ml2 =
|
1965
|
+
{ size_t ofbCandidate = 999999999;
|
1966
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
|
1929
1967
|
if (ml2 > matchLength)
|
1930
|
-
matchLength = ml2, start = ip,
|
1968
|
+
matchLength = ml2, start = ip, offBase = ofbCandidate;
|
1931
1969
|
}
|
1932
1970
|
|
1933
1971
|
if (matchLength < 4) {
|
1934
|
-
|
1972
|
+
size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
|
1973
|
+
ip += step + 1; /* jump faster over incompressible sections */
|
1974
|
+
/* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
|
1975
|
+
* In this mode we stop inserting every position into our tables, and only insert
|
1976
|
+
* positions that we search, which is one in step positions.
|
1977
|
+
* The exact cutoff is flexible, I've just chosen a number that is reasonably high,
|
1978
|
+
* so we minimize the compression ratio loss in "normal" scenarios. This mode gets
|
1979
|
+
* triggered once we've gone 2KB without finding any matches.
|
1980
|
+
*/
|
1981
|
+
ms->lazySkipping = step > kLazySkippingStep;
|
1935
1982
|
continue;
|
1936
1983
|
}
|
1937
1984
|
|
@@ -1941,7 +1988,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1941
1988
|
ip ++;
|
1942
1989
|
curr++;
|
1943
1990
|
/* check repCode */
|
1944
|
-
if (
|
1991
|
+
if (offBase) {
|
1945
1992
|
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
|
1946
1993
|
const U32 repIndex = (U32)(curr - offset_1);
|
1947
1994
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
@@ -1953,18 +2000,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1953
2000
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
1954
2001
|
size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
1955
2002
|
int const gain2 = (int)(repLength * 3);
|
1956
|
-
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)
|
2003
|
+
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
|
1957
2004
|
if ((repLength >= 4) && (gain2 > gain1))
|
1958
|
-
matchLength = repLength,
|
2005
|
+
matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
1959
2006
|
} }
|
1960
2007
|
|
1961
2008
|
/* search match, depth 1 */
|
1962
|
-
{ size_t
|
1963
|
-
size_t const ml2 =
|
1964
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
1965
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
2009
|
+
{ size_t ofbCandidate = 999999999;
|
2010
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
|
2011
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
2012
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
|
1966
2013
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
1967
|
-
matchLength = ml2,
|
2014
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
1968
2015
|
continue; /* search a better one */
|
1969
2016
|
} }
|
1970
2017
|
|
@@ -1973,7 +2020,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1973
2020
|
ip ++;
|
1974
2021
|
curr++;
|
1975
2022
|
/* check repCode */
|
1976
|
-
if (
|
2023
|
+
if (offBase) {
|
1977
2024
|
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
|
1978
2025
|
const U32 repIndex = (U32)(curr - offset_1);
|
1979
2026
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
@@ -1985,38 +2032,45 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1985
2032
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
1986
2033
|
size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
1987
2034
|
int const gain2 = (int)(repLength * 4);
|
1988
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
2035
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
|
1989
2036
|
if ((repLength >= 4) && (gain2 > gain1))
|
1990
|
-
matchLength = repLength,
|
2037
|
+
matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
1991
2038
|
} }
|
1992
2039
|
|
1993
2040
|
/* search match, depth 2 */
|
1994
|
-
{ size_t
|
1995
|
-
size_t const ml2 =
|
1996
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
1997
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
2041
|
+
{ size_t ofbCandidate = 999999999;
|
2042
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
|
2043
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
2044
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
|
1998
2045
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
1999
|
-
matchLength = ml2,
|
2046
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
2000
2047
|
continue;
|
2001
2048
|
} } }
|
2002
2049
|
break; /* nothing found : store previous solution */
|
2003
2050
|
}
|
2004
2051
|
|
2005
2052
|
/* catch up */
|
2006
|
-
if (
|
2007
|
-
U32 const matchIndex = (U32)((size_t)(start-base) -
|
2053
|
+
if (OFFBASE_IS_OFFSET(offBase)) {
|
2054
|
+
U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
|
2008
2055
|
const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
|
2009
2056
|
const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
|
2010
2057
|
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
|
2011
|
-
offset_2 = offset_1; offset_1 = (U32)
|
2058
|
+
offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
|
2012
2059
|
}
|
2013
2060
|
|
2014
2061
|
/* store sequence */
|
2015
2062
|
_storeSequence:
|
2016
2063
|
{ size_t const litLength = (size_t)(start - anchor);
|
2017
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)
|
2064
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
|
2018
2065
|
anchor = ip = start + matchLength;
|
2019
2066
|
}
|
2067
|
+
if (ms->lazySkipping) {
|
2068
|
+
/* We've found a match, disable lazy skipping mode, and refill the hash cache. */
|
2069
|
+
if (searchMethod == search_rowHash) {
|
2070
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
2071
|
+
}
|
2072
|
+
ms->lazySkipping = 0;
|
2073
|
+
}
|
2020
2074
|
|
2021
2075
|
/* check immediate repcode */
|
2022
2076
|
while (ip <= ilimit) {
|
@@ -2031,8 +2085,8 @@ _storeSequence:
|
|
2031
2085
|
/* repcode detected we should take it */
|
2032
2086
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
2033
2087
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
2034
|
-
|
2035
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
2088
|
+
offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset history */
|
2089
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
|
2036
2090
|
ip += matchLength;
|
2037
2091
|
anchor = ip;
|
2038
2092
|
continue; /* faster when present ... (?) */
|
@@ -2098,7 +2152,6 @@ size_t ZSTD_compressBlock_lazy_extDict_row(
|
|
2098
2152
|
size_t ZSTD_compressBlock_lazy2_extDict_row(
|
2099
2153
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
2100
2154
|
void const* src, size_t srcSize)
|
2101
|
-
|
2102
2155
|
{
|
2103
2156
|
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
|
2104
2157
|
}
|