zstd-ruby 1.5.4.1 → 1.5.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +3 -3
- data/ext/zstdruby/extconf.rb +2 -0
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +30 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
- data/ext/zstdruby/libzstd/common/compiler.h +4 -0
- data/ext/zstdruby/libzstd/common/pool.c +1 -1
- data/ext/zstdruby/libzstd/common/threading.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +0 -35
- data/ext/zstdruby/libzstd/common/zstd_internal.h +0 -5
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +209 -104
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +67 -13
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +123 -59
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +92 -62
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +2 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +5 -4
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +4 -4
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +1 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +41 -38
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +16 -17
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +5 -0
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +2 -2
- data/ext/zstdruby/libzstd/zstd.h +258 -212
- data/ext/zstdruby/streaming_compress.c +23 -3
- data/ext/zstdruby/streaming_decompress.c +23 -3
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +3 -2
@@ -12,6 +12,8 @@
|
|
12
12
|
#include "zstd_lazy.h"
|
13
13
|
#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
|
14
14
|
|
15
|
+
#define kLazySkippingStep 8
|
16
|
+
|
15
17
|
|
16
18
|
/*-*************************************
|
17
19
|
* Binary Tree search
|
@@ -618,7 +620,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
|
|
618
620
|
FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
|
619
621
|
ZSTD_matchState_t* ms,
|
620
622
|
const ZSTD_compressionParameters* const cParams,
|
621
|
-
const BYTE* ip, U32 const mls)
|
623
|
+
const BYTE* ip, U32 const mls, U32 const lazySkipping)
|
622
624
|
{
|
623
625
|
U32* const hashTable = ms->hashTable;
|
624
626
|
const U32 hashLog = cParams->hashLog;
|
@@ -633,6 +635,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
|
|
633
635
|
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
|
634
636
|
hashTable[h] = idx;
|
635
637
|
idx++;
|
638
|
+
/* Stop inserting every position when in the lazy skipping mode. */
|
639
|
+
if (lazySkipping)
|
640
|
+
break;
|
636
641
|
}
|
637
642
|
|
638
643
|
ms->nextToUpdate = target;
|
@@ -641,7 +646,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
|
|
641
646
|
|
642
647
|
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
|
643
648
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
644
|
-
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
|
649
|
+
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
|
645
650
|
}
|
646
651
|
|
647
652
|
/* inlining is important to hardwire a hot branch (template emulation) */
|
@@ -685,7 +690,7 @@ size_t ZSTD_HcFindBestMatch(
|
|
685
690
|
}
|
686
691
|
|
687
692
|
/* HC4 match finder */
|
688
|
-
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
|
693
|
+
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
|
689
694
|
|
690
695
|
for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
|
691
696
|
size_t currentMl=0;
|
@@ -758,7 +763,6 @@ size_t ZSTD_HcFindBestMatch(
|
|
758
763
|
* (SIMD) Row-based matchfinder
|
759
764
|
***********************************/
|
760
765
|
/* Constants for row-based hash */
|
761
|
-
#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
|
762
766
|
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
|
763
767
|
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
|
764
768
|
|
@@ -774,39 +778,15 @@ MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
|
|
774
778
|
return ZSTD_countTrailingZeros64(val);
|
775
779
|
}
|
776
780
|
|
777
|
-
/* ZSTD_rotateRight_*():
|
778
|
-
* Rotates a bitfield to the right by "count" bits.
|
779
|
-
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
|
780
|
-
*/
|
781
|
-
FORCE_INLINE_TEMPLATE
|
782
|
-
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
|
783
|
-
assert(count < 64);
|
784
|
-
count &= 0x3F; /* for fickle pattern recognition */
|
785
|
-
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
|
786
|
-
}
|
787
|
-
|
788
|
-
FORCE_INLINE_TEMPLATE
|
789
|
-
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
|
790
|
-
assert(count < 32);
|
791
|
-
count &= 0x1F; /* for fickle pattern recognition */
|
792
|
-
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
|
793
|
-
}
|
794
|
-
|
795
|
-
FORCE_INLINE_TEMPLATE
|
796
|
-
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
|
797
|
-
assert(count < 16);
|
798
|
-
count &= 0x0F; /* for fickle pattern recognition */
|
799
|
-
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
|
800
|
-
}
|
801
|
-
|
802
781
|
/* ZSTD_row_nextIndex():
|
803
782
|
* Returns the next index to insert at within a tagTable row, and updates the "head"
|
804
|
-
* value to reflect the update. Essentially cycles backwards from [
|
783
|
+
* value to reflect the update. Essentially cycles backwards from [1, {entries per row})
|
805
784
|
*/
|
806
785
|
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
|
807
|
-
|
808
|
-
|
809
|
-
|
786
|
+
U32 next = (*tagRow-1) & rowMask;
|
787
|
+
next += (next == 0) ? rowMask : 0; /* skip first position */
|
788
|
+
*tagRow = (BYTE)next;
|
789
|
+
return next;
|
810
790
|
}
|
811
791
|
|
812
792
|
/* ZSTD_isAligned():
|
@@ -820,7 +800,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
|
|
820
800
|
/* ZSTD_row_prefetch():
|
821
801
|
* Performs prefetching for the hashTable and tagTable at a given row.
|
822
802
|
*/
|
823
|
-
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable,
|
803
|
+
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
|
824
804
|
PREFETCH_L1(hashTable + relRow);
|
825
805
|
if (rowLog >= 5) {
|
826
806
|
PREFETCH_L1(hashTable + relRow + 16);
|
@@ -844,13 +824,13 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
|
|
844
824
|
U32 idx, const BYTE* const iLimit)
|
845
825
|
{
|
846
826
|
U32 const* const hashTable = ms->hashTable;
|
847
|
-
|
827
|
+
BYTE const* const tagTable = ms->tagTable;
|
848
828
|
U32 const hashLog = ms->rowHashLog;
|
849
829
|
U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
|
850
830
|
U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
|
851
831
|
|
852
832
|
for (; idx < lim; ++idx) {
|
853
|
-
U32 const hash = (U32)
|
833
|
+
U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
|
854
834
|
U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
855
835
|
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
|
856
836
|
ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
|
@@ -866,11 +846,12 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
|
|
866
846
|
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
|
867
847
|
*/
|
868
848
|
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
|
869
|
-
|
849
|
+
BYTE const* tagTable, BYTE const* base,
|
870
850
|
U32 idx, U32 const hashLog,
|
871
|
-
U32 const rowLog, U32 const mls
|
851
|
+
U32 const rowLog, U32 const mls,
|
852
|
+
U64 const hashSalt)
|
872
853
|
{
|
873
|
-
U32 const newHash = (U32)
|
854
|
+
U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
|
874
855
|
U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
875
856
|
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
|
876
857
|
{ U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
|
@@ -888,22 +869,21 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
|
|
888
869
|
U32 const rowMask, U32 const useCache)
|
889
870
|
{
|
890
871
|
U32* const hashTable = ms->hashTable;
|
891
|
-
|
872
|
+
BYTE* const tagTable = ms->tagTable;
|
892
873
|
U32 const hashLog = ms->rowHashLog;
|
893
874
|
const BYTE* const base = ms->window.base;
|
894
875
|
|
895
876
|
DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
|
896
877
|
for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
|
897
|
-
U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
|
898
|
-
: (U32)
|
878
|
+
U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
|
879
|
+
: (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
|
899
880
|
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
900
881
|
U32* const row = hashTable + relRow;
|
901
|
-
BYTE* tagRow =
|
902
|
-
Explicit cast allows us to get exact desired position within each row */
|
882
|
+
BYTE* tagRow = tagTable + relRow;
|
903
883
|
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
|
904
884
|
|
905
|
-
assert(hash ==
|
906
|
-
|
885
|
+
assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
|
886
|
+
tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
|
907
887
|
row[pos] = updateStartIdx;
|
908
888
|
}
|
909
889
|
}
|
@@ -1059,7 +1039,7 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
|
|
1059
1039
|
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
1060
1040
|
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
|
1061
1041
|
{
|
1062
|
-
const BYTE* const src = tagRow
|
1042
|
+
const BYTE* const src = tagRow;
|
1063
1043
|
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
1064
1044
|
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
|
1065
1045
|
assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
|
@@ -1144,7 +1124,7 @@ size_t ZSTD_RowFindBestMatch(
|
|
1144
1124
|
const U32 rowLog)
|
1145
1125
|
{
|
1146
1126
|
U32* const hashTable = ms->hashTable;
|
1147
|
-
|
1127
|
+
BYTE* const tagTable = ms->tagTable;
|
1148
1128
|
U32* const hashCache = ms->hashCache;
|
1149
1129
|
const U32 hashLog = ms->rowHashLog;
|
1150
1130
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -1163,8 +1143,10 @@ size_t ZSTD_RowFindBestMatch(
|
|
1163
1143
|
const U32 rowMask = rowEntries - 1;
|
1164
1144
|
const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
|
1165
1145
|
const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
|
1146
|
+
const U64 hashSalt = ms->hashSalt;
|
1166
1147
|
U32 nbAttempts = 1U << cappedSearchLog;
|
1167
1148
|
size_t ml=4-1;
|
1149
|
+
U32 hash;
|
1168
1150
|
|
1169
1151
|
/* DMS/DDS variables that may be referenced laster */
|
1170
1152
|
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
@@ -1188,7 +1170,7 @@ size_t ZSTD_RowFindBestMatch(
|
|
1188
1170
|
if (dictMode == ZSTD_dictMatchState) {
|
1189
1171
|
/* Prefetch DMS rows */
|
1190
1172
|
U32* const dmsHashTable = dms->hashTable;
|
1191
|
-
|
1173
|
+
BYTE* const dmsTagTable = dms->tagTable;
|
1192
1174
|
U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
|
1193
1175
|
U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
1194
1176
|
dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
|
@@ -1198,9 +1180,19 @@ size_t ZSTD_RowFindBestMatch(
|
|
1198
1180
|
}
|
1199
1181
|
|
1200
1182
|
/* Update the hashTable and tagTable up to (but not including) ip */
|
1201
|
-
|
1183
|
+
if (!ms->lazySkipping) {
|
1184
|
+
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
|
1185
|
+
hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
|
1186
|
+
} else {
|
1187
|
+
/* Stop inserting every position when in the lazy skipping mode.
|
1188
|
+
* The hash cache is also not kept up to date in this mode.
|
1189
|
+
*/
|
1190
|
+
hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
|
1191
|
+
ms->nextToUpdate = curr;
|
1192
|
+
}
|
1193
|
+
ms->hashSaltEntropy += hash; /* collect salt entropy */
|
1194
|
+
|
1202
1195
|
{ /* Get the hash for ip, compute the appropriate row */
|
1203
|
-
U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
|
1204
1196
|
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
1205
1197
|
U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
|
1206
1198
|
U32* const row = hashTable + relRow;
|
@@ -1212,9 +1204,10 @@ size_t ZSTD_RowFindBestMatch(
|
|
1212
1204
|
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
|
1213
1205
|
|
1214
1206
|
/* Cycle through the matches and prefetch */
|
1215
|
-
for (; (matches > 0) && (nbAttempts > 0);
|
1207
|
+
for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
|
1216
1208
|
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
|
1217
1209
|
U32 const matchIndex = row[matchPos];
|
1210
|
+
if(matchPos == 0) continue;
|
1218
1211
|
assert(numMatches < rowEntries);
|
1219
1212
|
if (matchIndex < lowLimit)
|
1220
1213
|
break;
|
@@ -1224,13 +1217,14 @@ size_t ZSTD_RowFindBestMatch(
|
|
1224
1217
|
PREFETCH_L1(dictBase + matchIndex);
|
1225
1218
|
}
|
1226
1219
|
matchBuffer[numMatches++] = matchIndex;
|
1220
|
+
--nbAttempts;
|
1227
1221
|
}
|
1228
1222
|
|
1229
1223
|
/* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
|
1230
1224
|
in ZSTD_row_update_internal() at the next search. */
|
1231
1225
|
{
|
1232
1226
|
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
|
1233
|
-
tagRow[pos
|
1227
|
+
tagRow[pos] = (BYTE)tag;
|
1234
1228
|
row[pos] = ms->nextToUpdate++;
|
1235
1229
|
}
|
1236
1230
|
|
@@ -1281,13 +1275,15 @@ size_t ZSTD_RowFindBestMatch(
|
|
1281
1275
|
size_t currMatch = 0;
|
1282
1276
|
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
|
1283
1277
|
|
1284
|
-
for (; (matches > 0) && (nbAttempts > 0);
|
1278
|
+
for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
|
1285
1279
|
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
|
1286
1280
|
U32 const matchIndex = dmsRow[matchPos];
|
1281
|
+
if(matchPos == 0) continue;
|
1287
1282
|
if (matchIndex < dmsLowestIndex)
|
1288
1283
|
break;
|
1289
1284
|
PREFETCH_L1(dmsBase + matchIndex);
|
1290
1285
|
matchBuffer[numMatches++] = matchIndex;
|
1286
|
+
--nbAttempts;
|
1291
1287
|
}
|
1292
1288
|
|
1293
1289
|
/* Return the longest match */
|
@@ -1544,10 +1540,11 @@ ZSTD_compressBlock_lazy_generic(
|
|
1544
1540
|
assert(offset_2 <= dictAndPrefixLength);
|
1545
1541
|
}
|
1546
1542
|
|
1543
|
+
/* Reset the lazy skipping state */
|
1544
|
+
ms->lazySkipping = 0;
|
1545
|
+
|
1547
1546
|
if (searchMethod == search_rowHash) {
|
1548
|
-
ZSTD_row_fillHashCache(ms, base, rowLog,
|
1549
|
-
MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
|
1550
|
-
ms->nextToUpdate, ilimit);
|
1547
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
1551
1548
|
}
|
1552
1549
|
|
1553
1550
|
/* Match Loop */
|
@@ -1591,7 +1588,16 @@ ZSTD_compressBlock_lazy_generic(
|
|
1591
1588
|
}
|
1592
1589
|
|
1593
1590
|
if (matchLength < 4) {
|
1594
|
-
|
1591
|
+
size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
|
1592
|
+
ip += step;
|
1593
|
+
/* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
|
1594
|
+
* In this mode we stop inserting every position into our tables, and only insert
|
1595
|
+
* positions that we search, which is one in step positions.
|
1596
|
+
* The exact cutoff is flexible, I've just chosen a number that is reasonably high,
|
1597
|
+
* so we minimize the compression ratio loss in "normal" scenarios. This mode gets
|
1598
|
+
* triggered once we've gone 2KB without finding any matches.
|
1599
|
+
*/
|
1600
|
+
ms->lazySkipping = step > kLazySkippingStep;
|
1595
1601
|
continue;
|
1596
1602
|
}
|
1597
1603
|
|
@@ -1695,6 +1701,13 @@ _storeSequence:
|
|
1695
1701
|
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
|
1696
1702
|
anchor = ip = start + matchLength;
|
1697
1703
|
}
|
1704
|
+
if (ms->lazySkipping) {
|
1705
|
+
/* We've found a match, disable lazy skipping mode, and refill the hash cache. */
|
1706
|
+
if (searchMethod == search_rowHash) {
|
1707
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
1708
|
+
}
|
1709
|
+
ms->lazySkipping = 0;
|
1710
|
+
}
|
1698
1711
|
|
1699
1712
|
/* check immediate repcode */
|
1700
1713
|
if (isDxS) {
|
@@ -1912,12 +1925,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1912
1925
|
|
1913
1926
|
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
|
1914
1927
|
|
1928
|
+
/* Reset the lazy skipping state */
|
1929
|
+
ms->lazySkipping = 0;
|
1930
|
+
|
1915
1931
|
/* init */
|
1916
1932
|
ip += (ip == prefixStart);
|
1917
1933
|
if (searchMethod == search_rowHash) {
|
1918
|
-
ZSTD_row_fillHashCache(ms, base, rowLog,
|
1919
|
-
MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
|
1920
|
-
ms->nextToUpdate, ilimit);
|
1934
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
1921
1935
|
}
|
1922
1936
|
|
1923
1937
|
/* Match Loop */
|
@@ -1955,7 +1969,16 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1955
1969
|
}
|
1956
1970
|
|
1957
1971
|
if (matchLength < 4) {
|
1958
|
-
|
1972
|
+
size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
|
1973
|
+
ip += step + 1; /* jump faster over incompressible sections */
|
1974
|
+
/* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
|
1975
|
+
* In this mode we stop inserting every position into our tables, and only insert
|
1976
|
+
* positions that we search, which is one in step positions.
|
1977
|
+
* The exact cutoff is flexible, I've just chosen a number that is reasonably high,
|
1978
|
+
* so we minimize the compression ratio loss in "normal" scenarios. This mode gets
|
1979
|
+
* triggered once we've gone 2KB without finding any matches.
|
1980
|
+
*/
|
1981
|
+
ms->lazySkipping = step > kLazySkippingStep;
|
1959
1982
|
continue;
|
1960
1983
|
}
|
1961
1984
|
|
@@ -2041,6 +2064,13 @@ _storeSequence:
|
|
2041
2064
|
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
|
2042
2065
|
anchor = ip = start + matchLength;
|
2043
2066
|
}
|
2067
|
+
if (ms->lazySkipping) {
|
2068
|
+
/* We've found a match, disable lazy skipping mode, and refill the hash cache. */
|
2069
|
+
if (searchMethod == search_rowHash) {
|
2070
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
2071
|
+
}
|
2072
|
+
ms->lazySkipping = 0;
|
2073
|
+
}
|
2044
2074
|
|
2045
2075
|
/* check immediate repcode */
|
2046
2076
|
while (ip <= ilimit) {
|
@@ -1086,6 +1086,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1086
1086
|
ZSTD_optimal_t lastSequence;
|
1087
1087
|
ZSTD_optLdm_t optLdm;
|
1088
1088
|
|
1089
|
+
ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
|
1090
|
+
|
1089
1091
|
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
|
1090
1092
|
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
|
1091
1093
|
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
|
@@ -20,6 +20,7 @@
|
|
20
20
|
|
21
21
|
|
22
22
|
/* ====== Dependencies ====== */
|
23
|
+
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
|
23
24
|
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
|
24
25
|
#include "../common/mem.h" /* MEM_STATIC */
|
25
26
|
#include "../common/pool.h" /* threadpool */
|
@@ -719,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
719
720
|
ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
|
720
721
|
|
721
722
|
if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
|
722
|
-
size_t const hSize =
|
723
|
+
size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
|
723
724
|
if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
|
724
725
|
DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
|
725
726
|
ZSTD_invalidateRepCodes(cctx);
|
@@ -737,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
737
738
|
DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
|
738
739
|
assert(job->cSize == 0);
|
739
740
|
for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
|
740
|
-
size_t const cSize =
|
741
|
+
size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
|
741
742
|
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
742
743
|
ip += chunkSize;
|
743
744
|
op += cSize; assert(op < oend);
|
@@ -757,8 +758,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
757
758
|
size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
|
758
759
|
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
|
759
760
|
size_t const cSize = (job->lastJob) ?
|
760
|
-
|
761
|
-
|
761
|
+
ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
|
762
|
+
ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
|
762
763
|
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
763
764
|
lastCBlockSize = cSize;
|
764
765
|
} }
|
@@ -696,7 +696,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
696
696
|
|
697
697
|
/* Copy the arguments to local variables */
|
698
698
|
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
699
|
-
ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
|
699
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
700
700
|
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
701
701
|
|
702
702
|
assert(MEM_isLittleEndian());
|
@@ -779,7 +779,7 @@ _out:
|
|
779
779
|
|
780
780
|
/* Save the final values of each of the state variables back to args. */
|
781
781
|
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
782
|
-
ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
|
782
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
783
783
|
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
784
784
|
}
|
785
785
|
|
@@ -1476,7 +1476,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
1476
1476
|
|
1477
1477
|
/* Copy the arguments to local registers. */
|
1478
1478
|
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
1479
|
-
ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
|
1479
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
1480
1480
|
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
1481
1481
|
|
1482
1482
|
oend[0] = op[1];
|
@@ -1599,7 +1599,7 @@ _out:
|
|
1599
1599
|
|
1600
1600
|
/* Save the final values of each of the state variables back to args. */
|
1601
1601
|
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
1602
|
-
ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
|
1602
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
1603
1603
|
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
1604
1604
|
}
|
1605
1605
|
|
@@ -14,6 +14,7 @@
|
|
14
14
|
/*-*******************************************************
|
15
15
|
* Dependencies
|
16
16
|
*********************************************************/
|
17
|
+
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
|
17
18
|
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
18
19
|
#include "../common/cpu.h" /* bmi2 */
|
19
20
|
#include "../common/mem.h" /* low level memory routines */
|
@@ -55,6 +55,7 @@
|
|
55
55
|
/*-*******************************************************
|
56
56
|
* Dependencies
|
57
57
|
*********************************************************/
|
58
|
+
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
|
58
59
|
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
59
60
|
#include "../common/mem.h" /* low level memory routines */
|
60
61
|
#define FSE_STATIC_LINKING_ONLY
|
@@ -588,49 +589,52 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
|
|
588
589
|
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
|
589
590
|
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
|
590
591
|
frameParameter_unsupported, "");
|
591
|
-
{
|
592
|
-
size_t const skippableSize = skippableHeaderSize + sizeU32;
|
592
|
+
{ size_t const skippableSize = skippableHeaderSize + sizeU32;
|
593
593
|
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
|
594
594
|
return skippableSize;
|
595
595
|
}
|
596
596
|
}
|
597
597
|
|
598
598
|
/*! ZSTD_readSkippableFrame() :
|
599
|
-
* Retrieves a
|
599
|
+
* Retrieves content of a skippable frame, and writes it to dst buffer.
|
600
600
|
*
|
601
601
|
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
|
602
602
|
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
|
603
603
|
* in the magicVariant.
|
604
604
|
*
|
605
|
-
* Returns an error if destination buffer is not large enough, or if
|
605
|
+
* Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
|
606
606
|
*
|
607
607
|
* @return : number of bytes written or a ZSTD error.
|
608
608
|
*/
|
609
|
-
|
610
|
-
|
609
|
+
size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
|
610
|
+
unsigned* magicVariant, /* optional, can be NULL */
|
611
|
+
const void* src, size_t srcSize)
|
611
612
|
{
|
612
|
-
|
613
|
-
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
|
614
|
-
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
|
615
|
-
|
616
|
-
/* check input validity */
|
617
|
-
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
|
618
|
-
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
|
619
|
-
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
|
613
|
+
RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
|
620
614
|
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
615
|
+
{ U32 const magicNumber = MEM_readLE32(src);
|
616
|
+
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
|
617
|
+
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
|
618
|
+
|
619
|
+
/* check input validity */
|
620
|
+
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
|
621
|
+
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
|
622
|
+
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
|
623
|
+
|
624
|
+
/* deliver payload */
|
625
|
+
if (skippableContentSize > 0 && dst != NULL)
|
626
|
+
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
|
627
|
+
if (magicVariant != NULL)
|
628
|
+
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
|
629
|
+
return skippableContentSize;
|
630
|
+
}
|
627
631
|
}
|
628
632
|
|
629
633
|
/** ZSTD_findDecompressedSize() :
|
630
|
-
* compatible with legacy mode
|
631
634
|
* `srcSize` must be the exact length of some number of ZSTD compressed and/or
|
632
635
|
* skippable frames
|
633
|
-
*
|
636
|
+
* note: compatible with legacy mode
|
637
|
+
* @return : decompressed size of the frames contained */
|
634
638
|
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
635
639
|
{
|
636
640
|
unsigned long long totalDstSize = 0;
|
@@ -640,9 +644,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
|
640
644
|
|
641
645
|
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
642
646
|
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
643
|
-
if (ZSTD_isError(skippableSize))
|
644
|
-
return ZSTD_CONTENTSIZE_ERROR;
|
645
|
-
}
|
647
|
+
if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
|
646
648
|
assert(skippableSize <= srcSize);
|
647
649
|
|
648
650
|
src = (const BYTE *)src + skippableSize;
|
@@ -650,17 +652,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
|
650
652
|
continue;
|
651
653
|
}
|
652
654
|
|
653
|
-
{ unsigned long long const
|
654
|
-
if (
|
655
|
+
{ unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
|
656
|
+
if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
|
655
657
|
|
656
|
-
|
657
|
-
|
658
|
-
totalDstSize +=
|
658
|
+
if (totalDstSize + fcs < totalDstSize)
|
659
|
+
return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
|
660
|
+
totalDstSize += fcs;
|
659
661
|
}
|
662
|
+
/* skip to next frame */
|
660
663
|
{ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
|
661
|
-
if (ZSTD_isError(frameSrcSize))
|
662
|
-
|
663
|
-
}
|
664
|
+
if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
|
665
|
+
assert(frameSrcSize <= srcSize);
|
664
666
|
|
665
667
|
src = (const BYTE *)src + frameSrcSize;
|
666
668
|
srcSize -= frameSrcSize;
|
@@ -1090,17 +1092,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
|
1090
1092
|
}
|
1091
1093
|
#endif
|
1092
1094
|
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1095
|
+
if (srcSize >= 4) {
|
1096
|
+
U32 const magicNumber = MEM_readLE32(src);
|
1097
|
+
DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
|
1096
1098
|
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
1099
|
+
/* skippable frame detected : skip it */
|
1097
1100
|
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
1098
|
-
FORWARD_IF_ERROR(skippableSize, "
|
1101
|
+
FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
|
1099
1102
|
assert(skippableSize <= srcSize);
|
1100
1103
|
|
1101
1104
|
src = (const BYTE *)src + skippableSize;
|
1102
1105
|
srcSize -= skippableSize;
|
1103
|
-
continue;
|
1106
|
+
continue; /* check next frame */
|
1104
1107
|
} }
|
1105
1108
|
|
1106
1109
|
if (ddict) {
|