zstd-ruby 1.5.4.1 → 1.5.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,6 +12,8 @@
12
12
  #include "zstd_lazy.h"
13
13
  #include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
14
14
 
15
+ #define kLazySkippingStep 8
16
+
15
17
 
16
18
  /*-*************************************
17
19
  * Binary Tree search
@@ -618,7 +620,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
618
620
  FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
619
621
  ZSTD_matchState_t* ms,
620
622
  const ZSTD_compressionParameters* const cParams,
621
- const BYTE* ip, U32 const mls)
623
+ const BYTE* ip, U32 const mls, U32 const lazySkipping)
622
624
  {
623
625
  U32* const hashTable = ms->hashTable;
624
626
  const U32 hashLog = cParams->hashLog;
@@ -633,6 +635,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
633
635
  NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
634
636
  hashTable[h] = idx;
635
637
  idx++;
638
+ /* Stop inserting every position when in the lazy skipping mode. */
639
+ if (lazySkipping)
640
+ break;
636
641
  }
637
642
 
638
643
  ms->nextToUpdate = target;
@@ -641,7 +646,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
641
646
 
642
647
  U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
643
648
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
644
- return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
649
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
645
650
  }
646
651
 
647
652
  /* inlining is important to hardwire a hot branch (template emulation) */
@@ -685,7 +690,7 @@ size_t ZSTD_HcFindBestMatch(
685
690
  }
686
691
 
687
692
  /* HC4 match finder */
688
- matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
693
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
689
694
 
690
695
  for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
691
696
  size_t currentMl=0;
@@ -758,7 +763,6 @@ size_t ZSTD_HcFindBestMatch(
758
763
  * (SIMD) Row-based matchfinder
759
764
  ***********************************/
760
765
  /* Constants for row-based hash */
761
- #define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
762
766
  #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
763
767
  #define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
764
768
 
@@ -774,39 +778,15 @@ MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
774
778
  return ZSTD_countTrailingZeros64(val);
775
779
  }
776
780
 
777
- /* ZSTD_rotateRight_*():
778
- * Rotates a bitfield to the right by "count" bits.
779
- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
780
- */
781
- FORCE_INLINE_TEMPLATE
782
- U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
783
- assert(count < 64);
784
- count &= 0x3F; /* for fickle pattern recognition */
785
- return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
786
- }
787
-
788
- FORCE_INLINE_TEMPLATE
789
- U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
790
- assert(count < 32);
791
- count &= 0x1F; /* for fickle pattern recognition */
792
- return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
793
- }
794
-
795
- FORCE_INLINE_TEMPLATE
796
- U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
797
- assert(count < 16);
798
- count &= 0x0F; /* for fickle pattern recognition */
799
- return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
800
- }
801
-
802
781
  /* ZSTD_row_nextIndex():
803
782
  * Returns the next index to insert at within a tagTable row, and updates the "head"
804
- * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
783
+ * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
805
784
  */
806
785
  FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
807
- U32 const next = (*tagRow - 1) & rowMask;
808
- *tagRow = (BYTE)next;
809
- return next;
786
+ U32 next = (*tagRow-1) & rowMask;
787
+ next += (next == 0) ? rowMask : 0; /* skip first position */
788
+ *tagRow = (BYTE)next;
789
+ return next;
810
790
  }
811
791
 
812
792
  /* ZSTD_isAligned():
@@ -820,7 +800,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
820
800
  /* ZSTD_row_prefetch():
821
801
  * Performs prefetching for the hashTable and tagTable at a given row.
822
802
  */
823
- FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
803
+ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
824
804
  PREFETCH_L1(hashTable + relRow);
825
805
  if (rowLog >= 5) {
826
806
  PREFETCH_L1(hashTable + relRow + 16);
@@ -844,13 +824,13 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
844
824
  U32 idx, const BYTE* const iLimit)
845
825
  {
846
826
  U32 const* const hashTable = ms->hashTable;
847
- U16 const* const tagTable = ms->tagTable;
827
+ BYTE const* const tagTable = ms->tagTable;
848
828
  U32 const hashLog = ms->rowHashLog;
849
829
  U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
850
830
  U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
851
831
 
852
832
  for (; idx < lim; ++idx) {
853
- U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
833
+ U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
854
834
  U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
855
835
  ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
856
836
  ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
@@ -866,11 +846,12 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
866
846
  * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
867
847
  */
868
848
  FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
869
- U16 const* tagTable, BYTE const* base,
849
+ BYTE const* tagTable, BYTE const* base,
870
850
  U32 idx, U32 const hashLog,
871
- U32 const rowLog, U32 const mls)
851
+ U32 const rowLog, U32 const mls,
852
+ U64 const hashSalt)
872
853
  {
873
- U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
854
+ U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
874
855
  U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
875
856
  ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
876
857
  { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
@@ -888,22 +869,21 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
888
869
  U32 const rowMask, U32 const useCache)
889
870
  {
890
871
  U32* const hashTable = ms->hashTable;
891
- U16* const tagTable = ms->tagTable;
872
+ BYTE* const tagTable = ms->tagTable;
892
873
  U32 const hashLog = ms->rowHashLog;
893
874
  const BYTE* const base = ms->window.base;
894
875
 
895
876
  DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
896
877
  for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
897
- U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
898
- : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
878
+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
879
+ : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
899
880
  U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
900
881
  U32* const row = hashTable + relRow;
901
- BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
902
- Explicit cast allows us to get exact desired position within each row */
882
+ BYTE* tagRow = tagTable + relRow;
903
883
  U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
904
884
 
905
- assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
906
- ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
885
+ assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
886
+ tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
907
887
  row[pos] = updateStartIdx;
908
888
  }
909
889
  }
@@ -1059,7 +1039,7 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
1059
1039
  FORCE_INLINE_TEMPLATE ZSTD_VecMask
1060
1040
  ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
1061
1041
  {
1062
- const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
1042
+ const BYTE* const src = tagRow;
1063
1043
  assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
1064
1044
  assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
1065
1045
  assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
@@ -1144,7 +1124,7 @@ size_t ZSTD_RowFindBestMatch(
1144
1124
  const U32 rowLog)
1145
1125
  {
1146
1126
  U32* const hashTable = ms->hashTable;
1147
- U16* const tagTable = ms->tagTable;
1127
+ BYTE* const tagTable = ms->tagTable;
1148
1128
  U32* const hashCache = ms->hashCache;
1149
1129
  const U32 hashLog = ms->rowHashLog;
1150
1130
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -1163,8 +1143,10 @@ size_t ZSTD_RowFindBestMatch(
1163
1143
  const U32 rowMask = rowEntries - 1;
1164
1144
  const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
1165
1145
  const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
1146
+ const U64 hashSalt = ms->hashSalt;
1166
1147
  U32 nbAttempts = 1U << cappedSearchLog;
1167
1148
  size_t ml=4-1;
1149
+ U32 hash;
1168
1150
 
1169
1151
  /* DMS/DDS variables that may be referenced laster */
1170
1152
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
@@ -1188,7 +1170,7 @@ size_t ZSTD_RowFindBestMatch(
1188
1170
  if (dictMode == ZSTD_dictMatchState) {
1189
1171
  /* Prefetch DMS rows */
1190
1172
  U32* const dmsHashTable = dms->hashTable;
1191
- U16* const dmsTagTable = dms->tagTable;
1173
+ BYTE* const dmsTagTable = dms->tagTable;
1192
1174
  U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
1193
1175
  U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1194
1176
  dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
@@ -1198,9 +1180,19 @@ size_t ZSTD_RowFindBestMatch(
1198
1180
  }
1199
1181
 
1200
1182
  /* Update the hashTable and tagTable up to (but not including) ip */
1201
- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
1183
+ if (!ms->lazySkipping) {
1184
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
1185
+ hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
1186
+ } else {
1187
+ /* Stop inserting every position when in the lazy skipping mode.
1188
+ * The hash cache is also not kept up to date in this mode.
1189
+ */
1190
+ hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
1191
+ ms->nextToUpdate = curr;
1192
+ }
1193
+ ms->hashSaltEntropy += hash; /* collect salt entropy */
1194
+
1202
1195
  { /* Get the hash for ip, compute the appropriate row */
1203
- U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
1204
1196
  U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1205
1197
  U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
1206
1198
  U32* const row = hashTable + relRow;
@@ -1212,9 +1204,10 @@ size_t ZSTD_RowFindBestMatch(
1212
1204
  ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
1213
1205
 
1214
1206
  /* Cycle through the matches and prefetch */
1215
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
1207
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
1216
1208
  U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
1217
1209
  U32 const matchIndex = row[matchPos];
1210
+ if(matchPos == 0) continue;
1218
1211
  assert(numMatches < rowEntries);
1219
1212
  if (matchIndex < lowLimit)
1220
1213
  break;
@@ -1224,13 +1217,14 @@ size_t ZSTD_RowFindBestMatch(
1224
1217
  PREFETCH_L1(dictBase + matchIndex);
1225
1218
  }
1226
1219
  matchBuffer[numMatches++] = matchIndex;
1220
+ --nbAttempts;
1227
1221
  }
1228
1222
 
1229
1223
  /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
1230
1224
  in ZSTD_row_update_internal() at the next search. */
1231
1225
  {
1232
1226
  U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
1233
- tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
1227
+ tagRow[pos] = (BYTE)tag;
1234
1228
  row[pos] = ms->nextToUpdate++;
1235
1229
  }
1236
1230
 
@@ -1281,13 +1275,15 @@ size_t ZSTD_RowFindBestMatch(
1281
1275
  size_t currMatch = 0;
1282
1276
  ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
1283
1277
 
1284
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
1278
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
1285
1279
  U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
1286
1280
  U32 const matchIndex = dmsRow[matchPos];
1281
+ if(matchPos == 0) continue;
1287
1282
  if (matchIndex < dmsLowestIndex)
1288
1283
  break;
1289
1284
  PREFETCH_L1(dmsBase + matchIndex);
1290
1285
  matchBuffer[numMatches++] = matchIndex;
1286
+ --nbAttempts;
1291
1287
  }
1292
1288
 
1293
1289
  /* Return the longest match */
@@ -1544,10 +1540,11 @@ ZSTD_compressBlock_lazy_generic(
1544
1540
  assert(offset_2 <= dictAndPrefixLength);
1545
1541
  }
1546
1542
 
1543
+ /* Reset the lazy skipping state */
1544
+ ms->lazySkipping = 0;
1545
+
1547
1546
  if (searchMethod == search_rowHash) {
1548
- ZSTD_row_fillHashCache(ms, base, rowLog,
1549
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
1550
- ms->nextToUpdate, ilimit);
1547
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1551
1548
  }
1552
1549
 
1553
1550
  /* Match Loop */
@@ -1591,7 +1588,16 @@ ZSTD_compressBlock_lazy_generic(
1591
1588
  }
1592
1589
 
1593
1590
  if (matchLength < 4) {
1594
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
1591
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
1592
+ ip += step;
1593
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
1594
+ * In this mode we stop inserting every position into our tables, and only insert
1595
+ * positions that we search, which is one in step positions.
1596
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
1597
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
1598
+ * triggered once we've gone 2KB without finding any matches.
1599
+ */
1600
+ ms->lazySkipping = step > kLazySkippingStep;
1595
1601
  continue;
1596
1602
  }
1597
1603
 
@@ -1695,6 +1701,13 @@ _storeSequence:
1695
1701
  ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
1696
1702
  anchor = ip = start + matchLength;
1697
1703
  }
1704
+ if (ms->lazySkipping) {
1705
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
1706
+ if (searchMethod == search_rowHash) {
1707
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1708
+ }
1709
+ ms->lazySkipping = 0;
1710
+ }
1698
1711
 
1699
1712
  /* check immediate repcode */
1700
1713
  if (isDxS) {
@@ -1912,12 +1925,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1912
1925
 
1913
1926
  DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
1914
1927
 
1928
+ /* Reset the lazy skipping state */
1929
+ ms->lazySkipping = 0;
1930
+
1915
1931
  /* init */
1916
1932
  ip += (ip == prefixStart);
1917
1933
  if (searchMethod == search_rowHash) {
1918
- ZSTD_row_fillHashCache(ms, base, rowLog,
1919
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
1920
- ms->nextToUpdate, ilimit);
1934
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1921
1935
  }
1922
1936
 
1923
1937
  /* Match Loop */
@@ -1955,7 +1969,16 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1955
1969
  }
1956
1970
 
1957
1971
  if (matchLength < 4) {
1958
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
1972
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
1973
+ ip += step + 1; /* jump faster over incompressible sections */
1974
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
1975
+ * In this mode we stop inserting every position into our tables, and only insert
1976
+ * positions that we search, which is one in step positions.
1977
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
1978
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
1979
+ * triggered once we've gone 2KB without finding any matches.
1980
+ */
1981
+ ms->lazySkipping = step > kLazySkippingStep;
1959
1982
  continue;
1960
1983
  }
1961
1984
 
@@ -2041,6 +2064,13 @@ _storeSequence:
2041
2064
  ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
2042
2065
  anchor = ip = start + matchLength;
2043
2066
  }
2067
+ if (ms->lazySkipping) {
2068
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
2069
+ if (searchMethod == search_rowHash) {
2070
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
2071
+ }
2072
+ ms->lazySkipping = 0;
2073
+ }
2044
2074
 
2045
2075
  /* check immediate repcode */
2046
2076
  while (ip <= ilimit) {
@@ -1086,6 +1086,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1086
1086
  ZSTD_optimal_t lastSequence;
1087
1087
  ZSTD_optLdm_t optLdm;
1088
1088
 
1089
+ ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
1090
+
1089
1091
  optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1090
1092
  optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1091
1093
  ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
@@ -20,6 +20,7 @@
20
20
 
21
21
 
22
22
  /* ====== Dependencies ====== */
23
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
23
24
  #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
24
25
  #include "../common/mem.h" /* MEM_STATIC */
25
26
  #include "../common/pool.h" /* threadpool */
@@ -719,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
719
720
  ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
720
721
 
721
722
  if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
722
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
723
+ size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
723
724
  if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
724
725
  DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
725
726
  ZSTD_invalidateRepCodes(cctx);
@@ -737,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
737
738
  DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
738
739
  assert(job->cSize == 0);
739
740
  for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
740
- size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
741
+ size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
741
742
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
742
743
  ip += chunkSize;
743
744
  op += cSize; assert(op < oend);
@@ -757,8 +758,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
757
758
  size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
758
759
  size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
759
760
  size_t const cSize = (job->lastJob) ?
760
- ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
761
- ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
761
+ ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
762
+ ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
762
763
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
763
764
  lastCBlockSize = cSize;
764
765
  } }
@@ -696,7 +696,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
696
696
 
697
697
  /* Copy the arguments to local variables */
698
698
  ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
699
- ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
699
+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
700
700
  ZSTD_memcpy(&op, &args->op, sizeof(op));
701
701
 
702
702
  assert(MEM_isLittleEndian());
@@ -779,7 +779,7 @@ _out:
779
779
 
780
780
  /* Save the final values of each of the state variables back to args. */
781
781
  ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
782
- ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
782
+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
783
783
  ZSTD_memcpy(&args->op, &op, sizeof(op));
784
784
  }
785
785
 
@@ -1476,7 +1476,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
1476
1476
 
1477
1477
  /* Copy the arguments to local registers. */
1478
1478
  ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
1479
- ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
1479
+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
1480
1480
  ZSTD_memcpy(&op, &args->op, sizeof(op));
1481
1481
 
1482
1482
  oend[0] = op[1];
@@ -1599,7 +1599,7 @@ _out:
1599
1599
 
1600
1600
  /* Save the final values of each of the state variables back to args. */
1601
1601
  ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
1602
- ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
1602
+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
1603
1603
  ZSTD_memcpy(&args->op, &op, sizeof(op));
1604
1604
  }
1605
1605
 
@@ -14,6 +14,7 @@
14
14
  /*-*******************************************************
15
15
  * Dependencies
16
16
  *********************************************************/
17
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
17
18
  #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18
19
  #include "../common/cpu.h" /* bmi2 */
19
20
  #include "../common/mem.h" /* low level memory routines */
@@ -55,6 +55,7 @@
55
55
  /*-*******************************************************
56
56
  * Dependencies
57
57
  *********************************************************/
58
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
58
59
  #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
59
60
  #include "../common/mem.h" /* low level memory routines */
60
61
  #define FSE_STATIC_LINKING_ONLY
@@ -588,49 +589,52 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
588
589
  sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
589
590
  RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
590
591
  frameParameter_unsupported, "");
591
- {
592
- size_t const skippableSize = skippableHeaderSize + sizeU32;
592
+ { size_t const skippableSize = skippableHeaderSize + sizeU32;
593
593
  RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
594
594
  return skippableSize;
595
595
  }
596
596
  }
597
597
 
598
598
  /*! ZSTD_readSkippableFrame() :
599
- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
599
+ * Retrieves content of a skippable frame, and writes it to dst buffer.
600
600
  *
601
601
  * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
602
602
  * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
603
603
  * in the magicVariant.
604
604
  *
605
- * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
605
+ * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
606
606
  *
607
607
  * @return : number of bytes written or a ZSTD error.
608
608
  */
609
- ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
610
- const void* src, size_t srcSize)
609
+ size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
610
+ unsigned* magicVariant, /* optional, can be NULL */
611
+ const void* src, size_t srcSize)
611
612
  {
612
- U32 const magicNumber = MEM_readLE32(src);
613
- size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
614
- size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
615
-
616
- /* check input validity */
617
- RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
618
- RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
619
- RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
613
+ RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
620
614
 
621
- /* deliver payload */
622
- if (skippableContentSize > 0 && dst != NULL)
623
- ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
624
- if (magicVariant != NULL)
625
- *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
626
- return skippableContentSize;
615
+ { U32 const magicNumber = MEM_readLE32(src);
616
+ size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
617
+ size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
618
+
619
+ /* check input validity */
620
+ RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
621
+ RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
622
+ RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
623
+
624
+ /* deliver payload */
625
+ if (skippableContentSize > 0 && dst != NULL)
626
+ ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
627
+ if (magicVariant != NULL)
628
+ *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
629
+ return skippableContentSize;
630
+ }
627
631
  }
628
632
 
629
633
  /** ZSTD_findDecompressedSize() :
630
- * compatible with legacy mode
631
634
  * `srcSize` must be the exact length of some number of ZSTD compressed and/or
632
635
  * skippable frames
633
- * @return : decompressed size of the frames contained */
636
+ * note: compatible with legacy mode
637
+ * @return : decompressed size of the frames contained */
634
638
  unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
635
639
  {
636
640
  unsigned long long totalDstSize = 0;
@@ -640,9 +644,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
640
644
 
641
645
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
642
646
  size_t const skippableSize = readSkippableFrameSize(src, srcSize);
643
- if (ZSTD_isError(skippableSize)) {
644
- return ZSTD_CONTENTSIZE_ERROR;
645
- }
647
+ if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
646
648
  assert(skippableSize <= srcSize);
647
649
 
648
650
  src = (const BYTE *)src + skippableSize;
@@ -650,17 +652,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
650
652
  continue;
651
653
  }
652
654
 
653
- { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
654
- if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
655
+ { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
656
+ if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
655
657
 
656
- /* check for overflow */
657
- if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
658
- totalDstSize += ret;
658
+ if (totalDstSize + fcs < totalDstSize)
659
+ return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
660
+ totalDstSize += fcs;
659
661
  }
662
+ /* skip to next frame */
660
663
  { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
661
- if (ZSTD_isError(frameSrcSize)) {
662
- return ZSTD_CONTENTSIZE_ERROR;
663
- }
664
+ if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
665
+ assert(frameSrcSize <= srcSize);
664
666
 
665
667
  src = (const BYTE *)src + frameSrcSize;
666
668
  srcSize -= frameSrcSize;
@@ -1090,17 +1092,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
1090
1092
  }
1091
1093
  #endif
1092
1094
 
1093
- { U32 const magicNumber = MEM_readLE32(src);
1094
- DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
1095
- (unsigned)magicNumber, ZSTD_MAGICNUMBER);
1095
+ if (srcSize >= 4) {
1096
+ U32 const magicNumber = MEM_readLE32(src);
1097
+ DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
1096
1098
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
1099
+ /* skippable frame detected : skip it */
1097
1100
  size_t const skippableSize = readSkippableFrameSize(src, srcSize);
1098
- FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
1101
+ FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
1099
1102
  assert(skippableSize <= srcSize);
1100
1103
 
1101
1104
  src = (const BYTE *)src + skippableSize;
1102
1105
  srcSize -= skippableSize;
1103
- continue;
1106
+ continue; /* check next frame */
1104
1107
  } }
1105
1108
 
1106
1109
  if (ddict) {