zstd-ruby 1.5.4.1 → 1.5.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,8 @@
12
12
  #include "zstd_lazy.h"
13
13
  #include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
14
14
 
15
+ #define kLazySkippingStep 8
16
+
15
17
 
16
18
  /*-*************************************
17
19
  * Binary Tree search
@@ -618,7 +620,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
618
620
  FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
619
621
  ZSTD_matchState_t* ms,
620
622
  const ZSTD_compressionParameters* const cParams,
621
- const BYTE* ip, U32 const mls)
623
+ const BYTE* ip, U32 const mls, U32 const lazySkipping)
622
624
  {
623
625
  U32* const hashTable = ms->hashTable;
624
626
  const U32 hashLog = cParams->hashLog;
@@ -633,6 +635,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
633
635
  NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
634
636
  hashTable[h] = idx;
635
637
  idx++;
638
+ /* Stop inserting every position when in the lazy skipping mode. */
639
+ if (lazySkipping)
640
+ break;
636
641
  }
637
642
 
638
643
  ms->nextToUpdate = target;
@@ -641,7 +646,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
641
646
 
642
647
  U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
643
648
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
644
- return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
649
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
645
650
  }
646
651
 
647
652
  /* inlining is important to hardwire a hot branch (template emulation) */
@@ -685,7 +690,7 @@ size_t ZSTD_HcFindBestMatch(
685
690
  }
686
691
 
687
692
  /* HC4 match finder */
688
- matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
693
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
689
694
 
690
695
  for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
691
696
  size_t currentMl=0;
@@ -758,7 +763,6 @@ size_t ZSTD_HcFindBestMatch(
758
763
  * (SIMD) Row-based matchfinder
759
764
  ***********************************/
760
765
  /* Constants for row-based hash */
761
- #define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
762
766
  #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
763
767
  #define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
764
768
 
@@ -774,39 +778,15 @@ MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
774
778
  return ZSTD_countTrailingZeros64(val);
775
779
  }
776
780
 
777
- /* ZSTD_rotateRight_*():
778
- * Rotates a bitfield to the right by "count" bits.
779
- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
780
- */
781
- FORCE_INLINE_TEMPLATE
782
- U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
783
- assert(count < 64);
784
- count &= 0x3F; /* for fickle pattern recognition */
785
- return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
786
- }
787
-
788
- FORCE_INLINE_TEMPLATE
789
- U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
790
- assert(count < 32);
791
- count &= 0x1F; /* for fickle pattern recognition */
792
- return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
793
- }
794
-
795
- FORCE_INLINE_TEMPLATE
796
- U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
797
- assert(count < 16);
798
- count &= 0x0F; /* for fickle pattern recognition */
799
- return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
800
- }
801
-
802
781
  /* ZSTD_row_nextIndex():
803
782
  * Returns the next index to insert at within a tagTable row, and updates the "head"
804
- * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
783
+ * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
805
784
  */
806
785
  FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
807
- U32 const next = (*tagRow - 1) & rowMask;
808
- *tagRow = (BYTE)next;
809
- return next;
786
+ U32 next = (*tagRow-1) & rowMask;
787
+ next += (next == 0) ? rowMask : 0; /* skip first position */
788
+ *tagRow = (BYTE)next;
789
+ return next;
810
790
  }
811
791
 
812
792
  /* ZSTD_isAligned():
@@ -820,7 +800,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
820
800
  /* ZSTD_row_prefetch():
821
801
  * Performs prefetching for the hashTable and tagTable at a given row.
822
802
  */
823
- FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
803
+ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
824
804
  PREFETCH_L1(hashTable + relRow);
825
805
  if (rowLog >= 5) {
826
806
  PREFETCH_L1(hashTable + relRow + 16);
@@ -844,13 +824,13 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
844
824
  U32 idx, const BYTE* const iLimit)
845
825
  {
846
826
  U32 const* const hashTable = ms->hashTable;
847
- U16 const* const tagTable = ms->tagTable;
827
+ BYTE const* const tagTable = ms->tagTable;
848
828
  U32 const hashLog = ms->rowHashLog;
849
829
  U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
850
830
  U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
851
831
 
852
832
  for (; idx < lim; ++idx) {
853
- U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
833
+ U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
854
834
  U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
855
835
  ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
856
836
  ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
@@ -866,11 +846,12 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
866
846
  * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
867
847
  */
868
848
  FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
869
- U16 const* tagTable, BYTE const* base,
849
+ BYTE const* tagTable, BYTE const* base,
870
850
  U32 idx, U32 const hashLog,
871
- U32 const rowLog, U32 const mls)
851
+ U32 const rowLog, U32 const mls,
852
+ U64 const hashSalt)
872
853
  {
873
- U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
854
+ U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
874
855
  U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
875
856
  ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
876
857
  { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
@@ -888,22 +869,21 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
888
869
  U32 const rowMask, U32 const useCache)
889
870
  {
890
871
  U32* const hashTable = ms->hashTable;
891
- U16* const tagTable = ms->tagTable;
872
+ BYTE* const tagTable = ms->tagTable;
892
873
  U32 const hashLog = ms->rowHashLog;
893
874
  const BYTE* const base = ms->window.base;
894
875
 
895
876
  DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
896
877
  for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
897
- U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
898
- : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
878
+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
879
+ : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
899
880
  U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
900
881
  U32* const row = hashTable + relRow;
901
- BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
902
- Explicit cast allows us to get exact desired position within each row */
882
+ BYTE* tagRow = tagTable + relRow;
903
883
  U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
904
884
 
905
- assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
906
- ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
885
+ assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
886
+ tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
907
887
  row[pos] = updateStartIdx;
908
888
  }
909
889
  }
@@ -1059,7 +1039,7 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
1059
1039
  FORCE_INLINE_TEMPLATE ZSTD_VecMask
1060
1040
  ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
1061
1041
  {
1062
- const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
1042
+ const BYTE* const src = tagRow;
1063
1043
  assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
1064
1044
  assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
1065
1045
  assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
@@ -1144,7 +1124,7 @@ size_t ZSTD_RowFindBestMatch(
1144
1124
  const U32 rowLog)
1145
1125
  {
1146
1126
  U32* const hashTable = ms->hashTable;
1147
- U16* const tagTable = ms->tagTable;
1127
+ BYTE* const tagTable = ms->tagTable;
1148
1128
  U32* const hashCache = ms->hashCache;
1149
1129
  const U32 hashLog = ms->rowHashLog;
1150
1130
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -1163,8 +1143,10 @@ size_t ZSTD_RowFindBestMatch(
1163
1143
  const U32 rowMask = rowEntries - 1;
1164
1144
  const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
1165
1145
  const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
1146
+ const U64 hashSalt = ms->hashSalt;
1166
1147
  U32 nbAttempts = 1U << cappedSearchLog;
1167
1148
  size_t ml=4-1;
1149
+ U32 hash;
1168
1150
 
1169
1151
  /* DMS/DDS variables that may be referenced laster */
1170
1152
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
@@ -1188,7 +1170,7 @@ size_t ZSTD_RowFindBestMatch(
1188
1170
  if (dictMode == ZSTD_dictMatchState) {
1189
1171
  /* Prefetch DMS rows */
1190
1172
  U32* const dmsHashTable = dms->hashTable;
1191
- U16* const dmsTagTable = dms->tagTable;
1173
+ BYTE* const dmsTagTable = dms->tagTable;
1192
1174
  U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
1193
1175
  U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1194
1176
  dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
@@ -1198,9 +1180,19 @@ size_t ZSTD_RowFindBestMatch(
1198
1180
  }
1199
1181
 
1200
1182
  /* Update the hashTable and tagTable up to (but not including) ip */
1201
- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
1183
+ if (!ms->lazySkipping) {
1184
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
1185
+ hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
1186
+ } else {
1187
+ /* Stop inserting every position when in the lazy skipping mode.
1188
+ * The hash cache is also not kept up to date in this mode.
1189
+ */
1190
+ hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
1191
+ ms->nextToUpdate = curr;
1192
+ }
1193
+ ms->hashSaltEntropy += hash; /* collect salt entropy */
1194
+
1202
1195
  { /* Get the hash for ip, compute the appropriate row */
1203
- U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
1204
1196
  U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1205
1197
  U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
1206
1198
  U32* const row = hashTable + relRow;
@@ -1212,9 +1204,10 @@ size_t ZSTD_RowFindBestMatch(
1212
1204
  ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
1213
1205
 
1214
1206
  /* Cycle through the matches and prefetch */
1215
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
1207
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
1216
1208
  U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
1217
1209
  U32 const matchIndex = row[matchPos];
1210
+ if(matchPos == 0) continue;
1218
1211
  assert(numMatches < rowEntries);
1219
1212
  if (matchIndex < lowLimit)
1220
1213
  break;
@@ -1224,13 +1217,14 @@ size_t ZSTD_RowFindBestMatch(
1224
1217
  PREFETCH_L1(dictBase + matchIndex);
1225
1218
  }
1226
1219
  matchBuffer[numMatches++] = matchIndex;
1220
+ --nbAttempts;
1227
1221
  }
1228
1222
 
1229
1223
  /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
1230
1224
  in ZSTD_row_update_internal() at the next search. */
1231
1225
  {
1232
1226
  U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
1233
- tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
1227
+ tagRow[pos] = (BYTE)tag;
1234
1228
  row[pos] = ms->nextToUpdate++;
1235
1229
  }
1236
1230
 
@@ -1281,13 +1275,15 @@ size_t ZSTD_RowFindBestMatch(
1281
1275
  size_t currMatch = 0;
1282
1276
  ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
1283
1277
 
1284
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
1278
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
1285
1279
  U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
1286
1280
  U32 const matchIndex = dmsRow[matchPos];
1281
+ if(matchPos == 0) continue;
1287
1282
  if (matchIndex < dmsLowestIndex)
1288
1283
  break;
1289
1284
  PREFETCH_L1(dmsBase + matchIndex);
1290
1285
  matchBuffer[numMatches++] = matchIndex;
1286
+ --nbAttempts;
1291
1287
  }
1292
1288
 
1293
1289
  /* Return the longest match */
@@ -1544,10 +1540,11 @@ ZSTD_compressBlock_lazy_generic(
1544
1540
  assert(offset_2 <= dictAndPrefixLength);
1545
1541
  }
1546
1542
 
1543
+ /* Reset the lazy skipping state */
1544
+ ms->lazySkipping = 0;
1545
+
1547
1546
  if (searchMethod == search_rowHash) {
1548
- ZSTD_row_fillHashCache(ms, base, rowLog,
1549
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
1550
- ms->nextToUpdate, ilimit);
1547
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1551
1548
  }
1552
1549
 
1553
1550
  /* Match Loop */
@@ -1591,7 +1588,16 @@ ZSTD_compressBlock_lazy_generic(
1591
1588
  }
1592
1589
 
1593
1590
  if (matchLength < 4) {
1594
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
1591
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
1592
+ ip += step;
1593
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
1594
+ * In this mode we stop inserting every position into our tables, and only insert
1595
+ * positions that we search, which is one in step positions.
1596
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
1597
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
1598
+ * triggered once we've gone 2KB without finding any matches.
1599
+ */
1600
+ ms->lazySkipping = step > kLazySkippingStep;
1595
1601
  continue;
1596
1602
  }
1597
1603
 
@@ -1695,6 +1701,13 @@ _storeSequence:
1695
1701
  ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
1696
1702
  anchor = ip = start + matchLength;
1697
1703
  }
1704
+ if (ms->lazySkipping) {
1705
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
1706
+ if (searchMethod == search_rowHash) {
1707
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1708
+ }
1709
+ ms->lazySkipping = 0;
1710
+ }
1698
1711
 
1699
1712
  /* check immediate repcode */
1700
1713
  if (isDxS) {
@@ -1912,12 +1925,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1912
1925
 
1913
1926
  DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
1914
1927
 
1928
+ /* Reset the lazy skipping state */
1929
+ ms->lazySkipping = 0;
1930
+
1915
1931
  /* init */
1916
1932
  ip += (ip == prefixStart);
1917
1933
  if (searchMethod == search_rowHash) {
1918
- ZSTD_row_fillHashCache(ms, base, rowLog,
1919
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
1920
- ms->nextToUpdate, ilimit);
1934
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1921
1935
  }
1922
1936
 
1923
1937
  /* Match Loop */
@@ -1955,7 +1969,16 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1955
1969
  }
1956
1970
 
1957
1971
  if (matchLength < 4) {
1958
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
1972
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
1973
+ ip += step + 1; /* jump faster over incompressible sections */
1974
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
1975
+ * In this mode we stop inserting every position into our tables, and only insert
1976
+ * positions that we search, which is one in step positions.
1977
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
1978
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
1979
+ * triggered once we've gone 2KB without finding any matches.
1980
+ */
1981
+ ms->lazySkipping = step > kLazySkippingStep;
1959
1982
  continue;
1960
1983
  }
1961
1984
 
@@ -2041,6 +2064,13 @@ _storeSequence:
2041
2064
  ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
2042
2065
  anchor = ip = start + matchLength;
2043
2066
  }
2067
+ if (ms->lazySkipping) {
2068
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
2069
+ if (searchMethod == search_rowHash) {
2070
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
2071
+ }
2072
+ ms->lazySkipping = 0;
2073
+ }
2044
2074
 
2045
2075
  /* check immediate repcode */
2046
2076
  while (ip <= ilimit) {
@@ -1086,6 +1086,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1086
1086
  ZSTD_optimal_t lastSequence;
1087
1087
  ZSTD_optLdm_t optLdm;
1088
1088
 
1089
+ ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
1090
+
1089
1091
  optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1090
1092
  optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1091
1093
  ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
@@ -20,6 +20,7 @@
20
20
 
21
21
 
22
22
  /* ====== Dependencies ====== */
23
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
23
24
  #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
24
25
  #include "../common/mem.h" /* MEM_STATIC */
25
26
  #include "../common/pool.h" /* threadpool */
@@ -719,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
719
720
  ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
720
721
 
721
722
  if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
722
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
723
+ size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
723
724
  if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
724
725
  DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
725
726
  ZSTD_invalidateRepCodes(cctx);
@@ -737,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
737
738
  DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
738
739
  assert(job->cSize == 0);
739
740
  for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
740
- size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
741
+ size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
741
742
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
742
743
  ip += chunkSize;
743
744
  op += cSize; assert(op < oend);
@@ -757,8 +758,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
757
758
  size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
758
759
  size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
759
760
  size_t const cSize = (job->lastJob) ?
760
- ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
761
- ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
761
+ ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
762
+ ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
762
763
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
763
764
  lastCBlockSize = cSize;
764
765
  } }
@@ -696,7 +696,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
696
696
 
697
697
  /* Copy the arguments to local variables */
698
698
  ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
699
- ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
699
+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
700
700
  ZSTD_memcpy(&op, &args->op, sizeof(op));
701
701
 
702
702
  assert(MEM_isLittleEndian());
@@ -779,7 +779,7 @@ _out:
779
779
 
780
780
  /* Save the final values of each of the state variables back to args. */
781
781
  ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
782
- ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
782
+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
783
783
  ZSTD_memcpy(&args->op, &op, sizeof(op));
784
784
  }
785
785
 
@@ -1476,7 +1476,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
1476
1476
 
1477
1477
  /* Copy the arguments to local registers. */
1478
1478
  ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
1479
- ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
1479
+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
1480
1480
  ZSTD_memcpy(&op, &args->op, sizeof(op));
1481
1481
 
1482
1482
  oend[0] = op[1];
@@ -1599,7 +1599,7 @@ _out:
1599
1599
 
1600
1600
  /* Save the final values of each of the state variables back to args. */
1601
1601
  ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
1602
- ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
1602
+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
1603
1603
  ZSTD_memcpy(&args->op, &op, sizeof(op));
1604
1604
  }
1605
1605
 
@@ -14,6 +14,7 @@
14
14
  /*-*******************************************************
15
15
  * Dependencies
16
16
  *********************************************************/
17
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
17
18
  #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18
19
  #include "../common/cpu.h" /* bmi2 */
19
20
  #include "../common/mem.h" /* low level memory routines */
@@ -55,6 +55,7 @@
55
55
  /*-*******************************************************
56
56
  * Dependencies
57
57
  *********************************************************/
58
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
58
59
  #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
59
60
  #include "../common/mem.h" /* low level memory routines */
60
61
  #define FSE_STATIC_LINKING_ONLY
@@ -588,49 +589,52 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
588
589
  sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
589
590
  RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
590
591
  frameParameter_unsupported, "");
591
- {
592
- size_t const skippableSize = skippableHeaderSize + sizeU32;
592
+ { size_t const skippableSize = skippableHeaderSize + sizeU32;
593
593
  RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
594
594
  return skippableSize;
595
595
  }
596
596
  }
597
597
 
598
598
  /*! ZSTD_readSkippableFrame() :
599
- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
599
+ * Retrieves content of a skippable frame, and writes it to dst buffer.
600
600
  *
601
601
  * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
602
602
  * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
603
603
  * in the magicVariant.
604
604
  *
605
- * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
605
+ * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
606
606
  *
607
607
  * @return : number of bytes written or a ZSTD error.
608
608
  */
609
- ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
610
- const void* src, size_t srcSize)
609
+ size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
610
+ unsigned* magicVariant, /* optional, can be NULL */
611
+ const void* src, size_t srcSize)
611
612
  {
612
- U32 const magicNumber = MEM_readLE32(src);
613
- size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
614
- size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
615
-
616
- /* check input validity */
617
- RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
618
- RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
619
- RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
613
+ RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
620
614
 
621
- /* deliver payload */
622
- if (skippableContentSize > 0 && dst != NULL)
623
- ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
624
- if (magicVariant != NULL)
625
- *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
626
- return skippableContentSize;
615
+ { U32 const magicNumber = MEM_readLE32(src);
616
+ size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
617
+ size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
618
+
619
+ /* check input validity */
620
+ RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
621
+ RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
622
+ RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
623
+
624
+ /* deliver payload */
625
+ if (skippableContentSize > 0 && dst != NULL)
626
+ ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
627
+ if (magicVariant != NULL)
628
+ *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
629
+ return skippableContentSize;
630
+ }
627
631
  }
628
632
 
629
633
  /** ZSTD_findDecompressedSize() :
630
- * compatible with legacy mode
631
634
  * `srcSize` must be the exact length of some number of ZSTD compressed and/or
632
635
  * skippable frames
633
- * @return : decompressed size of the frames contained */
636
+ * note: compatible with legacy mode
637
+ * @return : decompressed size of the frames contained */
634
638
  unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
635
639
  {
636
640
  unsigned long long totalDstSize = 0;
@@ -640,9 +644,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
640
644
 
641
645
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
642
646
  size_t const skippableSize = readSkippableFrameSize(src, srcSize);
643
- if (ZSTD_isError(skippableSize)) {
644
- return ZSTD_CONTENTSIZE_ERROR;
645
- }
647
+ if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
646
648
  assert(skippableSize <= srcSize);
647
649
 
648
650
  src = (const BYTE *)src + skippableSize;
@@ -650,17 +652,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
650
652
  continue;
651
653
  }
652
654
 
653
- { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
654
- if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
655
+ { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
656
+ if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
655
657
 
656
- /* check for overflow */
657
- if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
658
- totalDstSize += ret;
658
+ if (totalDstSize + fcs < totalDstSize)
659
+ return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
660
+ totalDstSize += fcs;
659
661
  }
662
+ /* skip to next frame */
660
663
  { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
661
- if (ZSTD_isError(frameSrcSize)) {
662
- return ZSTD_CONTENTSIZE_ERROR;
663
- }
664
+ if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
665
+ assert(frameSrcSize <= srcSize);
664
666
 
665
667
  src = (const BYTE *)src + frameSrcSize;
666
668
  srcSize -= frameSrcSize;
@@ -1090,17 +1092,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
1090
1092
  }
1091
1093
  #endif
1092
1094
 
1093
- { U32 const magicNumber = MEM_readLE32(src);
1094
- DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
1095
- (unsigned)magicNumber, ZSTD_MAGICNUMBER);
1095
+ if (srcSize >= 4) {
1096
+ U32 const magicNumber = MEM_readLE32(src);
1097
+ DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
1096
1098
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
1099
+ /* skippable frame detected : skip it */
1097
1100
  size_t const skippableSize = readSkippableFrameSize(src, srcSize);
1098
- FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
1101
+ FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
1099
1102
  assert(skippableSize <= srcSize);
1100
1103
 
1101
1104
  src = (const BYTE *)src + skippableSize;
1102
1105
  srcSize -= skippableSize;
1103
- continue;
1106
+ continue; /* check next frame */
1104
1107
  } }
1105
1108
 
1106
1109
  if (ddict) {