zstdlib 0.10.0-x64-mingw-ucrt → 0.11.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +8 -0
  3. data/ext/zstdlib_c/extconf.rb +2 -2
  4. data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
  5. data/ext/zstdlib_c/zstd-1.5.5/lib/common/allocations.h +55 -0
  6. data/ext/zstdlib_c/zstd-1.5.5/lib/common/bits.h +200 -0
  7. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/bitstream.h +19 -60
  8. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/compiler.h +26 -3
  9. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/cpu.h +1 -1
  10. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.c +1 -1
  11. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.h +1 -1
  12. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/entropy_common.c +12 -40
  13. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.c +9 -2
  14. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.h +1 -1
  15. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse.h +5 -83
  16. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse_decompress.c +7 -99
  17. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/huf.h +65 -156
  18. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/mem.h +39 -46
  19. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.c +26 -10
  20. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.h +7 -1
  21. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/portability_macros.h +22 -3
  22. data/ext/zstdlib_c/zstd-1.5.5/lib/common/threading.c +176 -0
  23. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/threading.h +5 -10
  24. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.c +2 -2
  25. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.h +8 -8
  26. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_common.c +1 -36
  27. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_deps.h +1 -1
  28. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_internal.h +17 -118
  29. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_trace.h +3 -3
  30. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/clevels.h +1 -1
  31. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/fse_compress.c +7 -124
  32. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.c +1 -1
  33. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.h +1 -1
  34. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/huf_compress.c +234 -169
  35. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress.c +1243 -538
  36. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_internal.h +225 -151
  37. data/ext/zstdlib_c/zstd-1.5.5/lib/compress/zstd_compress_literals.c +235 -0
  38. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_literals.h +16 -8
  39. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.c +3 -3
  40. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.h +1 -1
  41. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.c +25 -21
  42. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.h +1 -1
  43. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_cwksp.h +128 -62
  44. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.c +95 -33
  45. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.h +3 -2
  46. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.c +433 -148
  47. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.h +3 -2
  48. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.c +398 -345
  49. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.h +4 -2
  50. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.c +5 -5
  51. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.h +1 -1
  52. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm_geartab.h +1 -1
  53. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.c +106 -80
  54. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.h +1 -1
  55. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.c +17 -9
  56. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.h +1 -1
  57. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress.c +434 -441
  58. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress_amd64.S +30 -39
  59. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.c +4 -4
  60. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.h +1 -1
  61. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress.c +205 -80
  62. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.c +201 -81
  63. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.h +6 -1
  64. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_internal.h +4 -2
  65. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zdict.h +53 -31
  66. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd.h +580 -135
  67. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd_errors.h +27 -8
  68. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzclose.c +1 -1
  69. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzcompatibility.h +8 -8
  70. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzguts.h +10 -10
  71. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzlib.c +3 -3
  72. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzread.c +10 -10
  73. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzwrite.c +5 -5
  74. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.c +46 -44
  75. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.h +4 -1
  76. data/lib/3.1/zstdlib_c.so +0 -0
  77. data/lib/3.2/zstdlib_c.so +0 -0
  78. metadata +82 -78
  79. data/ext/zstdlib_c/zstd-1.5.2/lib/common/threading.c +0 -122
  80. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_literals.c +0 -159
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -20,12 +20,12 @@
20
20
  #include "../common/mem.h" /* low level memory routines */
21
21
  #define FSE_STATIC_LINKING_ONLY
22
22
  #include "../common/fse.h"
23
- #define HUF_STATIC_LINKING_ONLY
24
23
  #include "../common/huf.h"
25
24
  #include "../common/zstd_internal.h"
26
25
  #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27
26
  #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28
27
  #include "zstd_decompress_block.h"
28
+ #include "../common/bits.h" /* ZSTD_highbit32 */
29
29
 
30
30
  /*_*******************************************************
31
31
  * Macros
@@ -89,7 +89,7 @@ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const
89
89
  dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
90
90
  }
91
91
  else {
92
- /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
92
+ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
93
93
  dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
94
94
  dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
95
95
  }
@@ -134,13 +134,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
134
134
  ZSTD_FALLTHROUGH;
135
135
 
136
136
  case set_compressed:
137
- RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
137
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
138
138
  { size_t lhSize, litSize, litCSize;
139
139
  U32 singleStream=0;
140
140
  U32 const lhlCode = (istart[0] >> 2) & 3;
141
141
  U32 const lhc = MEM_readLE32(istart);
142
142
  size_t hufSuccess;
143
143
  size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
144
+ int const flags = 0
145
+ | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
146
+ | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
144
147
  switch(lhlCode)
145
148
  {
146
149
  case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
@@ -165,6 +168,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
165
168
  }
166
169
  RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
167
170
  RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
171
+ if (!singleStream)
172
+ RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
173
+ "Not enough literals (%zu) for the 4-streams mode (min %u)",
174
+ litSize, MIN_LITERALS_FOR_4_STREAMS);
168
175
  RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
169
176
  RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
170
177
  ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
@@ -176,13 +183,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
176
183
 
177
184
  if (litEncType==set_repeat) {
178
185
  if (singleStream) {
179
- hufSuccess = HUF_decompress1X_usingDTable_bmi2(
186
+ hufSuccess = HUF_decompress1X_usingDTable(
180
187
  dctx->litBuffer, litSize, istart+lhSize, litCSize,
181
- dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
188
+ dctx->HUFptr, flags);
182
189
  } else {
183
- hufSuccess = HUF_decompress4X_usingDTable_bmi2(
190
+ assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
191
+ hufSuccess = HUF_decompress4X_usingDTable(
184
192
  dctx->litBuffer, litSize, istart+lhSize, litCSize,
185
- dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
193
+ dctx->HUFptr, flags);
186
194
  }
187
195
  } else {
188
196
  if (singleStream) {
@@ -190,18 +198,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
190
198
  hufSuccess = HUF_decompress1X_DCtx_wksp(
191
199
  dctx->entropy.hufTable, dctx->litBuffer, litSize,
192
200
  istart+lhSize, litCSize, dctx->workspace,
193
- sizeof(dctx->workspace));
201
+ sizeof(dctx->workspace), flags);
194
202
  #else
195
- hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
203
+ hufSuccess = HUF_decompress1X1_DCtx_wksp(
196
204
  dctx->entropy.hufTable, dctx->litBuffer, litSize,
197
205
  istart+lhSize, litCSize, dctx->workspace,
198
- sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
206
+ sizeof(dctx->workspace), flags);
199
207
  #endif
200
208
  } else {
201
- hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
209
+ hufSuccess = HUF_decompress4X_hufOnly_wksp(
202
210
  dctx->entropy.hufTable, dctx->litBuffer, litSize,
203
211
  istart+lhSize, litCSize, dctx->workspace,
204
- sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
212
+ sizeof(dctx->workspace), flags);
205
213
  }
206
214
  }
207
215
  if (dctx->litBufferLocation == ZSTD_split)
@@ -237,6 +245,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
237
245
  break;
238
246
  case 3:
239
247
  lhSize = 3;
248
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
240
249
  litSize = MEM_readLE24(istart) >> 4;
241
250
  break;
242
251
  }
@@ -279,12 +288,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
279
288
  break;
280
289
  case 1:
281
290
  lhSize = 2;
291
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
282
292
  litSize = MEM_readLE16(istart) >> 4;
283
293
  break;
284
294
  case 3:
285
295
  lhSize = 3;
296
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
286
297
  litSize = MEM_readLE24(istart) >> 4;
287
- RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
288
298
  break;
289
299
  }
290
300
  RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
@@ -506,14 +516,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
506
516
  for (i = 8; i < n; i += 8) {
507
517
  MEM_write64(spread + pos + i, sv);
508
518
  }
509
- pos += n;
519
+ assert(n>=0);
520
+ pos += (size_t)n;
510
521
  }
511
522
  }
512
523
  /* Now we spread those positions across the table.
513
- * The benefit of doing it in two stages is that we avoid the the
524
+ * The benefit of doing it in two stages is that we avoid the
514
525
  * variable size inner loop, which caused lots of branch misses.
515
526
  * Now we can run through all the positions without any branch misses.
516
- * We unroll the loop twice, since that is what emperically worked best.
527
+ * We unroll the loop twice, since that is what empirically worked best.
517
528
  */
518
529
  {
519
530
  size_t position = 0;
@@ -540,7 +551,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
540
551
  for (i=0; i<n; i++) {
541
552
  tableDecode[position].baseValue = s;
542
553
  position = (position + step) & tableMask;
543
- while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
554
+ while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
544
555
  } }
545
556
  assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
546
557
  }
@@ -551,7 +562,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
551
562
  for (u=0; u<tableSize; u++) {
552
563
  U32 const symbol = tableDecode[u].baseValue;
553
564
  U32 const nextState = symbolNext[symbol]++;
554
- tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
565
+ tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
555
566
  tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
556
567
  assert(nbAdditionalBits[symbol] < 255);
557
568
  tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
@@ -964,6 +975,11 @@ size_t ZSTD_execSequence(BYTE* op,
964
975
 
965
976
  assert(op != NULL /* Precondition */);
966
977
  assert(oend_w < oend /* No underflow */);
978
+
979
+ #if defined(__aarch64__)
980
+ /* prefetch sequence starting from match that will be used for copy later */
981
+ PREFETCH_L1(match);
982
+ #endif
967
983
  /* Handle edge cases in a slow path:
968
984
  * - Read beyond end of literals
969
985
  * - Match end is within WILDCOPY_OVERLIMIT of oend
@@ -1154,7 +1170,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
1154
1170
  }
1155
1171
 
1156
1172
  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1157
- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1173
+ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
1158
1174
  * bits before reloading. This value is the maximum number of bytes we read
1159
1175
  * after reloading when we are decoding long offsets.
1160
1176
  */
@@ -1169,9 +1185,27 @@ FORCE_INLINE_TEMPLATE seq_t
1169
1185
  ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1170
1186
  {
1171
1187
  seq_t seq;
1188
+ /*
1189
+ * ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
1190
+ * loaded in one operation and extracted its fields by simply shifting or
1191
+ * bit-extracting on aarch64.
1192
+ * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
1193
+ * operations that cause performance drop. This can be avoided by using this
1194
+ * ZSTD_memcpy hack.
1195
+ */
1196
+ #if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1197
+ ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1198
+ ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1199
+ ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1200
+ ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1201
+ ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1202
+ ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1203
+ ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1204
+ #else
1172
1205
  const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1173
1206
  const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1174
1207
  const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1208
+ #endif
1175
1209
  seq.matchLength = mlDInfo->baseValue;
1176
1210
  seq.litLength = llDInfo->baseValue;
1177
1211
  { U32 const ofBase = ofDInfo->baseValue;
@@ -1186,28 +1220,31 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1186
1220
  U32 const llnbBits = llDInfo->nbBits;
1187
1221
  U32 const mlnbBits = mlDInfo->nbBits;
1188
1222
  U32 const ofnbBits = ofDInfo->nbBits;
1223
+
1224
+ assert(llBits <= MaxLLBits);
1225
+ assert(mlBits <= MaxMLBits);
1226
+ assert(ofBits <= MaxOff);
1189
1227
  /*
1190
1228
  * As gcc has better branch and block analyzers, sometimes it is only
1191
- * valuable to mark likelyness for clang, it gives around 3-4% of
1229
+ * valuable to mark likeliness for clang, it gives around 3-4% of
1192
1230
  * performance.
1193
1231
  */
1194
1232
 
1195
1233
  /* sequence */
1196
1234
  { size_t offset;
1197
- #if defined(__clang__)
1198
- if (LIKELY(ofBits > 1)) {
1199
- #else
1200
1235
  if (ofBits > 1) {
1201
- #endif
1202
1236
  ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1203
1237
  ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1204
- assert(ofBits <= MaxOff);
1238
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
1239
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
1205
1240
  if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1206
- U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
1241
+ /* Always read extra bits, this keeps the logic simple,
1242
+ * avoids branches, and avoids accidentally reading 0 bits.
1243
+ */
1244
+ U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
1207
1245
  offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1208
1246
  BIT_reloadDStream(&seqState->DStream);
1209
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1210
- assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
1247
+ offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1211
1248
  } else {
1212
1249
  offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1213
1250
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
@@ -1232,11 +1269,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1232
1269
  seq.offset = offset;
1233
1270
  }
1234
1271
 
1235
- #if defined(__clang__)
1236
- if (UNLIKELY(mlBits > 0))
1237
- #else
1238
1272
  if (mlBits > 0)
1239
- #endif
1240
1273
  seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
1241
1274
 
1242
1275
  if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
@@ -1246,11 +1279,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1246
1279
  /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1247
1280
  ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1248
1281
 
1249
- #if defined(__clang__)
1250
- if (UNLIKELY(llBits > 0))
1251
- #else
1252
1282
  if (llBits > 0)
1253
- #endif
1254
1283
  seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
1255
1284
 
1256
1285
  if (MEM_32bits())
@@ -1552,7 +1581,7 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1552
1581
  const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1553
1582
  const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1554
1583
  const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1555
- DEBUGLOG(5, "ZSTD_decompressSequences_body");
1584
+ DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
1556
1585
  (void)frame;
1557
1586
 
1558
1587
  /* Regen sequences */
@@ -1945,34 +1974,79 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1945
1974
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1946
1975
 
1947
1976
 
1977
+ /**
1978
+ * @returns The total size of the history referenceable by zstd, including
1979
+ * both the prefix and the extDict. At @p op any offset larger than this
1980
+ * is invalid.
1981
+ */
1982
+ static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
1983
+ {
1984
+ return (size_t)(op - virtualStart);
1985
+ }
1948
1986
 
1949
- #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1950
- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1951
- /* ZSTD_getLongOffsetsShare() :
1987
+ typedef struct {
1988
+ unsigned longOffsetShare;
1989
+ unsigned maxNbAdditionalBits;
1990
+ } ZSTD_OffsetInfo;
1991
+
1992
+ /* ZSTD_getOffsetInfo() :
1952
1993
  * condition : offTable must be valid
1953
1994
  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1954
- * compared to maximum possible of (1<<OffFSELog) */
1955
- static unsigned
1956
- ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1995
+ * compared to maximum possible of (1<<OffFSELog),
1996
+ * as well as the maximum number additional bits required.
1997
+ */
1998
+ static ZSTD_OffsetInfo
1999
+ ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
1957
2000
  {
1958
- const void* ptr = offTable;
1959
- U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1960
- const ZSTD_seqSymbol* table = offTable + 1;
1961
- U32 const max = 1 << tableLog;
1962
- U32 u, total = 0;
1963
- DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1964
-
1965
- assert(max <= (1 << OffFSELog)); /* max not too large */
1966
- for (u=0; u<max; u++) {
1967
- if (table[u].nbAdditionalBits > 22) total += 1;
2001
+ ZSTD_OffsetInfo info = {0, 0};
2002
+ /* If nbSeq == 0, then the offTable is uninitialized, but we have
2003
+ * no sequences, so both values should be 0.
2004
+ */
2005
+ if (nbSeq != 0) {
2006
+ const void* ptr = offTable;
2007
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
2008
+ const ZSTD_seqSymbol* table = offTable + 1;
2009
+ U32 const max = 1 << tableLog;
2010
+ U32 u;
2011
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
2012
+
2013
+ assert(max <= (1 << OffFSELog)); /* max not too large */
2014
+ for (u=0; u<max; u++) {
2015
+ info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
2016
+ if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
2017
+ }
2018
+
2019
+ assert(tableLog <= OffFSELog);
2020
+ info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1968
2021
  }
1969
2022
 
1970
- assert(tableLog <= OffFSELog);
1971
- total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
2023
+ return info;
2024
+ }
1972
2025
 
1973
- return total;
2026
+ /**
2027
+ * @returns The maximum offset we can decode in one read of our bitstream, without
2028
+ * reloading more bits in the middle of the offset bits read. Any offsets larger
2029
+ * than this must use the long offset decoder.
2030
+ */
2031
+ static size_t ZSTD_maxShortOffset(void)
2032
+ {
2033
+ if (MEM_64bits()) {
2034
+ /* We can decode any offset without reloading bits.
2035
+ * This might change if the max window size grows.
2036
+ */
2037
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2038
+ return (size_t)-1;
2039
+ } else {
2040
+ /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
2041
+ * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
2042
+ * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
2043
+ */
2044
+ size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
2045
+ size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
2046
+ assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
2047
+ return maxOffset;
2048
+ }
1974
2049
  }
1975
- #endif
1976
2050
 
1977
2051
  size_t
1978
2052
  ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
@@ -1980,20 +2054,21 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1980
2054
  const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
1981
2055
  { /* blockType == blockCompressed */
1982
2056
  const BYTE* ip = (const BYTE*)src;
1983
- /* isLongOffset must be true if there are long offsets.
1984
- * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1985
- * We don't expect that to be the case in 64-bit mode.
1986
- * In block mode, window size is not known, so we have to be conservative.
1987
- * (note: but it could be evaluated from current-lowLimit)
1988
- */
1989
- ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1990
2057
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1991
2058
 
1992
- RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
2059
+ /* Note : the wording of the specification
2060
+ * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
2061
+ * This generally does not happen, as it makes little sense,
2062
+ * since an uncompressed block would feature same size and have no decompression cost.
2063
+ * Also, note that decoder from reference libzstd before < v1.5.4
2064
+ * would consider this edge case as an error.
2065
+ * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
2066
+ * for broader compatibility with the deployed ecosystem of zstd decoders */
2067
+ RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1993
2068
 
1994
2069
  /* Decode literals section */
1995
2070
  { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
1996
- DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
2071
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
1997
2072
  if (ZSTD_isError(litCSize)) return litCSize;
1998
2073
  ip += litCSize;
1999
2074
  srcSize -= litCSize;
@@ -2001,6 +2076,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2001
2076
 
2002
2077
  /* Build Decoding Tables */
2003
2078
  {
2079
+ /* Compute the maximum block size, which must also work when !frame and fParams are unset.
2080
+ * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
2081
+ */
2082
+ size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
2083
+ size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
2084
+ /* isLongOffset must be true if there are long offsets.
2085
+ * Offsets are long if they are larger than ZSTD_maxShortOffset().
2086
+ * We don't expect that to be the case in 64-bit mode.
2087
+ *
2088
+ * We check here to see if our history is large enough to allow long offsets.
2089
+ * If it isn't, then we can't possible have (valid) long offsets. If the offset
2090
+ * is invalid, then it is okay to read it incorrectly.
2091
+ *
2092
+ * If isLongOffsets is true, then we will later check our decoding table to see
2093
+ * if it is even possible to generate long offsets.
2094
+ */
2095
+ ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
2004
2096
  /* These macros control at build-time which decompressor implementation
2005
2097
  * we use. If neither is defined, we do some inspection and dispatch at
2006
2098
  * runtime.
@@ -2008,6 +2100,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2008
2100
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2009
2101
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2010
2102
  int usePrefetchDecoder = dctx->ddictIsCold;
2103
+ #else
2104
+ /* Set to 1 to avoid computing offset info if we don't need to.
2105
+ * Otherwise this value is ignored.
2106
+ */
2107
+ int usePrefetchDecoder = 1;
2011
2108
  #endif
2012
2109
  int nbSeq;
2013
2110
  size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
@@ -2015,28 +2112,42 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2015
2112
  ip += seqHSize;
2016
2113
  srcSize -= seqHSize;
2017
2114
 
2018
- RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2115
+ RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2116
+ RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
2117
+ "invalid dst");
2019
2118
 
2020
- #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2021
- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2022
- if ( !usePrefetchDecoder
2023
- && (!frame || (dctx->fParams.windowSize > (1<<24)))
2024
- && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
2025
- U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
2026
- U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2027
- usePrefetchDecoder = (shareLongOffsets >= minShare);
2119
+ /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
2120
+ * compute information about the share of long offsets, and the maximum nbAdditionalBits.
2121
+ * NOTE: could probably use a larger nbSeq limit
2122
+ */
2123
+ if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
2124
+ ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
2125
+ if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
2126
+ /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
2127
+ * enough, then we know it is impossible to have too long an offset in this block, so we can
2128
+ * use the regular offset decoder.
2129
+ */
2130
+ isLongOffset = ZSTD_lo_isRegularOffset;
2131
+ }
2132
+ if (!usePrefetchDecoder) {
2133
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2134
+ usePrefetchDecoder = (info.longOffsetShare >= minShare);
2135
+ }
2028
2136
  }
2029
- #endif
2030
2137
 
2031
2138
  dctx->ddictIsCold = 0;
2032
2139
 
2033
2140
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2034
2141
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2035
- if (usePrefetchDecoder)
2142
+ if (usePrefetchDecoder) {
2143
+ #else
2144
+ (void)usePrefetchDecoder;
2145
+ {
2036
2146
  #endif
2037
2147
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2038
2148
  return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2039
2149
  #endif
2150
+ }
2040
2151
 
2041
2152
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2042
2153
  /* else */
@@ -2060,9 +2171,9 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2060
2171
  }
2061
2172
 
2062
2173
 
2063
- size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2064
- void* dst, size_t dstCapacity,
2065
- const void* src, size_t srcSize)
2174
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
2175
+ void* dst, size_t dstCapacity,
2176
+ const void* src, size_t srcSize)
2066
2177
  {
2067
2178
  size_t dSize;
2068
2179
  ZSTD_checkContinuity(dctx, dst, dstCapacity);
@@ -2070,3 +2181,12 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2070
2181
  dctx->previousDstEnd = (char*)dst + dSize;
2071
2182
  return dSize;
2072
2183
  }
2184
+
2185
+
2186
+ /* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
2187
+ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2188
+ void* dst, size_t dstCapacity,
2189
+ const void* src, size_t srcSize)
2190
+ {
2191
+ return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
2192
+ }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -64,5 +64,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
64
64
  unsigned tableLog, void* wksp, size_t wkspSize,
65
65
  int bmi2);
66
66
 
67
+ /* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
68
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
69
+ void* dst, size_t dstCapacity,
70
+ const void* src, size_t srcSize);
71
+
67
72
 
68
73
  #endif /* ZSTD_DEC_BLOCK_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -75,12 +75,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
75
75
 
76
76
  #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77
77
  #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78
+ #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
78
79
 
79
80
  typedef struct {
80
81
  ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
81
82
  ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
82
83
  ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
83
- HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
84
+ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
84
85
  U32 rep[ZSTD_REP_NUM];
85
86
  U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
86
87
  } ZSTD_entropyDTables_t;
@@ -164,6 +165,7 @@ struct ZSTD_DCtx_s
164
165
  ZSTD_dictUses_e dictUses;
165
166
  ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
166
167
  ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
168
+ int disableHufAsm;
167
169
 
168
170
  /* streaming */
169
171
  ZSTD_dStreamStage streamStage;