zstd-ruby 1.5.2.3 → 1.5.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +13 -5
  3. data/ext/zstdruby/extconf.rb +1 -1
  4. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  5. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  6. data/ext/zstdruby/libzstd/common/bitstream.h +19 -60
  7. data/ext/zstdruby/libzstd/common/compiler.h +26 -3
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  9. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  10. data/ext/zstdruby/libzstd/common/debug.h +1 -1
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +12 -40
  12. data/ext/zstdruby/libzstd/common/error_private.c +9 -2
  13. data/ext/zstdruby/libzstd/common/error_private.h +1 -1
  14. data/ext/zstdruby/libzstd/common/fse.h +5 -83
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +7 -99
  16. data/ext/zstdruby/libzstd/common/huf.h +65 -156
  17. data/ext/zstdruby/libzstd/common/mem.h +39 -46
  18. data/ext/zstdruby/libzstd/common/pool.c +26 -10
  19. data/ext/zstdruby/libzstd/common/pool.h +7 -1
  20. data/ext/zstdruby/libzstd/common/portability_macros.h +22 -3
  21. data/ext/zstdruby/libzstd/common/threading.c +68 -14
  22. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  23. data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
  24. data/ext/zstdruby/libzstd/common/xxhash.h +8 -8
  25. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -36
  26. data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +17 -118
  28. data/ext/zstdruby/libzstd/common/zstd_trace.h +3 -3
  29. data/ext/zstdruby/libzstd/compress/clevels.h +1 -1
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +7 -124
  31. data/ext/zstdruby/libzstd/compress/hist.c +1 -1
  32. data/ext/zstdruby/libzstd/compress/hist.h +1 -1
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +234 -169
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1243 -538
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +225 -151
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +115 -39
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -8
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +128 -62
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +95 -33
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +433 -148
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +398 -345
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +4 -2
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +5 -5
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +106 -80
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +17 -9
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -1
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +434 -441
  57. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +30 -39
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +4 -4
  59. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +205 -80
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +201 -81
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -1
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +4 -2
  64. data/ext/zstdruby/libzstd/dictBuilder/cover.c +19 -15
  65. data/ext/zstdruby/libzstd/dictBuilder/cover.h +1 -1
  66. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
  67. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +11 -89
  68. data/ext/zstdruby/libzstd/zdict.h +53 -31
  69. data/ext/zstdruby/libzstd/zstd.h +580 -135
  70. data/ext/zstdruby/libzstd/zstd_errors.h +27 -8
  71. data/ext/zstdruby/main.c +6 -0
  72. data/ext/zstdruby/skippable_frame.c +63 -0
  73. data/lib/zstd-ruby/version.rb +1 -1
  74. metadata +9 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -20,12 +20,12 @@
20
20
  #include "../common/mem.h" /* low level memory routines */
21
21
  #define FSE_STATIC_LINKING_ONLY
22
22
  #include "../common/fse.h"
23
- #define HUF_STATIC_LINKING_ONLY
24
23
  #include "../common/huf.h"
25
24
  #include "../common/zstd_internal.h"
26
25
  #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27
26
  #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28
27
  #include "zstd_decompress_block.h"
28
+ #include "../common/bits.h" /* ZSTD_highbit32 */
29
29
 
30
30
  /*_*******************************************************
31
31
  * Macros
@@ -89,7 +89,7 @@ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const
89
89
  dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
90
90
  }
91
91
  else {
92
- /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
92
+ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
93
93
  dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
94
94
  dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
95
95
  }
@@ -134,13 +134,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
134
134
  ZSTD_FALLTHROUGH;
135
135
 
136
136
  case set_compressed:
137
- RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
137
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
138
138
  { size_t lhSize, litSize, litCSize;
139
139
  U32 singleStream=0;
140
140
  U32 const lhlCode = (istart[0] >> 2) & 3;
141
141
  U32 const lhc = MEM_readLE32(istart);
142
142
  size_t hufSuccess;
143
143
  size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
144
+ int const flags = 0
145
+ | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
146
+ | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
144
147
  switch(lhlCode)
145
148
  {
146
149
  case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
@@ -165,6 +168,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
165
168
  }
166
169
  RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
167
170
  RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
171
+ if (!singleStream)
172
+ RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
173
+ "Not enough literals (%zu) for the 4-streams mode (min %u)",
174
+ litSize, MIN_LITERALS_FOR_4_STREAMS);
168
175
  RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
169
176
  RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
170
177
  ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
@@ -176,13 +183,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
176
183
 
177
184
  if (litEncType==set_repeat) {
178
185
  if (singleStream) {
179
- hufSuccess = HUF_decompress1X_usingDTable_bmi2(
186
+ hufSuccess = HUF_decompress1X_usingDTable(
180
187
  dctx->litBuffer, litSize, istart+lhSize, litCSize,
181
- dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
188
+ dctx->HUFptr, flags);
182
189
  } else {
183
- hufSuccess = HUF_decompress4X_usingDTable_bmi2(
190
+ assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
191
+ hufSuccess = HUF_decompress4X_usingDTable(
184
192
  dctx->litBuffer, litSize, istart+lhSize, litCSize,
185
- dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
193
+ dctx->HUFptr, flags);
186
194
  }
187
195
  } else {
188
196
  if (singleStream) {
@@ -190,18 +198,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
190
198
  hufSuccess = HUF_decompress1X_DCtx_wksp(
191
199
  dctx->entropy.hufTable, dctx->litBuffer, litSize,
192
200
  istart+lhSize, litCSize, dctx->workspace,
193
- sizeof(dctx->workspace));
201
+ sizeof(dctx->workspace), flags);
194
202
  #else
195
- hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
203
+ hufSuccess = HUF_decompress1X1_DCtx_wksp(
196
204
  dctx->entropy.hufTable, dctx->litBuffer, litSize,
197
205
  istart+lhSize, litCSize, dctx->workspace,
198
- sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
206
+ sizeof(dctx->workspace), flags);
199
207
  #endif
200
208
  } else {
201
- hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
209
+ hufSuccess = HUF_decompress4X_hufOnly_wksp(
202
210
  dctx->entropy.hufTable, dctx->litBuffer, litSize,
203
211
  istart+lhSize, litCSize, dctx->workspace,
204
- sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
212
+ sizeof(dctx->workspace), flags);
205
213
  }
206
214
  }
207
215
  if (dctx->litBufferLocation == ZSTD_split)
@@ -237,6 +245,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
237
245
  break;
238
246
  case 3:
239
247
  lhSize = 3;
248
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
240
249
  litSize = MEM_readLE24(istart) >> 4;
241
250
  break;
242
251
  }
@@ -279,12 +288,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
279
288
  break;
280
289
  case 1:
281
290
  lhSize = 2;
291
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
282
292
  litSize = MEM_readLE16(istart) >> 4;
283
293
  break;
284
294
  case 3:
285
295
  lhSize = 3;
296
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
286
297
  litSize = MEM_readLE24(istart) >> 4;
287
- RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
288
298
  break;
289
299
  }
290
300
  RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
@@ -506,14 +516,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
506
516
  for (i = 8; i < n; i += 8) {
507
517
  MEM_write64(spread + pos + i, sv);
508
518
  }
509
- pos += n;
519
+ assert(n>=0);
520
+ pos += (size_t)n;
510
521
  }
511
522
  }
512
523
  /* Now we spread those positions across the table.
513
- * The benefit of doing it in two stages is that we avoid the the
524
+ * The benefit of doing it in two stages is that we avoid the
514
525
  * variable size inner loop, which caused lots of branch misses.
515
526
  * Now we can run through all the positions without any branch misses.
516
- * We unroll the loop twice, since that is what emperically worked best.
527
+ * We unroll the loop twice, since that is what empirically worked best.
517
528
  */
518
529
  {
519
530
  size_t position = 0;
@@ -540,7 +551,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
540
551
  for (i=0; i<n; i++) {
541
552
  tableDecode[position].baseValue = s;
542
553
  position = (position + step) & tableMask;
543
- while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
554
+ while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
544
555
  } }
545
556
  assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
546
557
  }
@@ -551,7 +562,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
551
562
  for (u=0; u<tableSize; u++) {
552
563
  U32 const symbol = tableDecode[u].baseValue;
553
564
  U32 const nextState = symbolNext[symbol]++;
554
- tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
565
+ tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
555
566
  tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
556
567
  assert(nbAdditionalBits[symbol] < 255);
557
568
  tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
@@ -964,6 +975,11 @@ size_t ZSTD_execSequence(BYTE* op,
964
975
 
965
976
  assert(op != NULL /* Precondition */);
966
977
  assert(oend_w < oend /* No underflow */);
978
+
979
+ #if defined(__aarch64__)
980
+ /* prefetch sequence starting from match that will be used for copy later */
981
+ PREFETCH_L1(match);
982
+ #endif
967
983
  /* Handle edge cases in a slow path:
968
984
  * - Read beyond end of literals
969
985
  * - Match end is within WILDCOPY_OVERLIMIT of oend
@@ -1154,7 +1170,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
1154
1170
  }
1155
1171
 
1156
1172
  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1157
- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1173
+ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
1158
1174
  * bits before reloading. This value is the maximum number of bytes we read
1159
1175
  * after reloading when we are decoding long offsets.
1160
1176
  */
@@ -1169,9 +1185,27 @@ FORCE_INLINE_TEMPLATE seq_t
1169
1185
  ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1170
1186
  {
1171
1187
  seq_t seq;
1188
+ /*
1189
+ * ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
1190
+ * loaded in one operation and extracted its fields by simply shifting or
1191
+ * bit-extracting on aarch64.
1192
+ * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
1193
+ * operations that cause performance drop. This can be avoided by using this
1194
+ * ZSTD_memcpy hack.
1195
+ */
1196
+ #if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1197
+ ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1198
+ ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1199
+ ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1200
+ ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1201
+ ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1202
+ ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1203
+ ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1204
+ #else
1172
1205
  const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1173
1206
  const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1174
1207
  const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1208
+ #endif
1175
1209
  seq.matchLength = mlDInfo->baseValue;
1176
1210
  seq.litLength = llDInfo->baseValue;
1177
1211
  { U32 const ofBase = ofDInfo->baseValue;
@@ -1186,28 +1220,31 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1186
1220
  U32 const llnbBits = llDInfo->nbBits;
1187
1221
  U32 const mlnbBits = mlDInfo->nbBits;
1188
1222
  U32 const ofnbBits = ofDInfo->nbBits;
1223
+
1224
+ assert(llBits <= MaxLLBits);
1225
+ assert(mlBits <= MaxMLBits);
1226
+ assert(ofBits <= MaxOff);
1189
1227
  /*
1190
1228
  * As gcc has better branch and block analyzers, sometimes it is only
1191
- * valuable to mark likelyness for clang, it gives around 3-4% of
1229
+ * valuable to mark likeliness for clang, it gives around 3-4% of
1192
1230
  * performance.
1193
1231
  */
1194
1232
 
1195
1233
  /* sequence */
1196
1234
  { size_t offset;
1197
- #if defined(__clang__)
1198
- if (LIKELY(ofBits > 1)) {
1199
- #else
1200
1235
  if (ofBits > 1) {
1201
- #endif
1202
1236
  ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1203
1237
  ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1204
- assert(ofBits <= MaxOff);
1238
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
1239
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
1205
1240
  if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1206
- U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
1241
+ /* Always read extra bits, this keeps the logic simple,
1242
+ * avoids branches, and avoids accidentally reading 0 bits.
1243
+ */
1244
+ U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
1207
1245
  offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1208
1246
  BIT_reloadDStream(&seqState->DStream);
1209
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1210
- assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
1247
+ offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1211
1248
  } else {
1212
1249
  offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1213
1250
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
@@ -1232,11 +1269,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1232
1269
  seq.offset = offset;
1233
1270
  }
1234
1271
 
1235
- #if defined(__clang__)
1236
- if (UNLIKELY(mlBits > 0))
1237
- #else
1238
1272
  if (mlBits > 0)
1239
- #endif
1240
1273
  seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
1241
1274
 
1242
1275
  if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
@@ -1246,11 +1279,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1246
1279
  /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1247
1280
  ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1248
1281
 
1249
- #if defined(__clang__)
1250
- if (UNLIKELY(llBits > 0))
1251
- #else
1252
1282
  if (llBits > 0)
1253
- #endif
1254
1283
  seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
1255
1284
 
1256
1285
  if (MEM_32bits())
@@ -1552,7 +1581,7 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1552
1581
  const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1553
1582
  const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1554
1583
  const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1555
- DEBUGLOG(5, "ZSTD_decompressSequences_body");
1584
+ DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
1556
1585
  (void)frame;
1557
1586
 
1558
1587
  /* Regen sequences */
@@ -1945,34 +1974,79 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1945
1974
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1946
1975
 
1947
1976
 
1977
+ /**
1978
+ * @returns The total size of the history referenceable by zstd, including
1979
+ * both the prefix and the extDict. At @p op any offset larger than this
1980
+ * is invalid.
1981
+ */
1982
+ static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
1983
+ {
1984
+ return (size_t)(op - virtualStart);
1985
+ }
1948
1986
 
1949
- #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1950
- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1951
- /* ZSTD_getLongOffsetsShare() :
1987
+ typedef struct {
1988
+ unsigned longOffsetShare;
1989
+ unsigned maxNbAdditionalBits;
1990
+ } ZSTD_OffsetInfo;
1991
+
1992
+ /* ZSTD_getOffsetInfo() :
1952
1993
  * condition : offTable must be valid
1953
1994
  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1954
- * compared to maximum possible of (1<<OffFSELog) */
1955
- static unsigned
1956
- ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1995
+ * compared to maximum possible of (1<<OffFSELog),
1996
+ * as well as the maximum number additional bits required.
1997
+ */
1998
+ static ZSTD_OffsetInfo
1999
+ ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
1957
2000
  {
1958
- const void* ptr = offTable;
1959
- U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1960
- const ZSTD_seqSymbol* table = offTable + 1;
1961
- U32 const max = 1 << tableLog;
1962
- U32 u, total = 0;
1963
- DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1964
-
1965
- assert(max <= (1 << OffFSELog)); /* max not too large */
1966
- for (u=0; u<max; u++) {
1967
- if (table[u].nbAdditionalBits > 22) total += 1;
2001
+ ZSTD_OffsetInfo info = {0, 0};
2002
+ /* If nbSeq == 0, then the offTable is uninitialized, but we have
2003
+ * no sequences, so both values should be 0.
2004
+ */
2005
+ if (nbSeq != 0) {
2006
+ const void* ptr = offTable;
2007
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
2008
+ const ZSTD_seqSymbol* table = offTable + 1;
2009
+ U32 const max = 1 << tableLog;
2010
+ U32 u;
2011
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
2012
+
2013
+ assert(max <= (1 << OffFSELog)); /* max not too large */
2014
+ for (u=0; u<max; u++) {
2015
+ info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
2016
+ if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
2017
+ }
2018
+
2019
+ assert(tableLog <= OffFSELog);
2020
+ info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1968
2021
  }
1969
2022
 
1970
- assert(tableLog <= OffFSELog);
1971
- total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
2023
+ return info;
2024
+ }
1972
2025
 
1973
- return total;
2026
+ /**
2027
+ * @returns The maximum offset we can decode in one read of our bitstream, without
2028
+ * reloading more bits in the middle of the offset bits read. Any offsets larger
2029
+ * than this must use the long offset decoder.
2030
+ */
2031
+ static size_t ZSTD_maxShortOffset(void)
2032
+ {
2033
+ if (MEM_64bits()) {
2034
+ /* We can decode any offset without reloading bits.
2035
+ * This might change if the max window size grows.
2036
+ */
2037
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2038
+ return (size_t)-1;
2039
+ } else {
2040
+ /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
2041
+ * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
2042
+ * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
2043
+ */
2044
+ size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
2045
+ size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
2046
+ assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
2047
+ return maxOffset;
2048
+ }
1974
2049
  }
1975
- #endif
1976
2050
 
1977
2051
  size_t
1978
2052
  ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
@@ -1980,20 +2054,21 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1980
2054
  const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
1981
2055
  { /* blockType == blockCompressed */
1982
2056
  const BYTE* ip = (const BYTE*)src;
1983
- /* isLongOffset must be true if there are long offsets.
1984
- * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1985
- * We don't expect that to be the case in 64-bit mode.
1986
- * In block mode, window size is not known, so we have to be conservative.
1987
- * (note: but it could be evaluated from current-lowLimit)
1988
- */
1989
- ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1990
2057
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1991
2058
 
1992
- RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
2059
+ /* Note : the wording of the specification
2060
+ * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
2061
+ * This generally does not happen, as it makes little sense,
2062
+ * since an uncompressed block would feature same size and have no decompression cost.
2063
+ * Also, note that decoder from reference libzstd before < v1.5.4
2064
+ * would consider this edge case as an error.
2065
+ * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
2066
+ * for broader compatibility with the deployed ecosystem of zstd decoders */
2067
+ RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1993
2068
 
1994
2069
  /* Decode literals section */
1995
2070
  { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
1996
- DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
2071
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
1997
2072
  if (ZSTD_isError(litCSize)) return litCSize;
1998
2073
  ip += litCSize;
1999
2074
  srcSize -= litCSize;
@@ -2001,6 +2076,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2001
2076
 
2002
2077
  /* Build Decoding Tables */
2003
2078
  {
2079
+ /* Compute the maximum block size, which must also work when !frame and fParams are unset.
2080
+ * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
2081
+ */
2082
+ size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
2083
+ size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
2084
+ /* isLongOffset must be true if there are long offsets.
2085
+ * Offsets are long if they are larger than ZSTD_maxShortOffset().
2086
+ * We don't expect that to be the case in 64-bit mode.
2087
+ *
2088
+ * We check here to see if our history is large enough to allow long offsets.
2089
+ * If it isn't, then we can't possible have (valid) long offsets. If the offset
2090
+ * is invalid, then it is okay to read it incorrectly.
2091
+ *
2092
+ * If isLongOffsets is true, then we will later check our decoding table to see
2093
+ * if it is even possible to generate long offsets.
2094
+ */
2095
+ ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
2004
2096
  /* These macros control at build-time which decompressor implementation
2005
2097
  * we use. If neither is defined, we do some inspection and dispatch at
2006
2098
  * runtime.
@@ -2008,6 +2100,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2008
2100
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2009
2101
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2010
2102
  int usePrefetchDecoder = dctx->ddictIsCold;
2103
+ #else
2104
+ /* Set to 1 to avoid computing offset info if we don't need to.
2105
+ * Otherwise this value is ignored.
2106
+ */
2107
+ int usePrefetchDecoder = 1;
2011
2108
  #endif
2012
2109
  int nbSeq;
2013
2110
  size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
@@ -2015,28 +2112,42 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2015
2112
  ip += seqHSize;
2016
2113
  srcSize -= seqHSize;
2017
2114
 
2018
- RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2115
+ RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2116
+ RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
2117
+ "invalid dst");
2019
2118
 
2020
- #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2021
- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2022
- if ( !usePrefetchDecoder
2023
- && (!frame || (dctx->fParams.windowSize > (1<<24)))
2024
- && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
2025
- U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
2026
- U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2027
- usePrefetchDecoder = (shareLongOffsets >= minShare);
2119
+ /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
2120
+ * compute information about the share of long offsets, and the maximum nbAdditionalBits.
2121
+ * NOTE: could probably use a larger nbSeq limit
2122
+ */
2123
+ if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
2124
+ ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
2125
+ if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
2126
+ /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
2127
+ * enough, then we know it is impossible to have too long an offset in this block, so we can
2128
+ * use the regular offset decoder.
2129
+ */
2130
+ isLongOffset = ZSTD_lo_isRegularOffset;
2131
+ }
2132
+ if (!usePrefetchDecoder) {
2133
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2134
+ usePrefetchDecoder = (info.longOffsetShare >= minShare);
2135
+ }
2028
2136
  }
2029
- #endif
2030
2137
 
2031
2138
  dctx->ddictIsCold = 0;
2032
2139
 
2033
2140
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2034
2141
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2035
- if (usePrefetchDecoder)
2142
+ if (usePrefetchDecoder) {
2143
+ #else
2144
+ (void)usePrefetchDecoder;
2145
+ {
2036
2146
  #endif
2037
2147
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2038
2148
  return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2039
2149
  #endif
2150
+ }
2040
2151
 
2041
2152
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2042
2153
  /* else */
@@ -2060,9 +2171,9 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2060
2171
  }
2061
2172
 
2062
2173
 
2063
- size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2064
- void* dst, size_t dstCapacity,
2065
- const void* src, size_t srcSize)
2174
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
2175
+ void* dst, size_t dstCapacity,
2176
+ const void* src, size_t srcSize)
2066
2177
  {
2067
2178
  size_t dSize;
2068
2179
  ZSTD_checkContinuity(dctx, dst, dstCapacity);
@@ -2070,3 +2181,12 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2070
2181
  dctx->previousDstEnd = (char*)dst + dSize;
2071
2182
  return dSize;
2072
2183
  }
2184
+
2185
+
2186
+ /* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
2187
+ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2188
+ void* dst, size_t dstCapacity,
2189
+ const void* src, size_t srcSize)
2190
+ {
2191
+ return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
2192
+ }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -64,5 +64,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
64
64
  unsigned tableLog, void* wksp, size_t wkspSize,
65
65
  int bmi2);
66
66
 
67
+ /* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
68
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
69
+ void* dst, size_t dstCapacity,
70
+ const void* src, size_t srcSize);
71
+
67
72
 
68
73
  #endif /* ZSTD_DEC_BLOCK_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -75,12 +75,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
75
75
 
76
76
  #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77
77
  #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78
+ #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
78
79
 
79
80
  typedef struct {
80
81
  ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
81
82
  ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
82
83
  ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
83
- HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
84
+ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
84
85
  U32 rep[ZSTD_REP_NUM];
85
86
  U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
86
87
  } ZSTD_entropyDTables_t;
@@ -164,6 +165,7 @@ struct ZSTD_DCtx_s
164
165
  ZSTD_dictUses_e dictUses;
165
166
  ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
166
167
  ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
168
+ int disableHufAsm;
167
169
 
168
170
  /* streaming */
169
171
  ZSTD_dStreamStage streamStage;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -34,6 +34,7 @@
34
34
  #include "../common/pool.h"
35
35
  #include "../common/threading.h"
36
36
  #include "../common/zstd_internal.h" /* includes zstd.h */
37
+ #include "../common/bits.h" /* ZSTD_highbit32 */
37
38
  #include "../zdict.h"
38
39
  #include "cover.h"
39
40
 
@@ -541,7 +542,7 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
541
542
 
542
543
  /**
543
544
  * Prepare a context for dictionary building.
544
- * The context is only dependent on the parameter `d` and can used multiple
545
+ * The context is only dependent on the parameter `d` and can be used multiple
545
546
  * times.
546
547
  * Returns 0 on success or error code on error.
547
548
  * The context must be destroyed with `COVER_ctx_destroy()`.
@@ -646,7 +647,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
646
647
 
647
648
  void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
648
649
  {
649
- const double ratio = (double)nbDmers / maxDictSize;
650
+ const double ratio = (double)nbDmers / (double)maxDictSize;
650
651
  if (ratio >= 10) {
651
652
  return;
652
653
  }
@@ -950,9 +951,17 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
950
951
  }
951
952
  }
952
953
 
954
+ static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz)
955
+ {
956
+ COVER_dictSelection_t ds;
957
+ ds.dictContent = buf;
958
+ ds.dictSize = s;
959
+ ds.totalCompressedSize = csz;
960
+ return ds;
961
+ }
962
+
953
963
  COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
954
- COVER_dictSelection_t selection = { NULL, 0, error };
955
- return selection;
964
+ return setDictSelection(NULL, 0, error);
956
965
  }
957
966
 
958
967
  unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
@@ -1005,9 +1014,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
1005
1014
  }
1006
1015
 
1007
1016
  if (params.shrinkDict == 0) {
1008
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1009
1017
  free(candidateDictBuffer);
1010
- return selection;
1018
+ return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize);
1011
1019
  }
1012
1020
 
1013
1021
  largestDict = dictContentSize;
@@ -1039,20 +1047,16 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe
1039
1047
  return COVER_dictSelectionError(totalCompressedSize);
1040
1048
  }
1041
1049
 
1042
- if (totalCompressedSize <= largestCompressed * regressionTolerance) {
1043
- COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
1050
+ if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
1044
1051
  free(largestDictbuffer);
1045
- return selection;
1052
+ return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize );
1046
1053
  }
1047
1054
  dictContentSize *= 2;
1048
1055
  }
1049
1056
  dictContentSize = largestDict;
1050
1057
  totalCompressedSize = largestCompressed;
1051
- {
1052
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1053
- free(candidateDictBuffer);
1054
- return selection;
1055
- }
1058
+ free(candidateDictBuffer);
1059
+ return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize );
1056
1060
  }
1057
1061
 
1058
1062
  /**