zstd-ruby 1.4.0.0 → 1.4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,15 +14,15 @@
14
14
  /*-*******************************************************
15
15
  * Dependencies
16
16
  *********************************************************/
17
- #include <string.h> /* memcpy, memmove, memset */
18
- #include "compiler.h" /* prefetch */
19
- #include "cpu.h" /* bmi2 */
20
- #include "mem.h" /* low level memory routines */
17
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18
+ #include "../common/compiler.h" /* prefetch */
19
+ #include "../common/cpu.h" /* bmi2 */
20
+ #include "../common/mem.h" /* low level memory routines */
21
21
  #define FSE_STATIC_LINKING_ONLY
22
- #include "fse.h"
22
+ #include "../common/fse.h"
23
23
  #define HUF_STATIC_LINKING_ONLY
24
- #include "huf.h"
25
- #include "zstd_internal.h"
24
+ #include "../common/huf.h"
25
+ #include "../common/zstd_internal.h"
26
26
  #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27
27
  #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28
28
  #include "zstd_decompress_block.h"
@@ -44,7 +44,7 @@
44
44
  /*_*******************************************************
45
45
  * Memory operations
46
46
  **********************************************************/
47
- static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
47
+ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
48
48
 
49
49
 
50
50
  /*-*************************************************************
@@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
56
56
  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57
57
  blockProperties_t* bpPtr)
58
58
  {
59
- RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
59
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
60
60
 
61
61
  { U32 const cBlockHeader = MEM_readLE24(src);
62
62
  U32 const cSize = cBlockHeader >> 3;
@@ -64,7 +64,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
64
64
  bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
65
65
  bpPtr->origSize = cSize; /* only useful for RLE */
66
66
  if (bpPtr->blockType == bt_rle) return 1;
67
- RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
67
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68
68
  return cSize;
69
69
  }
70
70
  }
@@ -79,7 +79,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79
79
  size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
80
80
  const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
81
81
  {
82
- RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
82
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
83
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
83
84
 
84
85
  { const BYTE* const istart = (const BYTE*) src;
85
86
  symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
@@ -87,7 +88,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
87
88
  switch(litEncType)
88
89
  {
89
90
  case set_repeat:
90
- RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
91
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
92
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
91
93
  /* fall-through */
92
94
 
93
95
  case set_compressed:
@@ -116,11 +118,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
116
118
  /* 2 - 2 - 18 - 18 */
117
119
  lhSize = 5;
118
120
  litSize = (lhc >> 4) & 0x3FFFF;
119
- litCSize = (lhc >> 22) + (istart[4] << 10);
121
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
120
122
  break;
121
123
  }
122
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
123
- RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
124
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
125
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
124
126
 
125
127
  /* prefetch huffman table if cold */
126
128
  if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -158,13 +160,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
158
160
  }
159
161
  }
160
162
 
161
- RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
163
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
162
164
 
163
165
  dctx->litPtr = dctx->litBuffer;
164
166
  dctx->litSize = litSize;
165
167
  dctx->litEntropy = 1;
166
168
  if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
167
- memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
169
+ ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
168
170
  return litCSize + lhSize;
169
171
  }
170
172
 
@@ -188,11 +190,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
188
190
  }
189
191
 
190
192
  if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
191
- RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
192
- memcpy(dctx->litBuffer, istart+lhSize, litSize);
193
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
194
+ ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
193
195
  dctx->litPtr = dctx->litBuffer;
194
196
  dctx->litSize = litSize;
195
- memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
197
+ ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
196
198
  return lhSize+litSize;
197
199
  }
198
200
  /* direct reference into compressed stream */
@@ -220,8 +222,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
220
222
  RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
221
223
  break;
222
224
  }
223
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
224
- memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
225
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
226
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
225
227
  dctx->litPtr = dctx->litBuffer;
226
228
  dctx->litSize = litSize;
227
229
  return lhSize+1;
@@ -234,7 +236,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
234
236
 
235
237
  /* Default FSE distribution tables.
236
238
  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
237
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
239
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
238
240
  * They were generated programmatically with following method :
239
241
  * - start from default distributions, present in /lib/common/zstd_internal.h
240
242
  * - generate tables normally, using ZSTD_buildFSETable()
@@ -362,23 +364,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
362
364
  * generate FSE decoding table for one symbol (ll, ml or off)
363
365
  * cannot fail if input is valid =>
364
366
  * all inputs are presumed validated at this stage */
365
- void
366
- ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
367
+ FORCE_INLINE_TEMPLATE
368
+ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
367
369
  const short* normalizedCounter, unsigned maxSymbolValue,
368
370
  const U32* baseValue, const U32* nbAdditionalBits,
369
- unsigned tableLog)
371
+ unsigned tableLog, void* wksp, size_t wkspSize)
370
372
  {
371
373
  ZSTD_seqSymbol* const tableDecode = dt+1;
372
- U16 symbolNext[MaxSeq+1];
373
-
374
374
  U32 const maxSV1 = maxSymbolValue + 1;
375
375
  U32 const tableSize = 1 << tableLog;
376
- U32 highThreshold = tableSize-1;
376
+
377
+ U16* symbolNext = (U16*)wksp;
378
+ BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
379
+ U32 highThreshold = tableSize - 1;
380
+
377
381
 
378
382
  /* Sanity Checks */
379
383
  assert(maxSymbolValue <= MaxSeq);
380
384
  assert(tableLog <= MaxFSELog);
381
-
385
+ assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
386
+ (void)wkspSize;
382
387
  /* Init, lay down lowprob symbols */
383
388
  { ZSTD_seqSymbol_header DTableH;
384
389
  DTableH.tableLog = tableLog;
@@ -391,18 +396,72 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
391
396
  symbolNext[s] = 1;
392
397
  } else {
393
398
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
394
- symbolNext[s] = normalizedCounter[s];
399
+ assert(normalizedCounter[s]>=0);
400
+ symbolNext[s] = (U16)normalizedCounter[s];
395
401
  } } }
396
- memcpy(dt, &DTableH, sizeof(DTableH));
402
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
397
403
  }
398
404
 
399
405
  /* Spread symbols */
400
- { U32 const tableMask = tableSize-1;
406
+ assert(tableSize <= 512);
407
+ /* Specialized symbol spreading for the case when there are
408
+ * no low probability (-1 count) symbols. When compressing
409
+ * small blocks we avoid low probability symbols to hit this
410
+ * case, since header decoding speed matters more.
411
+ */
412
+ if (highThreshold == tableSize - 1) {
413
+ size_t const tableMask = tableSize-1;
414
+ size_t const step = FSE_TABLESTEP(tableSize);
415
+ /* First lay down the symbols in order.
416
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
417
+ * misses since small blocks generally have small table logs, so nearly
418
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
419
+ * our buffer to handle the over-write.
420
+ */
421
+ {
422
+ U64 const add = 0x0101010101010101ull;
423
+ size_t pos = 0;
424
+ U64 sv = 0;
425
+ U32 s;
426
+ for (s=0; s<maxSV1; ++s, sv += add) {
427
+ int i;
428
+ int const n = normalizedCounter[s];
429
+ MEM_write64(spread + pos, sv);
430
+ for (i = 8; i < n; i += 8) {
431
+ MEM_write64(spread + pos + i, sv);
432
+ }
433
+ pos += n;
434
+ }
435
+ }
436
+ /* Now we spread those positions across the table.
437
+ * The benefit of doing it in two stages is that we avoid the the
438
+ * variable size inner loop, which caused lots of branch misses.
439
+ * Now we can run through all the positions without any branch misses.
440
+ * We unroll the loop twice, since that is what emperically worked best.
441
+ */
442
+ {
443
+ size_t position = 0;
444
+ size_t s;
445
+ size_t const unroll = 2;
446
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
447
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
448
+ size_t u;
449
+ for (u = 0; u < unroll; ++u) {
450
+ size_t const uPosition = (position + (u * step)) & tableMask;
451
+ tableDecode[uPosition].baseValue = spread[s + u];
452
+ }
453
+ position = (position + (unroll * step)) & tableMask;
454
+ }
455
+ assert(position == 0);
456
+ }
457
+ } else {
458
+ U32 const tableMask = tableSize-1;
401
459
  U32 const step = FSE_TABLESTEP(tableSize);
402
460
  U32 s, position = 0;
403
461
  for (s=0; s<maxSV1; s++) {
404
462
  int i;
405
- for (i=0; i<normalizedCounter[s]; i++) {
463
+ int const n = normalizedCounter[s];
464
+ for (i=0; i<n; i++) {
406
465
  tableDecode[position].baseValue = s;
407
466
  position = (position + step) & tableMask;
408
467
  while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
@@ -411,7 +470,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
411
470
  }
412
471
 
413
472
  /* Build Decoding table */
414
- { U32 u;
473
+ {
474
+ U32 u;
415
475
  for (u=0; u<tableSize; u++) {
416
476
  U32 const symbol = tableDecode[u].baseValue;
417
477
  U32 const nextState = symbolNext[symbol]++;
@@ -420,7 +480,46 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
420
480
  assert(nbAdditionalBits[symbol] < 255);
421
481
  tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
422
482
  tableDecode[u].baseValue = baseValue[symbol];
423
- } }
483
+ }
484
+ }
485
+ }
486
+
487
+ /* Avoids the FORCE_INLINE of the _body() function. */
488
+ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
489
+ const short* normalizedCounter, unsigned maxSymbolValue,
490
+ const U32* baseValue, const U32* nbAdditionalBits,
491
+ unsigned tableLog, void* wksp, size_t wkspSize)
492
+ {
493
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
494
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
495
+ }
496
+
497
+ #if DYNAMIC_BMI2
498
+ TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
499
+ const short* normalizedCounter, unsigned maxSymbolValue,
500
+ const U32* baseValue, const U32* nbAdditionalBits,
501
+ unsigned tableLog, void* wksp, size_t wkspSize)
502
+ {
503
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
504
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
505
+ }
506
+ #endif
507
+
508
+ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
509
+ const short* normalizedCounter, unsigned maxSymbolValue,
510
+ const U32* baseValue, const U32* nbAdditionalBits,
511
+ unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
512
+ {
513
+ #if DYNAMIC_BMI2
514
+ if (bmi2) {
515
+ ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
516
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
517
+ return;
518
+ }
519
+ #endif
520
+ (void)bmi2;
521
+ ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
522
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
424
523
  }
425
524
 
426
525
 
@@ -432,13 +531,14 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
432
531
  const void* src, size_t srcSize,
433
532
  const U32* baseValue, const U32* nbAdditionalBits,
434
533
  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
435
- int ddictIsCold, int nbSeq)
534
+ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
535
+ int bmi2)
436
536
  {
437
537
  switch(type)
438
538
  {
439
539
  case set_rle :
440
- RETURN_ERROR_IF(!srcSize, srcSize_wrong);
441
- RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
540
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
541
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
442
542
  { U32 const symbol = *(const BYTE*)src;
443
543
  U32 const baseline = baseValue[symbol];
444
544
  U32 const nbBits = nbAdditionalBits[symbol];
@@ -450,7 +550,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
450
550
  *DTablePtr = defaultTable;
451
551
  return 0;
452
552
  case set_repeat:
453
- RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
553
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
454
554
  /* prefetch FSE table if used */
455
555
  if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
456
556
  const void* const pStart = *DTablePtr;
@@ -462,9 +562,9 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
462
562
  { unsigned tableLog;
463
563
  S16 norm[MaxSeq+1];
464
564
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
465
- RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
466
- RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
467
- ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
565
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
566
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
567
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
468
568
  *DTablePtr = DTableSpace;
469
569
  return headerSize;
470
570
  }
@@ -477,35 +577,36 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
477
577
  size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
478
578
  const void* src, size_t srcSize)
479
579
  {
480
- const BYTE* const istart = (const BYTE* const)src;
580
+ const BYTE* const istart = (const BYTE*)src;
481
581
  const BYTE* const iend = istart + srcSize;
482
582
  const BYTE* ip = istart;
483
583
  int nbSeq;
484
584
  DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
485
585
 
486
586
  /* check */
487
- RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
587
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
488
588
 
489
589
  /* SeqHead */
490
590
  nbSeq = *ip++;
491
591
  if (!nbSeq) {
492
592
  *nbSeqPtr=0;
493
- RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
593
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
494
594
  return 1;
495
595
  }
496
596
  if (nbSeq > 0x7F) {
497
597
  if (nbSeq == 0xFF) {
498
- RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
499
- nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
598
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
599
+ nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
600
+ ip+=2;
500
601
  } else {
501
- RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
602
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
502
603
  nbSeq = ((nbSeq-0x80)<<8) + *ip++;
503
604
  }
504
605
  }
505
606
  *nbSeqPtr = nbSeq;
506
607
 
507
608
  /* FSE table descriptors */
508
- RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
609
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
509
610
  { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
510
611
  symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
511
612
  symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -517,8 +618,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
517
618
  ip, iend-ip,
518
619
  LL_base, LL_bits,
519
620
  LL_defaultDTable, dctx->fseEntropy,
520
- dctx->ddictIsCold, nbSeq);
521
- RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
621
+ dctx->ddictIsCold, nbSeq,
622
+ dctx->workspace, sizeof(dctx->workspace),
623
+ dctx->bmi2);
624
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
522
625
  ip += llhSize;
523
626
  }
524
627
 
@@ -527,8 +630,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
527
630
  ip, iend-ip,
528
631
  OF_base, OF_bits,
529
632
  OF_defaultDTable, dctx->fseEntropy,
530
- dctx->ddictIsCold, nbSeq);
531
- RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
633
+ dctx->ddictIsCold, nbSeq,
634
+ dctx->workspace, sizeof(dctx->workspace),
635
+ dctx->bmi2);
636
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
532
637
  ip += ofhSize;
533
638
  }
534
639
 
@@ -537,8 +642,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
537
642
  ip, iend-ip,
538
643
  ML_base, ML_bits,
539
644
  ML_defaultDTable, dctx->fseEntropy,
540
- dctx->ddictIsCold, nbSeq);
541
- RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
645
+ dctx->ddictIsCold, nbSeq,
646
+ dctx->workspace, sizeof(dctx->workspace),
647
+ dctx->bmi2);
648
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
542
649
  ip += mlhSize;
543
650
  }
544
651
  }
@@ -570,54 +677,133 @@ typedef struct {
570
677
  size_t pos;
571
678
  } seqState_t;
572
679
 
680
+ /*! ZSTD_overlapCopy8() :
681
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
682
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
683
+ *
684
+ * Precondition: *ip <= *op
685
+ * Postcondition: *op - *op >= 8
686
+ */
687
+ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
688
+ assert(*ip <= *op);
689
+ if (offset < 8) {
690
+ /* close range match, overlap */
691
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
692
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
693
+ int const sub2 = dec64table[offset];
694
+ (*op)[0] = (*ip)[0];
695
+ (*op)[1] = (*ip)[1];
696
+ (*op)[2] = (*ip)[2];
697
+ (*op)[3] = (*ip)[3];
698
+ *ip += dec32table[offset];
699
+ ZSTD_copy4(*op+4, *ip);
700
+ *ip -= sub2;
701
+ } else {
702
+ ZSTD_copy8(*op, *ip);
703
+ }
704
+ *ip += 8;
705
+ *op += 8;
706
+ assert(*op - *ip >= 8);
707
+ }
708
+
709
+ /*! ZSTD_safecopy() :
710
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
711
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
712
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
713
+ * should be fast for a single long sequence, but can be slow for several short sequences.
714
+ *
715
+ * @param ovtype controls the overlap detection
716
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
717
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
718
+ * The src buffer must be before the dst buffer.
719
+ */
720
+ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
721
+ ptrdiff_t const diff = op - ip;
722
+ BYTE* const oend = op + length;
723
+
724
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
725
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
573
726
 
574
- /* ZSTD_execSequenceLast7():
575
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
576
- * requires more careful checks, to ensure there is no overflow.
577
- * performance does not matter though.
578
- * note : this case is supposed to be never generated "naturally" by reference encoder,
579
- * since in most cases it needs at least 8 bytes to look for a match.
580
- * but it's allowed by the specification. */
727
+ if (length < 8) {
728
+ /* Handle short lengths. */
729
+ while (op < oend) *op++ = *ip++;
730
+ return;
731
+ }
732
+ if (ovtype == ZSTD_overlap_src_before_dst) {
733
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
734
+ assert(length >= 8);
735
+ ZSTD_overlapCopy8(&op, &ip, diff);
736
+ assert(op - ip >= 8);
737
+ assert(op <= oend);
738
+ }
739
+
740
+ if (oend <= oend_w) {
741
+ /* No risk of overwrite. */
742
+ ZSTD_wildcopy(op, ip, length, ovtype);
743
+ return;
744
+ }
745
+ if (op <= oend_w) {
746
+ /* Wildcopy until we get close to the end. */
747
+ assert(oend > oend_w);
748
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
749
+ ip += oend_w - op;
750
+ op = oend_w;
751
+ }
752
+ /* Handle the leftovers. */
753
+ while (op < oend) *op++ = *ip++;
754
+ }
755
+
756
+ /* ZSTD_execSequenceEnd():
757
+ * This version handles cases that are near the end of the output buffer. It requires
758
+ * more careful checks to make sure there is no overflow. By separating out these hard
759
+ * and unlikely cases, we can speed up the common cases.
760
+ *
761
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
762
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
763
+ */
581
764
  FORCE_NOINLINE
582
- size_t ZSTD_execSequenceLast7(BYTE* op,
583
- BYTE* const oend, seq_t sequence,
584
- const BYTE** litPtr, const BYTE* const litLimit,
585
- const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
765
+ size_t ZSTD_execSequenceEnd(BYTE* op,
766
+ BYTE* const oend, seq_t sequence,
767
+ const BYTE** litPtr, const BYTE* const litLimit,
768
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
586
769
  {
587
770
  BYTE* const oLitEnd = op + sequence.litLength;
588
771
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
589
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
590
772
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
591
773
  const BYTE* match = oLitEnd - sequence.offset;
774
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
592
775
 
593
- /* check */
594
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
595
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
776
+ /* bounds checks : careful of address space overflow in 32-bit mode */
777
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
778
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
779
+ assert(op < op + sequenceLength);
780
+ assert(oLitEnd < op + sequenceLength);
596
781
 
597
782
  /* copy literals */
598
- while (op < oLitEnd) *op++ = *(*litPtr)++;
783
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
784
+ op = oLitEnd;
785
+ *litPtr = iLitEnd;
599
786
 
600
787
  /* copy Match */
601
- if (sequence.offset > (size_t)(oLitEnd - base)) {
788
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
602
789
  /* offset beyond prefix */
603
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
604
- match = dictEnd - (base-match);
790
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
791
+ match = dictEnd - (prefixStart-match);
605
792
  if (match + sequence.matchLength <= dictEnd) {
606
- memmove(oLitEnd, match, sequence.matchLength);
793
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
607
794
  return sequenceLength;
608
795
  }
609
796
  /* span extDict & currentPrefixSegment */
610
797
  { size_t const length1 = dictEnd - match;
611
- memmove(oLitEnd, match, length1);
798
+ ZSTD_memmove(oLitEnd, match, length1);
612
799
  op = oLitEnd + length1;
613
800
  sequence.matchLength -= length1;
614
- match = base;
801
+ match = prefixStart;
615
802
  } }
616
- while (op < oMatchEnd) *op++ = *match++;
803
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
617
804
  return sequenceLength;
618
805
  }
619
806
 
620
-
621
807
  HINT_INLINE
622
808
  size_t ZSTD_execSequence(BYTE* op,
623
809
  BYTE* const oend, seq_t sequence,
@@ -627,152 +813,85 @@ size_t ZSTD_execSequence(BYTE* op,
627
813
  BYTE* const oLitEnd = op + sequence.litLength;
628
814
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
629
815
  BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
630
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
816
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
631
817
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
632
818
  const BYTE* match = oLitEnd - sequence.offset;
633
819
 
634
- /* check */
635
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
636
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
637
- if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
638
-
639
- /* copy Literals */
640
- ZSTD_copy8(op, *litPtr);
641
- if (sequence.litLength > 8)
642
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
820
+ assert(op != NULL /* Precondition */);
821
+ assert(oend_w < oend /* No underflow */);
822
+ /* Handle edge cases in a slow path:
823
+ * - Read beyond end of literals
824
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
825
+ * - 32-bit mode and the match length overflows
826
+ */
827
+ if (UNLIKELY(
828
+ iLitEnd > litLimit ||
829
+ oMatchEnd > oend_w ||
830
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
831
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
832
+
833
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
834
+ assert(op <= oLitEnd /* No overflow */);
835
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
836
+ assert(oMatchEnd <= oend /* No underflow */);
837
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
838
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
839
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
840
+
841
+ /* Copy Literals:
842
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
843
+ * We likely don't need the full 32-byte wildcopy.
844
+ */
845
+ assert(WILDCOPY_OVERLENGTH >= 16);
846
+ ZSTD_copy16(op, (*litPtr));
847
+ if (UNLIKELY(sequence.litLength > 16)) {
848
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
849
+ }
643
850
  op = oLitEnd;
644
851
  *litPtr = iLitEnd; /* update for next sequence */
645
852
 
646
- /* copy Match */
853
+ /* Copy Match */
647
854
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
648
855
  /* offset beyond prefix -> go into extDict */
649
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
856
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
650
857
  match = dictEnd + (match - prefixStart);
651
858
  if (match + sequence.matchLength <= dictEnd) {
652
- memmove(oLitEnd, match, sequence.matchLength);
859
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
653
860
  return sequenceLength;
654
861
  }
655
862
  /* span extDict & currentPrefixSegment */
656
863
  { size_t const length1 = dictEnd - match;
657
- memmove(oLitEnd, match, length1);
864
+ ZSTD_memmove(oLitEnd, match, length1);
658
865
  op = oLitEnd + length1;
659
866
  sequence.matchLength -= length1;
660
867
  match = prefixStart;
661
- if (op > oend_w || sequence.matchLength < MINMATCH) {
662
- U32 i;
663
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
664
- return sequenceLength;
665
- }
666
868
  } }
667
- /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
668
-
669
- /* match within prefix */
670
- if (sequence.offset < 8) {
671
- /* close range match, overlap */
672
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
673
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
674
- int const sub2 = dec64table[sequence.offset];
675
- op[0] = match[0];
676
- op[1] = match[1];
677
- op[2] = match[2];
678
- op[3] = match[3];
679
- match += dec32table[sequence.offset];
680
- ZSTD_copy4(op+4, match);
681
- match -= sub2;
682
- } else {
683
- ZSTD_copy8(op, match);
684
- }
685
- op += 8; match += 8;
686
-
687
- if (oMatchEnd > oend-(16-MINMATCH)) {
688
- if (op < oend_w) {
689
- ZSTD_wildcopy(op, match, oend_w - op);
690
- match += oend_w - op;
691
- op = oend_w;
692
- }
693
- while (op < oMatchEnd) *op++ = *match++;
694
- } else {
695
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
869
+ /* Match within prefix of 1 or more bytes */
870
+ assert(op <= oMatchEnd);
871
+ assert(oMatchEnd <= oend_w);
872
+ assert(match >= prefixStart);
873
+ assert(sequence.matchLength >= 1);
874
+
875
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
876
+ * without overlap checking.
877
+ */
878
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
879
+ /* We bet on a full wildcopy for matches, since we expect matches to be
880
+ * longer than literals (in general). In silesia, ~10% of matches are longer
881
+ * than 16 bytes.
882
+ */
883
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
884
+ return sequenceLength;
696
885
  }
697
- return sequenceLength;
698
- }
699
-
700
-
701
- HINT_INLINE
702
- size_t ZSTD_execSequenceLong(BYTE* op,
703
- BYTE* const oend, seq_t sequence,
704
- const BYTE** litPtr, const BYTE* const litLimit,
705
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
706
- {
707
- BYTE* const oLitEnd = op + sequence.litLength;
708
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
709
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
710
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
711
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
712
- const BYTE* match = sequence.match;
713
-
714
- /* check */
715
- RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
716
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
717
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
718
-
719
- /* copy Literals */
720
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
721
- if (sequence.litLength > 8)
722
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
723
- op = oLitEnd;
724
- *litPtr = iLitEnd; /* update for next sequence */
725
-
726
- /* copy Match */
727
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
728
- /* offset beyond prefix */
729
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
730
- if (match + sequence.matchLength <= dictEnd) {
731
- memmove(oLitEnd, match, sequence.matchLength);
732
- return sequenceLength;
733
- }
734
- /* span extDict & currentPrefixSegment */
735
- { size_t const length1 = dictEnd - match;
736
- memmove(oLitEnd, match, length1);
737
- op = oLitEnd + length1;
738
- sequence.matchLength -= length1;
739
- match = prefixStart;
740
- if (op > oend_w || sequence.matchLength < MINMATCH) {
741
- U32 i;
742
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
743
- return sequenceLength;
744
- }
745
- } }
746
- assert(op <= oend_w);
747
- assert(sequence.matchLength >= MINMATCH);
886
+ assert(sequence.offset < WILDCOPY_VECLEN);
748
887
 
749
- /* match within prefix */
750
- if (sequence.offset < 8) {
751
- /* close range match, overlap */
752
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
753
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
754
- int const sub2 = dec64table[sequence.offset];
755
- op[0] = match[0];
756
- op[1] = match[1];
757
- op[2] = match[2];
758
- op[3] = match[3];
759
- match += dec32table[sequence.offset];
760
- ZSTD_copy4(op+4, match);
761
- match -= sub2;
762
- } else {
763
- ZSTD_copy8(op, match);
764
- }
765
- op += 8; match += 8;
888
+ /* Copy 8 bytes and spread the offset to be >= 8. */
889
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
766
890
 
767
- if (oMatchEnd > oend-(16-MINMATCH)) {
768
- if (op < oend_w) {
769
- ZSTD_wildcopy(op, match, oend_w - op);
770
- match += oend_w - op;
771
- op = oend_w;
772
- }
773
- while (op < oMatchEnd) *op++ = *match++;
774
- } else {
775
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
891
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
892
+ if (sequence.matchLength > 8) {
893
+ assert(op < oMatchEnd);
894
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
776
895
  }
777
896
  return sequenceLength;
778
897
  }
@@ -798,6 +917,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
798
917
  DStatePtr->state = DInfo.nextState + lowBits;
799
918
  }
800
919
 
920
+ FORCE_INLINE_TEMPLATE void
921
+ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
922
+ {
923
+ U32 const nbBits = DInfo.nbBits;
924
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
925
+ DStatePtr->state = DInfo.nextState + lowBits;
926
+ }
927
+
801
928
  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
802
929
  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
803
930
  * bits before reloading. This value is the maximum number of bytes we read
@@ -809,25 +936,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
809
936
  : 0)
810
937
 
811
938
  typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
939
+ typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
812
940
 
813
- #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
814
941
  FORCE_INLINE_TEMPLATE seq_t
815
- ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
942
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
816
943
  {
817
944
  seq_t seq;
818
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
819
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
820
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
821
- U32 const totalBits = llBits+mlBits+ofBits;
822
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
823
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
824
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
945
+ ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
946
+ ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
947
+ ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
948
+ U32 const llBase = llDInfo.baseValue;
949
+ U32 const mlBase = mlDInfo.baseValue;
950
+ U32 const ofBase = ofDInfo.baseValue;
951
+ BYTE const llBits = llDInfo.nbAdditionalBits;
952
+ BYTE const mlBits = mlDInfo.nbAdditionalBits;
953
+ BYTE const ofBits = ofDInfo.nbAdditionalBits;
954
+ BYTE const totalBits = llBits+mlBits+ofBits;
825
955
 
826
956
  /* sequence */
827
957
  { size_t offset;
828
- if (!ofBits)
829
- offset = 0;
830
- else {
958
+ if (ofBits > 1) {
831
959
  ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
832
960
  ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
833
961
  assert(ofBits <= MaxOff);
@@ -841,62 +969,146 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
841
969
  offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
842
970
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
843
971
  }
844
- }
845
-
846
- if (ofBits <= 1) {
847
- offset += (llBase==0);
848
- if (offset) {
849
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
850
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
851
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
852
- seqState->prevOffset[1] = seqState->prevOffset[0];
853
- seqState->prevOffset[0] = offset = temp;
854
- } else { /* offset == 0 */
855
- offset = seqState->prevOffset[0];
856
- }
857
- } else {
858
972
  seqState->prevOffset[2] = seqState->prevOffset[1];
859
973
  seqState->prevOffset[1] = seqState->prevOffset[0];
860
974
  seqState->prevOffset[0] = offset;
861
- }
975
+ } else {
976
+ U32 const ll0 = (llBase == 0);
977
+ if (LIKELY((ofBits == 0))) {
978
+ if (LIKELY(!ll0))
979
+ offset = seqState->prevOffset[0];
980
+ else {
981
+ offset = seqState->prevOffset[1];
982
+ seqState->prevOffset[1] = seqState->prevOffset[0];
983
+ seqState->prevOffset[0] = offset;
984
+ }
985
+ } else {
986
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
987
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
988
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
989
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
990
+ seqState->prevOffset[1] = seqState->prevOffset[0];
991
+ seqState->prevOffset[0] = offset = temp;
992
+ } } }
862
993
  seq.offset = offset;
863
994
  }
864
995
 
865
- seq.matchLength = mlBase
866
- + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
996
+ seq.matchLength = mlBase;
997
+ if (mlBits > 0)
998
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
999
+
867
1000
  if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
868
1001
  BIT_reloadDStream(&seqState->DStream);
869
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1002
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
870
1003
  BIT_reloadDStream(&seqState->DStream);
871
1004
  /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
872
1005
  ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
873
1006
 
874
- seq.litLength = llBase
875
- + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
1007
+ seq.litLength = llBase;
1008
+ if (llBits > 0)
1009
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
1010
+
876
1011
  if (MEM_32bits())
877
1012
  BIT_reloadDStream(&seqState->DStream);
878
1013
 
879
1014
  DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
880
1015
  (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
881
1016
 
882
- /* ANS state update */
883
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
884
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
885
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
886
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1017
+ if (prefetch == ZSTD_p_prefetch) {
1018
+ size_t const pos = seqState->pos + seq.litLength;
1019
+ const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
1020
+ seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1021
+ * No consequence though : no memory access will occur, offset is only used for prefetching */
1022
+ seqState->pos = pos + seq.matchLength;
1023
+ }
1024
+
1025
+ /* ANS state update
1026
+ * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
1027
+ * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
1028
+ * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
1029
+ * better option, so it is the default for other compilers. But, if you
1030
+ * measure that it is worse, please put up a pull request.
1031
+ */
1032
+ {
1033
+ #if defined(__GNUC__) && !defined(__clang__)
1034
+ const int kUseUpdateFseState = 1;
1035
+ #else
1036
+ const int kUseUpdateFseState = 0;
1037
+ #endif
1038
+ if (kUseUpdateFseState) {
1039
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1040
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1041
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1042
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1043
+ } else {
1044
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
1045
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
1046
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1047
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
1048
+ }
1049
+ }
887
1050
 
888
1051
  return seq;
889
1052
  }
890
1053
 
1054
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1055
+ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1056
+ {
1057
+ size_t const windowSize = dctx->fParams.windowSize;
1058
+ /* No dictionary used. */
1059
+ if (dctx->dictContentEndForFuzzing == NULL) return 0;
1060
+ /* Dictionary is our prefix. */
1061
+ if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
1062
+ /* Dictionary is not our ext-dict. */
1063
+ if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
1064
+ /* Dictionary is not within our window size. */
1065
+ if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
1066
+ /* Dictionary is active. */
1067
+ return 1;
1068
+ }
1069
+
1070
+ MEM_STATIC void ZSTD_assertValidSequence(
1071
+ ZSTD_DCtx const* dctx,
1072
+ BYTE const* op, BYTE const* oend,
1073
+ seq_t const seq,
1074
+ BYTE const* prefixStart, BYTE const* virtualStart)
1075
+ {
1076
+ #if DEBUGLEVEL >= 1
1077
+ size_t const windowSize = dctx->fParams.windowSize;
1078
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
1079
+ BYTE const* const oLitEnd = op + seq.litLength;
1080
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
1081
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1082
+ assert(op <= oend);
1083
+ assert((size_t)(oend - op) >= sequenceSize);
1084
+ assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
1085
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1086
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
1087
+ /* Offset must be within the dictionary. */
1088
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1089
+ assert(seq.offset <= windowSize + dictSize);
1090
+ } else {
1091
+ /* Offset must be within our window. */
1092
+ assert(seq.offset <= windowSize);
1093
+ }
1094
+ #else
1095
+ (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1096
+ #endif
1097
+ }
1098
+ #endif
1099
+
1100
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
891
1101
  FORCE_INLINE_TEMPLATE size_t
1102
+ DONT_VECTORIZE
892
1103
  ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
893
1104
  void* dst, size_t maxDstSize,
894
1105
  const void* seqStart, size_t seqSize, int nbSeq,
895
- const ZSTD_longOffset_e isLongOffset)
1106
+ const ZSTD_longOffset_e isLongOffset,
1107
+ const int frame)
896
1108
  {
897
1109
  const BYTE* ip = (const BYTE*)seqStart;
898
1110
  const BYTE* const iend = ip + seqSize;
899
- BYTE* const ostart = (BYTE* const)dst;
1111
+ BYTE* const ostart = (BYTE*)dst;
900
1112
  BYTE* const oend = ostart + maxDstSize;
901
1113
  BYTE* op = ostart;
902
1114
  const BYTE* litPtr = dctx->litPtr;
@@ -905,40 +1117,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
905
1117
  const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
906
1118
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
907
1119
  DEBUGLOG(5, "ZSTD_decompressSequences_body");
1120
+ (void)frame;
908
1121
 
909
1122
  /* Regen sequences */
910
1123
  if (nbSeq) {
911
1124
  seqState_t seqState;
1125
+ size_t error = 0;
912
1126
  dctx->fseEntropy = 1;
913
1127
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
914
1128
  RETURN_ERROR_IF(
915
1129
  ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
916
- corruption_detected);
1130
+ corruption_detected, "");
917
1131
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
918
1132
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
919
1133
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
920
-
921
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
922
- nbSeq--;
923
- { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
924
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
925
- DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
926
- if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
927
- op += oneSeqSize;
928
- } }
1134
+ assert(dst != NULL);
1135
+
1136
+ ZSTD_STATIC_ASSERT(
1137
+ BIT_DStream_unfinished < BIT_DStream_completed &&
1138
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1139
+ BIT_DStream_completed < BIT_DStream_overflow);
1140
+
1141
+ #if defined(__GNUC__) && defined(__x86_64__)
1142
+ /* Align the decompression loop to 32 + 16 bytes.
1143
+ *
1144
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1145
+ * speed swings based on the alignment of the decompression loop. This
1146
+ * performance swing is caused by parts of the decompression loop falling
1147
+ * out of the DSB. The entire decompression loop should fit in the DSB,
1148
+ * when it can't we get much worse performance. You can measure if you've
1149
+ * hit the good case or the bad case with this perf command for some
1150
+ * compressed file test.zst:
1151
+ *
1152
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1153
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1154
+ *
1155
+ * If you see most cycles served out of the MITE you've hit the bad case.
1156
+ * If you see most cycles served out of the DSB you've hit the good case.
1157
+ * If it is pretty even then you may be in an okay case.
1158
+ *
1159
+ * I've been able to reproduce this issue on the following CPUs:
1160
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1161
+ * Use Instruments->Counters to get DSB/MITE cycles.
1162
+ * I never got performance swings, but I was able to
1163
+ * go from the good case of mostly DSB to half of the
1164
+ * cycles served from MITE.
1165
+ * - Coffeelake: Intel i9-9900k
1166
+ *
1167
+ * I haven't been able to reproduce the instability or DSB misses on any
1168
+ * of the following CPUS:
1169
+ * - Haswell
1170
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1171
+ * - Skylake
1172
+ *
1173
+ * If you are seeing performance stability this script can help test.
1174
+ * It tests on 4 commits in zstd where I saw performance change.
1175
+ *
1176
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1177
+ */
1178
+ __asm__(".p2align 5");
1179
+ __asm__("nop");
1180
+ __asm__(".p2align 4");
1181
+ #endif
1182
+ for ( ; ; ) {
1183
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
1184
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1185
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1186
+ assert(!ZSTD_isError(oneSeqSize));
1187
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1188
+ #endif
1189
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1190
+ BIT_reloadDStream(&(seqState.DStream));
1191
+ op += oneSeqSize;
1192
+ /* gcc and clang both don't like early returns in this loop.
1193
+ * Instead break and check for an error at the end of the loop.
1194
+ */
1195
+ if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
1196
+ error = oneSeqSize;
1197
+ break;
1198
+ }
1199
+ if (UNLIKELY(!--nbSeq)) break;
1200
+ }
929
1201
 
930
1202
  /* check if reached exact end */
931
1203
  DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
932
- RETURN_ERROR_IF(nbSeq, corruption_detected);
1204
+ if (ZSTD_isError(error)) return error;
1205
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1206
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
933
1207
  /* save reps for next block */
934
1208
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
935
1209
  }
936
1210
 
937
1211
  /* last literal segment */
938
1212
  { size_t const lastLLSize = litEnd - litPtr;
939
- RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
940
- memcpy(op, litPtr, lastLLSize);
941
- op += lastLLSize;
1213
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1214
+ if (op != NULL) {
1215
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1216
+ op += lastLLSize;
1217
+ }
942
1218
  }
943
1219
 
944
1220
  return op-ostart;
@@ -948,103 +1224,25 @@ static size_t
948
1224
  ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
949
1225
  void* dst, size_t maxDstSize,
950
1226
  const void* seqStart, size_t seqSize, int nbSeq,
951
- const ZSTD_longOffset_e isLongOffset)
1227
+ const ZSTD_longOffset_e isLongOffset,
1228
+ const int frame)
952
1229
  {
953
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1230
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
954
1231
  }
955
1232
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
956
1233
 
957
-
958
-
959
1234
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
960
- FORCE_INLINE_TEMPLATE seq_t
961
- ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
962
- {
963
- seq_t seq;
964
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
965
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
966
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
967
- U32 const totalBits = llBits+mlBits+ofBits;
968
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
969
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
970
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
971
-
972
- /* sequence */
973
- { size_t offset;
974
- if (!ofBits)
975
- offset = 0;
976
- else {
977
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
978
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
979
- assert(ofBits <= MaxOff);
980
- if (MEM_32bits() && longOffsets) {
981
- U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
982
- offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
983
- if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
984
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
985
- } else {
986
- offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
987
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
988
- }
989
- }
990
-
991
- if (ofBits <= 1) {
992
- offset += (llBase==0);
993
- if (offset) {
994
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
995
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
996
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
997
- seqState->prevOffset[1] = seqState->prevOffset[0];
998
- seqState->prevOffset[0] = offset = temp;
999
- } else {
1000
- offset = seqState->prevOffset[0];
1001
- }
1002
- } else {
1003
- seqState->prevOffset[2] = seqState->prevOffset[1];
1004
- seqState->prevOffset[1] = seqState->prevOffset[0];
1005
- seqState->prevOffset[0] = offset;
1006
- }
1007
- seq.offset = offset;
1008
- }
1009
-
1010
- seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1011
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1012
- BIT_reloadDStream(&seqState->DStream);
1013
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1014
- BIT_reloadDStream(&seqState->DStream);
1015
- /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1016
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1017
-
1018
- seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1019
- if (MEM_32bits())
1020
- BIT_reloadDStream(&seqState->DStream);
1021
-
1022
- { size_t const pos = seqState->pos + seq.litLength;
1023
- const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
1024
- seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1025
- * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
1026
- seqState->pos = pos + seq.matchLength;
1027
- }
1028
-
1029
- /* ANS state update */
1030
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1031
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1032
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1033
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1034
-
1035
- return seq;
1036
- }
1037
-
1038
1235
  FORCE_INLINE_TEMPLATE size_t
1039
1236
  ZSTD_decompressSequencesLong_body(
1040
1237
  ZSTD_DCtx* dctx,
1041
1238
  void* dst, size_t maxDstSize,
1042
1239
  const void* seqStart, size_t seqSize, int nbSeq,
1043
- const ZSTD_longOffset_e isLongOffset)
1240
+ const ZSTD_longOffset_e isLongOffset,
1241
+ const int frame)
1044
1242
  {
1045
1243
  const BYTE* ip = (const BYTE*)seqStart;
1046
1244
  const BYTE* const iend = ip + seqSize;
1047
- BYTE* const ostart = (BYTE* const)dst;
1245
+ BYTE* const ostart = (BYTE*)dst;
1048
1246
  BYTE* const oend = ostart + maxDstSize;
1049
1247
  BYTE* op = ostart;
1050
1248
  const BYTE* litPtr = dctx->litPtr;
@@ -1052,6 +1250,7 @@ ZSTD_decompressSequencesLong_body(
1052
1250
  const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1053
1251
  const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1054
1252
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1253
+ (void)frame;
1055
1254
 
1056
1255
  /* Regen sequences */
1057
1256
  if (nbSeq) {
@@ -1067,36 +1266,45 @@ ZSTD_decompressSequencesLong_body(
1067
1266
  seqState.prefixStart = prefixStart;
1068
1267
  seqState.pos = (size_t)(op-prefixStart);
1069
1268
  seqState.dictEnd = dictEnd;
1269
+ assert(dst != NULL);
1070
1270
  assert(iend >= ip);
1071
1271
  RETURN_ERROR_IF(
1072
1272
  ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1073
- corruption_detected);
1273
+ corruption_detected, "");
1074
1274
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1075
1275
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1076
1276
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1077
1277
 
1078
1278
  /* prepare in advance */
1079
1279
  for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1080
- sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1280
+ sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1081
1281
  PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1082
1282
  }
1083
- RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
1283
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1084
1284
 
1085
1285
  /* decode and decompress */
1086
1286
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1087
- seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1088
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1287
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1288
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1289
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1290
+ assert(!ZSTD_isError(oneSeqSize));
1291
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1292
+ #endif
1089
1293
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1090
1294
  PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1091
1295
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
1092
1296
  op += oneSeqSize;
1093
1297
  }
1094
- RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
1298
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1095
1299
 
1096
1300
  /* finish queue */
1097
1301
  seqNb -= seqAdvance;
1098
1302
  for ( ; seqNb<nbSeq ; seqNb++) {
1099
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1303
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1304
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1305
+ assert(!ZSTD_isError(oneSeqSize));
1306
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1307
+ #endif
1100
1308
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1101
1309
  op += oneSeqSize;
1102
1310
  }
@@ -1107,9 +1315,11 @@ ZSTD_decompressSequencesLong_body(
1107
1315
 
1108
1316
  /* last literal segment */
1109
1317
  { size_t const lastLLSize = litEnd - litPtr;
1110
- RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
1111
- memcpy(op, litPtr, lastLLSize);
1112
- op += lastLLSize;
1318
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1319
+ if (op != NULL) {
1320
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1321
+ op += lastLLSize;
1322
+ }
1113
1323
  }
1114
1324
 
1115
1325
  return op-ostart;
@@ -1119,9 +1329,10 @@ static size_t
1119
1329
  ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1120
1330
  void* dst, size_t maxDstSize,
1121
1331
  const void* seqStart, size_t seqSize, int nbSeq,
1122
- const ZSTD_longOffset_e isLongOffset)
1332
+ const ZSTD_longOffset_e isLongOffset,
1333
+ const int frame)
1123
1334
  {
1124
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1335
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1125
1336
  }
1126
1337
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1127
1338
 
@@ -1131,12 +1342,14 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1131
1342
 
1132
1343
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1133
1344
  static TARGET_ATTRIBUTE("bmi2") size_t
1345
+ DONT_VECTORIZE
1134
1346
  ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1135
1347
  void* dst, size_t maxDstSize,
1136
1348
  const void* seqStart, size_t seqSize, int nbSeq,
1137
- const ZSTD_longOffset_e isLongOffset)
1349
+ const ZSTD_longOffset_e isLongOffset,
1350
+ const int frame)
1138
1351
  {
1139
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1352
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1140
1353
  }
1141
1354
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1142
1355
 
@@ -1145,9 +1358,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
1145
1358
  ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1146
1359
  void* dst, size_t maxDstSize,
1147
1360
  const void* seqStart, size_t seqSize, int nbSeq,
1148
- const ZSTD_longOffset_e isLongOffset)
1361
+ const ZSTD_longOffset_e isLongOffset,
1362
+ const int frame)
1149
1363
  {
1150
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1364
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1151
1365
  }
1152
1366
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1153
1367
 
@@ -1157,21 +1371,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
1157
1371
  ZSTD_DCtx* dctx,
1158
1372
  void* dst, size_t maxDstSize,
1159
1373
  const void* seqStart, size_t seqSize, int nbSeq,
1160
- const ZSTD_longOffset_e isLongOffset);
1374
+ const ZSTD_longOffset_e isLongOffset,
1375
+ const int frame);
1161
1376
 
1162
1377
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1163
1378
  static size_t
1164
1379
  ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1165
1380
  const void* seqStart, size_t seqSize, int nbSeq,
1166
- const ZSTD_longOffset_e isLongOffset)
1381
+ const ZSTD_longOffset_e isLongOffset,
1382
+ const int frame)
1167
1383
  {
1168
1384
  DEBUGLOG(5, "ZSTD_decompressSequences");
1169
1385
  #if DYNAMIC_BMI2
1170
1386
  if (dctx->bmi2) {
1171
- return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1387
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1172
1388
  }
1173
1389
  #endif
1174
- return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1390
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1175
1391
  }
1176
1392
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1177
1393
 
@@ -1186,15 +1402,16 @@ static size_t
1186
1402
  ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1187
1403
  void* dst, size_t maxDstSize,
1188
1404
  const void* seqStart, size_t seqSize, int nbSeq,
1189
- const ZSTD_longOffset_e isLongOffset)
1405
+ const ZSTD_longOffset_e isLongOffset,
1406
+ const int frame)
1190
1407
  {
1191
1408
  DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1192
1409
  #if DYNAMIC_BMI2
1193
1410
  if (dctx->bmi2) {
1194
- return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1411
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1195
1412
  }
1196
1413
  #endif
1197
- return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1414
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1198
1415
  }
1199
1416
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1200
1417
 
@@ -1228,7 +1445,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1228
1445
  }
1229
1446
  #endif
1230
1447
 
1231
-
1232
1448
  size_t
1233
1449
  ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1234
1450
  void* dst, size_t dstCapacity,
@@ -1244,7 +1460,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1244
1460
  ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1245
1461
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1246
1462
 
1247
- RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
1463
+ RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1248
1464
 
1249
1465
  /* Decode literals section */
1250
1466
  { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
@@ -1270,6 +1486,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1270
1486
  ip += seqHSize;
1271
1487
  srcSize -= seqHSize;
1272
1488
 
1489
+ RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
1490
+
1273
1491
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1274
1492
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1275
1493
  if ( !usePrefetchDecoder
@@ -1288,23 +1506,34 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1288
1506
  if (usePrefetchDecoder)
1289
1507
  #endif
1290
1508
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1291
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1509
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1292
1510
  #endif
1293
1511
 
1294
1512
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1295
1513
  /* else */
1296
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1514
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1297
1515
  #endif
1298
1516
  }
1299
1517
  }
1300
1518
 
1301
1519
 
1520
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
1521
+ {
1522
+ if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
1523
+ dctx->dictEnd = dctx->previousDstEnd;
1524
+ dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1525
+ dctx->prefixStart = dst;
1526
+ dctx->previousDstEnd = dst;
1527
+ }
1528
+ }
1529
+
1530
+
1302
1531
  size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1303
1532
  void* dst, size_t dstCapacity,
1304
1533
  const void* src, size_t srcSize)
1305
1534
  {
1306
1535
  size_t dSize;
1307
- ZSTD_checkContinuity(dctx, dst);
1536
+ ZSTD_checkContinuity(dctx, dst, dstCapacity);
1308
1537
  dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
1309
1538
  dctx->previousDstEnd = (char*)dst + dSize;
1310
1539
  return dSize;