zstd-ruby 1.4.4.0 → 1.4.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +123 -58
  4. data/ext/zstdruby/libzstd/README.md +34 -14
  5. data/ext/zstdruby/libzstd/common/bitstream.h +31 -37
  6. data/ext/zstdruby/libzstd/common/compiler.h +19 -3
  7. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  8. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  9. data/ext/zstdruby/libzstd/common/debug.h +11 -31
  10. data/ext/zstdruby/libzstd/common/entropy_common.c +13 -33
  11. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  12. data/ext/zstdruby/libzstd/common/error_private.h +6 -2
  13. data/ext/zstdruby/libzstd/common/fse.h +11 -31
  14. data/ext/zstdruby/libzstd/common/fse_decompress.c +12 -37
  15. data/ext/zstdruby/libzstd/common/huf.h +15 -33
  16. data/ext/zstdruby/libzstd/common/mem.h +1 -1
  17. data/ext/zstdruby/libzstd/common/pool.c +1 -1
  18. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  19. data/ext/zstdruby/libzstd/common/threading.c +4 -3
  20. data/ext/zstdruby/libzstd/common/threading.h +4 -3
  21. data/ext/zstdruby/libzstd/common/xxhash.c +15 -33
  22. data/ext/zstdruby/libzstd/common/xxhash.h +11 -31
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  24. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  25. data/ext/zstdruby/libzstd/common/zstd_internal.h +112 -15
  26. data/ext/zstdruby/libzstd/compress/fse_compress.c +17 -40
  27. data/ext/zstdruby/libzstd/compress/hist.c +15 -35
  28. data/ext/zstdruby/libzstd/compress/hist.h +12 -32
  29. data/ext/zstdruby/libzstd/compress/huf_compress.c +92 -92
  30. data/ext/zstdruby/libzstd/compress/zstd_compress.c +450 -275
  31. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +136 -14
  32. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +10 -6
  33. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +24 -20
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +845 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -13
  39. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +11 -8
  40. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  41. data/ext/zstdruby/libzstd/compress/zstd_fast.c +36 -24
  42. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  43. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +34 -11
  44. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +1 -1
  45. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +27 -5
  46. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +7 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_opt.c +38 -84
  48. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  49. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +48 -21
  50. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +2 -2
  51. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +76 -62
  52. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -8
  53. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  54. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +264 -148
  55. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +312 -203
  56. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +3 -3
  57. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +18 -4
  58. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
  59. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  60. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  61. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  62. data/ext/zstdruby/libzstd/dictBuilder/cover.c +5 -5
  63. data/ext/zstdruby/libzstd/dictBuilder/cover.h +14 -4
  64. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +14 -4
  65. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +33 -9
  66. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +51 -28
  67. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  68. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  69. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +18 -12
  70. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  71. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +10 -6
  72. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  73. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +10 -6
  74. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +13 -7
  76. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +17 -13
  78. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  79. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +17 -13
  80. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +22 -14
  82. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  83. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -2
  84. data/ext/zstdruby/libzstd/zstd.h +62 -21
  85. data/lib/zstd-ruby/version.rb +1 -1
  86. metadata +7 -5
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,14 +15,14 @@
15
15
  * Dependencies
16
16
  *********************************************************/
17
17
  #include <string.h> /* memcpy, memmove, memset */
18
- #include "compiler.h" /* prefetch */
19
- #include "cpu.h" /* bmi2 */
20
- #include "mem.h" /* low level memory routines */
18
+ #include "../common/compiler.h" /* prefetch */
19
+ #include "../common/cpu.h" /* bmi2 */
20
+ #include "../common/mem.h" /* low level memory routines */
21
21
  #define FSE_STATIC_LINKING_ONLY
22
- #include "fse.h"
22
+ #include "../common/fse.h"
23
23
  #define HUF_STATIC_LINKING_ONLY
24
- #include "huf.h"
25
- #include "zstd_internal.h"
24
+ #include "../common/huf.h"
25
+ #include "../common/zstd_internal.h"
26
26
  #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27
27
  #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28
28
  #include "zstd_decompress_block.h"
@@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
56
56
  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57
57
  blockProperties_t* bpPtr)
58
58
  {
59
- RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
59
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
60
60
 
61
61
  { U32 const cBlockHeader = MEM_readLE24(src);
62
62
  U32 const cSize = cBlockHeader >> 3;
@@ -64,7 +64,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
64
64
  bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
65
65
  bpPtr->origSize = cSize; /* only useful for RLE */
66
66
  if (bpPtr->blockType == bt_rle) return 1;
67
- RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
67
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68
68
  return cSize;
69
69
  }
70
70
  }
@@ -80,7 +80,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
80
80
  const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
81
81
  {
82
82
  DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
83
- RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
83
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
84
84
 
85
85
  { const BYTE* const istart = (const BYTE*) src;
86
86
  symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
@@ -89,7 +89,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
89
89
  {
90
90
  case set_repeat:
91
91
  DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
92
- RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
92
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
93
93
  /* fall-through */
94
94
 
95
95
  case set_compressed:
@@ -121,8 +121,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
121
121
  litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
122
122
  break;
123
123
  }
124
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
125
- RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
124
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
125
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
126
126
 
127
127
  /* prefetch huffman table if cold */
128
128
  if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -160,7 +160,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
160
160
  }
161
161
  }
162
162
 
163
- RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
163
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
164
164
 
165
165
  dctx->litPtr = dctx->litBuffer;
166
166
  dctx->litSize = litSize;
@@ -190,7 +190,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
190
190
  }
191
191
 
192
192
  if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
193
- RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
193
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
194
194
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
195
195
  dctx->litPtr = dctx->litBuffer;
196
196
  dctx->litSize = litSize;
@@ -222,7 +222,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
222
222
  RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
223
223
  break;
224
224
  }
225
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
225
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
226
226
  memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
227
227
  dctx->litPtr = dctx->litBuffer;
228
228
  dctx->litSize = litSize;
@@ -440,8 +440,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
440
440
  switch(type)
441
441
  {
442
442
  case set_rle :
443
- RETURN_ERROR_IF(!srcSize, srcSize_wrong);
444
- RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
443
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
444
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
445
445
  { U32 const symbol = *(const BYTE*)src;
446
446
  U32 const baseline = baseValue[symbol];
447
447
  U32 const nbBits = nbAdditionalBits[symbol];
@@ -453,7 +453,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
453
453
  *DTablePtr = defaultTable;
454
454
  return 0;
455
455
  case set_repeat:
456
- RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
456
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
457
457
  /* prefetch FSE table if used */
458
458
  if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
459
459
  const void* const pStart = *DTablePtr;
@@ -465,8 +465,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
465
465
  { unsigned tableLog;
466
466
  S16 norm[MaxSeq+1];
467
467
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
468
- RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
469
- RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
468
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
469
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
470
470
  ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
471
471
  *DTablePtr = DTableSpace;
472
472
  return headerSize;
@@ -487,28 +487,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
487
487
  DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
488
488
 
489
489
  /* check */
490
- RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
490
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
491
491
 
492
492
  /* SeqHead */
493
493
  nbSeq = *ip++;
494
494
  if (!nbSeq) {
495
495
  *nbSeqPtr=0;
496
- RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
496
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
497
497
  return 1;
498
498
  }
499
499
  if (nbSeq > 0x7F) {
500
500
  if (nbSeq == 0xFF) {
501
- RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
501
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
502
502
  nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
503
503
  } else {
504
- RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
504
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
505
505
  nbSeq = ((nbSeq-0x80)<<8) + *ip++;
506
506
  }
507
507
  }
508
508
  *nbSeqPtr = nbSeq;
509
509
 
510
510
  /* FSE table descriptors */
511
- RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
511
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
512
512
  { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
513
513
  symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
514
514
  symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -521,7 +521,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
521
521
  LL_base, LL_bits,
522
522
  LL_defaultDTable, dctx->fseEntropy,
523
523
  dctx->ddictIsCold, nbSeq);
524
- RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
524
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
525
525
  ip += llhSize;
526
526
  }
527
527
 
@@ -531,7 +531,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
531
531
  OF_base, OF_bits,
532
532
  OF_defaultDTable, dctx->fseEntropy,
533
533
  dctx->ddictIsCold, nbSeq);
534
- RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
534
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
535
535
  ip += ofhSize;
536
536
  }
537
537
 
@@ -541,7 +541,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
541
541
  ML_base, ML_bits,
542
542
  ML_defaultDTable, dctx->fseEntropy,
543
543
  dctx->ddictIsCold, nbSeq);
544
- RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
544
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
545
545
  ip += mlhSize;
546
546
  }
547
547
  }
@@ -580,7 +580,7 @@ typedef struct {
580
580
  * Precondition: *ip <= *op
581
581
  * Postcondition: *op - *op >= 8
582
582
  */
583
- static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
583
+ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
584
584
  assert(*ip <= *op);
585
585
  if (offset < 8) {
586
586
  /* close range match, overlap */
@@ -665,15 +665,15 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
665
665
  {
666
666
  BYTE* const oLitEnd = op + sequence.litLength;
667
667
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
668
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
669
668
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
670
669
  const BYTE* match = oLitEnd - sequence.offset;
671
670
  BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
672
671
 
673
- /* bounds checks */
674
- assert(oLitEnd < oMatchEnd);
675
- RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
676
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
672
+ /* bounds checks : careful of address space overflow in 32-bit mode */
673
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
674
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
675
+ assert(op < op + sequenceLength);
676
+ assert(oLitEnd < op + sequenceLength);
677
677
 
678
678
  /* copy literals */
679
679
  ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
@@ -683,7 +683,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
683
683
  /* copy Match */
684
684
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
685
685
  /* offset beyond prefix */
686
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
686
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
687
687
  match = dictEnd - (prefixStart-match);
688
688
  if (match + sequence.matchLength <= dictEnd) {
689
689
  memmove(oLitEnd, match, sequence.matchLength);
@@ -709,16 +709,27 @@ size_t ZSTD_execSequence(BYTE* op,
709
709
  BYTE* const oLitEnd = op + sequence.litLength;
710
710
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
711
711
  BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
712
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
712
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
713
713
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
714
714
  const BYTE* match = oLitEnd - sequence.offset;
715
715
 
716
- /* Errors and uncommon cases handled here. */
717
- assert(oLitEnd < oMatchEnd);
718
- if (iLitEnd > litLimit || oMatchEnd > oend_w)
716
+ assert(op != NULL /* Precondition */);
717
+ assert(oend_w < oend /* No underflow */);
718
+ /* Handle edge cases in a slow path:
719
+ * - Read beyond end of literals
720
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
721
+ * - 32-bit mode and the match length overflows
722
+ */
723
+ if (UNLIKELY(
724
+ iLitEnd > litLimit ||
725
+ oMatchEnd > oend_w ||
726
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
719
727
  return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
720
728
 
721
729
  /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
730
+ assert(op <= oLitEnd /* No overflow */);
731
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
732
+ assert(oMatchEnd <= oend /* No underflow */);
722
733
  assert(iLitEnd <= litLimit /* Literal length is in bounds */);
723
734
  assert(oLitEnd <= oend_w /* Can wildcopy literals */);
724
735
  assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
@@ -729,7 +740,7 @@ size_t ZSTD_execSequence(BYTE* op,
729
740
  */
730
741
  assert(WILDCOPY_OVERLENGTH >= 16);
731
742
  ZSTD_copy16(op, (*litPtr));
732
- if (sequence.litLength > 16) {
743
+ if (UNLIKELY(sequence.litLength > 16)) {
733
744
  ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
734
745
  }
735
746
  op = oLitEnd;
@@ -738,7 +749,7 @@ size_t ZSTD_execSequence(BYTE* op,
738
749
  /* Copy Match */
739
750
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
740
751
  /* offset beyond prefix -> go into extDict */
741
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
752
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
742
753
  match = dictEnd + (match - prefixStart);
743
754
  if (match + sequence.matchLength <= dictEnd) {
744
755
  memmove(oLitEnd, match, sequence.matchLength);
@@ -760,7 +771,7 @@ size_t ZSTD_execSequence(BYTE* op,
760
771
  /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
761
772
  * without overlap checking.
762
773
  */
763
- if (sequence.offset >= WILDCOPY_VECLEN) {
774
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
764
775
  /* We bet on a full wildcopy for matches, since we expect matches to be
765
776
  * longer than literals (in general). In silesia, ~10% of matches are longer
766
777
  * than 16 bytes.
@@ -802,6 +813,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
802
813
  DStatePtr->state = DInfo.nextState + lowBits;
803
814
  }
804
815
 
816
+ FORCE_INLINE_TEMPLATE void
817
+ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
818
+ {
819
+ U32 const nbBits = DInfo.nbBits;
820
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
821
+ DStatePtr->state = DInfo.nextState + lowBits;
822
+ }
823
+
805
824
  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
806
825
  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
807
826
  * bits before reloading. This value is the maximum number of bytes we read
@@ -813,25 +832,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
813
832
  : 0)
814
833
 
815
834
  typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
835
+ typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
816
836
 
817
- #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
818
837
  FORCE_INLINE_TEMPLATE seq_t
819
- ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
838
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
820
839
  {
821
840
  seq_t seq;
822
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
823
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
824
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
825
- U32 const totalBits = llBits+mlBits+ofBits;
826
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
827
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
828
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
841
+ ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
842
+ ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
843
+ ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
844
+ U32 const llBase = llDInfo.baseValue;
845
+ U32 const mlBase = mlDInfo.baseValue;
846
+ U32 const ofBase = ofDInfo.baseValue;
847
+ BYTE const llBits = llDInfo.nbAdditionalBits;
848
+ BYTE const mlBits = mlDInfo.nbAdditionalBits;
849
+ BYTE const ofBits = ofDInfo.nbAdditionalBits;
850
+ BYTE const totalBits = llBits+mlBits+ofBits;
829
851
 
830
852
  /* sequence */
831
853
  { size_t offset;
832
- if (!ofBits)
833
- offset = 0;
834
- else {
854
+ if (ofBits > 1) {
835
855
  ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
836
856
  ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
837
857
  assert(ofBits <= MaxOff);
@@ -845,59 +865,138 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
845
865
  offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
846
866
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
847
867
  }
848
- }
849
-
850
- if (ofBits <= 1) {
851
- offset += (llBase==0);
852
- if (offset) {
853
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
854
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
855
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
856
- seqState->prevOffset[1] = seqState->prevOffset[0];
857
- seqState->prevOffset[0] = offset = temp;
858
- } else { /* offset == 0 */
859
- offset = seqState->prevOffset[0];
860
- }
861
- } else {
862
868
  seqState->prevOffset[2] = seqState->prevOffset[1];
863
869
  seqState->prevOffset[1] = seqState->prevOffset[0];
864
870
  seqState->prevOffset[0] = offset;
865
- }
871
+ } else {
872
+ U32 const ll0 = (llBase == 0);
873
+ if (LIKELY((ofBits == 0))) {
874
+ if (LIKELY(!ll0))
875
+ offset = seqState->prevOffset[0];
876
+ else {
877
+ offset = seqState->prevOffset[1];
878
+ seqState->prevOffset[1] = seqState->prevOffset[0];
879
+ seqState->prevOffset[0] = offset;
880
+ }
881
+ } else {
882
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
883
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
884
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
885
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
886
+ seqState->prevOffset[1] = seqState->prevOffset[0];
887
+ seqState->prevOffset[0] = offset = temp;
888
+ } } }
866
889
  seq.offset = offset;
867
890
  }
868
891
 
869
- seq.matchLength = mlBase
870
- + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
892
+ seq.matchLength = mlBase;
893
+ if (mlBits > 0)
894
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
895
+
871
896
  if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
872
897
  BIT_reloadDStream(&seqState->DStream);
873
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
898
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
874
899
  BIT_reloadDStream(&seqState->DStream);
875
900
  /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
876
901
  ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
877
902
 
878
- seq.litLength = llBase
879
- + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
903
+ seq.litLength = llBase;
904
+ if (llBits > 0)
905
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
906
+
880
907
  if (MEM_32bits())
881
908
  BIT_reloadDStream(&seqState->DStream);
882
909
 
883
910
  DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
884
911
  (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
885
912
 
886
- /* ANS state update */
887
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
888
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
889
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
890
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
913
+ if (prefetch == ZSTD_p_prefetch) {
914
+ size_t const pos = seqState->pos + seq.litLength;
915
+ const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
916
+ seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
917
+ * No consequence though : no memory access will occur, offset is only used for prefetching */
918
+ seqState->pos = pos + seq.matchLength;
919
+ }
920
+
921
+ /* ANS state update
922
+ * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
923
+ * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
924
+ * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
925
+ * better option, so it is the default for other compilers. But, if you
926
+ * measure that it is worse, please put up a pull request.
927
+ */
928
+ {
929
+ #if defined(__GNUC__) && !defined(__clang__)
930
+ const int kUseUpdateFseState = 1;
931
+ #else
932
+ const int kUseUpdateFseState = 0;
933
+ #endif
934
+ if (kUseUpdateFseState) {
935
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
936
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
937
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
938
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
939
+ } else {
940
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
941
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
942
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
943
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
944
+ }
945
+ }
891
946
 
892
947
  return seq;
893
948
  }
894
949
 
950
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
951
+ static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
952
+ {
953
+ size_t const windowSize = dctx->fParams.windowSize;
954
+ /* No dictionary used. */
955
+ if (dctx->dictContentEndForFuzzing == NULL) return 0;
956
+ /* Dictionary is our prefix. */
957
+ if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
958
+ /* Dictionary is not our ext-dict. */
959
+ if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
960
+ /* Dictionary is not within our window size. */
961
+ if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
962
+ /* Dictionary is active. */
963
+ return 1;
964
+ }
965
+
966
+ MEM_STATIC void ZSTD_assertValidSequence(
967
+ ZSTD_DCtx const* dctx,
968
+ BYTE const* op, BYTE const* oend,
969
+ seq_t const seq,
970
+ BYTE const* prefixStart, BYTE const* virtualStart)
971
+ {
972
+ size_t const windowSize = dctx->fParams.windowSize;
973
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
974
+ BYTE const* const oLitEnd = op + seq.litLength;
975
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
976
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
977
+ assert(op <= oend);
978
+ assert((size_t)(oend - op) >= sequenceSize);
979
+ assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
980
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
981
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
982
+ /* Offset must be within the dictionary. */
983
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
984
+ assert(seq.offset <= windowSize + dictSize);
985
+ } else {
986
+ /* Offset must be within our window. */
987
+ assert(seq.offset <= windowSize);
988
+ }
989
+ }
990
+ #endif
991
+
992
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
895
993
  FORCE_INLINE_TEMPLATE size_t
896
994
  DONT_VECTORIZE
897
995
  ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
898
996
  void* dst, size_t maxDstSize,
899
997
  const void* seqStart, size_t seqSize, int nbSeq,
900
- const ZSTD_longOffset_e isLongOffset)
998
+ const ZSTD_longOffset_e isLongOffset,
999
+ const int frame)
901
1000
  {
902
1001
  const BYTE* ip = (const BYTE*)seqStart;
903
1002
  const BYTE* const iend = ip + seqSize;
@@ -910,46 +1009,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
910
1009
  const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
911
1010
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
912
1011
  DEBUGLOG(5, "ZSTD_decompressSequences_body");
1012
+ (void)frame;
913
1013
 
914
1014
  /* Regen sequences */
915
1015
  if (nbSeq) {
916
1016
  seqState_t seqState;
1017
+ size_t error = 0;
917
1018
  dctx->fseEntropy = 1;
918
1019
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
919
1020
  RETURN_ERROR_IF(
920
1021
  ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
921
- corruption_detected);
1022
+ corruption_detected, "");
922
1023
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
923
1024
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
924
1025
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1026
+ assert(dst != NULL);
925
1027
 
926
1028
  ZSTD_STATIC_ASSERT(
927
1029
  BIT_DStream_unfinished < BIT_DStream_completed &&
928
1030
  BIT_DStream_endOfBuffer < BIT_DStream_completed &&
929
1031
  BIT_DStream_completed < BIT_DStream_overflow);
930
1032
 
931
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
932
- nbSeq--;
933
- { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
934
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
935
- DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
936
- if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
937
- op += oneSeqSize;
938
- } }
1033
+ #if defined(__GNUC__) && defined(__x86_64__)
1034
+ /* Align the decompression loop to 32 + 16 bytes.
1035
+ *
1036
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1037
+ * speed swings based on the alignment of the decompression loop. This
1038
+ * performance swing is caused by parts of the decompression loop falling
1039
+ * out of the DSB. The entire decompression loop should fit in the DSB,
1040
+ * when it can't we get much worse performance. You can measure if you've
1041
+ * hit the good case or the bad case with this perf command for some
1042
+ * compressed file test.zst:
1043
+ *
1044
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1045
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1046
+ *
1047
+ * If you see most cycles served out of the MITE you've hit the bad case.
1048
+ * If you see most cycles served out of the DSB you've hit the good case.
1049
+ * If it is pretty even then you may be in an okay case.
1050
+ *
1051
+ * I've been able to reproduce this issue on the following CPUs:
1052
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1053
+ * Use Instruments->Counters to get DSB/MITE cycles.
1054
+ * I never got performance swings, but I was able to
1055
+ * go from the good case of mostly DSB to half of the
1056
+ * cycles served from MITE.
1057
+ * - Coffeelake: Intel i9-9900k
1058
+ *
1059
+ * I haven't been able to reproduce the instability or DSB misses on any
1060
+ * of the following CPUS:
1061
+ * - Haswell
1062
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1063
+ * - Skylake
1064
+ *
1065
+ * If you are seeing performance stability this script can help test.
1066
+ * It tests on 4 commits in zstd where I saw performance change.
1067
+ *
1068
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1069
+ */
1070
+ __asm__(".p2align 5");
1071
+ __asm__("nop");
1072
+ __asm__(".p2align 4");
1073
+ #endif
1074
+ for ( ; ; ) {
1075
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
1076
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1077
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1078
+ assert(!ZSTD_isError(oneSeqSize));
1079
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1080
+ #endif
1081
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1082
+ BIT_reloadDStream(&(seqState.DStream));
1083
+ /* gcc and clang both don't like early returns in this loop.
1084
+ * gcc doesn't like early breaks either.
1085
+ * Instead save an error and report it at the end.
1086
+ * When there is an error, don't increment op, so we don't
1087
+ * overwrite.
1088
+ */
1089
+ if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
1090
+ else op += oneSeqSize;
1091
+ if (UNLIKELY(!--nbSeq)) break;
1092
+ }
939
1093
 
940
1094
  /* check if reached exact end */
941
1095
  DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
942
- RETURN_ERROR_IF(nbSeq, corruption_detected);
943
- RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
1096
+ if (ZSTD_isError(error)) return error;
1097
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1098
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
944
1099
  /* save reps for next block */
945
1100
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
946
1101
  }
947
1102
 
948
1103
  /* last literal segment */
949
1104
  { size_t const lastLLSize = litEnd - litPtr;
950
- RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
951
- memcpy(op, litPtr, lastLLSize);
952
- op += lastLLSize;
1105
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1106
+ if (op != NULL) {
1107
+ memcpy(op, litPtr, lastLLSize);
1108
+ op += lastLLSize;
1109
+ }
953
1110
  }
954
1111
 
955
1112
  return op-ostart;
@@ -959,99 +1116,21 @@ static size_t
959
1116
  ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
960
1117
  void* dst, size_t maxDstSize,
961
1118
  const void* seqStart, size_t seqSize, int nbSeq,
962
- const ZSTD_longOffset_e isLongOffset)
1119
+ const ZSTD_longOffset_e isLongOffset,
1120
+ const int frame)
963
1121
  {
964
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1122
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
965
1123
  }
966
1124
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
967
1125
 
968
-
969
-
970
1126
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
971
- FORCE_INLINE_TEMPLATE seq_t
972
- ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
973
- {
974
- seq_t seq;
975
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
976
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
977
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
978
- U32 const totalBits = llBits+mlBits+ofBits;
979
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
980
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
981
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
982
-
983
- /* sequence */
984
- { size_t offset;
985
- if (!ofBits)
986
- offset = 0;
987
- else {
988
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
989
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
990
- assert(ofBits <= MaxOff);
991
- if (MEM_32bits() && longOffsets) {
992
- U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
993
- offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
994
- if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
995
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
996
- } else {
997
- offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
998
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
999
- }
1000
- }
1001
-
1002
- if (ofBits <= 1) {
1003
- offset += (llBase==0);
1004
- if (offset) {
1005
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1006
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
1007
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1008
- seqState->prevOffset[1] = seqState->prevOffset[0];
1009
- seqState->prevOffset[0] = offset = temp;
1010
- } else {
1011
- offset = seqState->prevOffset[0];
1012
- }
1013
- } else {
1014
- seqState->prevOffset[2] = seqState->prevOffset[1];
1015
- seqState->prevOffset[1] = seqState->prevOffset[0];
1016
- seqState->prevOffset[0] = offset;
1017
- }
1018
- seq.offset = offset;
1019
- }
1020
-
1021
- seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1022
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1023
- BIT_reloadDStream(&seqState->DStream);
1024
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1025
- BIT_reloadDStream(&seqState->DStream);
1026
- /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1027
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1028
-
1029
- seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1030
- if (MEM_32bits())
1031
- BIT_reloadDStream(&seqState->DStream);
1032
-
1033
- { size_t const pos = seqState->pos + seq.litLength;
1034
- const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
1035
- seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1036
- * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
1037
- seqState->pos = pos + seq.matchLength;
1038
- }
1039
-
1040
- /* ANS state update */
1041
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1042
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1043
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1044
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1045
-
1046
- return seq;
1047
- }
1048
-
1049
1127
  FORCE_INLINE_TEMPLATE size_t
1050
1128
  ZSTD_decompressSequencesLong_body(
1051
1129
  ZSTD_DCtx* dctx,
1052
1130
  void* dst, size_t maxDstSize,
1053
1131
  const void* seqStart, size_t seqSize, int nbSeq,
1054
- const ZSTD_longOffset_e isLongOffset)
1132
+ const ZSTD_longOffset_e isLongOffset,
1133
+ const int frame)
1055
1134
  {
1056
1135
  const BYTE* ip = (const BYTE*)seqStart;
1057
1136
  const BYTE* const iend = ip + seqSize;
@@ -1063,6 +1142,7 @@ ZSTD_decompressSequencesLong_body(
1063
1142
  const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1064
1143
  const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1065
1144
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1145
+ (void)frame;
1066
1146
 
1067
1147
  /* Regen sequences */
1068
1148
  if (nbSeq) {
@@ -1078,36 +1158,45 @@ ZSTD_decompressSequencesLong_body(
1078
1158
  seqState.prefixStart = prefixStart;
1079
1159
  seqState.pos = (size_t)(op-prefixStart);
1080
1160
  seqState.dictEnd = dictEnd;
1161
+ assert(dst != NULL);
1081
1162
  assert(iend >= ip);
1082
1163
  RETURN_ERROR_IF(
1083
1164
  ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1084
- corruption_detected);
1165
+ corruption_detected, "");
1085
1166
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1086
1167
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1087
1168
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1088
1169
 
1089
1170
  /* prepare in advance */
1090
1171
  for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1091
- sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1172
+ sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1092
1173
  PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1093
1174
  }
1094
- RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
1175
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1095
1176
 
1096
1177
  /* decode and decompress */
1097
1178
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1098
- seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1179
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1099
1180
  size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1181
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1182
+ assert(!ZSTD_isError(oneSeqSize));
1183
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1184
+ #endif
1100
1185
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1101
1186
  PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1102
1187
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
1103
1188
  op += oneSeqSize;
1104
1189
  }
1105
- RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
1190
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1106
1191
 
1107
1192
  /* finish queue */
1108
1193
  seqNb -= seqAdvance;
1109
1194
  for ( ; seqNb<nbSeq ; seqNb++) {
1110
1195
  size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1196
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1197
+ assert(!ZSTD_isError(oneSeqSize));
1198
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1199
+ #endif
1111
1200
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1112
1201
  op += oneSeqSize;
1113
1202
  }
@@ -1118,9 +1207,11 @@ ZSTD_decompressSequencesLong_body(
1118
1207
 
1119
1208
  /* last literal segment */
1120
1209
  { size_t const lastLLSize = litEnd - litPtr;
1121
- RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
1122
- memcpy(op, litPtr, lastLLSize);
1123
- op += lastLLSize;
1210
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1211
+ if (op != NULL) {
1212
+ memcpy(op, litPtr, lastLLSize);
1213
+ op += lastLLSize;
1214
+ }
1124
1215
  }
1125
1216
 
1126
1217
  return op-ostart;
@@ -1130,9 +1221,10 @@ static size_t
1130
1221
  ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1131
1222
  void* dst, size_t maxDstSize,
1132
1223
  const void* seqStart, size_t seqSize, int nbSeq,
1133
- const ZSTD_longOffset_e isLongOffset)
1224
+ const ZSTD_longOffset_e isLongOffset,
1225
+ const int frame)
1134
1226
  {
1135
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1227
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1136
1228
  }
1137
1229
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1138
1230
 
@@ -1146,9 +1238,10 @@ DONT_VECTORIZE
1146
1238
  ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1147
1239
  void* dst, size_t maxDstSize,
1148
1240
  const void* seqStart, size_t seqSize, int nbSeq,
1149
- const ZSTD_longOffset_e isLongOffset)
1241
+ const ZSTD_longOffset_e isLongOffset,
1242
+ const int frame)
1150
1243
  {
1151
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1244
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1152
1245
  }
1153
1246
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1154
1247
 
@@ -1157,9 +1250,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
1157
1250
  ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1158
1251
  void* dst, size_t maxDstSize,
1159
1252
  const void* seqStart, size_t seqSize, int nbSeq,
1160
- const ZSTD_longOffset_e isLongOffset)
1253
+ const ZSTD_longOffset_e isLongOffset,
1254
+ const int frame)
1161
1255
  {
1162
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1256
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1163
1257
  }
1164
1258
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1165
1259
 
@@ -1169,21 +1263,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
1169
1263
  ZSTD_DCtx* dctx,
1170
1264
  void* dst, size_t maxDstSize,
1171
1265
  const void* seqStart, size_t seqSize, int nbSeq,
1172
- const ZSTD_longOffset_e isLongOffset);
1266
+ const ZSTD_longOffset_e isLongOffset,
1267
+ const int frame);
1173
1268
 
1174
1269
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1175
1270
  static size_t
1176
1271
  ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1177
1272
  const void* seqStart, size_t seqSize, int nbSeq,
1178
- const ZSTD_longOffset_e isLongOffset)
1273
+ const ZSTD_longOffset_e isLongOffset,
1274
+ const int frame)
1179
1275
  {
1180
1276
  DEBUGLOG(5, "ZSTD_decompressSequences");
1181
1277
  #if DYNAMIC_BMI2
1182
1278
  if (dctx->bmi2) {
1183
- return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1279
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1184
1280
  }
1185
1281
  #endif
1186
- return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1282
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1187
1283
  }
1188
1284
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1189
1285
 
@@ -1198,15 +1294,16 @@ static size_t
1198
1294
  ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1199
1295
  void* dst, size_t maxDstSize,
1200
1296
  const void* seqStart, size_t seqSize, int nbSeq,
1201
- const ZSTD_longOffset_e isLongOffset)
1297
+ const ZSTD_longOffset_e isLongOffset,
1298
+ const int frame)
1202
1299
  {
1203
1300
  DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1204
1301
  #if DYNAMIC_BMI2
1205
1302
  if (dctx->bmi2) {
1206
- return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1303
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1207
1304
  }
1208
1305
  #endif
1209
- return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1306
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1210
1307
  }
1211
1308
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1212
1309
 
@@ -1240,7 +1337,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1240
1337
  }
1241
1338
  #endif
1242
1339
 
1243
-
1244
1340
  size_t
1245
1341
  ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1246
1342
  void* dst, size_t dstCapacity,
@@ -1256,7 +1352,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1256
1352
  ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1257
1353
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1258
1354
 
1259
- RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
1355
+ RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1260
1356
 
1261
1357
  /* Decode literals section */
1262
1358
  { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
@@ -1282,6 +1378,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1282
1378
  ip += seqHSize;
1283
1379
  srcSize -= seqHSize;
1284
1380
 
1381
+ RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
1382
+
1285
1383
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1286
1384
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1287
1385
  if ( !usePrefetchDecoder
@@ -1300,17 +1398,28 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1300
1398
  if (usePrefetchDecoder)
1301
1399
  #endif
1302
1400
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1303
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1401
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1304
1402
  #endif
1305
1403
 
1306
1404
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1307
1405
  /* else */
1308
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1406
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1309
1407
  #endif
1310
1408
  }
1311
1409
  }
1312
1410
 
1313
1411
 
1412
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
1413
+ {
1414
+ if (dst != dctx->previousDstEnd) { /* not contiguous */
1415
+ dctx->dictEnd = dctx->previousDstEnd;
1416
+ dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1417
+ dctx->prefixStart = dst;
1418
+ dctx->previousDstEnd = dst;
1419
+ }
1420
+ }
1421
+
1422
+
1314
1423
  size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1315
1424
  void* dst, size_t dstCapacity,
1316
1425
  const void* src, size_t srcSize)