zstdlib 0.2.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +30 -1
  3. data/README.md +2 -2
  4. data/Rakefile +1 -1
  5. data/ext/zstdlib/extconf.rb +3 -3
  6. data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
  7. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/bitstream.h +38 -39
  8. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/compiler.h +40 -5
  9. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/cpu.h +1 -1
  10. data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
  11. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/debug.h +11 -31
  12. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
  13. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.c +2 -1
  14. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.h +6 -2
  15. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse.h +12 -32
  16. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -35
  17. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/huf.h +15 -33
  18. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/mem.h +75 -2
  19. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.c +8 -4
  20. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.h +2 -2
  21. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.c +50 -4
  22. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.h +36 -4
  23. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.c +23 -35
  24. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
  25. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
  26. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
  27. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_internal.h +154 -26
  28. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
  29. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.c +15 -35
  30. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.h +12 -32
  31. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
  32. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress.c +1191 -1330
  33. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +317 -55
  34. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.c +158 -0
  35. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.h +29 -0
  36. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.c +419 -0
  37. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.h +54 -0
  38. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
  39. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
  40. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_cwksp.h +525 -0
  41. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +65 -43
  42. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
  43. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.c +92 -66
  44. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
  45. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.c +74 -42
  46. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
  47. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.c +32 -10
  48. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
  49. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.c +81 -114
  50. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
  51. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +95 -51
  52. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +3 -2
  53. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -60
  54. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
  55. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
  56. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +292 -172
  57. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +459 -338
  58. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
  59. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
  60. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/zstd.h +265 -88
  61. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzclose.c +1 -1
  62. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
  63. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
  64. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzlib.c +9 -9
  65. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzread.c +16 -8
  66. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzwrite.c +8 -8
  67. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +16 -12
  68. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  69. metadata +69 -62
  70. data/ext/zstdlib/zstd-1.4.0/lib/common/debug.c +0 -44
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,14 +15,14 @@
15
15
  * Dependencies
16
16
  *********************************************************/
17
17
  #include <string.h> /* memcpy, memmove, memset */
18
- #include "compiler.h" /* prefetch */
19
- #include "cpu.h" /* bmi2 */
20
- #include "mem.h" /* low level memory routines */
18
+ #include "../common/compiler.h" /* prefetch */
19
+ #include "../common/cpu.h" /* bmi2 */
20
+ #include "../common/mem.h" /* low level memory routines */
21
21
  #define FSE_STATIC_LINKING_ONLY
22
- #include "fse.h"
22
+ #include "../common/fse.h"
23
23
  #define HUF_STATIC_LINKING_ONLY
24
- #include "huf.h"
25
- #include "zstd_internal.h"
24
+ #include "../common/huf.h"
25
+ #include "../common/zstd_internal.h"
26
26
  #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27
27
  #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28
28
  #include "zstd_decompress_block.h"
@@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
56
56
  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57
57
  blockProperties_t* bpPtr)
58
58
  {
59
- RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
59
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
60
60
 
61
61
  { U32 const cBlockHeader = MEM_readLE24(src);
62
62
  U32 const cSize = cBlockHeader >> 3;
@@ -64,7 +64,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
64
64
  bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
65
65
  bpPtr->origSize = cSize; /* only useful for RLE */
66
66
  if (bpPtr->blockType == bt_rle) return 1;
67
- RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
67
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68
68
  return cSize;
69
69
  }
70
70
  }
@@ -79,7 +79,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79
79
  size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
80
80
  const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
81
81
  {
82
- RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
82
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
83
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
83
84
 
84
85
  { const BYTE* const istart = (const BYTE*) src;
85
86
  symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
@@ -87,7 +88,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
87
88
  switch(litEncType)
88
89
  {
89
90
  case set_repeat:
90
- RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
91
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
92
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
91
93
  /* fall-through */
92
94
 
93
95
  case set_compressed:
@@ -116,11 +118,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
116
118
  /* 2 - 2 - 18 - 18 */
117
119
  lhSize = 5;
118
120
  litSize = (lhc >> 4) & 0x3FFFF;
119
- litCSize = (lhc >> 22) + (istart[4] << 10);
121
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
120
122
  break;
121
123
  }
122
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
123
- RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
124
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
125
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
124
126
 
125
127
  /* prefetch huffman table if cold */
126
128
  if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -158,7 +160,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
158
160
  }
159
161
  }
160
162
 
161
- RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
163
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
162
164
 
163
165
  dctx->litPtr = dctx->litBuffer;
164
166
  dctx->litSize = litSize;
@@ -188,7 +190,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
188
190
  }
189
191
 
190
192
  if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
191
- RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
193
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
192
194
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
193
195
  dctx->litPtr = dctx->litBuffer;
194
196
  dctx->litSize = litSize;
@@ -220,7 +222,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
220
222
  RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
221
223
  break;
222
224
  }
223
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
225
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
224
226
  memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
225
227
  dctx->litPtr = dctx->litBuffer;
226
228
  dctx->litSize = litSize;
@@ -391,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
391
393
  symbolNext[s] = 1;
392
394
  } else {
393
395
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
394
- symbolNext[s] = normalizedCounter[s];
396
+ assert(normalizedCounter[s]>=0);
397
+ symbolNext[s] = (U16)normalizedCounter[s];
395
398
  } } }
396
399
  memcpy(dt, &DTableH, sizeof(DTableH));
397
400
  }
@@ -437,8 +440,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
437
440
  switch(type)
438
441
  {
439
442
  case set_rle :
440
- RETURN_ERROR_IF(!srcSize, srcSize_wrong);
441
- RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
443
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
444
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
442
445
  { U32 const symbol = *(const BYTE*)src;
443
446
  U32 const baseline = baseValue[symbol];
444
447
  U32 const nbBits = nbAdditionalBits[symbol];
@@ -450,7 +453,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
450
453
  *DTablePtr = defaultTable;
451
454
  return 0;
452
455
  case set_repeat:
453
- RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
456
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
454
457
  /* prefetch FSE table if used */
455
458
  if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
456
459
  const void* const pStart = *DTablePtr;
@@ -462,8 +465,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
462
465
  { unsigned tableLog;
463
466
  S16 norm[MaxSeq+1];
464
467
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
465
- RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
466
- RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
468
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
469
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
467
470
  ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
468
471
  *DTablePtr = DTableSpace;
469
472
  return headerSize;
@@ -484,28 +487,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
484
487
  DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
485
488
 
486
489
  /* check */
487
- RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
490
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
488
491
 
489
492
  /* SeqHead */
490
493
  nbSeq = *ip++;
491
494
  if (!nbSeq) {
492
495
  *nbSeqPtr=0;
493
- RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
496
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
494
497
  return 1;
495
498
  }
496
499
  if (nbSeq > 0x7F) {
497
500
  if (nbSeq == 0xFF) {
498
- RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
501
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
499
502
  nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
500
503
  } else {
501
- RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
504
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
502
505
  nbSeq = ((nbSeq-0x80)<<8) + *ip++;
503
506
  }
504
507
  }
505
508
  *nbSeqPtr = nbSeq;
506
509
 
507
510
  /* FSE table descriptors */
508
- RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
511
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
509
512
  { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
510
513
  symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
511
514
  symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -518,7 +521,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
518
521
  LL_base, LL_bits,
519
522
  LL_defaultDTable, dctx->fseEntropy,
520
523
  dctx->ddictIsCold, nbSeq);
521
- RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
524
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
522
525
  ip += llhSize;
523
526
  }
524
527
 
@@ -528,7 +531,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
528
531
  OF_base, OF_bits,
529
532
  OF_defaultDTable, dctx->fseEntropy,
530
533
  dctx->ddictIsCold, nbSeq);
531
- RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
534
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
532
535
  ip += ofhSize;
533
536
  }
534
537
 
@@ -538,7 +541,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
538
541
  ML_base, ML_bits,
539
542
  ML_defaultDTable, dctx->fseEntropy,
540
543
  dctx->ddictIsCold, nbSeq);
541
- RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
544
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
542
545
  ip += mlhSize;
543
546
  }
544
547
  }
@@ -570,38 +573,118 @@ typedef struct {
570
573
  size_t pos;
571
574
  } seqState_t;
572
575
 
576
+ /*! ZSTD_overlapCopy8() :
577
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
578
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
579
+ *
580
+ * Precondition: *ip <= *op
581
+ * Postcondition: *op - *op >= 8
582
+ */
583
+ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
584
+ assert(*ip <= *op);
585
+ if (offset < 8) {
586
+ /* close range match, overlap */
587
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
588
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
589
+ int const sub2 = dec64table[offset];
590
+ (*op)[0] = (*ip)[0];
591
+ (*op)[1] = (*ip)[1];
592
+ (*op)[2] = (*ip)[2];
593
+ (*op)[3] = (*ip)[3];
594
+ *ip += dec32table[offset];
595
+ ZSTD_copy4(*op+4, *ip);
596
+ *ip -= sub2;
597
+ } else {
598
+ ZSTD_copy8(*op, *ip);
599
+ }
600
+ *ip += 8;
601
+ *op += 8;
602
+ assert(*op - *ip >= 8);
603
+ }
604
+
605
+ /*! ZSTD_safecopy() :
606
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
607
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
608
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
609
+ * should be fast for a single long sequence, but can be slow for several short sequences.
610
+ *
611
+ * @param ovtype controls the overlap detection
612
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
613
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
614
+ * The src buffer must be before the dst buffer.
615
+ */
616
+ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
617
+ ptrdiff_t const diff = op - ip;
618
+ BYTE* const oend = op + length;
619
+
620
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
621
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
573
622
 
574
- /* ZSTD_execSequenceLast7():
575
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
576
- * requires more careful checks, to ensure there is no overflow.
577
- * performance does not matter though.
578
- * note : this case is supposed to be never generated "naturally" by reference encoder,
579
- * since in most cases it needs at least 8 bytes to look for a match.
580
- * but it's allowed by the specification. */
623
+ if (length < 8) {
624
+ /* Handle short lengths. */
625
+ while (op < oend) *op++ = *ip++;
626
+ return;
627
+ }
628
+ if (ovtype == ZSTD_overlap_src_before_dst) {
629
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
630
+ assert(length >= 8);
631
+ ZSTD_overlapCopy8(&op, &ip, diff);
632
+ assert(op - ip >= 8);
633
+ assert(op <= oend);
634
+ }
635
+
636
+ if (oend <= oend_w) {
637
+ /* No risk of overwrite. */
638
+ ZSTD_wildcopy(op, ip, length, ovtype);
639
+ return;
640
+ }
641
+ if (op <= oend_w) {
642
+ /* Wildcopy until we get close to the end. */
643
+ assert(oend > oend_w);
644
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
645
+ ip += oend_w - op;
646
+ op = oend_w;
647
+ }
648
+ /* Handle the leftovers. */
649
+ while (op < oend) *op++ = *ip++;
650
+ }
651
+
652
+ /* ZSTD_execSequenceEnd():
653
+ * This version handles cases that are near the end of the output buffer. It requires
654
+ * more careful checks to make sure there is no overflow. By separating out these hard
655
+ * and unlikely cases, we can speed up the common cases.
656
+ *
657
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
658
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
659
+ */
581
660
  FORCE_NOINLINE
582
- size_t ZSTD_execSequenceLast7(BYTE* op,
583
- BYTE* const oend, seq_t sequence,
584
- const BYTE** litPtr, const BYTE* const litLimit,
585
- const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
661
+ size_t ZSTD_execSequenceEnd(BYTE* op,
662
+ BYTE* const oend, seq_t sequence,
663
+ const BYTE** litPtr, const BYTE* const litLimit,
664
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
586
665
  {
587
666
  BYTE* const oLitEnd = op + sequence.litLength;
588
667
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
589
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
590
668
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
591
669
  const BYTE* match = oLitEnd - sequence.offset;
670
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
592
671
 
593
- /* check */
594
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
595
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
672
+ /* bounds checks : careful of address space overflow in 32-bit mode */
673
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
674
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
675
+ assert(op < op + sequenceLength);
676
+ assert(oLitEnd < op + sequenceLength);
596
677
 
597
678
  /* copy literals */
598
- while (op < oLitEnd) *op++ = *(*litPtr)++;
679
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
680
+ op = oLitEnd;
681
+ *litPtr = iLitEnd;
599
682
 
600
683
  /* copy Match */
601
- if (sequence.offset > (size_t)(oLitEnd - base)) {
684
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
602
685
  /* offset beyond prefix */
603
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
604
- match = dictEnd - (base-match);
686
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
687
+ match = dictEnd - (prefixStart-match);
605
688
  if (match + sequence.matchLength <= dictEnd) {
606
689
  memmove(oLitEnd, match, sequence.matchLength);
607
690
  return sequenceLength;
@@ -611,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
611
694
  memmove(oLitEnd, match, length1);
612
695
  op = oLitEnd + length1;
613
696
  sequence.matchLength -= length1;
614
- match = base;
697
+ match = prefixStart;
615
698
  } }
616
- while (op < oMatchEnd) *op++ = *match++;
699
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
617
700
  return sequenceLength;
618
701
  }
619
702
 
620
-
621
703
  HINT_INLINE
622
704
  size_t ZSTD_execSequence(BYTE* op,
623
705
  BYTE* const oend, seq_t sequence,
@@ -627,26 +709,47 @@ size_t ZSTD_execSequence(BYTE* op,
627
709
  BYTE* const oLitEnd = op + sequence.litLength;
628
710
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
629
711
  BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
630
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
712
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
631
713
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
632
714
  const BYTE* match = oLitEnd - sequence.offset;
633
715
 
634
- /* check */
635
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
636
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
637
- if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
638
-
639
- /* copy Literals */
640
- ZSTD_copy8(op, *litPtr);
641
- if (sequence.litLength > 8)
642
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
716
+ assert(op != NULL /* Precondition */);
717
+ assert(oend_w < oend /* No underflow */);
718
+ /* Handle edge cases in a slow path:
719
+ * - Read beyond end of literals
720
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
721
+ * - 32-bit mode and the match length overflows
722
+ */
723
+ if (UNLIKELY(
724
+ iLitEnd > litLimit ||
725
+ oMatchEnd > oend_w ||
726
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
727
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
728
+
729
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
730
+ assert(op <= oLitEnd /* No overflow */);
731
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
732
+ assert(oMatchEnd <= oend /* No underflow */);
733
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
734
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
735
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
736
+
737
+ /* Copy Literals:
738
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
739
+ * We likely don't need the full 32-byte wildcopy.
740
+ */
741
+ assert(WILDCOPY_OVERLENGTH >= 16);
742
+ ZSTD_copy16(op, (*litPtr));
743
+ if (UNLIKELY(sequence.litLength > 16)) {
744
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
745
+ }
643
746
  op = oLitEnd;
644
747
  *litPtr = iLitEnd; /* update for next sequence */
645
748
 
646
- /* copy Match */
749
+ /* Copy Match */
647
750
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
648
751
  /* offset beyond prefix -> go into extDict */
649
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
752
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
650
753
  match = dictEnd + (match - prefixStart);
651
754
  if (match + sequence.matchLength <= dictEnd) {
652
755
  memmove(oLitEnd, match, sequence.matchLength);
@@ -658,121 +761,33 @@ size_t ZSTD_execSequence(BYTE* op,
658
761
  op = oLitEnd + length1;
659
762
  sequence.matchLength -= length1;
660
763
  match = prefixStart;
661
- if (op > oend_w || sequence.matchLength < MINMATCH) {
662
- U32 i;
663
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
664
- return sequenceLength;
665
- }
666
764
  } }
667
- /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
668
-
669
- /* match within prefix */
670
- if (sequence.offset < 8) {
671
- /* close range match, overlap */
672
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
673
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
674
- int const sub2 = dec64table[sequence.offset];
675
- op[0] = match[0];
676
- op[1] = match[1];
677
- op[2] = match[2];
678
- op[3] = match[3];
679
- match += dec32table[sequence.offset];
680
- ZSTD_copy4(op+4, match);
681
- match -= sub2;
682
- } else {
683
- ZSTD_copy8(op, match);
684
- }
685
- op += 8; match += 8;
686
-
687
- if (oMatchEnd > oend-(16-MINMATCH)) {
688
- if (op < oend_w) {
689
- ZSTD_wildcopy(op, match, oend_w - op);
690
- match += oend_w - op;
691
- op = oend_w;
692
- }
693
- while (op < oMatchEnd) *op++ = *match++;
694
- } else {
695
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
765
+ /* Match within prefix of 1 or more bytes */
766
+ assert(op <= oMatchEnd);
767
+ assert(oMatchEnd <= oend_w);
768
+ assert(match >= prefixStart);
769
+ assert(sequence.matchLength >= 1);
770
+
771
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
772
+ * without overlap checking.
773
+ */
774
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
775
+ /* We bet on a full wildcopy for matches, since we expect matches to be
776
+ * longer than literals (in general). In silesia, ~10% of matches are longer
777
+ * than 16 bytes.
778
+ */
779
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
780
+ return sequenceLength;
696
781
  }
697
- return sequenceLength;
698
- }
699
-
700
-
701
- HINT_INLINE
702
- size_t ZSTD_execSequenceLong(BYTE* op,
703
- BYTE* const oend, seq_t sequence,
704
- const BYTE** litPtr, const BYTE* const litLimit,
705
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
706
- {
707
- BYTE* const oLitEnd = op + sequence.litLength;
708
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
709
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
710
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
711
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
712
- const BYTE* match = sequence.match;
782
+ assert(sequence.offset < WILDCOPY_VECLEN);
713
783
 
714
- /* check */
715
- RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
716
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
717
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
718
-
719
- /* copy Literals */
720
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
721
- if (sequence.litLength > 8)
722
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
723
- op = oLitEnd;
724
- *litPtr = iLitEnd; /* update for next sequence */
784
+ /* Copy 8 bytes and spread the offset to be >= 8. */
785
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
725
786
 
726
- /* copy Match */
727
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
728
- /* offset beyond prefix */
729
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
730
- if (match + sequence.matchLength <= dictEnd) {
731
- memmove(oLitEnd, match, sequence.matchLength);
732
- return sequenceLength;
733
- }
734
- /* span extDict & currentPrefixSegment */
735
- { size_t const length1 = dictEnd - match;
736
- memmove(oLitEnd, match, length1);
737
- op = oLitEnd + length1;
738
- sequence.matchLength -= length1;
739
- match = prefixStart;
740
- if (op > oend_w || sequence.matchLength < MINMATCH) {
741
- U32 i;
742
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
743
- return sequenceLength;
744
- }
745
- } }
746
- assert(op <= oend_w);
747
- assert(sequence.matchLength >= MINMATCH);
748
-
749
- /* match within prefix */
750
- if (sequence.offset < 8) {
751
- /* close range match, overlap */
752
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
753
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
754
- int const sub2 = dec64table[sequence.offset];
755
- op[0] = match[0];
756
- op[1] = match[1];
757
- op[2] = match[2];
758
- op[3] = match[3];
759
- match += dec32table[sequence.offset];
760
- ZSTD_copy4(op+4, match);
761
- match -= sub2;
762
- } else {
763
- ZSTD_copy8(op, match);
764
- }
765
- op += 8; match += 8;
766
-
767
- if (oMatchEnd > oend-(16-MINMATCH)) {
768
- if (op < oend_w) {
769
- ZSTD_wildcopy(op, match, oend_w - op);
770
- match += oend_w - op;
771
- op = oend_w;
772
- }
773
- while (op < oMatchEnd) *op++ = *match++;
774
- } else {
775
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
787
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
788
+ if (sequence.matchLength > 8) {
789
+ assert(op < oMatchEnd);
790
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
776
791
  }
777
792
  return sequenceLength;
778
793
  }
@@ -798,6 +813,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
798
813
  DStatePtr->state = DInfo.nextState + lowBits;
799
814
  }
800
815
 
816
+ FORCE_INLINE_TEMPLATE void
817
+ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
818
+ {
819
+ U32 const nbBits = DInfo.nbBits;
820
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
821
+ DStatePtr->state = DInfo.nextState + lowBits;
822
+ }
823
+
801
824
  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
802
825
  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
803
826
  * bits before reloading. This value is the maximum number of bytes we read
@@ -809,25 +832,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
809
832
  : 0)
810
833
 
811
834
  typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
835
+ typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
812
836
 
813
- #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
814
837
  FORCE_INLINE_TEMPLATE seq_t
815
- ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
838
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
816
839
  {
817
840
  seq_t seq;
818
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
819
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
820
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
821
- U32 const totalBits = llBits+mlBits+ofBits;
822
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
823
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
824
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
841
+ ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
842
+ ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
843
+ ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
844
+ U32 const llBase = llDInfo.baseValue;
845
+ U32 const mlBase = mlDInfo.baseValue;
846
+ U32 const ofBase = ofDInfo.baseValue;
847
+ BYTE const llBits = llDInfo.nbAdditionalBits;
848
+ BYTE const mlBits = mlDInfo.nbAdditionalBits;
849
+ BYTE const ofBits = ofDInfo.nbAdditionalBits;
850
+ BYTE const totalBits = llBits+mlBits+ofBits;
825
851
 
826
852
  /* sequence */
827
853
  { size_t offset;
828
- if (!ofBits)
829
- offset = 0;
830
- else {
854
+ if (ofBits > 1) {
831
855
  ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
832
856
  ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
833
857
  assert(ofBits <= MaxOff);
@@ -841,58 +865,138 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
841
865
  offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
842
866
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
843
867
  }
844
- }
845
-
846
- if (ofBits <= 1) {
847
- offset += (llBase==0);
848
- if (offset) {
849
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
850
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
851
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
852
- seqState->prevOffset[1] = seqState->prevOffset[0];
853
- seqState->prevOffset[0] = offset = temp;
854
- } else { /* offset == 0 */
855
- offset = seqState->prevOffset[0];
856
- }
857
- } else {
858
868
  seqState->prevOffset[2] = seqState->prevOffset[1];
859
869
  seqState->prevOffset[1] = seqState->prevOffset[0];
860
870
  seqState->prevOffset[0] = offset;
861
- }
871
+ } else {
872
+ U32 const ll0 = (llBase == 0);
873
+ if (LIKELY((ofBits == 0))) {
874
+ if (LIKELY(!ll0))
875
+ offset = seqState->prevOffset[0];
876
+ else {
877
+ offset = seqState->prevOffset[1];
878
+ seqState->prevOffset[1] = seqState->prevOffset[0];
879
+ seqState->prevOffset[0] = offset;
880
+ }
881
+ } else {
882
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
883
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
884
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
885
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
886
+ seqState->prevOffset[1] = seqState->prevOffset[0];
887
+ seqState->prevOffset[0] = offset = temp;
888
+ } } }
862
889
  seq.offset = offset;
863
890
  }
864
891
 
865
- seq.matchLength = mlBase
866
- + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
892
+ seq.matchLength = mlBase;
893
+ if (mlBits > 0)
894
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
895
+
867
896
  if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
868
897
  BIT_reloadDStream(&seqState->DStream);
869
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
898
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
870
899
  BIT_reloadDStream(&seqState->DStream);
871
900
  /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
872
901
  ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
873
902
 
874
- seq.litLength = llBase
875
- + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
903
+ seq.litLength = llBase;
904
+ if (llBits > 0)
905
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
906
+
876
907
  if (MEM_32bits())
877
908
  BIT_reloadDStream(&seqState->DStream);
878
909
 
879
910
  DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
880
911
  (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
881
912
 
882
- /* ANS state update */
883
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
884
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
885
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
886
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
913
+ if (prefetch == ZSTD_p_prefetch) {
914
+ size_t const pos = seqState->pos + seq.litLength;
915
+ const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
916
+ seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
917
+ * No consequence though : no memory access will occur, offset is only used for prefetching */
918
+ seqState->pos = pos + seq.matchLength;
919
+ }
920
+
921
+ /* ANS state update
922
+ * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
923
+ * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
924
+ * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
925
+ * better option, so it is the default for other compilers. But, if you
926
+ * measure that it is worse, please put up a pull request.
927
+ */
928
+ {
929
+ #if defined(__GNUC__) && !defined(__clang__)
930
+ const int kUseUpdateFseState = 1;
931
+ #else
932
+ const int kUseUpdateFseState = 0;
933
+ #endif
934
+ if (kUseUpdateFseState) {
935
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
936
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
937
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
938
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
939
+ } else {
940
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
941
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
942
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
943
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
944
+ }
945
+ }
887
946
 
888
947
  return seq;
889
948
  }
890
949
 
950
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
951
+ static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
952
+ {
953
+ size_t const windowSize = dctx->fParams.windowSize;
954
+ /* No dictionary used. */
955
+ if (dctx->dictContentEndForFuzzing == NULL) return 0;
956
+ /* Dictionary is our prefix. */
957
+ if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
958
+ /* Dictionary is not our ext-dict. */
959
+ if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
960
+ /* Dictionary is not within our window size. */
961
+ if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
962
+ /* Dictionary is active. */
963
+ return 1;
964
+ }
965
+
966
+ MEM_STATIC void ZSTD_assertValidSequence(
967
+ ZSTD_DCtx const* dctx,
968
+ BYTE const* op, BYTE const* oend,
969
+ seq_t const seq,
970
+ BYTE const* prefixStart, BYTE const* virtualStart)
971
+ {
972
+ size_t const windowSize = dctx->fParams.windowSize;
973
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
974
+ BYTE const* const oLitEnd = op + seq.litLength;
975
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
976
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
977
+ assert(op <= oend);
978
+ assert((size_t)(oend - op) >= sequenceSize);
979
+ assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
980
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
981
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
982
+ /* Offset must be within the dictionary. */
983
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
984
+ assert(seq.offset <= windowSize + dictSize);
985
+ } else {
986
+ /* Offset must be within our window. */
987
+ assert(seq.offset <= windowSize);
988
+ }
989
+ }
990
+ #endif
991
+
992
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
891
993
  FORCE_INLINE_TEMPLATE size_t
994
+ DONT_VECTORIZE
892
995
  ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
893
996
  void* dst, size_t maxDstSize,
894
997
  const void* seqStart, size_t seqSize, int nbSeq,
895
- const ZSTD_longOffset_e isLongOffset)
998
+ const ZSTD_longOffset_e isLongOffset,
999
+ const int frame)
896
1000
  {
897
1001
  const BYTE* ip = (const BYTE*)seqStart;
898
1002
  const BYTE* const iend = ip + seqSize;
@@ -905,40 +1009,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
905
1009
  const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
906
1010
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
907
1011
  DEBUGLOG(5, "ZSTD_decompressSequences_body");
1012
+ (void)frame;
908
1013
 
909
1014
  /* Regen sequences */
910
1015
  if (nbSeq) {
911
1016
  seqState_t seqState;
1017
+ size_t error = 0;
912
1018
  dctx->fseEntropy = 1;
913
1019
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
914
1020
  RETURN_ERROR_IF(
915
1021
  ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
916
- corruption_detected);
1022
+ corruption_detected, "");
917
1023
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
918
1024
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
919
1025
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
920
-
921
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
922
- nbSeq--;
923
- { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
924
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
925
- DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
926
- if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
927
- op += oneSeqSize;
928
- } }
1026
+ assert(dst != NULL);
1027
+
1028
+ ZSTD_STATIC_ASSERT(
1029
+ BIT_DStream_unfinished < BIT_DStream_completed &&
1030
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1031
+ BIT_DStream_completed < BIT_DStream_overflow);
1032
+
1033
+ #if defined(__GNUC__) && defined(__x86_64__)
1034
+ /* Align the decompression loop to 32 + 16 bytes.
1035
+ *
1036
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1037
+ * speed swings based on the alignment of the decompression loop. This
1038
+ * performance swing is caused by parts of the decompression loop falling
1039
+ * out of the DSB. The entire decompression loop should fit in the DSB,
1040
+ * when it can't we get much worse performance. You can measure if you've
1041
+ * hit the good case or the bad case with this perf command for some
1042
+ * compressed file test.zst:
1043
+ *
1044
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1045
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1046
+ *
1047
+ * If you see most cycles served out of the MITE you've hit the bad case.
1048
+ * If you see most cycles served out of the DSB you've hit the good case.
1049
+ * If it is pretty even then you may be in an okay case.
1050
+ *
1051
+ * I've been able to reproduce this issue on the following CPUs:
1052
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1053
+ * Use Instruments->Counters to get DSB/MITE cycles.
1054
+ * I never got performance swings, but I was able to
1055
+ * go from the good case of mostly DSB to half of the
1056
+ * cycles served from MITE.
1057
+ * - Coffeelake: Intel i9-9900k
1058
+ *
1059
+ * I haven't been able to reproduce the instability or DSB misses on any
1060
+ * of the following CPUS:
1061
+ * - Haswell
1062
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1063
+ * - Skylake
1064
+ *
1065
+ * If you are seeing performance stability this script can help test.
1066
+ * It tests on 4 commits in zstd where I saw performance change.
1067
+ *
1068
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1069
+ */
1070
+ __asm__(".p2align 5");
1071
+ __asm__("nop");
1072
+ __asm__(".p2align 4");
1073
+ #endif
1074
+ for ( ; ; ) {
1075
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
1076
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1077
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1078
+ assert(!ZSTD_isError(oneSeqSize));
1079
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1080
+ #endif
1081
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1082
+ BIT_reloadDStream(&(seqState.DStream));
1083
+ /* gcc and clang both don't like early returns in this loop.
1084
+ * gcc doesn't like early breaks either.
1085
+ * Instead save an error and report it at the end.
1086
+ * When there is an error, don't increment op, so we don't
1087
+ * overwrite.
1088
+ */
1089
+ if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
1090
+ else op += oneSeqSize;
1091
+ if (UNLIKELY(!--nbSeq)) break;
1092
+ }
929
1093
 
930
1094
  /* check if reached exact end */
931
1095
  DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
932
- RETURN_ERROR_IF(nbSeq, corruption_detected);
1096
+ if (ZSTD_isError(error)) return error;
1097
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1098
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
933
1099
  /* save reps for next block */
934
1100
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
935
1101
  }
936
1102
 
937
1103
  /* last literal segment */
938
1104
  { size_t const lastLLSize = litEnd - litPtr;
939
- RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
940
- memcpy(op, litPtr, lastLLSize);
941
- op += lastLLSize;
1105
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1106
+ if (op != NULL) {
1107
+ memcpy(op, litPtr, lastLLSize);
1108
+ op += lastLLSize;
1109
+ }
942
1110
  }
943
1111
 
944
1112
  return op-ostart;
@@ -948,99 +1116,21 @@ static size_t
948
1116
  ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
949
1117
  void* dst, size_t maxDstSize,
950
1118
  const void* seqStart, size_t seqSize, int nbSeq,
951
- const ZSTD_longOffset_e isLongOffset)
1119
+ const ZSTD_longOffset_e isLongOffset,
1120
+ const int frame)
952
1121
  {
953
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1122
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
954
1123
  }
955
1124
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
956
1125
 
957
-
958
-
959
1126
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
960
- FORCE_INLINE_TEMPLATE seq_t
961
- ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
962
- {
963
- seq_t seq;
964
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
965
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
966
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
967
- U32 const totalBits = llBits+mlBits+ofBits;
968
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
969
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
970
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
971
-
972
- /* sequence */
973
- { size_t offset;
974
- if (!ofBits)
975
- offset = 0;
976
- else {
977
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
978
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
979
- assert(ofBits <= MaxOff);
980
- if (MEM_32bits() && longOffsets) {
981
- U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
982
- offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
983
- if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
984
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
985
- } else {
986
- offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
987
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
988
- }
989
- }
990
-
991
- if (ofBits <= 1) {
992
- offset += (llBase==0);
993
- if (offset) {
994
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
995
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
996
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
997
- seqState->prevOffset[1] = seqState->prevOffset[0];
998
- seqState->prevOffset[0] = offset = temp;
999
- } else {
1000
- offset = seqState->prevOffset[0];
1001
- }
1002
- } else {
1003
- seqState->prevOffset[2] = seqState->prevOffset[1];
1004
- seqState->prevOffset[1] = seqState->prevOffset[0];
1005
- seqState->prevOffset[0] = offset;
1006
- }
1007
- seq.offset = offset;
1008
- }
1009
-
1010
- seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1011
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1012
- BIT_reloadDStream(&seqState->DStream);
1013
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1014
- BIT_reloadDStream(&seqState->DStream);
1015
- /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1016
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1017
-
1018
- seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1019
- if (MEM_32bits())
1020
- BIT_reloadDStream(&seqState->DStream);
1021
-
1022
- { size_t const pos = seqState->pos + seq.litLength;
1023
- const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
1024
- seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1025
- * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
1026
- seqState->pos = pos + seq.matchLength;
1027
- }
1028
-
1029
- /* ANS state update */
1030
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1031
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1032
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1033
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1034
-
1035
- return seq;
1036
- }
1037
-
1038
1127
  FORCE_INLINE_TEMPLATE size_t
1039
1128
  ZSTD_decompressSequencesLong_body(
1040
1129
  ZSTD_DCtx* dctx,
1041
1130
  void* dst, size_t maxDstSize,
1042
1131
  const void* seqStart, size_t seqSize, int nbSeq,
1043
- const ZSTD_longOffset_e isLongOffset)
1132
+ const ZSTD_longOffset_e isLongOffset,
1133
+ const int frame)
1044
1134
  {
1045
1135
  const BYTE* ip = (const BYTE*)seqStart;
1046
1136
  const BYTE* const iend = ip + seqSize;
@@ -1052,6 +1142,7 @@ ZSTD_decompressSequencesLong_body(
1052
1142
  const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1053
1143
  const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1054
1144
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1145
+ (void)frame;
1055
1146
 
1056
1147
  /* Regen sequences */
1057
1148
  if (nbSeq) {
@@ -1067,36 +1158,45 @@ ZSTD_decompressSequencesLong_body(
1067
1158
  seqState.prefixStart = prefixStart;
1068
1159
  seqState.pos = (size_t)(op-prefixStart);
1069
1160
  seqState.dictEnd = dictEnd;
1161
+ assert(dst != NULL);
1070
1162
  assert(iend >= ip);
1071
1163
  RETURN_ERROR_IF(
1072
1164
  ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1073
- corruption_detected);
1165
+ corruption_detected, "");
1074
1166
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1075
1167
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1076
1168
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1077
1169
 
1078
1170
  /* prepare in advance */
1079
1171
  for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1080
- sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1172
+ sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1081
1173
  PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1082
1174
  }
1083
- RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
1175
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1084
1176
 
1085
1177
  /* decode and decompress */
1086
1178
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1087
- seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1088
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1179
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1180
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1181
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1182
+ assert(!ZSTD_isError(oneSeqSize));
1183
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1184
+ #endif
1089
1185
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1090
1186
  PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1091
1187
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
1092
1188
  op += oneSeqSize;
1093
1189
  }
1094
- RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
1190
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1095
1191
 
1096
1192
  /* finish queue */
1097
1193
  seqNb -= seqAdvance;
1098
1194
  for ( ; seqNb<nbSeq ; seqNb++) {
1099
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1195
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1196
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1197
+ assert(!ZSTD_isError(oneSeqSize));
1198
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1199
+ #endif
1100
1200
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1101
1201
  op += oneSeqSize;
1102
1202
  }
@@ -1107,9 +1207,11 @@ ZSTD_decompressSequencesLong_body(
1107
1207
 
1108
1208
  /* last literal segment */
1109
1209
  { size_t const lastLLSize = litEnd - litPtr;
1110
- RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
1111
- memcpy(op, litPtr, lastLLSize);
1112
- op += lastLLSize;
1210
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1211
+ if (op != NULL) {
1212
+ memcpy(op, litPtr, lastLLSize);
1213
+ op += lastLLSize;
1214
+ }
1113
1215
  }
1114
1216
 
1115
1217
  return op-ostart;
@@ -1119,9 +1221,10 @@ static size_t
1119
1221
  ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1120
1222
  void* dst, size_t maxDstSize,
1121
1223
  const void* seqStart, size_t seqSize, int nbSeq,
1122
- const ZSTD_longOffset_e isLongOffset)
1224
+ const ZSTD_longOffset_e isLongOffset,
1225
+ const int frame)
1123
1226
  {
1124
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1227
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1125
1228
  }
1126
1229
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1127
1230
 
@@ -1131,12 +1234,14 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1131
1234
 
1132
1235
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1133
1236
  static TARGET_ATTRIBUTE("bmi2") size_t
1237
+ DONT_VECTORIZE
1134
1238
  ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1135
1239
  void* dst, size_t maxDstSize,
1136
1240
  const void* seqStart, size_t seqSize, int nbSeq,
1137
- const ZSTD_longOffset_e isLongOffset)
1241
+ const ZSTD_longOffset_e isLongOffset,
1242
+ const int frame)
1138
1243
  {
1139
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1244
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1140
1245
  }
1141
1246
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1142
1247
 
@@ -1145,9 +1250,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
1145
1250
  ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1146
1251
  void* dst, size_t maxDstSize,
1147
1252
  const void* seqStart, size_t seqSize, int nbSeq,
1148
- const ZSTD_longOffset_e isLongOffset)
1253
+ const ZSTD_longOffset_e isLongOffset,
1254
+ const int frame)
1149
1255
  {
1150
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1256
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1151
1257
  }
1152
1258
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1153
1259
 
@@ -1157,21 +1263,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
1157
1263
  ZSTD_DCtx* dctx,
1158
1264
  void* dst, size_t maxDstSize,
1159
1265
  const void* seqStart, size_t seqSize, int nbSeq,
1160
- const ZSTD_longOffset_e isLongOffset);
1266
+ const ZSTD_longOffset_e isLongOffset,
1267
+ const int frame);
1161
1268
 
1162
1269
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1163
1270
  static size_t
1164
1271
  ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1165
1272
  const void* seqStart, size_t seqSize, int nbSeq,
1166
- const ZSTD_longOffset_e isLongOffset)
1273
+ const ZSTD_longOffset_e isLongOffset,
1274
+ const int frame)
1167
1275
  {
1168
1276
  DEBUGLOG(5, "ZSTD_decompressSequences");
1169
1277
  #if DYNAMIC_BMI2
1170
1278
  if (dctx->bmi2) {
1171
- return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1279
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1172
1280
  }
1173
1281
  #endif
1174
- return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1282
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1175
1283
  }
1176
1284
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1177
1285
 
@@ -1186,15 +1294,16 @@ static size_t
1186
1294
  ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1187
1295
  void* dst, size_t maxDstSize,
1188
1296
  const void* seqStart, size_t seqSize, int nbSeq,
1189
- const ZSTD_longOffset_e isLongOffset)
1297
+ const ZSTD_longOffset_e isLongOffset,
1298
+ const int frame)
1190
1299
  {
1191
1300
  DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1192
1301
  #if DYNAMIC_BMI2
1193
1302
  if (dctx->bmi2) {
1194
- return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1303
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1195
1304
  }
1196
1305
  #endif
1197
- return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1306
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1198
1307
  }
1199
1308
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1200
1309
 
@@ -1228,7 +1337,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1228
1337
  }
1229
1338
  #endif
1230
1339
 
1231
-
1232
1340
  size_t
1233
1341
  ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1234
1342
  void* dst, size_t dstCapacity,
@@ -1244,7 +1352,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1244
1352
  ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1245
1353
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1246
1354
 
1247
- RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
1355
+ RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1248
1356
 
1249
1357
  /* Decode literals section */
1250
1358
  { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
@@ -1270,6 +1378,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1270
1378
  ip += seqHSize;
1271
1379
  srcSize -= seqHSize;
1272
1380
 
1381
+ RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
1382
+
1273
1383
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1274
1384
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1275
1385
  if ( !usePrefetchDecoder
@@ -1288,17 +1398,28 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1288
1398
  if (usePrefetchDecoder)
1289
1399
  #endif
1290
1400
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1291
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1401
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1292
1402
  #endif
1293
1403
 
1294
1404
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1295
1405
  /* else */
1296
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1406
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1297
1407
  #endif
1298
1408
  }
1299
1409
  }
1300
1410
 
1301
1411
 
1412
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
1413
+ {
1414
+ if (dst != dctx->previousDstEnd) { /* not contiguous */
1415
+ dctx->dictEnd = dctx->previousDstEnd;
1416
+ dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1417
+ dctx->prefixStart = dst;
1418
+ dctx->previousDstEnd = dst;
1419
+ }
1420
+ }
1421
+
1422
+
1302
1423
  size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1303
1424
  void* dst, size_t dstCapacity,
1304
1425
  const void* src, size_t srcSize)