zstd-ruby 1.3.8.0 → 1.4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -5
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/Makefile +133 -61
  5. data/ext/zstdruby/libzstd/README.md +51 -18
  6. data/ext/zstdruby/libzstd/common/bitstream.h +38 -39
  7. data/ext/zstdruby/libzstd/common/compiler.h +41 -6
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +11 -31
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +13 -33
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +6 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +13 -33
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +12 -35
  16. data/ext/zstdruby/libzstd/common/huf.h +15 -33
  17. data/ext/zstdruby/libzstd/common/mem.h +75 -2
  18. data/ext/zstdruby/libzstd/common/pool.c +8 -4
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +52 -6
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +25 -37
  23. data/ext/zstdruby/libzstd/common/xxhash.h +11 -31
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +203 -22
  27. data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -42
  28. data/ext/zstdruby/libzstd/compress/hist.c +15 -35
  29. data/ext/zstdruby/libzstd/compress/hist.h +12 -32
  30. data/ext/zstdruby/libzstd/compress/huf_compress.c +92 -92
  31. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1460 -1472
  32. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +330 -65
  33. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +419 -0
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +845 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +525 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +65 -43
  41. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  42. data/ext/zstdruby/libzstd/compress/zstd_fast.c +264 -159
  43. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  44. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +74 -42
  45. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +2 -2
  46. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +33 -11
  47. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +7 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_opt.c +108 -125
  49. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  50. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +129 -93
  51. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +46 -28
  52. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +76 -60
  53. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +14 -10
  54. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  55. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +471 -258
  56. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +471 -346
  57. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +3 -3
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +25 -4
  59. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  60. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  61. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  62. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  63. data/ext/zstdruby/libzstd/dictBuilder/cover.c +220 -65
  64. data/ext/zstdruby/libzstd/dictBuilder/cover.h +81 -7
  65. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +85 -56
  66. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +43 -19
  67. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +73 -35
  68. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  69. data/ext/zstdruby/libzstd/dll/example/build_package.bat +3 -2
  70. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +49 -15
  71. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +142 -117
  72. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +13 -8
  73. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +54 -25
  74. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +13 -8
  75. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +55 -25
  76. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +13 -8
  77. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +62 -29
  78. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +13 -8
  79. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +145 -109
  80. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +14 -9
  81. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +56 -26
  82. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +11 -6
  83. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +65 -28
  84. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +11 -6
  85. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
  86. data/ext/zstdruby/libzstd/zstd.h +921 -597
  87. data/lib/zstd-ruby/version.rb +1 -1
  88. data/zstd-ruby.gemspec +2 -2
  89. metadata +19 -14
  90. data/ext/zstdruby/libzstd/dll/libzstd.def +0 -87
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,14 +15,14 @@
15
15
  * Dependencies
16
16
  *********************************************************/
17
17
  #include <string.h> /* memcpy, memmove, memset */
18
- #include "compiler.h" /* prefetch */
19
- #include "cpu.h" /* bmi2 */
20
- #include "mem.h" /* low level memory routines */
18
+ #include "../common/compiler.h" /* prefetch */
19
+ #include "../common/cpu.h" /* bmi2 */
20
+ #include "../common/mem.h" /* low level memory routines */
21
21
  #define FSE_STATIC_LINKING_ONLY
22
- #include "fse.h"
22
+ #include "../common/fse.h"
23
23
  #define HUF_STATIC_LINKING_ONLY
24
- #include "huf.h"
25
- #include "zstd_internal.h"
24
+ #include "../common/huf.h"
25
+ #include "../common/zstd_internal.h"
26
26
  #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27
27
  #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28
28
  #include "zstd_decompress_block.h"
@@ -56,14 +56,15 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
56
56
  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57
57
  blockProperties_t* bpPtr)
58
58
  {
59
- if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
59
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
60
+
60
61
  { U32 const cBlockHeader = MEM_readLE24(src);
61
62
  U32 const cSize = cBlockHeader >> 3;
62
63
  bpPtr->lastBlock = cBlockHeader & 1;
63
64
  bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
64
65
  bpPtr->origSize = cSize; /* only useful for RLE */
65
66
  if (bpPtr->blockType == bt_rle) return 1;
66
- if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
67
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
67
68
  return cSize;
68
69
  }
69
70
  }
@@ -78,7 +79,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
78
79
  size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79
80
  const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
80
81
  {
81
- if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
82
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
83
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
82
84
 
83
85
  { const BYTE* const istart = (const BYTE*) src;
84
86
  symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
@@ -86,11 +88,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
86
88
  switch(litEncType)
87
89
  {
88
90
  case set_repeat:
89
- if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
91
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
92
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
90
93
  /* fall-through */
91
94
 
92
95
  case set_compressed:
93
- if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
96
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
94
97
  { size_t lhSize, litSize, litCSize;
95
98
  U32 singleStream=0;
96
99
  U32 const lhlCode = (istart[0] >> 2) & 3;
@@ -115,11 +118,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
115
118
  /* 2 - 2 - 18 - 18 */
116
119
  lhSize = 5;
117
120
  litSize = (lhc >> 4) & 0x3FFFF;
118
- litCSize = (lhc >> 22) + (istart[4] << 10);
121
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
119
122
  break;
120
123
  }
121
- if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
122
- if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
124
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
125
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
123
126
 
124
127
  /* prefetch huffman table if cold */
125
128
  if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -157,7 +160,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
157
160
  }
158
161
  }
159
162
 
160
- if (HUF_isError(hufSuccess)) return ERROR(corruption_detected);
163
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
161
164
 
162
165
  dctx->litPtr = dctx->litBuffer;
163
166
  dctx->litSize = litSize;
@@ -187,7 +190,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
187
190
  }
188
191
 
189
192
  if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
190
- if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
193
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
191
194
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
192
195
  dctx->litPtr = dctx->litBuffer;
193
196
  dctx->litSize = litSize;
@@ -216,17 +219,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
216
219
  case 3:
217
220
  lhSize = 3;
218
221
  litSize = MEM_readLE24(istart) >> 4;
219
- if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
222
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
220
223
  break;
221
224
  }
222
- if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
225
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
223
226
  memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
224
227
  dctx->litPtr = dctx->litBuffer;
225
228
  dctx->litSize = litSize;
226
229
  return lhSize+1;
227
230
  }
228
231
  default:
229
- return ERROR(corruption_detected); /* impossible */
232
+ RETURN_ERROR(corruption_detected, "impossible");
230
233
  }
231
234
  }
232
235
  }
@@ -390,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
390
393
  symbolNext[s] = 1;
391
394
  } else {
392
395
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
393
- symbolNext[s] = normalizedCounter[s];
396
+ assert(normalizedCounter[s]>=0);
397
+ symbolNext[s] = (U16)normalizedCounter[s];
394
398
  } } }
395
399
  memcpy(dt, &DTableH, sizeof(DTableH));
396
400
  }
@@ -436,8 +440,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
436
440
  switch(type)
437
441
  {
438
442
  case set_rle :
439
- if (!srcSize) return ERROR(srcSize_wrong);
440
- if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
443
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
444
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
441
445
  { U32 const symbol = *(const BYTE*)src;
442
446
  U32 const baseline = baseValue[symbol];
443
447
  U32 const nbBits = nbAdditionalBits[symbol];
@@ -449,7 +453,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
449
453
  *DTablePtr = defaultTable;
450
454
  return 0;
451
455
  case set_repeat:
452
- if (!flagRepeatTable) return ERROR(corruption_detected);
456
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
453
457
  /* prefetch FSE table if used */
454
458
  if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
455
459
  const void* const pStart = *DTablePtr;
@@ -461,15 +465,15 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
461
465
  { unsigned tableLog;
462
466
  S16 norm[MaxSeq+1];
463
467
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
464
- if (FSE_isError(headerSize)) return ERROR(corruption_detected);
465
- if (tableLog > maxLog) return ERROR(corruption_detected);
468
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
469
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
466
470
  ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
467
471
  *DTablePtr = DTableSpace;
468
472
  return headerSize;
469
473
  }
470
- default : /* impossible */
474
+ default :
471
475
  assert(0);
472
- return ERROR(GENERIC);
476
+ RETURN_ERROR(GENERIC, "impossible");
473
477
  }
474
478
  }
475
479
 
@@ -483,28 +487,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
483
487
  DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
484
488
 
485
489
  /* check */
486
- if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
490
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
487
491
 
488
492
  /* SeqHead */
489
493
  nbSeq = *ip++;
490
494
  if (!nbSeq) {
491
495
  *nbSeqPtr=0;
492
- if (srcSize != 1) return ERROR(srcSize_wrong);
496
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
493
497
  return 1;
494
498
  }
495
499
  if (nbSeq > 0x7F) {
496
500
  if (nbSeq == 0xFF) {
497
- if (ip+2 > iend) return ERROR(srcSize_wrong);
501
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
498
502
  nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
499
503
  } else {
500
- if (ip >= iend) return ERROR(srcSize_wrong);
504
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
501
505
  nbSeq = ((nbSeq-0x80)<<8) + *ip++;
502
506
  }
503
507
  }
504
508
  *nbSeqPtr = nbSeq;
505
509
 
506
510
  /* FSE table descriptors */
507
- if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
511
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
508
512
  { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
509
513
  symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
510
514
  symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -517,7 +521,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
517
521
  LL_base, LL_bits,
518
522
  LL_defaultDTable, dctx->fseEntropy,
519
523
  dctx->ddictIsCold, nbSeq);
520
- if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
524
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
521
525
  ip += llhSize;
522
526
  }
523
527
 
@@ -527,7 +531,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
527
531
  OF_base, OF_bits,
528
532
  OF_defaultDTable, dctx->fseEntropy,
529
533
  dctx->ddictIsCold, nbSeq);
530
- if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
534
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
531
535
  ip += ofhSize;
532
536
  }
533
537
 
@@ -537,7 +541,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
537
541
  ML_base, ML_bits,
538
542
  ML_defaultDTable, dctx->fseEntropy,
539
543
  dctx->ddictIsCold, nbSeq);
540
- if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
544
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
541
545
  ip += mlhSize;
542
546
  }
543
547
  }
@@ -569,38 +573,118 @@ typedef struct {
569
573
  size_t pos;
570
574
  } seqState_t;
571
575
 
576
+ /*! ZSTD_overlapCopy8() :
577
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
578
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
579
+ *
580
+ * Precondition: *ip <= *op
581
+ * Postcondition: *op - *op >= 8
582
+ */
583
+ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
584
+ assert(*ip <= *op);
585
+ if (offset < 8) {
586
+ /* close range match, overlap */
587
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
588
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
589
+ int const sub2 = dec64table[offset];
590
+ (*op)[0] = (*ip)[0];
591
+ (*op)[1] = (*ip)[1];
592
+ (*op)[2] = (*ip)[2];
593
+ (*op)[3] = (*ip)[3];
594
+ *ip += dec32table[offset];
595
+ ZSTD_copy4(*op+4, *ip);
596
+ *ip -= sub2;
597
+ } else {
598
+ ZSTD_copy8(*op, *ip);
599
+ }
600
+ *ip += 8;
601
+ *op += 8;
602
+ assert(*op - *ip >= 8);
603
+ }
604
+
605
+ /*! ZSTD_safecopy() :
606
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
607
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
608
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
609
+ * should be fast for a single long sequence, but can be slow for several short sequences.
610
+ *
611
+ * @param ovtype controls the overlap detection
612
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
613
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
614
+ * The src buffer must be before the dst buffer.
615
+ */
616
+ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
617
+ ptrdiff_t const diff = op - ip;
618
+ BYTE* const oend = op + length;
619
+
620
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
621
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
622
+
623
+ if (length < 8) {
624
+ /* Handle short lengths. */
625
+ while (op < oend) *op++ = *ip++;
626
+ return;
627
+ }
628
+ if (ovtype == ZSTD_overlap_src_before_dst) {
629
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
630
+ assert(length >= 8);
631
+ ZSTD_overlapCopy8(&op, &ip, diff);
632
+ assert(op - ip >= 8);
633
+ assert(op <= oend);
634
+ }
635
+
636
+ if (oend <= oend_w) {
637
+ /* No risk of overwrite. */
638
+ ZSTD_wildcopy(op, ip, length, ovtype);
639
+ return;
640
+ }
641
+ if (op <= oend_w) {
642
+ /* Wildcopy until we get close to the end. */
643
+ assert(oend > oend_w);
644
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
645
+ ip += oend_w - op;
646
+ op = oend_w;
647
+ }
648
+ /* Handle the leftovers. */
649
+ while (op < oend) *op++ = *ip++;
650
+ }
572
651
 
573
- /* ZSTD_execSequenceLast7():
574
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
575
- * requires more careful checks, to ensure there is no overflow.
576
- * performance does not matter though.
577
- * note : this case is supposed to be never generated "naturally" by reference encoder,
578
- * since in most cases it needs at least 8 bytes to look for a match.
579
- * but it's allowed by the specification. */
652
+ /* ZSTD_execSequenceEnd():
653
+ * This version handles cases that are near the end of the output buffer. It requires
654
+ * more careful checks to make sure there is no overflow. By separating out these hard
655
+ * and unlikely cases, we can speed up the common cases.
656
+ *
657
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
658
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
659
+ */
580
660
  FORCE_NOINLINE
581
- size_t ZSTD_execSequenceLast7(BYTE* op,
582
- BYTE* const oend, seq_t sequence,
583
- const BYTE** litPtr, const BYTE* const litLimit,
584
- const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
661
+ size_t ZSTD_execSequenceEnd(BYTE* op,
662
+ BYTE* const oend, seq_t sequence,
663
+ const BYTE** litPtr, const BYTE* const litLimit,
664
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
585
665
  {
586
666
  BYTE* const oLitEnd = op + sequence.litLength;
587
667
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
588
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
589
668
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
590
669
  const BYTE* match = oLitEnd - sequence.offset;
670
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
591
671
 
592
- /* check */
593
- if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must fit within dstBuffer */
594
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* try to read beyond literal buffer */
672
+ /* bounds checks : careful of address space overflow in 32-bit mode */
673
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
674
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
675
+ assert(op < op + sequenceLength);
676
+ assert(oLitEnd < op + sequenceLength);
595
677
 
596
678
  /* copy literals */
597
- while (op < oLitEnd) *op++ = *(*litPtr)++;
679
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
680
+ op = oLitEnd;
681
+ *litPtr = iLitEnd;
598
682
 
599
683
  /* copy Match */
600
- if (sequence.offset > (size_t)(oLitEnd - base)) {
684
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
601
685
  /* offset beyond prefix */
602
- if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
603
- match = dictEnd - (base-match);
686
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
687
+ match = dictEnd - (prefixStart-match);
604
688
  if (match + sequence.matchLength <= dictEnd) {
605
689
  memmove(oLitEnd, match, sequence.matchLength);
606
690
  return sequenceLength;
@@ -610,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
610
694
  memmove(oLitEnd, match, length1);
611
695
  op = oLitEnd + length1;
612
696
  sequence.matchLength -= length1;
613
- match = base;
697
+ match = prefixStart;
614
698
  } }
615
- while (op < oMatchEnd) *op++ = *match++;
699
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
616
700
  return sequenceLength;
617
701
  }
618
702
 
619
-
620
703
  HINT_INLINE
621
704
  size_t ZSTD_execSequence(BYTE* op,
622
705
  BYTE* const oend, seq_t sequence,
@@ -626,27 +709,47 @@ size_t ZSTD_execSequence(BYTE* op,
626
709
  BYTE* const oLitEnd = op + sequence.litLength;
627
710
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
628
711
  BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
629
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
712
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
630
713
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
631
714
  const BYTE* match = oLitEnd - sequence.offset;
632
715
 
633
- /* check */
634
- if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
635
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
636
- if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
637
-
638
- /* copy Literals */
639
- ZSTD_copy8(op, *litPtr);
640
- if (sequence.litLength > 8)
641
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
716
+ assert(op != NULL /* Precondition */);
717
+ assert(oend_w < oend /* No underflow */);
718
+ /* Handle edge cases in a slow path:
719
+ * - Read beyond end of literals
720
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
721
+ * - 32-bit mode and the match length overflows
722
+ */
723
+ if (UNLIKELY(
724
+ iLitEnd > litLimit ||
725
+ oMatchEnd > oend_w ||
726
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
727
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
728
+
729
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
730
+ assert(op <= oLitEnd /* No overflow */);
731
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
732
+ assert(oMatchEnd <= oend /* No underflow */);
733
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
734
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
735
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
736
+
737
+ /* Copy Literals:
738
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
739
+ * We likely don't need the full 32-byte wildcopy.
740
+ */
741
+ assert(WILDCOPY_OVERLENGTH >= 16);
742
+ ZSTD_copy16(op, (*litPtr));
743
+ if (UNLIKELY(sequence.litLength > 16)) {
744
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
745
+ }
642
746
  op = oLitEnd;
643
747
  *litPtr = iLitEnd; /* update for next sequence */
644
748
 
645
- /* copy Match */
749
+ /* Copy Match */
646
750
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
647
751
  /* offset beyond prefix -> go into extDict */
648
- if (sequence.offset > (size_t)(oLitEnd - virtualStart))
649
- return ERROR(corruption_detected);
752
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
650
753
  match = dictEnd + (match - prefixStart);
651
754
  if (match + sequence.matchLength <= dictEnd) {
652
755
  memmove(oLitEnd, match, sequence.matchLength);
@@ -658,121 +761,33 @@ size_t ZSTD_execSequence(BYTE* op,
658
761
  op = oLitEnd + length1;
659
762
  sequence.matchLength -= length1;
660
763
  match = prefixStart;
661
- if (op > oend_w || sequence.matchLength < MINMATCH) {
662
- U32 i;
663
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
664
- return sequenceLength;
665
- }
666
764
  } }
667
- /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
668
-
669
- /* match within prefix */
670
- if (sequence.offset < 8) {
671
- /* close range match, overlap */
672
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
673
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
674
- int const sub2 = dec64table[sequence.offset];
675
- op[0] = match[0];
676
- op[1] = match[1];
677
- op[2] = match[2];
678
- op[3] = match[3];
679
- match += dec32table[sequence.offset];
680
- ZSTD_copy4(op+4, match);
681
- match -= sub2;
682
- } else {
683
- ZSTD_copy8(op, match);
684
- }
685
- op += 8; match += 8;
686
-
687
- if (oMatchEnd > oend-(16-MINMATCH)) {
688
- if (op < oend_w) {
689
- ZSTD_wildcopy(op, match, oend_w - op);
690
- match += oend_w - op;
691
- op = oend_w;
692
- }
693
- while (op < oMatchEnd) *op++ = *match++;
694
- } else {
695
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
765
+ /* Match within prefix of 1 or more bytes */
766
+ assert(op <= oMatchEnd);
767
+ assert(oMatchEnd <= oend_w);
768
+ assert(match >= prefixStart);
769
+ assert(sequence.matchLength >= 1);
770
+
771
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
772
+ * without overlap checking.
773
+ */
774
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
775
+ /* We bet on a full wildcopy for matches, since we expect matches to be
776
+ * longer than literals (in general). In silesia, ~10% of matches are longer
777
+ * than 16 bytes.
778
+ */
779
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
780
+ return sequenceLength;
696
781
  }
697
- return sequenceLength;
698
- }
699
-
700
-
701
- HINT_INLINE
702
- size_t ZSTD_execSequenceLong(BYTE* op,
703
- BYTE* const oend, seq_t sequence,
704
- const BYTE** litPtr, const BYTE* const litLimit,
705
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
706
- {
707
- BYTE* const oLitEnd = op + sequence.litLength;
708
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
709
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
710
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
711
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
712
- const BYTE* match = sequence.match;
782
+ assert(sequence.offset < WILDCOPY_VECLEN);
713
783
 
714
- /* check */
715
- if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
716
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
717
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
718
-
719
- /* copy Literals */
720
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
721
- if (sequence.litLength > 8)
722
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
723
- op = oLitEnd;
724
- *litPtr = iLitEnd; /* update for next sequence */
784
+ /* Copy 8 bytes and spread the offset to be >= 8. */
785
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
725
786
 
726
- /* copy Match */
727
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
728
- /* offset beyond prefix */
729
- if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
730
- if (match + sequence.matchLength <= dictEnd) {
731
- memmove(oLitEnd, match, sequence.matchLength);
732
- return sequenceLength;
733
- }
734
- /* span extDict & currentPrefixSegment */
735
- { size_t const length1 = dictEnd - match;
736
- memmove(oLitEnd, match, length1);
737
- op = oLitEnd + length1;
738
- sequence.matchLength -= length1;
739
- match = prefixStart;
740
- if (op > oend_w || sequence.matchLength < MINMATCH) {
741
- U32 i;
742
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
743
- return sequenceLength;
744
- }
745
- } }
746
- assert(op <= oend_w);
747
- assert(sequence.matchLength >= MINMATCH);
748
-
749
- /* match within prefix */
750
- if (sequence.offset < 8) {
751
- /* close range match, overlap */
752
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
753
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
754
- int const sub2 = dec64table[sequence.offset];
755
- op[0] = match[0];
756
- op[1] = match[1];
757
- op[2] = match[2];
758
- op[3] = match[3];
759
- match += dec32table[sequence.offset];
760
- ZSTD_copy4(op+4, match);
761
- match -= sub2;
762
- } else {
763
- ZSTD_copy8(op, match);
764
- }
765
- op += 8; match += 8;
766
-
767
- if (oMatchEnd > oend-(16-MINMATCH)) {
768
- if (op < oend_w) {
769
- ZSTD_wildcopy(op, match, oend_w - op);
770
- match += oend_w - op;
771
- op = oend_w;
772
- }
773
- while (op < oMatchEnd) *op++ = *match++;
774
- } else {
775
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
787
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
788
+ if (sequence.matchLength > 8) {
789
+ assert(op < oMatchEnd);
790
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
776
791
  }
777
792
  return sequenceLength;
778
793
  }
@@ -798,10 +813,18 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
798
813
  DStatePtr->state = DInfo.nextState + lowBits;
799
814
  }
800
815
 
816
+ FORCE_INLINE_TEMPLATE void
817
+ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
818
+ {
819
+ U32 const nbBits = DInfo.nbBits;
820
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
821
+ DStatePtr->state = DInfo.nextState + lowBits;
822
+ }
823
+
801
824
  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
802
825
  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
803
826
  * bits before reloading. This value is the maximum number of bytes we read
804
- * after reloading when we are decoding long offets.
827
+ * after reloading when we are decoding long offsets.
805
828
  */
806
829
  #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
807
830
  (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
@@ -809,25 +832,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
809
832
  : 0)
810
833
 
811
834
  typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
835
+ typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
812
836
 
813
- #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
814
837
  FORCE_INLINE_TEMPLATE seq_t
815
- ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
838
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
816
839
  {
817
840
  seq_t seq;
818
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
819
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
820
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
821
- U32 const totalBits = llBits+mlBits+ofBits;
822
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
823
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
824
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
841
+ ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
842
+ ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
843
+ ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
844
+ U32 const llBase = llDInfo.baseValue;
845
+ U32 const mlBase = mlDInfo.baseValue;
846
+ U32 const ofBase = ofDInfo.baseValue;
847
+ BYTE const llBits = llDInfo.nbAdditionalBits;
848
+ BYTE const mlBits = mlDInfo.nbAdditionalBits;
849
+ BYTE const ofBits = ofDInfo.nbAdditionalBits;
850
+ BYTE const totalBits = llBits+mlBits+ofBits;
825
851
 
826
852
  /* sequence */
827
853
  { size_t offset;
828
- if (!ofBits)
829
- offset = 0;
830
- else {
854
+ if (ofBits > 1) {
831
855
  ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
832
856
  ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
833
857
  assert(ofBits <= MaxOff);
@@ -841,58 +865,138 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
841
865
  offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
842
866
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
843
867
  }
844
- }
845
-
846
- if (ofBits <= 1) {
847
- offset += (llBase==0);
848
- if (offset) {
849
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
850
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
851
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
852
- seqState->prevOffset[1] = seqState->prevOffset[0];
853
- seqState->prevOffset[0] = offset = temp;
854
- } else { /* offset == 0 */
855
- offset = seqState->prevOffset[0];
856
- }
857
- } else {
858
868
  seqState->prevOffset[2] = seqState->prevOffset[1];
859
869
  seqState->prevOffset[1] = seqState->prevOffset[0];
860
870
  seqState->prevOffset[0] = offset;
861
- }
871
+ } else {
872
+ U32 const ll0 = (llBase == 0);
873
+ if (LIKELY((ofBits == 0))) {
874
+ if (LIKELY(!ll0))
875
+ offset = seqState->prevOffset[0];
876
+ else {
877
+ offset = seqState->prevOffset[1];
878
+ seqState->prevOffset[1] = seqState->prevOffset[0];
879
+ seqState->prevOffset[0] = offset;
880
+ }
881
+ } else {
882
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
883
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
884
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
885
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
886
+ seqState->prevOffset[1] = seqState->prevOffset[0];
887
+ seqState->prevOffset[0] = offset = temp;
888
+ } } }
862
889
  seq.offset = offset;
863
890
  }
864
891
 
865
- seq.matchLength = mlBase
866
- + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
892
+ seq.matchLength = mlBase;
893
+ if (mlBits > 0)
894
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
895
+
867
896
  if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
868
897
  BIT_reloadDStream(&seqState->DStream);
869
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
898
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
870
899
  BIT_reloadDStream(&seqState->DStream);
871
900
  /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
872
901
  ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
873
902
 
874
- seq.litLength = llBase
875
- + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
903
+ seq.litLength = llBase;
904
+ if (llBits > 0)
905
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
906
+
876
907
  if (MEM_32bits())
877
908
  BIT_reloadDStream(&seqState->DStream);
878
909
 
879
910
  DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
880
911
  (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
881
912
 
882
- /* ANS state update */
883
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
884
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
885
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
886
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
913
+ if (prefetch == ZSTD_p_prefetch) {
914
+ size_t const pos = seqState->pos + seq.litLength;
915
+ const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
916
+ seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
917
+ * No consequence though : no memory access will occur, offset is only used for prefetching */
918
+ seqState->pos = pos + seq.matchLength;
919
+ }
920
+
921
+ /* ANS state update
922
+ * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
923
+ * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
924
+ * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
925
+ * better option, so it is the default for other compilers. But, if you
926
+ * measure that it is worse, please put up a pull request.
927
+ */
928
+ {
929
+ #if defined(__GNUC__) && !defined(__clang__)
930
+ const int kUseUpdateFseState = 1;
931
+ #else
932
+ const int kUseUpdateFseState = 0;
933
+ #endif
934
+ if (kUseUpdateFseState) {
935
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
936
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
937
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
938
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
939
+ } else {
940
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
941
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
942
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
943
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
944
+ }
945
+ }
887
946
 
888
947
  return seq;
889
948
  }
890
949
 
950
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
951
+ static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
952
+ {
953
+ size_t const windowSize = dctx->fParams.windowSize;
954
+ /* No dictionary used. */
955
+ if (dctx->dictContentEndForFuzzing == NULL) return 0;
956
+ /* Dictionary is our prefix. */
957
+ if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
958
+ /* Dictionary is not our ext-dict. */
959
+ if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
960
+ /* Dictionary is not within our window size. */
961
+ if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
962
+ /* Dictionary is active. */
963
+ return 1;
964
+ }
965
+
966
+ MEM_STATIC void ZSTD_assertValidSequence(
967
+ ZSTD_DCtx const* dctx,
968
+ BYTE const* op, BYTE const* oend,
969
+ seq_t const seq,
970
+ BYTE const* prefixStart, BYTE const* virtualStart)
971
+ {
972
+ size_t const windowSize = dctx->fParams.windowSize;
973
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
974
+ BYTE const* const oLitEnd = op + seq.litLength;
975
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
976
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
977
+ assert(op <= oend);
978
+ assert((size_t)(oend - op) >= sequenceSize);
979
+ assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
980
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
981
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
982
+ /* Offset must be within the dictionary. */
983
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
984
+ assert(seq.offset <= windowSize + dictSize);
985
+ } else {
986
+ /* Offset must be within our window. */
987
+ assert(seq.offset <= windowSize);
988
+ }
989
+ }
990
+ #endif
991
+
992
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
891
993
  FORCE_INLINE_TEMPLATE size_t
994
+ DONT_VECTORIZE
892
995
  ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
893
996
  void* dst, size_t maxDstSize,
894
997
  const void* seqStart, size_t seqSize, int nbSeq,
895
- const ZSTD_longOffset_e isLongOffset)
998
+ const ZSTD_longOffset_e isLongOffset,
999
+ const int frame)
896
1000
  {
897
1001
  const BYTE* ip = (const BYTE*)seqStart;
898
1002
  const BYTE* const iend = ip + seqSize;
@@ -905,38 +1009,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
905
1009
  const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
906
1010
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
907
1011
  DEBUGLOG(5, "ZSTD_decompressSequences_body");
1012
+ (void)frame;
908
1013
 
909
1014
  /* Regen sequences */
910
1015
  if (nbSeq) {
911
1016
  seqState_t seqState;
1017
+ size_t error = 0;
912
1018
  dctx->fseEntropy = 1;
913
1019
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
914
- CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1020
+ RETURN_ERROR_IF(
1021
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1022
+ corruption_detected, "");
915
1023
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
916
1024
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
917
1025
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
918
-
919
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
920
- nbSeq--;
921
- { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
922
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
923
- DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
924
- if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
925
- op += oneSeqSize;
926
- } }
1026
+ assert(dst != NULL);
1027
+
1028
+ ZSTD_STATIC_ASSERT(
1029
+ BIT_DStream_unfinished < BIT_DStream_completed &&
1030
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1031
+ BIT_DStream_completed < BIT_DStream_overflow);
1032
+
1033
+ #if defined(__GNUC__) && defined(__x86_64__)
1034
+ /* Align the decompression loop to 32 + 16 bytes.
1035
+ *
1036
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1037
+ * speed swings based on the alignment of the decompression loop. This
1038
+ * performance swing is caused by parts of the decompression loop falling
1039
+ * out of the DSB. The entire decompression loop should fit in the DSB,
1040
+ * when it can't we get much worse performance. You can measure if you've
1041
+ * hit the good case or the bad case with this perf command for some
1042
+ * compressed file test.zst:
1043
+ *
1044
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1045
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1046
+ *
1047
+ * If you see most cycles served out of the MITE you've hit the bad case.
1048
+ * If you see most cycles served out of the DSB you've hit the good case.
1049
+ * If it is pretty even then you may be in an okay case.
1050
+ *
1051
+ * I've been able to reproduce this issue on the following CPUs:
1052
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1053
+ * Use Instruments->Counters to get DSB/MITE cycles.
1054
+ * I never got performance swings, but I was able to
1055
+ * go from the good case of mostly DSB to half of the
1056
+ * cycles served from MITE.
1057
+ * - Coffeelake: Intel i9-9900k
1058
+ *
1059
+ * I haven't been able to reproduce the instability or DSB misses on any
1060
+ * of the following CPUS:
1061
+ * - Haswell
1062
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1063
+ * - Skylake
1064
+ *
1065
+ * If you are seeing performance stability this script can help test.
1066
+ * It tests on 4 commits in zstd where I saw performance change.
1067
+ *
1068
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1069
+ */
1070
+ __asm__(".p2align 5");
1071
+ __asm__("nop");
1072
+ __asm__(".p2align 4");
1073
+ #endif
1074
+ for ( ; ; ) {
1075
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
1076
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1077
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1078
+ assert(!ZSTD_isError(oneSeqSize));
1079
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1080
+ #endif
1081
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1082
+ BIT_reloadDStream(&(seqState.DStream));
1083
+ /* gcc and clang both don't like early returns in this loop.
1084
+ * gcc doesn't like early breaks either.
1085
+ * Instead save an error and report it at the end.
1086
+ * When there is an error, don't increment op, so we don't
1087
+ * overwrite.
1088
+ */
1089
+ if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
1090
+ else op += oneSeqSize;
1091
+ if (UNLIKELY(!--nbSeq)) break;
1092
+ }
927
1093
 
928
1094
  /* check if reached exact end */
929
1095
  DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
930
- if (nbSeq) return ERROR(corruption_detected);
1096
+ if (ZSTD_isError(error)) return error;
1097
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1098
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
931
1099
  /* save reps for next block */
932
1100
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
933
1101
  }
934
1102
 
935
1103
  /* last literal segment */
936
1104
  { size_t const lastLLSize = litEnd - litPtr;
937
- if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
938
- memcpy(op, litPtr, lastLLSize);
939
- op += lastLLSize;
1105
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1106
+ if (op != NULL) {
1107
+ memcpy(op, litPtr, lastLLSize);
1108
+ op += lastLLSize;
1109
+ }
940
1110
  }
941
1111
 
942
1112
  return op-ostart;
@@ -946,99 +1116,21 @@ static size_t
946
1116
  ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
947
1117
  void* dst, size_t maxDstSize,
948
1118
  const void* seqStart, size_t seqSize, int nbSeq,
949
- const ZSTD_longOffset_e isLongOffset)
1119
+ const ZSTD_longOffset_e isLongOffset,
1120
+ const int frame)
950
1121
  {
951
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1122
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
952
1123
  }
953
1124
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
954
1125
 
955
-
956
-
957
1126
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
958
- FORCE_INLINE_TEMPLATE seq_t
959
- ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
960
- {
961
- seq_t seq;
962
- U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
963
- U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
964
- U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
965
- U32 const totalBits = llBits+mlBits+ofBits;
966
- U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
967
- U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
968
- U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
969
-
970
- /* sequence */
971
- { size_t offset;
972
- if (!ofBits)
973
- offset = 0;
974
- else {
975
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
976
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
977
- assert(ofBits <= MaxOff);
978
- if (MEM_32bits() && longOffsets) {
979
- U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
980
- offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
981
- if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
982
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
983
- } else {
984
- offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
985
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
986
- }
987
- }
988
-
989
- if (ofBits <= 1) {
990
- offset += (llBase==0);
991
- if (offset) {
992
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
993
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
994
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
995
- seqState->prevOffset[1] = seqState->prevOffset[0];
996
- seqState->prevOffset[0] = offset = temp;
997
- } else {
998
- offset = seqState->prevOffset[0];
999
- }
1000
- } else {
1001
- seqState->prevOffset[2] = seqState->prevOffset[1];
1002
- seqState->prevOffset[1] = seqState->prevOffset[0];
1003
- seqState->prevOffset[0] = offset;
1004
- }
1005
- seq.offset = offset;
1006
- }
1007
-
1008
- seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1009
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1010
- BIT_reloadDStream(&seqState->DStream);
1011
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1012
- BIT_reloadDStream(&seqState->DStream);
1013
- /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1014
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1015
-
1016
- seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1017
- if (MEM_32bits())
1018
- BIT_reloadDStream(&seqState->DStream);
1019
-
1020
- { size_t const pos = seqState->pos + seq.litLength;
1021
- const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
1022
- seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1023
- * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
1024
- seqState->pos = pos + seq.matchLength;
1025
- }
1026
-
1027
- /* ANS state update */
1028
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1029
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1030
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1031
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1032
-
1033
- return seq;
1034
- }
1035
-
1036
1127
  FORCE_INLINE_TEMPLATE size_t
1037
1128
  ZSTD_decompressSequencesLong_body(
1038
1129
  ZSTD_DCtx* dctx,
1039
1130
  void* dst, size_t maxDstSize,
1040
1131
  const void* seqStart, size_t seqSize, int nbSeq,
1041
- const ZSTD_longOffset_e isLongOffset)
1132
+ const ZSTD_longOffset_e isLongOffset,
1133
+ const int frame)
1042
1134
  {
1043
1135
  const BYTE* ip = (const BYTE*)seqStart;
1044
1136
  const BYTE* const iend = ip + seqSize;
@@ -1050,6 +1142,7 @@ ZSTD_decompressSequencesLong_body(
1050
1142
  const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1051
1143
  const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1052
1144
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1145
+ (void)frame;
1053
1146
 
1054
1147
  /* Regen sequences */
1055
1148
  if (nbSeq) {
@@ -1065,34 +1158,45 @@ ZSTD_decompressSequencesLong_body(
1065
1158
  seqState.prefixStart = prefixStart;
1066
1159
  seqState.pos = (size_t)(op-prefixStart);
1067
1160
  seqState.dictEnd = dictEnd;
1161
+ assert(dst != NULL);
1068
1162
  assert(iend >= ip);
1069
- CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1163
+ RETURN_ERROR_IF(
1164
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1165
+ corruption_detected, "");
1070
1166
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1071
1167
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1072
1168
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1073
1169
 
1074
1170
  /* prepare in advance */
1075
1171
  for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1076
- sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1172
+ sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1077
1173
  PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1078
1174
  }
1079
- if (seqNb<seqAdvance) return ERROR(corruption_detected);
1175
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1080
1176
 
1081
1177
  /* decode and decompress */
1082
1178
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1083
- seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1084
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1179
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1180
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1181
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1182
+ assert(!ZSTD_isError(oneSeqSize));
1183
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1184
+ #endif
1085
1185
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1086
1186
  PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1087
1187
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
1088
1188
  op += oneSeqSize;
1089
1189
  }
1090
- if (seqNb<nbSeq) return ERROR(corruption_detected);
1190
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1091
1191
 
1092
1192
  /* finish queue */
1093
1193
  seqNb -= seqAdvance;
1094
1194
  for ( ; seqNb<nbSeq ; seqNb++) {
1095
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1195
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1196
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1197
+ assert(!ZSTD_isError(oneSeqSize));
1198
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1199
+ #endif
1096
1200
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1097
1201
  op += oneSeqSize;
1098
1202
  }
@@ -1103,9 +1207,11 @@ ZSTD_decompressSequencesLong_body(
1103
1207
 
1104
1208
  /* last literal segment */
1105
1209
  { size_t const lastLLSize = litEnd - litPtr;
1106
- if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
1107
- memcpy(op, litPtr, lastLLSize);
1108
- op += lastLLSize;
1210
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1211
+ if (op != NULL) {
1212
+ memcpy(op, litPtr, lastLLSize);
1213
+ op += lastLLSize;
1214
+ }
1109
1215
  }
1110
1216
 
1111
1217
  return op-ostart;
@@ -1115,9 +1221,10 @@ static size_t
1115
1221
  ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1116
1222
  void* dst, size_t maxDstSize,
1117
1223
  const void* seqStart, size_t seqSize, int nbSeq,
1118
- const ZSTD_longOffset_e isLongOffset)
1224
+ const ZSTD_longOffset_e isLongOffset,
1225
+ const int frame)
1119
1226
  {
1120
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1227
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1121
1228
  }
1122
1229
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1123
1230
 
@@ -1127,12 +1234,14 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1127
1234
 
1128
1235
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1129
1236
  static TARGET_ATTRIBUTE("bmi2") size_t
1237
+ DONT_VECTORIZE
1130
1238
  ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1131
1239
  void* dst, size_t maxDstSize,
1132
1240
  const void* seqStart, size_t seqSize, int nbSeq,
1133
- const ZSTD_longOffset_e isLongOffset)
1241
+ const ZSTD_longOffset_e isLongOffset,
1242
+ const int frame)
1134
1243
  {
1135
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1244
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1136
1245
  }
1137
1246
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1138
1247
 
@@ -1141,9 +1250,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
1141
1250
  ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1142
1251
  void* dst, size_t maxDstSize,
1143
1252
  const void* seqStart, size_t seqSize, int nbSeq,
1144
- const ZSTD_longOffset_e isLongOffset)
1253
+ const ZSTD_longOffset_e isLongOffset,
1254
+ const int frame)
1145
1255
  {
1146
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1256
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1147
1257
  }
1148
1258
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1149
1259
 
@@ -1153,21 +1263,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
1153
1263
  ZSTD_DCtx* dctx,
1154
1264
  void* dst, size_t maxDstSize,
1155
1265
  const void* seqStart, size_t seqSize, int nbSeq,
1156
- const ZSTD_longOffset_e isLongOffset);
1266
+ const ZSTD_longOffset_e isLongOffset,
1267
+ const int frame);
1157
1268
 
1158
1269
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1159
1270
  static size_t
1160
1271
  ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1161
1272
  const void* seqStart, size_t seqSize, int nbSeq,
1162
- const ZSTD_longOffset_e isLongOffset)
1273
+ const ZSTD_longOffset_e isLongOffset,
1274
+ const int frame)
1163
1275
  {
1164
1276
  DEBUGLOG(5, "ZSTD_decompressSequences");
1165
1277
  #if DYNAMIC_BMI2
1166
1278
  if (dctx->bmi2) {
1167
- return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1279
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1168
1280
  }
1169
1281
  #endif
1170
- return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1282
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1171
1283
  }
1172
1284
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1173
1285
 
@@ -1176,21 +1288,22 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1176
1288
  /* ZSTD_decompressSequencesLong() :
1177
1289
  * decompression function triggered when a minimum share of offsets is considered "long",
1178
1290
  * aka out of cache.
1179
- * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
1291
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1180
1292
  * This function will try to mitigate main memory latency through the use of prefetching */
1181
1293
  static size_t
1182
1294
  ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1183
1295
  void* dst, size_t maxDstSize,
1184
1296
  const void* seqStart, size_t seqSize, int nbSeq,
1185
- const ZSTD_longOffset_e isLongOffset)
1297
+ const ZSTD_longOffset_e isLongOffset,
1298
+ const int frame)
1186
1299
  {
1187
1300
  DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1188
1301
  #if DYNAMIC_BMI2
1189
1302
  if (dctx->bmi2) {
1190
- return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1303
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1191
1304
  }
1192
1305
  #endif
1193
- return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1306
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1194
1307
  }
1195
1308
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1196
1309
 
@@ -1224,7 +1337,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1224
1337
  }
1225
1338
  #endif
1226
1339
 
1227
-
1228
1340
  size_t
1229
1341
  ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1230
1342
  void* dst, size_t dstCapacity,
@@ -1240,7 +1352,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1240
1352
  ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1241
1353
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1242
1354
 
1243
- if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
1355
+ RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1244
1356
 
1245
1357
  /* Decode literals section */
1246
1358
  { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
@@ -1266,6 +1378,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1266
1378
  ip += seqHSize;
1267
1379
  srcSize -= seqHSize;
1268
1380
 
1381
+ RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
1382
+
1269
1383
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1270
1384
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1271
1385
  if ( !usePrefetchDecoder
@@ -1284,17 +1398,28 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1284
1398
  if (usePrefetchDecoder)
1285
1399
  #endif
1286
1400
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1287
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1401
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1288
1402
  #endif
1289
1403
 
1290
1404
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1291
1405
  /* else */
1292
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1406
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1293
1407
  #endif
1294
1408
  }
1295
1409
  }
1296
1410
 
1297
1411
 
1412
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
1413
+ {
1414
+ if (dst != dctx->previousDstEnd) { /* not contiguous */
1415
+ dctx->dictEnd = dctx->previousDstEnd;
1416
+ dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1417
+ dctx->prefixStart = dst;
1418
+ dctx->previousDstEnd = dst;
1419
+ }
1420
+ }
1421
+
1422
+
1298
1423
  size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1299
1424
  void* dst, size_t dstCapacity,
1300
1425
  const void* src, size_t srcSize)