zstdlib 0.4.0-x64-mingw32 → 0.5.0-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +6 -1
  3. data/README.md +1 -1
  4. data/ext/zstdlib/extconf.rb +2 -2
  5. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/bitstream.h +3 -2
  6. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/compiler.h +14 -2
  7. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/cpu.h +0 -0
  8. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/debug.c +0 -0
  9. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/debug.h +0 -0
  10. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/entropy_common.c +0 -0
  11. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/error_private.c +0 -0
  12. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/error_private.h +0 -0
  13. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/fse.h +1 -1
  14. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/fse_decompress.c +2 -0
  15. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/huf.h +0 -0
  16. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/mem.h +73 -0
  17. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/pool.c +7 -3
  18. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/pool.h +0 -0
  19. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/threading.c +46 -1
  20. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/threading.h +32 -1
  21. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/xxhash.c +0 -0
  22. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/xxhash.h +0 -0
  23. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/zstd_common.c +0 -0
  24. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/zstd_errors.h +0 -0
  25. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/zstd_internal.h +32 -55
  26. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/fse_compress.c +0 -0
  27. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/hist.c +0 -0
  28. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/hist.h +0 -0
  29. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/huf_compress.c +0 -0
  30. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress.c +633 -436
  31. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_internal.h +54 -12
  32. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_literals.c +10 -5
  33. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_literals.h +1 -1
  34. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_sequences.c +3 -3
  35. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_sequences.h +1 -1
  36. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_cwksp.h +535 -0
  37. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_double_fast.c +9 -9
  38. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_double_fast.h +0 -0
  39. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_fast.c +30 -39
  40. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_fast.h +0 -0
  41. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_lazy.c +5 -5
  42. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_lazy.h +0 -0
  43. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_ldm.c +4 -4
  44. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_ldm.h +0 -0
  45. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_opt.c +1 -1
  46. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_opt.h +0 -0
  47. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstdmt_compress.c +32 -26
  48. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstdmt_compress.h +0 -0
  49. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/huf_decompress.c +2 -0
  50. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_ddict.c +0 -0
  51. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_ddict.h +0 -0
  52. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_decompress.c +14 -16
  53. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_decompress_block.c +144 -146
  54. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_decompress_block.h +0 -0
  55. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_decompress_internal.h +0 -0
  56. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/zstd.h +161 -59
  57. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzclose.c +1 -1
  58. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzcompatibility.h +0 -0
  59. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzguts.h +0 -0
  60. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzlib.c +9 -9
  61. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzread.c +16 -8
  62. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzwrite.c +8 -8
  63. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/zstd_zlibwrapper.c +15 -11
  64. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/zstd_zlibwrapper.h +0 -0
  65. data/lib/2.2/zstdlib.so +0 -0
  66. data/lib/2.3/zstdlib.so +0 -0
  67. data/lib/2.4/zstdlib.so +0 -0
  68. data/lib/2.5/zstdlib.so +0 -0
  69. data/lib/2.6/zstdlib.so +0 -0
  70. metadata +62 -61
@@ -61,7 +61,9 @@
61
61
  * Error Management
62
62
  ****************************************************************/
63
63
  #define HUF_isError ERR_isError
64
+ #ifndef CHECK_F
64
65
  #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
66
+ #endif
65
67
 
66
68
 
67
69
  /* **************************************************************
@@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
88
88
 
89
89
  static size_t ZSTD_startingInputLength(ZSTD_format_e format)
90
90
  {
91
- size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
92
- ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
93
- ZSTD_FRAMEHEADERSIZE_PREFIX;
94
- ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
91
+ size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
95
92
  /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
96
93
  assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
97
94
  return startingInputLength;
@@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
376
373
  {
377
374
  unsigned long long totalDstSize = 0;
378
375
 
379
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
376
+ while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
380
377
  U32 const magicNumber = MEM_readLE32(src);
381
378
 
382
379
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -629,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
629
626
 
630
627
  /* check */
631
628
  RETURN_ERROR_IF(
632
- remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
629
+ remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
633
630
  srcSize_wrong);
634
631
 
635
632
  /* Frame Header */
636
- { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
633
+ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
634
+ ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
637
635
  if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
638
636
  RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
639
637
  srcSize_wrong);
@@ -714,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
714
712
  dictSize = ZSTD_DDict_dictSize(ddict);
715
713
  }
716
714
 
717
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
715
+ while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
718
716
 
719
717
  #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
720
718
  if (ZSTD_isLegacy(src, srcSize)) {
@@ -1098,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
1098
1096
  size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
1099
1097
  for (i=0; i<3; i++) {
1100
1098
  U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
1101
- RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
1099
+ RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
1102
1100
  dictionary_corrupted);
1103
1101
  entropy->rep[i] = rep;
1104
1102
  } }
@@ -1267,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
1267
1265
  {
1268
1266
  RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
1269
1267
  ZSTD_clearDict(dctx);
1270
- if (dict && dictSize >= 8) {
1268
+ if (dict && dictSize != 0) {
1271
1269
  dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
1272
1270
  RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
1273
1271
  dctx->ddict = dctx->ddictLocal;
@@ -1300,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
1300
1298
 
1301
1299
 
1302
1300
  /* ZSTD_initDStream_usingDict() :
1303
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1301
+ * return : expected size, aka ZSTD_startingInputLength().
1304
1302
  * this function cannot fail */
1305
1303
  size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
1306
1304
  {
1307
1305
  DEBUGLOG(4, "ZSTD_initDStream_usingDict");
1308
1306
  FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
1309
1307
  FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
1310
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1308
+ return ZSTD_startingInputLength(zds->format);
1311
1309
  }
1312
1310
 
1313
1311
  /* note : this variant can't fail */
@@ -1324,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
1324
1322
  {
1325
1323
  FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
1326
1324
  FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
1327
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1325
+ return ZSTD_startingInputLength(dctx->format);
1328
1326
  }
1329
1327
 
1330
1328
  /* ZSTD_resetDStream() :
1331
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1329
+ * return : expected size, aka ZSTD_startingInputLength().
1332
1330
  * this function cannot fail */
1333
1331
  size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
1334
1332
  {
1335
1333
  FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
1336
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1334
+ return ZSTD_startingInputLength(dctx->format);
1337
1335
  }
1338
1336
 
1339
1337
 
@@ -1564,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
1564
1562
  zds->lhSize += remainingInput;
1565
1563
  }
1566
1564
  input->pos = input->size;
1567
- return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1565
+ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1568
1566
  }
1569
1567
  assert(ip != NULL);
1570
1568
  memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
@@ -573,38 +573,118 @@ typedef struct {
573
573
  size_t pos;
574
574
  } seqState_t;
575
575
 
576
+ /*! ZSTD_overlapCopy8() :
577
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
578
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
579
+ *
580
+ * Precondition: *ip <= *op
581
+ * Postcondition: *op - *op >= 8
582
+ */
583
+ static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
584
+ assert(*ip <= *op);
585
+ if (offset < 8) {
586
+ /* close range match, overlap */
587
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
588
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
589
+ int const sub2 = dec64table[offset];
590
+ (*op)[0] = (*ip)[0];
591
+ (*op)[1] = (*ip)[1];
592
+ (*op)[2] = (*ip)[2];
593
+ (*op)[3] = (*ip)[3];
594
+ *ip += dec32table[offset];
595
+ ZSTD_copy4(*op+4, *ip);
596
+ *ip -= sub2;
597
+ } else {
598
+ ZSTD_copy8(*op, *ip);
599
+ }
600
+ *ip += 8;
601
+ *op += 8;
602
+ assert(*op - *ip >= 8);
603
+ }
604
+
605
+ /*! ZSTD_safecopy() :
606
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
607
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
608
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
609
+ * should be fast for a single long sequence, but can be slow for several short sequences.
610
+ *
611
+ * @param ovtype controls the overlap detection
612
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
613
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
614
+ * The src buffer must be before the dst buffer.
615
+ */
616
+ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
617
+ ptrdiff_t const diff = op - ip;
618
+ BYTE* const oend = op + length;
576
619
 
577
- /* ZSTD_execSequenceLast7():
578
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
579
- * requires more careful checks, to ensure there is no overflow.
580
- * performance does not matter though.
581
- * note : this case is supposed to be never generated "naturally" by reference encoder,
582
- * since in most cases it needs at least 8 bytes to look for a match.
583
- * but it's allowed by the specification. */
620
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
621
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
622
+
623
+ if (length < 8) {
624
+ /* Handle short lengths. */
625
+ while (op < oend) *op++ = *ip++;
626
+ return;
627
+ }
628
+ if (ovtype == ZSTD_overlap_src_before_dst) {
629
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
630
+ assert(length >= 8);
631
+ ZSTD_overlapCopy8(&op, &ip, diff);
632
+ assert(op - ip >= 8);
633
+ assert(op <= oend);
634
+ }
635
+
636
+ if (oend <= oend_w) {
637
+ /* No risk of overwrite. */
638
+ ZSTD_wildcopy(op, ip, length, ovtype);
639
+ return;
640
+ }
641
+ if (op <= oend_w) {
642
+ /* Wildcopy until we get close to the end. */
643
+ assert(oend > oend_w);
644
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
645
+ ip += oend_w - op;
646
+ op = oend_w;
647
+ }
648
+ /* Handle the leftovers. */
649
+ while (op < oend) *op++ = *ip++;
650
+ }
651
+
652
+ /* ZSTD_execSequenceEnd():
653
+ * This version handles cases that are near the end of the output buffer. It requires
654
+ * more careful checks to make sure there is no overflow. By separating out these hard
655
+ * and unlikely cases, we can speed up the common cases.
656
+ *
657
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
658
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
659
+ */
584
660
  FORCE_NOINLINE
585
- size_t ZSTD_execSequenceLast7(BYTE* op,
586
- BYTE* const oend, seq_t sequence,
587
- const BYTE** litPtr, const BYTE* const litLimit,
588
- const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
661
+ size_t ZSTD_execSequenceEnd(BYTE* op,
662
+ BYTE* const oend, seq_t sequence,
663
+ const BYTE** litPtr, const BYTE* const litLimit,
664
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
589
665
  {
590
666
  BYTE* const oLitEnd = op + sequence.litLength;
591
667
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
592
668
  BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
593
669
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
594
670
  const BYTE* match = oLitEnd - sequence.offset;
671
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
595
672
 
596
- /* check */
597
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
673
+ /* bounds checks */
674
+ assert(oLitEnd < oMatchEnd);
675
+ RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
598
676
  RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
599
677
 
600
678
  /* copy literals */
601
- while (op < oLitEnd) *op++ = *(*litPtr)++;
679
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
680
+ op = oLitEnd;
681
+ *litPtr = iLitEnd;
602
682
 
603
683
  /* copy Match */
604
- if (sequence.offset > (size_t)(oLitEnd - base)) {
684
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
605
685
  /* offset beyond prefix */
606
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
607
- match = dictEnd - (base-match);
686
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
687
+ match = dictEnd - (prefixStart-match);
608
688
  if (match + sequence.matchLength <= dictEnd) {
609
689
  memmove(oLitEnd, match, sequence.matchLength);
610
690
  return sequenceLength;
@@ -614,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
614
694
  memmove(oLitEnd, match, length1);
615
695
  op = oLitEnd + length1;
616
696
  sequence.matchLength -= length1;
617
- match = base;
697
+ match = prefixStart;
618
698
  } }
619
- while (op < oMatchEnd) *op++ = *match++;
699
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
620
700
  return sequenceLength;
621
701
  }
622
702
 
623
-
624
703
  HINT_INLINE
625
704
  size_t ZSTD_execSequence(BYTE* op,
626
705
  BYTE* const oend, seq_t sequence,
@@ -634,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op,
634
713
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
635
714
  const BYTE* match = oLitEnd - sequence.offset;
636
715
 
637
- /* check */
638
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
639
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
640
- if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
641
-
642
- /* copy Literals */
643
- if (sequence.litLength > 8)
644
- ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
645
- else
646
- ZSTD_copy8(op, *litPtr);
716
+ /* Errors and uncommon cases handled here. */
717
+ assert(oLitEnd < oMatchEnd);
718
+ if (iLitEnd > litLimit || oMatchEnd > oend_w)
719
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
720
+
721
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
722
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
723
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
724
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
725
+
726
+ /* Copy Literals:
727
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
728
+ * We likely don't need the full 32-byte wildcopy.
729
+ */
730
+ assert(WILDCOPY_OVERLENGTH >= 16);
731
+ ZSTD_copy16(op, (*litPtr));
732
+ if (sequence.litLength > 16) {
733
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
734
+ }
647
735
  op = oLitEnd;
648
736
  *litPtr = iLitEnd; /* update for next sequence */
649
737
 
650
- /* copy Match */
738
+ /* Copy Match */
651
739
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
652
740
  /* offset beyond prefix -> go into extDict */
653
741
  RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
@@ -662,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
662
750
  op = oLitEnd + length1;
663
751
  sequence.matchLength -= length1;
664
752
  match = prefixStart;
665
- if (op > oend_w || sequence.matchLength < MINMATCH) {
666
- U32 i;
667
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
668
- return sequenceLength;
669
- }
670
753
  } }
671
- /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
672
-
673
- /* match within prefix */
674
- if (sequence.offset < 8) {
675
- /* close range match, overlap */
676
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
677
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
678
- int const sub2 = dec64table[sequence.offset];
679
- op[0] = match[0];
680
- op[1] = match[1];
681
- op[2] = match[2];
682
- op[3] = match[3];
683
- match += dec32table[sequence.offset];
684
- ZSTD_copy4(op+4, match);
685
- match -= sub2;
686
- } else {
687
- ZSTD_copy8(op, match);
688
- }
689
- op += 8; match += 8;
690
-
691
- if (oMatchEnd > oend-(16-MINMATCH)) {
692
- if (op < oend_w) {
693
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
694
- match += oend_w - op;
695
- op = oend_w;
696
- }
697
- while (op < oMatchEnd) *op++ = *match++;
698
- } else {
699
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
754
+ /* Match within prefix of 1 or more bytes */
755
+ assert(op <= oMatchEnd);
756
+ assert(oMatchEnd <= oend_w);
757
+ assert(match >= prefixStart);
758
+ assert(sequence.matchLength >= 1);
759
+
760
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
761
+ * without overlap checking.
762
+ */
763
+ if (sequence.offset >= WILDCOPY_VECLEN) {
764
+ /* We bet on a full wildcopy for matches, since we expect matches to be
765
+ * longer than literals (in general). In silesia, ~10% of matches are longer
766
+ * than 16 bytes.
767
+ */
768
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
769
+ return sequenceLength;
700
770
  }
701
- return sequenceLength;
702
- }
703
-
704
-
705
- HINT_INLINE
706
- size_t ZSTD_execSequenceLong(BYTE* op,
707
- BYTE* const oend, seq_t sequence,
708
- const BYTE** litPtr, const BYTE* const litLimit,
709
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
710
- {
711
- BYTE* const oLitEnd = op + sequence.litLength;
712
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
713
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
714
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
715
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
716
- const BYTE* match = sequence.match;
771
+ assert(sequence.offset < WILDCOPY_VECLEN);
717
772
 
718
- /* check */
719
- RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
720
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
721
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
722
-
723
- /* copy Literals */
724
- if (sequence.litLength > 8)
725
- ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
726
- else
727
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
773
+ /* Copy 8 bytes and spread the offset to be >= 8. */
774
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
728
775
 
729
- op = oLitEnd;
730
- *litPtr = iLitEnd; /* update for next sequence */
731
-
732
- /* copy Match */
733
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
734
- /* offset beyond prefix */
735
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
736
- if (match + sequence.matchLength <= dictEnd) {
737
- memmove(oLitEnd, match, sequence.matchLength);
738
- return sequenceLength;
739
- }
740
- /* span extDict & currentPrefixSegment */
741
- { size_t const length1 = dictEnd - match;
742
- memmove(oLitEnd, match, length1);
743
- op = oLitEnd + length1;
744
- sequence.matchLength -= length1;
745
- match = prefixStart;
746
- if (op > oend_w || sequence.matchLength < MINMATCH) {
747
- U32 i;
748
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
749
- return sequenceLength;
750
- }
751
- } }
752
- assert(op <= oend_w);
753
- assert(sequence.matchLength >= MINMATCH);
754
-
755
- /* match within prefix */
756
- if (sequence.offset < 8) {
757
- /* close range match, overlap */
758
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
759
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
760
- int const sub2 = dec64table[sequence.offset];
761
- op[0] = match[0];
762
- op[1] = match[1];
763
- op[2] = match[2];
764
- op[3] = match[3];
765
- match += dec32table[sequence.offset];
766
- ZSTD_copy4(op+4, match);
767
- match -= sub2;
768
- } else {
769
- ZSTD_copy8(op, match);
770
- }
771
- op += 8; match += 8;
772
-
773
- if (oMatchEnd > oend-(16-MINMATCH)) {
774
- if (op < oend_w) {
775
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
776
- match += oend_w - op;
777
- op = oend_w;
778
- }
779
- while (op < oMatchEnd) *op++ = *match++;
780
- } else {
781
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
776
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
777
+ if (sequence.matchLength > 8) {
778
+ assert(op < oMatchEnd);
779
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
782
780
  }
783
781
  return sequenceLength;
784
782
  }
@@ -1098,7 +1096,7 @@ ZSTD_decompressSequencesLong_body(
1098
1096
  /* decode and decompress */
1099
1097
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1100
1098
  seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1101
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1099
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1102
1100
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1103
1101
  PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1104
1102
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
@@ -1109,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
1109
1107
  /* finish queue */
1110
1108
  seqNb -= seqAdvance;
1111
1109
  for ( ; seqNb<nbSeq ; seqNb++) {
1112
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1110
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1113
1111
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1114
1112
  op += oneSeqSize;
1115
1113
  }