zstdlib 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +6 -1
  3. data/README.md +1 -1
  4. data/ext/zstdlib/extconf.rb +2 -2
  5. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/bitstream.h +3 -2
  6. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/compiler.h +14 -2
  7. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/cpu.h +0 -0
  8. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/debug.c +0 -0
  9. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/debug.h +0 -0
  10. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/entropy_common.c +0 -0
  11. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/error_private.c +0 -0
  12. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/error_private.h +0 -0
  13. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/fse.h +1 -1
  14. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/fse_decompress.c +2 -0
  15. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/huf.h +0 -0
  16. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/mem.h +73 -0
  17. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/pool.c +7 -3
  18. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/pool.h +0 -0
  19. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/threading.c +46 -1
  20. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/threading.h +32 -1
  21. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/xxhash.c +0 -0
  22. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/xxhash.h +0 -0
  23. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/zstd_common.c +0 -0
  24. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/zstd_errors.h +0 -0
  25. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/common/zstd_internal.h +32 -55
  26. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/fse_compress.c +0 -0
  27. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/hist.c +0 -0
  28. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/hist.h +0 -0
  29. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/huf_compress.c +0 -0
  30. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress.c +633 -436
  31. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_internal.h +54 -12
  32. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_literals.c +10 -5
  33. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_literals.h +1 -1
  34. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_sequences.c +3 -3
  35. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_compress_sequences.h +1 -1
  36. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_cwksp.h +535 -0
  37. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_double_fast.c +9 -9
  38. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_double_fast.h +0 -0
  39. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_fast.c +30 -39
  40. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_fast.h +0 -0
  41. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_lazy.c +5 -5
  42. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_lazy.h +0 -0
  43. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_ldm.c +4 -4
  44. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_ldm.h +0 -0
  45. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_opt.c +1 -1
  46. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstd_opt.h +0 -0
  47. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstdmt_compress.c +32 -26
  48. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/compress/zstdmt_compress.h +0 -0
  49. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/huf_decompress.c +2 -0
  50. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_ddict.c +0 -0
  51. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_ddict.h +0 -0
  52. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_decompress.c +14 -16
  53. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_decompress_block.c +144 -146
  54. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_decompress_block.h +0 -0
  55. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/decompress/zstd_decompress_internal.h +0 -0
  56. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/lib/zstd.h +161 -59
  57. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzclose.c +1 -1
  58. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzcompatibility.h +0 -0
  59. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzguts.h +0 -0
  60. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzlib.c +9 -9
  61. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzread.c +16 -8
  62. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/gzwrite.c +8 -8
  63. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/zstd_zlibwrapper.c +15 -11
  64. data/ext/zstdlib/{zstd-1.4.3 → zstd-1.4.4}/zlibWrapper/zstd_zlibwrapper.h +0 -0
  65. metadata +62 -61
@@ -61,7 +61,9 @@
61
61
  * Error Management
62
62
  ****************************************************************/
63
63
  #define HUF_isError ERR_isError
64
+ #ifndef CHECK_F
64
65
  #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
66
+ #endif
65
67
 
66
68
 
67
69
  /* **************************************************************
@@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
88
88
 
89
89
  static size_t ZSTD_startingInputLength(ZSTD_format_e format)
90
90
  {
91
- size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
92
- ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
93
- ZSTD_FRAMEHEADERSIZE_PREFIX;
94
- ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
91
+ size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
95
92
  /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
96
93
  assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
97
94
  return startingInputLength;
@@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
376
373
  {
377
374
  unsigned long long totalDstSize = 0;
378
375
 
379
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
376
+ while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
380
377
  U32 const magicNumber = MEM_readLE32(src);
381
378
 
382
379
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -629,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
629
626
 
630
627
  /* check */
631
628
  RETURN_ERROR_IF(
632
- remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
629
+ remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
633
630
  srcSize_wrong);
634
631
 
635
632
  /* Frame Header */
636
- { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
633
+ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
634
+ ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
637
635
  if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
638
636
  RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
639
637
  srcSize_wrong);
@@ -714,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
714
712
  dictSize = ZSTD_DDict_dictSize(ddict);
715
713
  }
716
714
 
717
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
715
+ while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
718
716
 
719
717
  #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
720
718
  if (ZSTD_isLegacy(src, srcSize)) {
@@ -1098,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
1098
1096
  size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
1099
1097
  for (i=0; i<3; i++) {
1100
1098
  U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
1101
- RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
1099
+ RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
1102
1100
  dictionary_corrupted);
1103
1101
  entropy->rep[i] = rep;
1104
1102
  } }
@@ -1267,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
1267
1265
  {
1268
1266
  RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
1269
1267
  ZSTD_clearDict(dctx);
1270
- if (dict && dictSize >= 8) {
1268
+ if (dict && dictSize != 0) {
1271
1269
  dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
1272
1270
  RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
1273
1271
  dctx->ddict = dctx->ddictLocal;
@@ -1300,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
1300
1298
 
1301
1299
 
1302
1300
  /* ZSTD_initDStream_usingDict() :
1303
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1301
+ * return : expected size, aka ZSTD_startingInputLength().
1304
1302
  * this function cannot fail */
1305
1303
  size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
1306
1304
  {
1307
1305
  DEBUGLOG(4, "ZSTD_initDStream_usingDict");
1308
1306
  FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
1309
1307
  FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
1310
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1308
+ return ZSTD_startingInputLength(zds->format);
1311
1309
  }
1312
1310
 
1313
1311
  /* note : this variant can't fail */
@@ -1324,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
1324
1322
  {
1325
1323
  FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
1326
1324
  FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
1327
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1325
+ return ZSTD_startingInputLength(dctx->format);
1328
1326
  }
1329
1327
 
1330
1328
  /* ZSTD_resetDStream() :
1331
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1329
+ * return : expected size, aka ZSTD_startingInputLength().
1332
1330
  * this function cannot fail */
1333
1331
  size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
1334
1332
  {
1335
1333
  FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
1336
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1334
+ return ZSTD_startingInputLength(dctx->format);
1337
1335
  }
1338
1336
 
1339
1337
 
@@ -1564,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
1564
1562
  zds->lhSize += remainingInput;
1565
1563
  }
1566
1564
  input->pos = input->size;
1567
- return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1565
+ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1568
1566
  }
1569
1567
  assert(ip != NULL);
1570
1568
  memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
@@ -573,38 +573,118 @@ typedef struct {
573
573
  size_t pos;
574
574
  } seqState_t;
575
575
 
576
+ /*! ZSTD_overlapCopy8() :
577
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
578
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
579
+ *
580
+ * Precondition: *ip <= *op
581
+ * Postcondition: *op - *op >= 8
582
+ */
583
+ static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
584
+ assert(*ip <= *op);
585
+ if (offset < 8) {
586
+ /* close range match, overlap */
587
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
588
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
589
+ int const sub2 = dec64table[offset];
590
+ (*op)[0] = (*ip)[0];
591
+ (*op)[1] = (*ip)[1];
592
+ (*op)[2] = (*ip)[2];
593
+ (*op)[3] = (*ip)[3];
594
+ *ip += dec32table[offset];
595
+ ZSTD_copy4(*op+4, *ip);
596
+ *ip -= sub2;
597
+ } else {
598
+ ZSTD_copy8(*op, *ip);
599
+ }
600
+ *ip += 8;
601
+ *op += 8;
602
+ assert(*op - *ip >= 8);
603
+ }
604
+
605
+ /*! ZSTD_safecopy() :
606
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
607
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
608
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
609
+ * should be fast for a single long sequence, but can be slow for several short sequences.
610
+ *
611
+ * @param ovtype controls the overlap detection
612
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
613
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
614
+ * The src buffer must be before the dst buffer.
615
+ */
616
+ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
617
+ ptrdiff_t const diff = op - ip;
618
+ BYTE* const oend = op + length;
576
619
 
577
- /* ZSTD_execSequenceLast7():
578
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
579
- * requires more careful checks, to ensure there is no overflow.
580
- * performance does not matter though.
581
- * note : this case is supposed to be never generated "naturally" by reference encoder,
582
- * since in most cases it needs at least 8 bytes to look for a match.
583
- * but it's allowed by the specification. */
620
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
621
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
622
+
623
+ if (length < 8) {
624
+ /* Handle short lengths. */
625
+ while (op < oend) *op++ = *ip++;
626
+ return;
627
+ }
628
+ if (ovtype == ZSTD_overlap_src_before_dst) {
629
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
630
+ assert(length >= 8);
631
+ ZSTD_overlapCopy8(&op, &ip, diff);
632
+ assert(op - ip >= 8);
633
+ assert(op <= oend);
634
+ }
635
+
636
+ if (oend <= oend_w) {
637
+ /* No risk of overwrite. */
638
+ ZSTD_wildcopy(op, ip, length, ovtype);
639
+ return;
640
+ }
641
+ if (op <= oend_w) {
642
+ /* Wildcopy until we get close to the end. */
643
+ assert(oend > oend_w);
644
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
645
+ ip += oend_w - op;
646
+ op = oend_w;
647
+ }
648
+ /* Handle the leftovers. */
649
+ while (op < oend) *op++ = *ip++;
650
+ }
651
+
652
+ /* ZSTD_execSequenceEnd():
653
+ * This version handles cases that are near the end of the output buffer. It requires
654
+ * more careful checks to make sure there is no overflow. By separating out these hard
655
+ * and unlikely cases, we can speed up the common cases.
656
+ *
657
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
658
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
659
+ */
584
660
  FORCE_NOINLINE
585
- size_t ZSTD_execSequenceLast7(BYTE* op,
586
- BYTE* const oend, seq_t sequence,
587
- const BYTE** litPtr, const BYTE* const litLimit,
588
- const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
661
+ size_t ZSTD_execSequenceEnd(BYTE* op,
662
+ BYTE* const oend, seq_t sequence,
663
+ const BYTE** litPtr, const BYTE* const litLimit,
664
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
589
665
  {
590
666
  BYTE* const oLitEnd = op + sequence.litLength;
591
667
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
592
668
  BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
593
669
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
594
670
  const BYTE* match = oLitEnd - sequence.offset;
671
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
595
672
 
596
- /* check */
597
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
673
+ /* bounds checks */
674
+ assert(oLitEnd < oMatchEnd);
675
+ RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
598
676
  RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
599
677
 
600
678
  /* copy literals */
601
- while (op < oLitEnd) *op++ = *(*litPtr)++;
679
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
680
+ op = oLitEnd;
681
+ *litPtr = iLitEnd;
602
682
 
603
683
  /* copy Match */
604
- if (sequence.offset > (size_t)(oLitEnd - base)) {
684
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
605
685
  /* offset beyond prefix */
606
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
607
- match = dictEnd - (base-match);
686
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
687
+ match = dictEnd - (prefixStart-match);
608
688
  if (match + sequence.matchLength <= dictEnd) {
609
689
  memmove(oLitEnd, match, sequence.matchLength);
610
690
  return sequenceLength;
@@ -614,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
614
694
  memmove(oLitEnd, match, length1);
615
695
  op = oLitEnd + length1;
616
696
  sequence.matchLength -= length1;
617
- match = base;
697
+ match = prefixStart;
618
698
  } }
619
- while (op < oMatchEnd) *op++ = *match++;
699
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
620
700
  return sequenceLength;
621
701
  }
622
702
 
623
-
624
703
  HINT_INLINE
625
704
  size_t ZSTD_execSequence(BYTE* op,
626
705
  BYTE* const oend, seq_t sequence,
@@ -634,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op,
634
713
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
635
714
  const BYTE* match = oLitEnd - sequence.offset;
636
715
 
637
- /* check */
638
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
639
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
640
- if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
641
-
642
- /* copy Literals */
643
- if (sequence.litLength > 8)
644
- ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
645
- else
646
- ZSTD_copy8(op, *litPtr);
716
+ /* Errors and uncommon cases handled here. */
717
+ assert(oLitEnd < oMatchEnd);
718
+ if (iLitEnd > litLimit || oMatchEnd > oend_w)
719
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
720
+
721
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
722
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
723
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
724
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
725
+
726
+ /* Copy Literals:
727
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
728
+ * We likely don't need the full 32-byte wildcopy.
729
+ */
730
+ assert(WILDCOPY_OVERLENGTH >= 16);
731
+ ZSTD_copy16(op, (*litPtr));
732
+ if (sequence.litLength > 16) {
733
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
734
+ }
647
735
  op = oLitEnd;
648
736
  *litPtr = iLitEnd; /* update for next sequence */
649
737
 
650
- /* copy Match */
738
+ /* Copy Match */
651
739
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
652
740
  /* offset beyond prefix -> go into extDict */
653
741
  RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
@@ -662,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
662
750
  op = oLitEnd + length1;
663
751
  sequence.matchLength -= length1;
664
752
  match = prefixStart;
665
- if (op > oend_w || sequence.matchLength < MINMATCH) {
666
- U32 i;
667
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
668
- return sequenceLength;
669
- }
670
753
  } }
671
- /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
672
-
673
- /* match within prefix */
674
- if (sequence.offset < 8) {
675
- /* close range match, overlap */
676
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
677
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
678
- int const sub2 = dec64table[sequence.offset];
679
- op[0] = match[0];
680
- op[1] = match[1];
681
- op[2] = match[2];
682
- op[3] = match[3];
683
- match += dec32table[sequence.offset];
684
- ZSTD_copy4(op+4, match);
685
- match -= sub2;
686
- } else {
687
- ZSTD_copy8(op, match);
688
- }
689
- op += 8; match += 8;
690
-
691
- if (oMatchEnd > oend-(16-MINMATCH)) {
692
- if (op < oend_w) {
693
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
694
- match += oend_w - op;
695
- op = oend_w;
696
- }
697
- while (op < oMatchEnd) *op++ = *match++;
698
- } else {
699
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
754
+ /* Match within prefix of 1 or more bytes */
755
+ assert(op <= oMatchEnd);
756
+ assert(oMatchEnd <= oend_w);
757
+ assert(match >= prefixStart);
758
+ assert(sequence.matchLength >= 1);
759
+
760
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
761
+ * without overlap checking.
762
+ */
763
+ if (sequence.offset >= WILDCOPY_VECLEN) {
764
+ /* We bet on a full wildcopy for matches, since we expect matches to be
765
+ * longer than literals (in general). In silesia, ~10% of matches are longer
766
+ * than 16 bytes.
767
+ */
768
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
769
+ return sequenceLength;
700
770
  }
701
- return sequenceLength;
702
- }
703
-
704
-
705
- HINT_INLINE
706
- size_t ZSTD_execSequenceLong(BYTE* op,
707
- BYTE* const oend, seq_t sequence,
708
- const BYTE** litPtr, const BYTE* const litLimit,
709
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
710
- {
711
- BYTE* const oLitEnd = op + sequence.litLength;
712
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
713
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
714
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
715
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
716
- const BYTE* match = sequence.match;
771
+ assert(sequence.offset < WILDCOPY_VECLEN);
717
772
 
718
- /* check */
719
- RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
720
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
721
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
722
-
723
- /* copy Literals */
724
- if (sequence.litLength > 8)
725
- ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
726
- else
727
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
773
+ /* Copy 8 bytes and spread the offset to be >= 8. */
774
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
728
775
 
729
- op = oLitEnd;
730
- *litPtr = iLitEnd; /* update for next sequence */
731
-
732
- /* copy Match */
733
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
734
- /* offset beyond prefix */
735
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
736
- if (match + sequence.matchLength <= dictEnd) {
737
- memmove(oLitEnd, match, sequence.matchLength);
738
- return sequenceLength;
739
- }
740
- /* span extDict & currentPrefixSegment */
741
- { size_t const length1 = dictEnd - match;
742
- memmove(oLitEnd, match, length1);
743
- op = oLitEnd + length1;
744
- sequence.matchLength -= length1;
745
- match = prefixStart;
746
- if (op > oend_w || sequence.matchLength < MINMATCH) {
747
- U32 i;
748
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
749
- return sequenceLength;
750
- }
751
- } }
752
- assert(op <= oend_w);
753
- assert(sequence.matchLength >= MINMATCH);
754
-
755
- /* match within prefix */
756
- if (sequence.offset < 8) {
757
- /* close range match, overlap */
758
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
759
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
760
- int const sub2 = dec64table[sequence.offset];
761
- op[0] = match[0];
762
- op[1] = match[1];
763
- op[2] = match[2];
764
- op[3] = match[3];
765
- match += dec32table[sequence.offset];
766
- ZSTD_copy4(op+4, match);
767
- match -= sub2;
768
- } else {
769
- ZSTD_copy8(op, match);
770
- }
771
- op += 8; match += 8;
772
-
773
- if (oMatchEnd > oend-(16-MINMATCH)) {
774
- if (op < oend_w) {
775
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
776
- match += oend_w - op;
777
- op = oend_w;
778
- }
779
- while (op < oMatchEnd) *op++ = *match++;
780
- } else {
781
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
776
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
777
+ if (sequence.matchLength > 8) {
778
+ assert(op < oMatchEnd);
779
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
782
780
  }
783
781
  return sequenceLength;
784
782
  }
@@ -1098,7 +1096,7 @@ ZSTD_decompressSequencesLong_body(
1098
1096
  /* decode and decompress */
1099
1097
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1100
1098
  seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1101
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1099
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1102
1100
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1103
1101
  PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1104
1102
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
@@ -1109,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
1109
1107
  /* finish queue */
1110
1108
  seqNb -= seqAdvance;
1111
1109
  for ( ; seqNb<nbSeq ; seqNb++) {
1112
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1110
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1113
1111
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1114
1112
  op += oneSeqSize;
1115
1113
  }