zstd-ruby 1.5.0.0 → 1.5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/README.md +1 -1
  4. data/ext/zstdruby/extconf.rb +1 -0
  5. data/ext/zstdruby/libzstd/Makefile +50 -175
  6. data/ext/zstdruby/libzstd/README.md +7 -1
  7. data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
  8. data/ext/zstdruby/libzstd/common/compiler.h +89 -43
  9. data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
  10. data/ext/zstdruby/libzstd/common/error_private.h +79 -0
  11. data/ext/zstdruby/libzstd/common/fse.h +2 -1
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +24 -22
  14. data/ext/zstdruby/libzstd/common/mem.h +18 -0
  15. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  16. data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
  17. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  18. data/ext/zstdruby/libzstd/common/zstd_internal.h +92 -88
  19. data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
  20. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  21. data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
  22. data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
  23. data/ext/zstdruby/libzstd/compress/zstd_compress.c +194 -278
  24. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +102 -44
  25. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
  26. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
  27. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +5 -4
  28. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +3 -2
  29. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -3
  30. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +289 -114
  31. data/ext/zstdruby/libzstd/compress/zstd_fast.c +302 -123
  32. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +418 -502
  33. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
  34. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
  35. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
  36. data/ext/zstdruby/libzstd/compress/zstd_opt.c +186 -108
  37. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +59 -29
  38. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
  39. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  40. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
  41. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
  42. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
  43. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
  44. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
  45. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  46. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
  47. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +99 -28
  48. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
  49. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
  50. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
  51. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
  52. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
  53. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
  54. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
  55. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  56. data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
  57. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  58. data/ext/zstdruby/libzstd/zdict.h +4 -4
  59. data/ext/zstdruby/libzstd/zstd.h +179 -136
  60. data/ext/zstdruby/zstdruby.c +2 -2
  61. data/lib/zstd-ruby/version.rb +1 -1
  62. metadata +8 -3
@@ -467,7 +467,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
467
467
  ZSTD_dictContentType_e dictContentType)
468
468
  {
469
469
  /* Adjust parameters */
470
- if (params.ldmParams.enableLdm) {
470
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
471
471
  DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
472
472
  ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
473
473
  assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
@@ -478,7 +478,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
478
478
  serialState->nextJobID = 0;
479
479
  if (params.fParams.checksumFlag)
480
480
  XXH64_reset(&serialState->xxhState, 0);
481
- if (params.ldmParams.enableLdm) {
481
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
482
482
  ZSTD_customMem cMem = params.customMem;
483
483
  unsigned const hashLog = params.ldmParams.hashLog;
484
484
  size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
@@ -564,7 +564,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
564
564
  /* A future job may error and skip our job */
565
565
  if (serialState->nextJobID == jobID) {
566
566
  /* It is now our turn, do any processing necessary */
567
- if (serialState->params.ldmParams.enableLdm) {
567
+ if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
568
568
  size_t error;
569
569
  assert(seqStore.seq != NULL && seqStore.pos == 0 &&
570
570
  seqStore.size == 0 && seqStore.capacity > 0);
@@ -594,7 +594,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
594
594
  if (seqStore.size > 0) {
595
595
  size_t const err = ZSTD_referenceExternalSequences(
596
596
  jobCCtx, seqStore.seq, seqStore.size);
597
- assert(serialState->params.ldmParams.enableLdm);
597
+ assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
598
598
  assert(!ZSTD_isError(err));
599
599
  (void)err;
600
600
  }
@@ -672,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
672
672
  if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
673
673
  job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
674
674
  }
675
- if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
675
+ if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
676
676
  JOB_ERROR(ERROR(memory_allocation));
677
677
 
678
678
  /* Don't compute the checksum for chunks, since we compute it externally,
@@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
680
680
  */
681
681
  if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
682
682
  /* Don't run LDM for the chunks, since we handle it externally */
683
- jobParams.ldmParams.enableLdm = 0;
683
+ jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
684
684
  /* Correct nbWorkers to 0. */
685
685
  jobParams.nbWorkers = 0;
686
686
 
@@ -807,6 +807,15 @@ typedef struct {
807
807
  static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
808
808
 
809
809
  #define RSYNC_LENGTH 32
810
+ /* Don't create chunks smaller than the zstd block size.
811
+ * This stops us from regressing compression ratio too much,
812
+ * and ensures our output fits in ZSTD_compressBound().
813
+ *
814
+ * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
815
+ * ZSTD_COMPRESSBOUND() will need to be updated.
816
+ */
817
+ #define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
818
+ #define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
810
819
 
811
820
  typedef struct {
812
821
  U64 hash;
@@ -1135,7 +1144,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1135
1144
  static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1136
1145
  {
1137
1146
  unsigned jobLog;
1138
- if (params->ldmParams.enableLdm) {
1147
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1139
1148
  /* In Long Range Mode, the windowLog is typically oversized.
1140
1149
  * In which case, it's preferable to determine the jobSize
1141
1150
  * based on cycleLog instead. */
@@ -1179,7 +1188,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1179
1188
  int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1180
1189
  int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1181
1190
  assert(0 <= overlapRLog && overlapRLog <= 8);
1182
- if (params->ldmParams.enableLdm) {
1191
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1183
1192
  /* In Long Range Mode, the windowLog is typically oversized.
1184
1193
  * In which case, it's preferable to determine the jobSize
1185
1194
  * based on chainLog instead.
@@ -1252,6 +1261,9 @@ size_t ZSTDMT_initCStream_internal(
1252
1261
  /* Aim for the targetsectionSize as the average job size. */
1253
1262
  U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
1254
1263
  U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
1264
+ /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
1265
+ * expected job size is at least 4x larger. */
1266
+ assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
1255
1267
  DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
1256
1268
  mtctx->rsync.hash = 0;
1257
1269
  mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
@@ -1263,7 +1275,7 @@ size_t ZSTDMT_initCStream_internal(
1263
1275
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1264
1276
  {
1265
1277
  /* If ldm is enabled we need windowSize space. */
1266
- size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
1278
+ size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
1267
1279
  /* Two buffers of slack, plus extra space for the overlap
1268
1280
  * This is the minimum slack that LDM works with. One extra because
1269
1281
  * flush might waste up to targetSectionSize-1 bytes. Another extra
@@ -1538,17 +1550,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
1538
1550
  static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1539
1551
  {
1540
1552
  BYTE const* const bufferStart = (BYTE const*)buffer.start;
1541
- BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1542
1553
  BYTE const* const rangeStart = (BYTE const*)range.start;
1543
- BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
1544
1554
 
1545
1555
  if (rangeStart == NULL || bufferStart == NULL)
1546
1556
  return 0;
1547
- /* Empty ranges cannot overlap */
1548
- if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1549
- return 0;
1550
1557
 
1551
- return bufferStart < rangeEnd && rangeStart < bufferEnd;
1558
+ {
1559
+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1560
+ BYTE const* const rangeEnd = rangeStart + range.size;
1561
+
1562
+ /* Empty ranges cannot overlap */
1563
+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1564
+ return 0;
1565
+
1566
+ return bufferStart < rangeEnd && rangeStart < bufferEnd;
1567
+ }
1552
1568
  }
1553
1569
 
1554
1570
  static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
@@ -1575,7 +1591,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
1575
1591
 
1576
1592
  static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
1577
1593
  {
1578
- if (mtctx->params.ldmParams.enableLdm) {
1594
+ if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
1579
1595
  ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1580
1596
  DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
1581
1597
  DEBUGLOG(5, "source [0x%zx, 0x%zx)",
@@ -1678,6 +1694,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1678
1694
  if (!mtctx->params.rsyncable)
1679
1695
  /* Rsync is disabled. */
1680
1696
  return syncPoint;
1697
+ if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
1698
+ /* We don't emit synchronization points if it would produce too small blocks.
1699
+ * We don't have enough input to find a synchronization point, so don't look.
1700
+ */
1701
+ return syncPoint;
1681
1702
  if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
1682
1703
  /* Not enough to compute the hash.
1683
1704
  * We will miss any synchronization points in this RSYNC_LENGTH byte
@@ -1688,10 +1709,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1688
1709
  */
1689
1710
  return syncPoint;
1690
1711
  /* Initialize the loop variables. */
1691
- if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
1692
- /* We have enough bytes buffered to initialize the hash.
1712
+ if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
1713
+ /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
1714
+ * because they can't possibly be a sync point. So we can start
1715
+ * part way through the input buffer.
1716
+ */
1717
+ pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
1718
+ if (pos >= RSYNC_LENGTH) {
1719
+ prev = istart + pos - RSYNC_LENGTH;
1720
+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1721
+ } else {
1722
+ assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
1723
+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1724
+ hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
1725
+ hash = ZSTD_rollingHash_append(hash, istart, pos);
1726
+ }
1727
+ } else {
1728
+ /* We have enough bytes buffered to initialize the hash,
1729
+ * and are have processed enough bytes to find a sync point.
1693
1730
  * Start scanning at the beginning of the input.
1694
1731
  */
1732
+ assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
1733
+ assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
1695
1734
  pos = 0;
1696
1735
  prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1697
1736
  hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
@@ -1705,16 +1744,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1705
1744
  syncPoint.flush = 1;
1706
1745
  return syncPoint;
1707
1746
  }
1708
- } else {
1709
- /* We don't have enough bytes buffered to initialize the hash, but
1710
- * we know we have at least RSYNC_LENGTH bytes total.
1711
- * Start scanning after the first RSYNC_LENGTH bytes less the bytes
1712
- * already buffered.
1713
- */
1714
- pos = RSYNC_LENGTH - mtctx->inBuff.filled;
1715
- prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
1716
- hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
1717
- hash = ZSTD_rollingHash_append(hash, istart, pos);
1718
1747
  }
1719
1748
  /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
1720
1749
  * through the input. If we hit a synchronization point, then cut the
@@ -1726,8 +1755,9 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1726
1755
  */
1727
1756
  for (; pos < syncPoint.toLoad; ++pos) {
1728
1757
  BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1729
- /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
1758
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1730
1759
  hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1760
+ assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
1731
1761
  if ((hash & hitMask) == hitMask) {
1732
1762
  syncPoint.toLoad = pos + 1;
1733
1763
  syncPoint.flush = 1;