RubyGems - zstd-ruby - Versions diffs - 1.5.0.0 → 1.5.1.0 - Mend

zstd-ruby 1.5.0.0 → 1.5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

checksums.yaml +4 -4
data/.github/workflows/ruby.yml +2 -2
data/README.md +1 -1
data/ext/zstdruby/extconf.rb +1 -0
data/ext/zstdruby/libzstd/Makefile +50 -175
data/ext/zstdruby/libzstd/README.md +7 -1
data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
data/ext/zstdruby/libzstd/common/compiler.h +89 -43
data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
data/ext/zstdruby/libzstd/common/error_private.h +79 -0
data/ext/zstdruby/libzstd/common/fse.h +2 -1
data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
data/ext/zstdruby/libzstd/common/huf.h +24 -22
data/ext/zstdruby/libzstd/common/mem.h +18 -0
data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
data/ext/zstdruby/libzstd/common/zstd_internal.h +92 -88
data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
data/ext/zstdruby/libzstd/compress/zstd_compress.c +194 -278
data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +102 -44
data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +5 -4
data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +3 -2
data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -3
data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +289 -114
data/ext/zstdruby/libzstd/compress/zstd_fast.c +302 -123
data/ext/zstdruby/libzstd/compress/zstd_lazy.c +418 -502
data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
data/ext/zstdruby/libzstd/compress/zstd_opt.c +186 -108
data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +59 -29
data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
data/ext/zstdruby/libzstd/dictBuilder/zdict.c +99 -28
data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
data/ext/zstdruby/libzstd/libzstd.mk +185 -0
data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
data/ext/zstdruby/libzstd/zdict.h +4 -4
data/ext/zstdruby/libzstd/zstd.h +179 -136
data/ext/zstdruby/zstdruby.c +2 -2
data/lib/zstd-ruby/version.rb +1 -1
metadata +8 -3

data/ext/zstdruby/libzstd/compress/zstdmt_compress.c CHANGED Viewed

@@ -467,7 +467,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
                          ZSTD_dictContentType_e dictContentType)
 {
     /* Adjust parameters */
-    if (params.ldmParams.enableLdm) {
+    if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
         DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
         ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
         assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
@@ -478,7 +478,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
     serialState->nextJobID = 0;
     if (params.fParams.checksumFlag)
         XXH64_reset(&serialState->xxhState, 0);
-    if (params.ldmParams.enableLdm) {
+    if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
         ZSTD_customMem cMem = params.customMem;
         unsigned const hashLog = params.ldmParams.hashLog;
         size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
@@ -564,7 +564,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
     /* A future job may error and skip our job */
     if (serialState->nextJobID == jobID) {
         /* It is now our turn, do any processing necessary */
-        if (serialState->params.ldmParams.enableLdm) {
+        if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
             size_t error;
             assert(seqStore.seq != NULL && seqStore.pos == 0 &&
                    seqStore.size == 0 && seqStore.capacity > 0);
@@ -594,7 +594,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
     if (seqStore.size > 0) {
         size_t const err = ZSTD_referenceExternalSequences(
             jobCCtx, seqStore.seq, seqStore.size);
-        assert(serialState->params.ldmParams.enableLdm);
+        assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
         assert(!ZSTD_isError(err));
         (void)err;
     }
@@ -672,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
         if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
         job->dstBuff = dstBuff;   /* this value can be read in ZSTDMT_flush, when it copies the whole job */
     }
-    if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
+    if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
         JOB_ERROR(ERROR(memory_allocation));
     /* Don't compute the checksum for chunks, since we compute it externally,
@@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
      */
     if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
     /* Don't run LDM for the chunks, since we handle it externally */
-    jobParams.ldmParams.enableLdm = 0;
+    jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
     /* Correct nbWorkers to 0. */
     jobParams.nbWorkers = 0;
@@ -807,6 +807,15 @@ typedef struct {
 static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
 #define RSYNC_LENGTH 32
+/* Don't create chunks smaller than the zstd block size.
+ * This stops us from regressing compression ratio too much,
+ * and ensures our output fits in ZSTD_compressBound().
+ *
+ * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
+ * ZSTD_COMPRESSBOUND() will need to be updated.
+ */
+#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
+#define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
 typedef struct {
   U64 hash;
@@ -1135,7 +1144,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
 static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
 {
     unsigned jobLog;
-    if (params->ldmParams.enableLdm) {
+    if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
         /* In Long Range Mode, the windowLog is typically oversized.
          * In which case, it's preferable to determine the jobSize
          * based on cycleLog instead. */
@@ -1179,7 +1188,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
     int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
     int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
     assert(0 <= overlapRLog && overlapRLog <= 8);
-    if (params->ldmParams.enableLdm) {
+    if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
         /* In Long Range Mode, the windowLog is typically oversized.
          * In which case, it's preferable to determine the jobSize
          * based on chainLog instead.
@@ -1252,6 +1261,9 @@ size_t ZSTDMT_initCStream_internal(
         /* Aim for the targetsectionSize as the average job size. */
         U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
         U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
+        /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
+         * expected job size is at least 4x larger. */
+        assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
         DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
         mtctx->rsync.hash = 0;
         mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
@@ -1263,7 +1275,7 @@ size_t ZSTDMT_initCStream_internal(
     ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
     {
         /* If ldm is enabled we need windowSize space. */
-        size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
+        size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
         /* Two buffers of slack, plus extra space for the overlap
          * This is the minimum slack that LDM works with. One extra because
          * flush might waste up to targetSectionSize-1 bytes. Another extra
@@ -1538,17 +1550,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
 static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
 {
     BYTE const* const bufferStart = (BYTE const*)buffer.start;
-    BYTE const* const bufferEnd = bufferStart + buffer.capacity;
     BYTE const* const rangeStart = (BYTE const*)range.start;
-    BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
     if (rangeStart == NULL || bufferStart == NULL)
         return 0;
-    /* Empty ranges cannot overlap */
-    if (bufferStart == bufferEnd || rangeStart == rangeEnd)
-        return 0;
-    return bufferStart < rangeEnd && rangeStart < bufferEnd;
+    {
+        BYTE const* const bufferEnd = bufferStart + buffer.capacity;
+        BYTE const* const rangeEnd = rangeStart + range.size;
+        /* Empty ranges cannot overlap */
+        if (bufferStart == bufferEnd || rangeStart == rangeEnd)
+            return 0;
+        return bufferStart < rangeEnd && rangeStart < bufferEnd;
+    }
 }
 static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
@@ -1575,7 +1591,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
 static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
 {
-    if (mtctx->params.ldmParams.enableLdm) {
+    if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
         ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
         DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
         DEBUGLOG(5, "source  [0x%zx, 0x%zx)",
@@ -1678,6 +1694,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
     if (!mtctx->params.rsyncable)
         /* Rsync is disabled. */
         return syncPoint;
+    if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
+        /* We don't emit synchronization points if it would produce too small blocks.
+         * We don't have enough input to find a synchronization point, so don't look.
+         */
+        return syncPoint;
     if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
         /* Not enough to compute the hash.
          * We will miss any synchronization points in this RSYNC_LENGTH byte
@@ -1688,10 +1709,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
          */
         return syncPoint;
     /* Initialize the loop variables. */
-    if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
-        /* We have enough bytes buffered to initialize the hash.
+    if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
+        /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
+         * because they can't possibly be a sync point. So we can start
+         * part way through the input buffer.
+         */
+        pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
+        if (pos >= RSYNC_LENGTH) {
+            prev = istart + pos - RSYNC_LENGTH;
+            hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
+        } else {
+            assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
+            prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
+            hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
+            hash = ZSTD_rollingHash_append(hash, istart, pos);
+        }
+    } else {
+        /* We have enough bytes buffered to initialize the hash,
+         * and are have processed enough bytes to find a sync point.
          * Start scanning at the beginning of the input.
          */
+        assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
+        assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
         pos = 0;
         prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
         hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
@@ -1705,16 +1744,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
             syncPoint.flush = 1;
             return syncPoint;
         }
-    } else {
-        /* We don't have enough bytes buffered to initialize the hash, but
-         * we know we have at least RSYNC_LENGTH bytes total.
-         * Start scanning after the first RSYNC_LENGTH bytes less the bytes
-         * already buffered.
-         */
-        pos = RSYNC_LENGTH - mtctx->inBuff.filled;
-        prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
-        hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
-        hash = ZSTD_rollingHash_append(hash, istart, pos);
     }
     /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
      * through the input. If we hit a synchronization point, then cut the
@@ -1726,8 +1755,9 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
      */
     for (; pos < syncPoint.toLoad; ++pos) {
         BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
-        /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
+        assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
         hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
+        assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
         if ((hash & hitMask) == hitMask) {
             syncPoint.toLoad = pos + 1;
             syncPoint.flush = 1;