extzstd 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/contrib/zstd/CHANGELOG +188 -1
- data/contrib/zstd/CONTRIBUTING.md +157 -74
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +81 -58
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +59 -35
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/appveyor.yml +49 -136
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +87 -181
- data/contrib/zstd/lib/README.md +23 -6
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +33 -59
- data/contrib/zstd/lib/common/compiler.h +115 -45
- data/contrib/zstd/lib/common/cpu.h +1 -1
- data/contrib/zstd/lib/common/debug.c +1 -1
- data/contrib/zstd/lib/common/debug.h +1 -1
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +82 -3
- data/contrib/zstd/lib/common/fse.h +9 -85
- data/contrib/zstd/lib/common/fse_decompress.c +29 -111
- data/contrib/zstd/lib/common/huf.h +84 -172
- data/contrib/zstd/lib/common/mem.h +58 -49
- data/contrib/zstd/lib/common/pool.c +37 -16
- data/contrib/zstd/lib/common/pool.h +9 -3
- data/contrib/zstd/lib/common/portability_macros.h +156 -0
- data/contrib/zstd/lib/common/threading.c +68 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +7 -809
- data/contrib/zstd/lib/common/xxhash.h +5568 -167
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +64 -150
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +69 -150
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +773 -251
- data/contrib/zstd/lib/compress/zstd_compress.c +2650 -826
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +509 -180
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +33 -305
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +266 -85
- data/contrib/zstd/lib/compress/zstd_double_fast.c +369 -132
- data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +722 -258
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1105 -360
- data/contrib/zstd/lib/compress/zstd_lazy.h +41 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +272 -208
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +324 -197
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +109 -53
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1071 -539
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +507 -82
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +962 -310
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +54 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +44 -32
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -5
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +24 -16
- data/contrib/zstd/lib/dictBuilder/zdict.c +88 -95
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +16 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +24 -69
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +25 -72
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +23 -69
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +35 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +42 -87
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +35 -82
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +214 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +922 -293
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +7 -6
- data/ext/extzstd.c +13 -10
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +16 -5
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
/* ====== Dependencies ====== */
|
|
23
|
+
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
|
|
23
24
|
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
|
|
24
25
|
#include "../common/mem.h" /* MEM_STATIC */
|
|
25
26
|
#include "../common/pool.h" /* threadpool */
|
|
@@ -102,9 +103,8 @@ typedef struct ZSTDMT_bufferPool_s {
|
|
|
102
103
|
buffer_t bTable[1]; /* variable size */
|
|
103
104
|
} ZSTDMT_bufferPool;
|
|
104
105
|
|
|
105
|
-
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned
|
|
106
|
+
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
|
|
106
107
|
{
|
|
107
|
-
unsigned const maxNbBuffers = 2*nbWorkers + 3;
|
|
108
108
|
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
|
|
109
109
|
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
|
|
110
110
|
if (bufPool==NULL) return NULL;
|
|
@@ -160,9 +160,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
|
|
|
160
160
|
}
|
|
161
161
|
|
|
162
162
|
|
|
163
|
-
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
|
|
163
|
+
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
|
|
164
164
|
{
|
|
165
|
-
unsigned const maxNbBuffers = 2*nbWorkers + 3;
|
|
166
165
|
if (srcBufPool==NULL) return NULL;
|
|
167
166
|
if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
|
|
168
167
|
return srcBufPool;
|
|
@@ -171,7 +170,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
|
|
|
171
170
|
size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
|
|
172
171
|
ZSTDMT_bufferPool* newBufPool;
|
|
173
172
|
ZSTDMT_freeBufferPool(srcBufPool);
|
|
174
|
-
newBufPool = ZSTDMT_createBufferPool(
|
|
173
|
+
newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
|
|
175
174
|
if (newBufPool==NULL) return newBufPool;
|
|
176
175
|
ZSTDMT_setBufferSize(newBufPool, bSize);
|
|
177
176
|
return newBufPool;
|
|
@@ -263,6 +262,16 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
|
|
|
263
262
|
ZSTD_customFree(buf.start, bufPool->cMem);
|
|
264
263
|
}
|
|
265
264
|
|
|
265
|
+
/* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
|
|
266
|
+
* The 3 additional buffers are as follows:
|
|
267
|
+
* 1 buffer for input loading
|
|
268
|
+
* 1 buffer for "next input" when submitting current one
|
|
269
|
+
* 1 buffer stuck in queue */
|
|
270
|
+
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
|
|
271
|
+
|
|
272
|
+
/* After a worker releases its rawSeqStore, it is immediately ready for reuse.
|
|
273
|
+
* So we only need one seq buffer per worker. */
|
|
274
|
+
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
|
|
266
275
|
|
|
267
276
|
/* ===== Seq Pool Wrapper ====== */
|
|
268
277
|
|
|
@@ -316,7 +325,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
|
|
|
316
325
|
|
|
317
326
|
static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
|
|
318
327
|
{
|
|
319
|
-
ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
|
328
|
+
ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
|
|
320
329
|
if (seqPool == NULL) return NULL;
|
|
321
330
|
ZSTDMT_setNbSeq(seqPool, 0);
|
|
322
331
|
return seqPool;
|
|
@@ -329,7 +338,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
|
|
|
329
338
|
|
|
330
339
|
static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
|
|
331
340
|
{
|
|
332
|
-
return ZSTDMT_expandBufferPool(pool, nbWorkers);
|
|
341
|
+
return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
|
|
333
342
|
}
|
|
334
343
|
|
|
335
344
|
|
|
@@ -467,29 +476,27 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
|
|
467
476
|
ZSTD_dictContentType_e dictContentType)
|
|
468
477
|
{
|
|
469
478
|
/* Adjust parameters */
|
|
470
|
-
if (params.ldmParams.enableLdm) {
|
|
479
|
+
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
471
480
|
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
|
472
481
|
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
|
473
482
|
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
|
474
483
|
assert(params.ldmParams.hashRateLog < 32);
|
|
475
|
-
serialState->ldmState.hashPower =
|
|
476
|
-
ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
|
|
477
484
|
} else {
|
|
478
485
|
ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams));
|
|
479
486
|
}
|
|
480
487
|
serialState->nextJobID = 0;
|
|
481
488
|
if (params.fParams.checksumFlag)
|
|
482
489
|
XXH64_reset(&serialState->xxhState, 0);
|
|
483
|
-
if (params.ldmParams.enableLdm) {
|
|
490
|
+
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
484
491
|
ZSTD_customMem cMem = params.customMem;
|
|
485
492
|
unsigned const hashLog = params.ldmParams.hashLog;
|
|
486
493
|
size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
|
|
487
494
|
unsigned const bucketLog =
|
|
488
495
|
params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
|
|
489
|
-
size_t const bucketSize = (size_t)1 << bucketLog;
|
|
490
496
|
unsigned const prevBucketLog =
|
|
491
497
|
serialState->params.ldmParams.hashLog -
|
|
492
498
|
serialState->params.ldmParams.bucketSizeLog;
|
|
499
|
+
size_t const numBuckets = (size_t)1 << bucketLog;
|
|
493
500
|
/* Size the seq pool tables */
|
|
494
501
|
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
|
495
502
|
/* Reset the window */
|
|
@@ -501,20 +508,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
|
|
501
508
|
}
|
|
502
509
|
if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
|
|
503
510
|
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
|
|
504
|
-
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(
|
|
511
|
+
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
|
|
505
512
|
}
|
|
506
513
|
if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
|
|
507
514
|
return 1;
|
|
508
515
|
/* Zero the tables */
|
|
509
516
|
ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
|
|
510
|
-
ZSTD_memset(serialState->ldmState.bucketOffsets, 0,
|
|
517
|
+
ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
|
|
511
518
|
|
|
512
519
|
/* Update window state and fill hash table with dict */
|
|
513
520
|
serialState->ldmState.loadedDictEnd = 0;
|
|
514
521
|
if (dictSize > 0) {
|
|
515
522
|
if (dictContentType == ZSTD_dct_rawContent) {
|
|
516
523
|
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
|
|
517
|
-
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
|
|
524
|
+
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
|
|
518
525
|
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams);
|
|
519
526
|
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
|
|
520
527
|
} else {
|
|
@@ -566,12 +573,12 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
|
|
566
573
|
/* A future job may error and skip our job */
|
|
567
574
|
if (serialState->nextJobID == jobID) {
|
|
568
575
|
/* It is now our turn, do any processing necessary */
|
|
569
|
-
if (serialState->params.ldmParams.enableLdm) {
|
|
576
|
+
if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
570
577
|
size_t error;
|
|
571
578
|
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
|
|
572
579
|
seqStore.size == 0 && seqStore.capacity > 0);
|
|
573
580
|
assert(src.size <= serialState->params.jobSize);
|
|
574
|
-
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
|
|
581
|
+
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
|
|
575
582
|
error = ZSTD_ldm_generateSequences(
|
|
576
583
|
&serialState->ldmState, &seqStore,
|
|
577
584
|
&serialState->params.ldmParams, src.start, src.size);
|
|
@@ -596,7 +603,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
|
|
596
603
|
if (seqStore.size > 0) {
|
|
597
604
|
size_t const err = ZSTD_referenceExternalSequences(
|
|
598
605
|
jobCCtx, seqStore.seq, seqStore.size);
|
|
599
|
-
assert(serialState->params.ldmParams.enableLdm);
|
|
606
|
+
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
|
|
600
607
|
assert(!ZSTD_isError(err));
|
|
601
608
|
(void)err;
|
|
602
609
|
}
|
|
@@ -674,7 +681,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
674
681
|
if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
|
|
675
682
|
job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
|
|
676
683
|
}
|
|
677
|
-
if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
|
|
684
|
+
if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
|
|
678
685
|
JOB_ERROR(ERROR(memory_allocation));
|
|
679
686
|
|
|
680
687
|
/* Don't compute the checksum for chunks, since we compute it externally,
|
|
@@ -682,7 +689,9 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
682
689
|
*/
|
|
683
690
|
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
|
|
684
691
|
/* Don't run LDM for the chunks, since we handle it externally */
|
|
685
|
-
jobParams.ldmParams.enableLdm =
|
|
692
|
+
jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
|
|
693
|
+
/* Correct nbWorkers to 0. */
|
|
694
|
+
jobParams.nbWorkers = 0;
|
|
686
695
|
|
|
687
696
|
|
|
688
697
|
/* init */
|
|
@@ -695,6 +704,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
695
704
|
{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
|
696
705
|
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
|
|
697
706
|
}
|
|
707
|
+
if (!job->firstJob) {
|
|
708
|
+
size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
|
|
709
|
+
if (ZSTD_isError(err)) JOB_ERROR(err);
|
|
710
|
+
}
|
|
698
711
|
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
|
699
712
|
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
|
|
700
713
|
ZSTD_dtlm_fast,
|
|
@@ -707,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
707
720
|
ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
|
|
708
721
|
|
|
709
722
|
if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
|
|
710
|
-
size_t const hSize =
|
|
723
|
+
size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
|
|
711
724
|
if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
|
|
712
725
|
DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
|
|
713
726
|
ZSTD_invalidateRepCodes(cctx);
|
|
@@ -725,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
725
738
|
DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
|
|
726
739
|
assert(job->cSize == 0);
|
|
727
740
|
for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
|
|
728
|
-
size_t const cSize =
|
|
741
|
+
size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
|
|
729
742
|
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
|
730
743
|
ip += chunkSize;
|
|
731
744
|
op += cSize; assert(op < oend);
|
|
@@ -745,11 +758,18 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
745
758
|
size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
|
|
746
759
|
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
|
|
747
760
|
size_t const cSize = (job->lastJob) ?
|
|
748
|
-
|
|
749
|
-
|
|
761
|
+
ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
|
|
762
|
+
ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
|
|
750
763
|
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
|
751
764
|
lastCBlockSize = cSize;
|
|
752
765
|
} }
|
|
766
|
+
if (!job->firstJob) {
|
|
767
|
+
/* Double check that we don't have an ext-dict, because then our
|
|
768
|
+
* repcode invalidation doesn't work.
|
|
769
|
+
*/
|
|
770
|
+
assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
|
|
771
|
+
}
|
|
772
|
+
ZSTD_CCtx_trace(cctx, 0);
|
|
753
773
|
|
|
754
774
|
_endJob:
|
|
755
775
|
ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
|
|
@@ -796,6 +816,15 @@ typedef struct {
|
|
|
796
816
|
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
|
|
797
817
|
|
|
798
818
|
#define RSYNC_LENGTH 32
|
|
819
|
+
/* Don't create chunks smaller than the zstd block size.
|
|
820
|
+
* This stops us from regressing compression ratio too much,
|
|
821
|
+
* and ensures our output fits in ZSTD_compressBound().
|
|
822
|
+
*
|
|
823
|
+
* If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
|
|
824
|
+
* ZSTD_COMPRESSBOUND() will need to be updated.
|
|
825
|
+
*/
|
|
826
|
+
#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
|
|
827
|
+
#define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
|
|
799
828
|
|
|
800
829
|
typedef struct {
|
|
801
830
|
U64 hash;
|
|
@@ -916,7 +945,7 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
|
|
|
916
945
|
mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
|
|
917
946
|
assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
|
|
918
947
|
mtctx->jobIDMask = nbJobs - 1;
|
|
919
|
-
mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
|
948
|
+
mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
|
|
920
949
|
mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
|
|
921
950
|
mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
|
|
922
951
|
initError = ZSTDMT_serialState_init(&mtctx->serial);
|
|
@@ -1019,7 +1048,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
|
|
1019
1048
|
{
|
|
1020
1049
|
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
|
1021
1050
|
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
|
|
1022
|
-
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
|
1051
|
+
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
|
|
1023
1052
|
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
|
1024
1053
|
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
|
1025
1054
|
if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
|
|
@@ -1124,7 +1153,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
|
1124
1153
|
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
|
|
1125
1154
|
{
|
|
1126
1155
|
unsigned jobLog;
|
|
1127
|
-
if (params->ldmParams.enableLdm) {
|
|
1156
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
1128
1157
|
/* In Long Range Mode, the windowLog is typically oversized.
|
|
1129
1158
|
* In which case, it's preferable to determine the jobSize
|
|
1130
1159
|
* based on cycleLog instead. */
|
|
@@ -1168,7 +1197,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
|
|
|
1168
1197
|
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
|
|
1169
1198
|
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
|
|
1170
1199
|
assert(0 <= overlapRLog && overlapRLog <= 8);
|
|
1171
|
-
if (params->ldmParams.enableLdm) {
|
|
1200
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
1172
1201
|
/* In Long Range Mode, the windowLog is typically oversized.
|
|
1173
1202
|
* In which case, it's preferable to determine the jobSize
|
|
1174
1203
|
* based on chainLog instead.
|
|
@@ -1239,9 +1268,11 @@ size_t ZSTDMT_initCStream_internal(
|
|
|
1239
1268
|
|
|
1240
1269
|
if (params.rsyncable) {
|
|
1241
1270
|
/* Aim for the targetsectionSize as the average job size. */
|
|
1242
|
-
U32 const
|
|
1243
|
-
U32 const rsyncBits = ZSTD_highbit32(
|
|
1244
|
-
|
|
1271
|
+
U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
|
|
1272
|
+
U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
|
|
1273
|
+
/* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
|
|
1274
|
+
* expected job size is at least 4x larger. */
|
|
1275
|
+
assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
|
|
1245
1276
|
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
|
|
1246
1277
|
mtctx->rsync.hash = 0;
|
|
1247
1278
|
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
|
|
@@ -1253,7 +1284,7 @@ size_t ZSTDMT_initCStream_internal(
|
|
|
1253
1284
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
|
|
1254
1285
|
{
|
|
1255
1286
|
/* If ldm is enabled we need windowSize space. */
|
|
1256
|
-
size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
|
|
1287
|
+
size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
|
|
1257
1288
|
/* Two buffers of slack, plus extra space for the overlap
|
|
1258
1289
|
* This is the minimum slack that LDM works with. One extra because
|
|
1259
1290
|
* flush might waste up to targetSectionSize-1 bytes. Another extra
|
|
@@ -1528,17 +1559,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
|
|
|
1528
1559
|
static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
|
|
1529
1560
|
{
|
|
1530
1561
|
BYTE const* const bufferStart = (BYTE const*)buffer.start;
|
|
1531
|
-
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
|
1532
1562
|
BYTE const* const rangeStart = (BYTE const*)range.start;
|
|
1533
|
-
BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
|
|
1534
1563
|
|
|
1535
1564
|
if (rangeStart == NULL || bufferStart == NULL)
|
|
1536
1565
|
return 0;
|
|
1537
|
-
/* Empty ranges cannot overlap */
|
|
1538
|
-
if (bufferStart == bufferEnd || rangeStart == rangeEnd)
|
|
1539
|
-
return 0;
|
|
1540
1566
|
|
|
1541
|
-
|
|
1567
|
+
{
|
|
1568
|
+
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
|
1569
|
+
BYTE const* const rangeEnd = rangeStart + range.size;
|
|
1570
|
+
|
|
1571
|
+
/* Empty ranges cannot overlap */
|
|
1572
|
+
if (bufferStart == bufferEnd || rangeStart == rangeEnd)
|
|
1573
|
+
return 0;
|
|
1574
|
+
|
|
1575
|
+
return bufferStart < rangeEnd && rangeStart < bufferEnd;
|
|
1576
|
+
}
|
|
1542
1577
|
}
|
|
1543
1578
|
|
|
1544
1579
|
static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|
|
@@ -1565,7 +1600,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|
|
|
1565
1600
|
|
|
1566
1601
|
static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
|
|
1567
1602
|
{
|
|
1568
|
-
if (mtctx->params.ldmParams.enableLdm) {
|
|
1603
|
+
if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
1569
1604
|
ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
|
|
1570
1605
|
DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
|
|
1571
1606
|
DEBUGLOG(5, "source [0x%zx, 0x%zx)",
|
|
@@ -1668,6 +1703,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1668
1703
|
if (!mtctx->params.rsyncable)
|
|
1669
1704
|
/* Rsync is disabled. */
|
|
1670
1705
|
return syncPoint;
|
|
1706
|
+
if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
|
|
1707
|
+
/* We don't emit synchronization points if it would produce too small blocks.
|
|
1708
|
+
* We don't have enough input to find a synchronization point, so don't look.
|
|
1709
|
+
*/
|
|
1710
|
+
return syncPoint;
|
|
1671
1711
|
if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
|
|
1672
1712
|
/* Not enough to compute the hash.
|
|
1673
1713
|
* We will miss any synchronization points in this RSYNC_LENGTH byte
|
|
@@ -1678,10 +1718,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1678
1718
|
*/
|
|
1679
1719
|
return syncPoint;
|
|
1680
1720
|
/* Initialize the loop variables. */
|
|
1681
|
-
if (mtctx->inBuff.filled
|
|
1682
|
-
/* We
|
|
1721
|
+
if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
|
|
1722
|
+
/* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
|
|
1723
|
+
* because they can't possibly be a sync point. So we can start
|
|
1724
|
+
* part way through the input buffer.
|
|
1725
|
+
*/
|
|
1726
|
+
pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
|
|
1727
|
+
if (pos >= RSYNC_LENGTH) {
|
|
1728
|
+
prev = istart + pos - RSYNC_LENGTH;
|
|
1729
|
+
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
|
1730
|
+
} else {
|
|
1731
|
+
assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
|
|
1732
|
+
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
|
1733
|
+
hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
|
|
1734
|
+
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
|
1735
|
+
}
|
|
1736
|
+
} else {
|
|
1737
|
+
/* We have enough bytes buffered to initialize the hash,
|
|
1738
|
+
* and have processed enough bytes to find a sync point.
|
|
1683
1739
|
* Start scanning at the beginning of the input.
|
|
1684
1740
|
*/
|
|
1741
|
+
assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
|
|
1742
|
+
assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
|
|
1685
1743
|
pos = 0;
|
|
1686
1744
|
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
|
1687
1745
|
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
|
@@ -1695,16 +1753,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1695
1753
|
syncPoint.flush = 1;
|
|
1696
1754
|
return syncPoint;
|
|
1697
1755
|
}
|
|
1698
|
-
} else {
|
|
1699
|
-
/* We don't have enough bytes buffered to initialize the hash, but
|
|
1700
|
-
* we know we have at least RSYNC_LENGTH bytes total.
|
|
1701
|
-
* Start scanning after the first RSYNC_LENGTH bytes less the bytes
|
|
1702
|
-
* already buffered.
|
|
1703
|
-
*/
|
|
1704
|
-
pos = RSYNC_LENGTH - mtctx->inBuff.filled;
|
|
1705
|
-
prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
|
|
1706
|
-
hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
|
|
1707
|
-
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
|
1708
1756
|
}
|
|
1709
1757
|
/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
|
|
1710
1758
|
* through the input. If we hit a synchronization point, then cut the
|
|
@@ -1714,16 +1762,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1714
1762
|
* then a block will be emitted anyways, but this is okay, since if we
|
|
1715
1763
|
* are already synchronized we will remain synchronized.
|
|
1716
1764
|
*/
|
|
1765
|
+
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
|
1717
1766
|
for (; pos < syncPoint.toLoad; ++pos) {
|
|
1718
1767
|
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
|
|
1719
|
-
/*
|
|
1768
|
+
/* This assert is very expensive, and Debian compiles with asserts enabled.
|
|
1769
|
+
* So disable it for now. We can get similar coverage by checking it at the
|
|
1770
|
+
* beginning & end of the loop.
|
|
1771
|
+
* assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
|
1772
|
+
*/
|
|
1720
1773
|
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
|
|
1774
|
+
assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
|
|
1721
1775
|
if ((hash & hitMask) == hitMask) {
|
|
1722
1776
|
syncPoint.toLoad = pos + 1;
|
|
1723
1777
|
syncPoint.flush = 1;
|
|
1778
|
+
++pos; /* for assert */
|
|
1724
1779
|
break;
|
|
1725
1780
|
}
|
|
1726
1781
|
}
|
|
1782
|
+
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
|
1727
1783
|
return syncPoint;
|
|
1728
1784
|
}
|
|
1729
1785
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -32,11 +32,11 @@
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
/* === Constants === */
|
|
35
|
-
#ifndef ZSTDMT_NBWORKERS_MAX
|
|
36
|
-
# define ZSTDMT_NBWORKERS_MAX
|
|
35
|
+
#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
|
|
36
|
+
# define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
|
|
37
37
|
#endif
|
|
38
|
-
#ifndef ZSTDMT_JOBSIZE_MIN
|
|
39
|
-
# define ZSTDMT_JOBSIZE_MIN (
|
|
38
|
+
#ifndef ZSTDMT_JOBSIZE_MIN /* a different value can be selected at compile time */
|
|
39
|
+
# define ZSTDMT_JOBSIZE_MIN (512 KB)
|
|
40
40
|
#endif
|
|
41
41
|
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
|
|
42
42
|
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
|
|
@@ -65,8 +65,11 @@ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
|
|
65
65
|
* Private use only. Init streaming operation.
|
|
66
66
|
* expects params to be valid.
|
|
67
67
|
* must receive dict, or cdict, or none, but not both.
|
|
68
|
+
* mtctx can be freshly constructed or reused from a prior compression.
|
|
69
|
+
* If mtctx is reused, memory allocations from the prior compression may not be freed,
|
|
70
|
+
* even if they are not needed for the current compression.
|
|
68
71
|
* @return : 0, or an error code */
|
|
69
|
-
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx*
|
|
72
|
+
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* mtctx,
|
|
70
73
|
const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
|
|
71
74
|
const ZSTD_CDict* cdict,
|
|
72
75
|
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
|