zstd-ruby 1.3.7.0 → 1.3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +15 -2
- data/ext/zstdruby/libzstd/Makefile +37 -2
- data/ext/zstdruby/libzstd/README.md +67 -41
- data/ext/zstdruby/libzstd/common/bitstream.h +2 -2
- data/ext/zstdruby/libzstd/common/compiler.h +19 -12
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +22 -11
- data/ext/zstdruby/libzstd/common/error_private.c +6 -0
- data/ext/zstdruby/libzstd/common/fse.h +2 -2
- data/ext/zstdruby/libzstd/common/huf.h +25 -1
- data/ext/zstdruby/libzstd/common/pool.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +3 -1
- data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +11 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +3 -3
- data/ext/zstdruby/libzstd/compress/hist.c +19 -11
- data/ext/zstdruby/libzstd/compress/hist.h +11 -8
- data/ext/zstdruby/libzstd/compress/huf_compress.c +33 -31
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +621 -371
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +90 -28
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +15 -15
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +25 -18
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +18 -67
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +2 -6
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +133 -48
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +229 -73
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +18 -10
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +178 -42
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +240 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +44 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +244 -1680
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1307 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +59 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +168 -0
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +13 -11
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +15 -15
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +28 -28
- data/ext/zstdruby/libzstd/dll/libzstd.def +0 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +15 -15
- data/ext/zstdruby/libzstd/zstd.h +1208 -968
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -2
@@ -26,6 +26,10 @@ size_t ZSTD_compressBlock_btopt(
|
|
26
26
|
size_t ZSTD_compressBlock_btultra(
|
27
27
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
28
28
|
void const* src, size_t srcSize);
|
29
|
+
size_t ZSTD_compressBlock_btultra2(
|
30
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
31
|
+
void const* src, size_t srcSize);
|
32
|
+
|
29
33
|
|
30
34
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
31
35
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
@@ -41,6 +45,10 @@ size_t ZSTD_compressBlock_btultra_extDict(
|
|
41
45
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
42
46
|
void const* src, size_t srcSize);
|
43
47
|
|
48
|
+
/* note : no btultra2 variant for extDict nor dictMatchState,
|
49
|
+
* because btultra2 is not meant to work with dictionaries
|
50
|
+
* and is only specific for the first block (no prefix) */
|
51
|
+
|
44
52
|
#if defined (__cplusplus)
|
45
53
|
}
|
46
54
|
#endif
|
@@ -9,21 +9,19 @@
|
|
9
9
|
*/
|
10
10
|
|
11
11
|
|
12
|
-
/* ====== Tuning parameters ====== */
|
13
|
-
#define ZSTDMT_NBWORKERS_MAX 200
|
14
|
-
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
|
15
|
-
#define ZSTDMT_OVERLAPLOG_DEFAULT 6
|
16
|
-
|
17
|
-
|
18
12
|
/* ====== Compiler specifics ====== */
|
19
13
|
#if defined(_MSC_VER)
|
20
14
|
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
|
21
15
|
#endif
|
22
16
|
|
23
17
|
|
18
|
+
/* ====== Constants ====== */
|
19
|
+
#define ZSTDMT_OVERLAPLOG_DEFAULT 0
|
20
|
+
|
21
|
+
|
24
22
|
/* ====== Dependencies ====== */
|
25
23
|
#include <string.h> /* memcpy, memset */
|
26
|
-
#include <limits.h> /* INT_MAX */
|
24
|
+
#include <limits.h> /* INT_MAX, UINT_MAX */
|
27
25
|
#include "pool.h" /* threadpool */
|
28
26
|
#include "threading.h" /* mutex */
|
29
27
|
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
@@ -57,9 +55,9 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
57
55
|
static clock_t _ticksPerSecond = 0;
|
58
56
|
if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
|
59
57
|
|
60
|
-
{
|
61
|
-
|
62
|
-
}
|
58
|
+
{ struct tms junk; clock_t newTicks = (clock_t) times(&junk);
|
59
|
+
return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
|
60
|
+
} }
|
63
61
|
|
64
62
|
#define MUTEX_WAIT_TIME_DLEVEL 6
|
65
63
|
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
|
@@ -342,8 +340,8 @@ static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
|
|
342
340
|
|
343
341
|
typedef struct {
|
344
342
|
ZSTD_pthread_mutex_t poolMutex;
|
345
|
-
|
346
|
-
|
343
|
+
int totalCCtx;
|
344
|
+
int availCCtx;
|
347
345
|
ZSTD_customMem cMem;
|
348
346
|
ZSTD_CCtx* cctx[1]; /* variable size */
|
349
347
|
} ZSTDMT_CCtxPool;
|
@@ -351,16 +349,16 @@ typedef struct {
|
|
351
349
|
/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
|
352
350
|
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
353
351
|
{
|
354
|
-
|
355
|
-
for (
|
356
|
-
ZSTD_freeCCtx(pool->cctx[
|
352
|
+
int cid;
|
353
|
+
for (cid=0; cid<pool->totalCCtx; cid++)
|
354
|
+
ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
|
357
355
|
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
|
358
356
|
ZSTD_free(pool, pool->cMem);
|
359
357
|
}
|
360
358
|
|
361
359
|
/* ZSTDMT_createCCtxPool() :
|
362
360
|
* implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
|
363
|
-
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(
|
361
|
+
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
|
364
362
|
ZSTD_customMem cMem)
|
365
363
|
{
|
366
364
|
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
|
@@ -381,7 +379,7 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
|
|
381
379
|
}
|
382
380
|
|
383
381
|
static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
|
384
|
-
|
382
|
+
int nbWorkers)
|
385
383
|
{
|
386
384
|
if (srcPool==NULL) return NULL;
|
387
385
|
if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
|
@@ -469,9 +467,9 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
469
467
|
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
470
468
|
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
471
469
|
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
472
|
-
assert(params.ldmParams.
|
470
|
+
assert(params.ldmParams.hashRateLog < 32);
|
473
471
|
serialState->ldmState.hashPower =
|
474
|
-
|
472
|
+
ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
|
475
473
|
} else {
|
476
474
|
memset(¶ms.ldmParams, 0, sizeof(params.ldmParams));
|
477
475
|
}
|
@@ -674,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
674
672
|
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
675
673
|
} else { /* srcStart points at reloaded section */
|
676
674
|
U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
|
677
|
-
{ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams,
|
675
|
+
{ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
678
676
|
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
|
679
677
|
}
|
680
678
|
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
@@ -777,6 +775,14 @@ typedef struct {
|
|
777
775
|
|
778
776
|
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
|
779
777
|
|
778
|
+
#define RSYNC_LENGTH 32
|
779
|
+
|
780
|
+
typedef struct {
|
781
|
+
U64 hash;
|
782
|
+
U64 hitMask;
|
783
|
+
U64 primePower;
|
784
|
+
} rsyncState_t;
|
785
|
+
|
780
786
|
struct ZSTDMT_CCtx_s {
|
781
787
|
POOL_ctx* factory;
|
782
788
|
ZSTDMT_jobDescription* jobs;
|
@@ -790,6 +796,7 @@ struct ZSTDMT_CCtx_s {
|
|
790
796
|
inBuff_t inBuff;
|
791
797
|
roundBuff_t roundBuff;
|
792
798
|
serialState_t serial;
|
799
|
+
rsyncState_t rsync;
|
793
800
|
unsigned singleBlockingThread;
|
794
801
|
unsigned jobIDMask;
|
795
802
|
unsigned doneJobID;
|
@@ -859,7 +866,7 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
|
|
859
866
|
{
|
860
867
|
if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
|
861
868
|
params->nbWorkers = nbWorkers;
|
862
|
-
params->
|
869
|
+
params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
|
863
870
|
params->jobSize = 0;
|
864
871
|
return nbWorkers;
|
865
872
|
}
|
@@ -969,52 +976,59 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
|
|
969
976
|
}
|
970
977
|
|
971
978
|
/* Internal only */
|
972
|
-
size_t
|
973
|
-
|
979
|
+
size_t
|
980
|
+
ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
|
981
|
+
ZSTDMT_parameter parameter,
|
982
|
+
int value)
|
983
|
+
{
|
974
984
|
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
|
975
985
|
switch(parameter)
|
976
986
|
{
|
977
987
|
case ZSTDMT_p_jobSize :
|
978
|
-
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %
|
979
|
-
if (
|
980
|
-
|
988
|
+
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
|
989
|
+
if ( value != 0 /* default */
|
990
|
+
&& value < ZSTDMT_JOBSIZE_MIN)
|
981
991
|
value = ZSTDMT_JOBSIZE_MIN;
|
982
|
-
|
983
|
-
|
992
|
+
assert(value >= 0);
|
993
|
+
if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
|
984
994
|
params->jobSize = value;
|
985
995
|
return value;
|
986
|
-
|
987
|
-
|
988
|
-
DEBUGLOG(4, "
|
989
|
-
|
996
|
+
|
997
|
+
case ZSTDMT_p_overlapLog :
|
998
|
+
DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
|
999
|
+
if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
|
1000
|
+
if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
|
1001
|
+
params->overlapLog = value;
|
1002
|
+
return value;
|
1003
|
+
|
1004
|
+
case ZSTDMT_p_rsyncable :
|
1005
|
+
value = (value != 0);
|
1006
|
+
params->rsyncable = value;
|
990
1007
|
return value;
|
1008
|
+
|
991
1009
|
default :
|
992
1010
|
return ERROR(parameter_unsupported);
|
993
1011
|
}
|
994
1012
|
}
|
995
1013
|
|
996
|
-
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
1014
|
+
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
|
997
1015
|
{
|
998
1016
|
DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
|
999
|
-
|
1000
|
-
{
|
1001
|
-
case ZSTDMT_p_jobSize :
|
1002
|
-
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
1003
|
-
case ZSTDMT_p_overlapSectionLog :
|
1004
|
-
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
1005
|
-
default :
|
1006
|
-
return ERROR(parameter_unsupported);
|
1007
|
-
}
|
1017
|
+
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
1008
1018
|
}
|
1009
1019
|
|
1010
|
-
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
1020
|
+
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
|
1011
1021
|
{
|
1012
1022
|
switch (parameter) {
|
1013
1023
|
case ZSTDMT_p_jobSize:
|
1014
|
-
|
1024
|
+
assert(mtctx->params.jobSize <= INT_MAX);
|
1025
|
+
*value = (int)(mtctx->params.jobSize);
|
1015
1026
|
break;
|
1016
|
-
case
|
1017
|
-
*value = mtctx->params.
|
1027
|
+
case ZSTDMT_p_overlapLog:
|
1028
|
+
*value = mtctx->params.overlapLog;
|
1029
|
+
break;
|
1030
|
+
case ZSTDMT_p_rsyncable:
|
1031
|
+
*value = mtctx->params.rsyncable;
|
1018
1032
|
break;
|
1019
1033
|
default:
|
1020
1034
|
return ERROR(parameter_unsupported);
|
@@ -1140,22 +1154,66 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
1140
1154
|
/* ===== Multi-threaded compression ===== */
|
1141
1155
|
/* ------------------------------------------ */
|
1142
1156
|
|
1143
|
-
static
|
1157
|
+
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
|
1144
1158
|
{
|
1145
1159
|
if (params.ldmParams.enableLdm)
|
1160
|
+
/* In Long Range Mode, the windowLog is typically oversized.
|
1161
|
+
* In which case, it's preferable to determine the jobSize
|
1162
|
+
* based on chainLog instead. */
|
1146
1163
|
return MAX(21, params.cParams.chainLog + 4);
|
1147
1164
|
return MAX(20, params.cParams.windowLog + 2);
|
1148
1165
|
}
|
1149
1166
|
|
1150
|
-
static
|
1167
|
+
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
|
1151
1168
|
{
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1169
|
+
switch(strat)
|
1170
|
+
{
|
1171
|
+
case ZSTD_btultra2:
|
1172
|
+
return 9;
|
1173
|
+
case ZSTD_btultra:
|
1174
|
+
case ZSTD_btopt:
|
1175
|
+
return 8;
|
1176
|
+
case ZSTD_btlazy2:
|
1177
|
+
case ZSTD_lazy2:
|
1178
|
+
return 7;
|
1179
|
+
case ZSTD_lazy:
|
1180
|
+
case ZSTD_greedy:
|
1181
|
+
case ZSTD_dfast:
|
1182
|
+
case ZSTD_fast:
|
1183
|
+
default:;
|
1184
|
+
}
|
1185
|
+
return 6;
|
1156
1186
|
}
|
1157
1187
|
|
1158
|
-
static
|
1188
|
+
static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
|
1189
|
+
{
|
1190
|
+
assert(0 <= ovlog && ovlog <= 9);
|
1191
|
+
if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
|
1192
|
+
return ovlog;
|
1193
|
+
}
|
1194
|
+
|
1195
|
+
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
|
1196
|
+
{
|
1197
|
+
int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
|
1198
|
+
int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
|
1199
|
+
assert(0 <= overlapRLog && overlapRLog <= 8);
|
1200
|
+
if (params.ldmParams.enableLdm) {
|
1201
|
+
/* In Long Range Mode, the windowLog is typically oversized.
|
1202
|
+
* In which case, it's preferable to determine the jobSize
|
1203
|
+
* based on chainLog instead.
|
1204
|
+
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
|
1205
|
+
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
|
1206
|
+
- overlapRLog;
|
1207
|
+
}
|
1208
|
+
assert(0 <= ovLog && ovLog <= 30);
|
1209
|
+
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
|
1210
|
+
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
|
1211
|
+
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
1212
|
+
}
|
1213
|
+
|
1214
|
+
static unsigned
|
1215
|
+
ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
|
1216
|
+
{
|
1159
1217
|
assert(nbWorkers>0);
|
1160
1218
|
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
|
1161
1219
|
size_t const jobMaxSize = jobSizeTarget << 2;
|
@@ -1178,7 +1236,7 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1178
1236
|
ZSTD_CCtx_params params)
|
1179
1237
|
{
|
1180
1238
|
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
|
1181
|
-
size_t const overlapSize = (
|
1239
|
+
size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
|
1182
1240
|
unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
|
1183
1241
|
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
|
1184
1242
|
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
|
@@ -1289,16 +1347,17 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1289
1347
|
}
|
1290
1348
|
|
1291
1349
|
size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1350
|
+
void* dst, size_t dstCapacity,
|
1351
|
+
const void* src, size_t srcSize,
|
1352
|
+
const ZSTD_CDict* cdict,
|
1353
|
+
ZSTD_parameters params,
|
1354
|
+
int overlapLog)
|
1297
1355
|
{
|
1298
1356
|
ZSTD_CCtx_params cctxParams = mtctx->params;
|
1299
1357
|
cctxParams.cParams = params.cParams;
|
1300
1358
|
cctxParams.fParams = params.fParams;
|
1301
|
-
|
1359
|
+
assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
|
1360
|
+
cctxParams.overlapLog = overlapLog;
|
1302
1361
|
return ZSTDMT_compress_advanced_internal(mtctx,
|
1303
1362
|
dst, dstCapacity,
|
1304
1363
|
src, srcSize,
|
@@ -1311,8 +1370,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
1311
1370
|
const void* src, size_t srcSize,
|
1312
1371
|
int compressionLevel)
|
1313
1372
|
{
|
1314
|
-
U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
|
1315
1373
|
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
|
1374
|
+
int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
|
1316
1375
|
params.fParams.contentSizeFlag = 1;
|
1317
1376
|
return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
|
1318
1377
|
}
|
@@ -1339,8 +1398,8 @@ size_t ZSTDMT_initCStream_internal(
|
|
1339
1398
|
if (params.nbWorkers != mtctx->params.nbWorkers)
|
1340
1399
|
CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
1341
1400
|
|
1342
|
-
if (params.jobSize
|
1343
|
-
if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
1401
|
+
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
1402
|
+
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
1344
1403
|
|
1345
1404
|
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
1346
1405
|
if (mtctx->singleBlockingThread) {
|
@@ -1375,14 +1434,24 @@ size_t ZSTDMT_initCStream_internal(
|
|
1375
1434
|
mtctx->cdict = cdict;
|
1376
1435
|
}
|
1377
1436
|
|
1378
|
-
mtctx->targetPrefixSize = (
|
1379
|
-
DEBUGLOG(4, "overlapLog=%
|
1437
|
+
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
|
1438
|
+
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
|
1380
1439
|
mtctx->targetSectionSize = params.jobSize;
|
1381
1440
|
if (mtctx->targetSectionSize == 0) {
|
1382
1441
|
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
1383
1442
|
}
|
1443
|
+
if (params.rsyncable) {
|
1444
|
+
/* Aim for the targetsectionSize as the average job size. */
|
1445
|
+
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
|
1446
|
+
U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
|
1447
|
+
assert(jobSizeMB >= 1);
|
1448
|
+
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
|
1449
|
+
mtctx->rsync.hash = 0;
|
1450
|
+
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
|
1451
|
+
mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
|
1452
|
+
}
|
1384
1453
|
if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
|
1385
|
-
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
|
1454
|
+
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
|
1386
1455
|
DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
|
1387
1456
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
|
1388
1457
|
{
|
@@ -1818,6 +1887,89 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
|
|
1818
1887
|
return 1;
|
1819
1888
|
}
|
1820
1889
|
|
1890
|
+
typedef struct {
|
1891
|
+
size_t toLoad; /* The number of bytes to load from the input. */
|
1892
|
+
int flush; /* Boolean declaring if we must flush because we found a synchronization point. */
|
1893
|
+
} syncPoint_t;
|
1894
|
+
|
1895
|
+
/**
|
1896
|
+
* Searches through the input for a synchronization point. If one is found, we
|
1897
|
+
* will instruct the caller to flush, and return the number of bytes to load.
|
1898
|
+
* Otherwise, we will load as many bytes as possible and instruct the caller
|
1899
|
+
* to continue as normal.
|
1900
|
+
*/
|
1901
|
+
static syncPoint_t
|
1902
|
+
findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
1903
|
+
{
|
1904
|
+
BYTE const* const istart = (BYTE const*)input.src + input.pos;
|
1905
|
+
U64 const primePower = mtctx->rsync.primePower;
|
1906
|
+
U64 const hitMask = mtctx->rsync.hitMask;
|
1907
|
+
|
1908
|
+
syncPoint_t syncPoint;
|
1909
|
+
U64 hash;
|
1910
|
+
BYTE const* prev;
|
1911
|
+
size_t pos;
|
1912
|
+
|
1913
|
+
syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
|
1914
|
+
syncPoint.flush = 0;
|
1915
|
+
if (!mtctx->params.rsyncable)
|
1916
|
+
/* Rsync is disabled. */
|
1917
|
+
return syncPoint;
|
1918
|
+
if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
|
1919
|
+
/* Not enough to compute the hash.
|
1920
|
+
* We will miss any synchronization points in this RSYNC_LENGTH byte
|
1921
|
+
* window. However, since it depends only in the internal buffers, if the
|
1922
|
+
* state is already synchronized, we will remain synchronized.
|
1923
|
+
* Additionally, the probability that we miss a synchronization point is
|
1924
|
+
* low: RSYNC_LENGTH / targetSectionSize.
|
1925
|
+
*/
|
1926
|
+
return syncPoint;
|
1927
|
+
/* Initialize the loop variables. */
|
1928
|
+
if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
|
1929
|
+
/* We have enough bytes buffered to initialize the hash.
|
1930
|
+
* Start scanning at the beginning of the input.
|
1931
|
+
*/
|
1932
|
+
pos = 0;
|
1933
|
+
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
1934
|
+
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
1935
|
+
} else {
|
1936
|
+
/* We don't have enough bytes buffered to initialize the hash, but
|
1937
|
+
* we know we have at least RSYNC_LENGTH bytes total.
|
1938
|
+
* Start scanning after the first RSYNC_LENGTH bytes less the bytes
|
1939
|
+
* already buffered.
|
1940
|
+
*/
|
1941
|
+
pos = RSYNC_LENGTH - mtctx->inBuff.filled;
|
1942
|
+
prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
|
1943
|
+
hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
|
1944
|
+
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
1945
|
+
}
|
1946
|
+
/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
|
1947
|
+
* through the input. If we hit a synchronization point, then cut the
|
1948
|
+
* job off, and tell the compressor to flush the job. Otherwise, load
|
1949
|
+
* all the bytes and continue as normal.
|
1950
|
+
* If we go too long without a synchronization point (targetSectionSize)
|
1951
|
+
* then a block will be emitted anyways, but this is okay, since if we
|
1952
|
+
* are already synchronized we will remain synchronized.
|
1953
|
+
*/
|
1954
|
+
for (; pos < syncPoint.toLoad; ++pos) {
|
1955
|
+
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
|
1956
|
+
/* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
|
1957
|
+
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
|
1958
|
+
if ((hash & hitMask) == hitMask) {
|
1959
|
+
syncPoint.toLoad = pos + 1;
|
1960
|
+
syncPoint.flush = 1;
|
1961
|
+
break;
|
1962
|
+
}
|
1963
|
+
}
|
1964
|
+
return syncPoint;
|
1965
|
+
}
|
1966
|
+
|
1967
|
+
size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx)
|
1968
|
+
{
|
1969
|
+
size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled;
|
1970
|
+
if (hintInSize==0) hintInSize = mtctx->targetSectionSize;
|
1971
|
+
return hintInSize;
|
1972
|
+
}
|
1821
1973
|
|
1822
1974
|
/** ZSTDMT_compressStream_generic() :
|
1823
1975
|
* internal use only - exposed to be invoked from zstd_compress.c
|
@@ -1844,7 +1996,8 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
1844
1996
|
}
|
1845
1997
|
|
1846
1998
|
/* single-pass shortcut (note : synchronous-mode) */
|
1847
|
-
if ( (mtctx->
|
1999
|
+
if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */
|
2000
|
+
&& (mtctx->nextJobID == 0) /* just started */
|
1848
2001
|
&& (mtctx->inBuff.filled == 0) /* nothing buffered */
|
1849
2002
|
&& (!mtctx->jobReady) /* no job already created */
|
1850
2003
|
&& (endOp == ZSTD_e_end) /* end order */
|
@@ -1876,14 +2029,17 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
1876
2029
|
DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
|
1877
2030
|
}
|
1878
2031
|
if (mtctx->inBuff.buffer.start != NULL) {
|
1879
|
-
|
2032
|
+
syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
|
2033
|
+
if (syncPoint.flush && endOp == ZSTD_e_continue) {
|
2034
|
+
endOp = ZSTD_e_flush;
|
2035
|
+
}
|
1880
2036
|
assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
|
1881
2037
|
DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
|
1882
|
-
(U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
|
1883
|
-
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
|
1884
|
-
input->pos += toLoad;
|
1885
|
-
mtctx->inBuff.filled += toLoad;
|
1886
|
-
forwardInputProgress = toLoad>0;
|
2038
|
+
(U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
|
2039
|
+
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
|
2040
|
+
input->pos += syncPoint.toLoad;
|
2041
|
+
mtctx->inBuff.filled += syncPoint.toLoad;
|
2042
|
+
forwardInputProgress = syncPoint.toLoad>0;
|
1887
2043
|
}
|
1888
2044
|
if ((input->pos < input->size) && (endOp == ZSTD_e_end))
|
1889
2045
|
endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
|