zstd-ruby 1.3.7.0 → 1.3.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +15 -2
- data/ext/zstdruby/libzstd/Makefile +37 -2
- data/ext/zstdruby/libzstd/README.md +67 -41
- data/ext/zstdruby/libzstd/common/bitstream.h +2 -2
- data/ext/zstdruby/libzstd/common/compiler.h +19 -12
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +22 -11
- data/ext/zstdruby/libzstd/common/error_private.c +6 -0
- data/ext/zstdruby/libzstd/common/fse.h +2 -2
- data/ext/zstdruby/libzstd/common/huf.h +25 -1
- data/ext/zstdruby/libzstd/common/pool.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +3 -1
- data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +11 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +3 -3
- data/ext/zstdruby/libzstd/compress/hist.c +19 -11
- data/ext/zstdruby/libzstd/compress/hist.h +11 -8
- data/ext/zstdruby/libzstd/compress/huf_compress.c +33 -31
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +621 -371
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +90 -28
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +15 -15
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +25 -18
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +18 -67
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +2 -6
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +133 -48
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +229 -73
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +18 -10
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +178 -42
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +240 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +44 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +244 -1680
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1307 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +59 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +168 -0
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +13 -11
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +15 -15
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +28 -28
- data/ext/zstdruby/libzstd/dll/libzstd.def +0 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +15 -15
- data/ext/zstdruby/libzstd/zstd.h +1208 -968
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -2
@@ -26,6 +26,10 @@ size_t ZSTD_compressBlock_btopt(
|
|
26
26
|
size_t ZSTD_compressBlock_btultra(
|
27
27
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
28
28
|
void const* src, size_t srcSize);
|
29
|
+
size_t ZSTD_compressBlock_btultra2(
|
30
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
31
|
+
void const* src, size_t srcSize);
|
32
|
+
|
29
33
|
|
30
34
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
31
35
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
@@ -41,6 +45,10 @@ size_t ZSTD_compressBlock_btultra_extDict(
|
|
41
45
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
42
46
|
void const* src, size_t srcSize);
|
43
47
|
|
48
|
+
/* note : no btultra2 variant for extDict nor dictMatchState,
|
49
|
+
* because btultra2 is not meant to work with dictionaries
|
50
|
+
* and is only specific for the first block (no prefix) */
|
51
|
+
|
44
52
|
#if defined (__cplusplus)
|
45
53
|
}
|
46
54
|
#endif
|
@@ -9,21 +9,19 @@
|
|
9
9
|
*/
|
10
10
|
|
11
11
|
|
12
|
-
/* ====== Tuning parameters ====== */
|
13
|
-
#define ZSTDMT_NBWORKERS_MAX 200
|
14
|
-
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
|
15
|
-
#define ZSTDMT_OVERLAPLOG_DEFAULT 6
|
16
|
-
|
17
|
-
|
18
12
|
/* ====== Compiler specifics ====== */
|
19
13
|
#if defined(_MSC_VER)
|
20
14
|
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
|
21
15
|
#endif
|
22
16
|
|
23
17
|
|
18
|
+
/* ====== Constants ====== */
|
19
|
+
#define ZSTDMT_OVERLAPLOG_DEFAULT 0
|
20
|
+
|
21
|
+
|
24
22
|
/* ====== Dependencies ====== */
|
25
23
|
#include <string.h> /* memcpy, memset */
|
26
|
-
#include <limits.h> /* INT_MAX */
|
24
|
+
#include <limits.h> /* INT_MAX, UINT_MAX */
|
27
25
|
#include "pool.h" /* threadpool */
|
28
26
|
#include "threading.h" /* mutex */
|
29
27
|
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
@@ -57,9 +55,9 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
57
55
|
static clock_t _ticksPerSecond = 0;
|
58
56
|
if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
|
59
57
|
|
60
|
-
{
|
61
|
-
|
62
|
-
}
|
58
|
+
{ struct tms junk; clock_t newTicks = (clock_t) times(&junk);
|
59
|
+
return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
|
60
|
+
} }
|
63
61
|
|
64
62
|
#define MUTEX_WAIT_TIME_DLEVEL 6
|
65
63
|
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
|
@@ -342,8 +340,8 @@ static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
|
|
342
340
|
|
343
341
|
typedef struct {
|
344
342
|
ZSTD_pthread_mutex_t poolMutex;
|
345
|
-
|
346
|
-
|
343
|
+
int totalCCtx;
|
344
|
+
int availCCtx;
|
347
345
|
ZSTD_customMem cMem;
|
348
346
|
ZSTD_CCtx* cctx[1]; /* variable size */
|
349
347
|
} ZSTDMT_CCtxPool;
|
@@ -351,16 +349,16 @@ typedef struct {
|
|
351
349
|
/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
|
352
350
|
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
353
351
|
{
|
354
|
-
|
355
|
-
for (
|
356
|
-
ZSTD_freeCCtx(pool->cctx[
|
352
|
+
int cid;
|
353
|
+
for (cid=0; cid<pool->totalCCtx; cid++)
|
354
|
+
ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
|
357
355
|
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
|
358
356
|
ZSTD_free(pool, pool->cMem);
|
359
357
|
}
|
360
358
|
|
361
359
|
/* ZSTDMT_createCCtxPool() :
|
362
360
|
* implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
|
363
|
-
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(
|
361
|
+
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
|
364
362
|
ZSTD_customMem cMem)
|
365
363
|
{
|
366
364
|
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
|
@@ -381,7 +379,7 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
|
|
381
379
|
}
|
382
380
|
|
383
381
|
static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
|
384
|
-
|
382
|
+
int nbWorkers)
|
385
383
|
{
|
386
384
|
if (srcPool==NULL) return NULL;
|
387
385
|
if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
|
@@ -469,9 +467,9 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
469
467
|
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
470
468
|
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
471
469
|
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
472
|
-
assert(params.ldmParams.
|
470
|
+
assert(params.ldmParams.hashRateLog < 32);
|
473
471
|
serialState->ldmState.hashPower =
|
474
|
-
|
472
|
+
ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
|
475
473
|
} else {
|
476
474
|
memset(¶ms.ldmParams, 0, sizeof(params.ldmParams));
|
477
475
|
}
|
@@ -674,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
674
672
|
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
675
673
|
} else { /* srcStart points at reloaded section */
|
676
674
|
U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
|
677
|
-
{ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams,
|
675
|
+
{ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
678
676
|
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
|
679
677
|
}
|
680
678
|
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
@@ -777,6 +775,14 @@ typedef struct {
|
|
777
775
|
|
778
776
|
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
|
779
777
|
|
778
|
+
#define RSYNC_LENGTH 32
|
779
|
+
|
780
|
+
typedef struct {
|
781
|
+
U64 hash;
|
782
|
+
U64 hitMask;
|
783
|
+
U64 primePower;
|
784
|
+
} rsyncState_t;
|
785
|
+
|
780
786
|
struct ZSTDMT_CCtx_s {
|
781
787
|
POOL_ctx* factory;
|
782
788
|
ZSTDMT_jobDescription* jobs;
|
@@ -790,6 +796,7 @@ struct ZSTDMT_CCtx_s {
|
|
790
796
|
inBuff_t inBuff;
|
791
797
|
roundBuff_t roundBuff;
|
792
798
|
serialState_t serial;
|
799
|
+
rsyncState_t rsync;
|
793
800
|
unsigned singleBlockingThread;
|
794
801
|
unsigned jobIDMask;
|
795
802
|
unsigned doneJobID;
|
@@ -859,7 +866,7 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
|
|
859
866
|
{
|
860
867
|
if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
|
861
868
|
params->nbWorkers = nbWorkers;
|
862
|
-
params->
|
869
|
+
params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
|
863
870
|
params->jobSize = 0;
|
864
871
|
return nbWorkers;
|
865
872
|
}
|
@@ -969,52 +976,59 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
|
|
969
976
|
}
|
970
977
|
|
971
978
|
/* Internal only */
|
972
|
-
size_t
|
973
|
-
|
979
|
+
size_t
|
980
|
+
ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
|
981
|
+
ZSTDMT_parameter parameter,
|
982
|
+
int value)
|
983
|
+
{
|
974
984
|
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
|
975
985
|
switch(parameter)
|
976
986
|
{
|
977
987
|
case ZSTDMT_p_jobSize :
|
978
|
-
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %
|
979
|
-
if (
|
980
|
-
|
988
|
+
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
|
989
|
+
if ( value != 0 /* default */
|
990
|
+
&& value < ZSTDMT_JOBSIZE_MIN)
|
981
991
|
value = ZSTDMT_JOBSIZE_MIN;
|
982
|
-
|
983
|
-
|
992
|
+
assert(value >= 0);
|
993
|
+
if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
|
984
994
|
params->jobSize = value;
|
985
995
|
return value;
|
986
|
-
|
987
|
-
|
988
|
-
DEBUGLOG(4, "
|
989
|
-
|
996
|
+
|
997
|
+
case ZSTDMT_p_overlapLog :
|
998
|
+
DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
|
999
|
+
if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
|
1000
|
+
if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
|
1001
|
+
params->overlapLog = value;
|
1002
|
+
return value;
|
1003
|
+
|
1004
|
+
case ZSTDMT_p_rsyncable :
|
1005
|
+
value = (value != 0);
|
1006
|
+
params->rsyncable = value;
|
990
1007
|
return value;
|
1008
|
+
|
991
1009
|
default :
|
992
1010
|
return ERROR(parameter_unsupported);
|
993
1011
|
}
|
994
1012
|
}
|
995
1013
|
|
996
|
-
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
1014
|
+
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
|
997
1015
|
{
|
998
1016
|
DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
|
999
|
-
|
1000
|
-
{
|
1001
|
-
case ZSTDMT_p_jobSize :
|
1002
|
-
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
1003
|
-
case ZSTDMT_p_overlapSectionLog :
|
1004
|
-
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
1005
|
-
default :
|
1006
|
-
return ERROR(parameter_unsupported);
|
1007
|
-
}
|
1017
|
+
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
1008
1018
|
}
|
1009
1019
|
|
1010
|
-
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
1020
|
+
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
|
1011
1021
|
{
|
1012
1022
|
switch (parameter) {
|
1013
1023
|
case ZSTDMT_p_jobSize:
|
1014
|
-
|
1024
|
+
assert(mtctx->params.jobSize <= INT_MAX);
|
1025
|
+
*value = (int)(mtctx->params.jobSize);
|
1015
1026
|
break;
|
1016
|
-
case
|
1017
|
-
*value = mtctx->params.
|
1027
|
+
case ZSTDMT_p_overlapLog:
|
1028
|
+
*value = mtctx->params.overlapLog;
|
1029
|
+
break;
|
1030
|
+
case ZSTDMT_p_rsyncable:
|
1031
|
+
*value = mtctx->params.rsyncable;
|
1018
1032
|
break;
|
1019
1033
|
default:
|
1020
1034
|
return ERROR(parameter_unsupported);
|
@@ -1140,22 +1154,66 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
1140
1154
|
/* ===== Multi-threaded compression ===== */
|
1141
1155
|
/* ------------------------------------------ */
|
1142
1156
|
|
1143
|
-
static
|
1157
|
+
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
|
1144
1158
|
{
|
1145
1159
|
if (params.ldmParams.enableLdm)
|
1160
|
+
/* In Long Range Mode, the windowLog is typically oversized.
|
1161
|
+
* In which case, it's preferable to determine the jobSize
|
1162
|
+
* based on chainLog instead. */
|
1146
1163
|
return MAX(21, params.cParams.chainLog + 4);
|
1147
1164
|
return MAX(20, params.cParams.windowLog + 2);
|
1148
1165
|
}
|
1149
1166
|
|
1150
|
-
static
|
1167
|
+
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
|
1151
1168
|
{
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1169
|
+
switch(strat)
|
1170
|
+
{
|
1171
|
+
case ZSTD_btultra2:
|
1172
|
+
return 9;
|
1173
|
+
case ZSTD_btultra:
|
1174
|
+
case ZSTD_btopt:
|
1175
|
+
return 8;
|
1176
|
+
case ZSTD_btlazy2:
|
1177
|
+
case ZSTD_lazy2:
|
1178
|
+
return 7;
|
1179
|
+
case ZSTD_lazy:
|
1180
|
+
case ZSTD_greedy:
|
1181
|
+
case ZSTD_dfast:
|
1182
|
+
case ZSTD_fast:
|
1183
|
+
default:;
|
1184
|
+
}
|
1185
|
+
return 6;
|
1156
1186
|
}
|
1157
1187
|
|
1158
|
-
static
|
1188
|
+
static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
|
1189
|
+
{
|
1190
|
+
assert(0 <= ovlog && ovlog <= 9);
|
1191
|
+
if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
|
1192
|
+
return ovlog;
|
1193
|
+
}
|
1194
|
+
|
1195
|
+
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
|
1196
|
+
{
|
1197
|
+
int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
|
1198
|
+
int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
|
1199
|
+
assert(0 <= overlapRLog && overlapRLog <= 8);
|
1200
|
+
if (params.ldmParams.enableLdm) {
|
1201
|
+
/* In Long Range Mode, the windowLog is typically oversized.
|
1202
|
+
* In which case, it's preferable to determine the jobSize
|
1203
|
+
* based on chainLog instead.
|
1204
|
+
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
|
1205
|
+
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
|
1206
|
+
- overlapRLog;
|
1207
|
+
}
|
1208
|
+
assert(0 <= ovLog && ovLog <= 30);
|
1209
|
+
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
|
1210
|
+
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
|
1211
|
+
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
1212
|
+
}
|
1213
|
+
|
1214
|
+
static unsigned
|
1215
|
+
ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
|
1216
|
+
{
|
1159
1217
|
assert(nbWorkers>0);
|
1160
1218
|
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
|
1161
1219
|
size_t const jobMaxSize = jobSizeTarget << 2;
|
@@ -1178,7 +1236,7 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1178
1236
|
ZSTD_CCtx_params params)
|
1179
1237
|
{
|
1180
1238
|
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
|
1181
|
-
size_t const overlapSize = (
|
1239
|
+
size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
|
1182
1240
|
unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
|
1183
1241
|
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
|
1184
1242
|
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
|
@@ -1289,16 +1347,17 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1289
1347
|
}
|
1290
1348
|
|
1291
1349
|
size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1350
|
+
void* dst, size_t dstCapacity,
|
1351
|
+
const void* src, size_t srcSize,
|
1352
|
+
const ZSTD_CDict* cdict,
|
1353
|
+
ZSTD_parameters params,
|
1354
|
+
int overlapLog)
|
1297
1355
|
{
|
1298
1356
|
ZSTD_CCtx_params cctxParams = mtctx->params;
|
1299
1357
|
cctxParams.cParams = params.cParams;
|
1300
1358
|
cctxParams.fParams = params.fParams;
|
1301
|
-
|
1359
|
+
assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
|
1360
|
+
cctxParams.overlapLog = overlapLog;
|
1302
1361
|
return ZSTDMT_compress_advanced_internal(mtctx,
|
1303
1362
|
dst, dstCapacity,
|
1304
1363
|
src, srcSize,
|
@@ -1311,8 +1370,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
1311
1370
|
const void* src, size_t srcSize,
|
1312
1371
|
int compressionLevel)
|
1313
1372
|
{
|
1314
|
-
U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
|
1315
1373
|
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
|
1374
|
+
int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
|
1316
1375
|
params.fParams.contentSizeFlag = 1;
|
1317
1376
|
return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
|
1318
1377
|
}
|
@@ -1339,8 +1398,8 @@ size_t ZSTDMT_initCStream_internal(
|
|
1339
1398
|
if (params.nbWorkers != mtctx->params.nbWorkers)
|
1340
1399
|
CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
1341
1400
|
|
1342
|
-
if (params.jobSize
|
1343
|
-
if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
1401
|
+
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
1402
|
+
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
1344
1403
|
|
1345
1404
|
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
1346
1405
|
if (mtctx->singleBlockingThread) {
|
@@ -1375,14 +1434,24 @@ size_t ZSTDMT_initCStream_internal(
|
|
1375
1434
|
mtctx->cdict = cdict;
|
1376
1435
|
}
|
1377
1436
|
|
1378
|
-
mtctx->targetPrefixSize = (
|
1379
|
-
DEBUGLOG(4, "overlapLog=%
|
1437
|
+
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
|
1438
|
+
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
|
1380
1439
|
mtctx->targetSectionSize = params.jobSize;
|
1381
1440
|
if (mtctx->targetSectionSize == 0) {
|
1382
1441
|
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
1383
1442
|
}
|
1443
|
+
if (params.rsyncable) {
|
1444
|
+
/* Aim for the targetsectionSize as the average job size. */
|
1445
|
+
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
|
1446
|
+
U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
|
1447
|
+
assert(jobSizeMB >= 1);
|
1448
|
+
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
|
1449
|
+
mtctx->rsync.hash = 0;
|
1450
|
+
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
|
1451
|
+
mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
|
1452
|
+
}
|
1384
1453
|
if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
|
1385
|
-
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
|
1454
|
+
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
|
1386
1455
|
DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
|
1387
1456
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
|
1388
1457
|
{
|
@@ -1818,6 +1887,89 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
|
|
1818
1887
|
return 1;
|
1819
1888
|
}
|
1820
1889
|
|
1890
|
+
typedef struct {
|
1891
|
+
size_t toLoad; /* The number of bytes to load from the input. */
|
1892
|
+
int flush; /* Boolean declaring if we must flush because we found a synchronization point. */
|
1893
|
+
} syncPoint_t;
|
1894
|
+
|
1895
|
+
/**
|
1896
|
+
* Searches through the input for a synchronization point. If one is found, we
|
1897
|
+
* will instruct the caller to flush, and return the number of bytes to load.
|
1898
|
+
* Otherwise, we will load as many bytes as possible and instruct the caller
|
1899
|
+
* to continue as normal.
|
1900
|
+
*/
|
1901
|
+
static syncPoint_t
|
1902
|
+
findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
1903
|
+
{
|
1904
|
+
BYTE const* const istart = (BYTE const*)input.src + input.pos;
|
1905
|
+
U64 const primePower = mtctx->rsync.primePower;
|
1906
|
+
U64 const hitMask = mtctx->rsync.hitMask;
|
1907
|
+
|
1908
|
+
syncPoint_t syncPoint;
|
1909
|
+
U64 hash;
|
1910
|
+
BYTE const* prev;
|
1911
|
+
size_t pos;
|
1912
|
+
|
1913
|
+
syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
|
1914
|
+
syncPoint.flush = 0;
|
1915
|
+
if (!mtctx->params.rsyncable)
|
1916
|
+
/* Rsync is disabled. */
|
1917
|
+
return syncPoint;
|
1918
|
+
if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
|
1919
|
+
/* Not enough to compute the hash.
|
1920
|
+
* We will miss any synchronization points in this RSYNC_LENGTH byte
|
1921
|
+
* window. However, since it depends only in the internal buffers, if the
|
1922
|
+
* state is already synchronized, we will remain synchronized.
|
1923
|
+
* Additionally, the probability that we miss a synchronization point is
|
1924
|
+
* low: RSYNC_LENGTH / targetSectionSize.
|
1925
|
+
*/
|
1926
|
+
return syncPoint;
|
1927
|
+
/* Initialize the loop variables. */
|
1928
|
+
if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
|
1929
|
+
/* We have enough bytes buffered to initialize the hash.
|
1930
|
+
* Start scanning at the beginning of the input.
|
1931
|
+
*/
|
1932
|
+
pos = 0;
|
1933
|
+
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
1934
|
+
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
1935
|
+
} else {
|
1936
|
+
/* We don't have enough bytes buffered to initialize the hash, but
|
1937
|
+
* we know we have at least RSYNC_LENGTH bytes total.
|
1938
|
+
* Start scanning after the first RSYNC_LENGTH bytes less the bytes
|
1939
|
+
* already buffered.
|
1940
|
+
*/
|
1941
|
+
pos = RSYNC_LENGTH - mtctx->inBuff.filled;
|
1942
|
+
prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
|
1943
|
+
hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
|
1944
|
+
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
1945
|
+
}
|
1946
|
+
/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
|
1947
|
+
* through the input. If we hit a synchronization point, then cut the
|
1948
|
+
* job off, and tell the compressor to flush the job. Otherwise, load
|
1949
|
+
* all the bytes and continue as normal.
|
1950
|
+
* If we go too long without a synchronization point (targetSectionSize)
|
1951
|
+
* then a block will be emitted anyways, but this is okay, since if we
|
1952
|
+
* are already synchronized we will remain synchronized.
|
1953
|
+
*/
|
1954
|
+
for (; pos < syncPoint.toLoad; ++pos) {
|
1955
|
+
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
|
1956
|
+
/* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
|
1957
|
+
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
|
1958
|
+
if ((hash & hitMask) == hitMask) {
|
1959
|
+
syncPoint.toLoad = pos + 1;
|
1960
|
+
syncPoint.flush = 1;
|
1961
|
+
break;
|
1962
|
+
}
|
1963
|
+
}
|
1964
|
+
return syncPoint;
|
1965
|
+
}
|
1966
|
+
|
1967
|
+
size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx)
|
1968
|
+
{
|
1969
|
+
size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled;
|
1970
|
+
if (hintInSize==0) hintInSize = mtctx->targetSectionSize;
|
1971
|
+
return hintInSize;
|
1972
|
+
}
|
1821
1973
|
|
1822
1974
|
/** ZSTDMT_compressStream_generic() :
|
1823
1975
|
* internal use only - exposed to be invoked from zstd_compress.c
|
@@ -1844,7 +1996,8 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
1844
1996
|
}
|
1845
1997
|
|
1846
1998
|
/* single-pass shortcut (note : synchronous-mode) */
|
1847
|
-
if ( (mtctx->
|
1999
|
+
if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */
|
2000
|
+
&& (mtctx->nextJobID == 0) /* just started */
|
1848
2001
|
&& (mtctx->inBuff.filled == 0) /* nothing buffered */
|
1849
2002
|
&& (!mtctx->jobReady) /* no job already created */
|
1850
2003
|
&& (endOp == ZSTD_e_end) /* end order */
|
@@ -1876,14 +2029,17 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
1876
2029
|
DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
|
1877
2030
|
}
|
1878
2031
|
if (mtctx->inBuff.buffer.start != NULL) {
|
1879
|
-
|
2032
|
+
syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
|
2033
|
+
if (syncPoint.flush && endOp == ZSTD_e_continue) {
|
2034
|
+
endOp = ZSTD_e_flush;
|
2035
|
+
}
|
1880
2036
|
assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
|
1881
2037
|
DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
|
1882
|
-
(U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
|
1883
|
-
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
|
1884
|
-
input->pos += toLoad;
|
1885
|
-
mtctx->inBuff.filled += toLoad;
|
1886
|
-
forwardInputProgress = toLoad>0;
|
2038
|
+
(U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
|
2039
|
+
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
|
2040
|
+
input->pos += syncPoint.toLoad;
|
2041
|
+
mtctx->inBuff.filled += syncPoint.toLoad;
|
2042
|
+
forwardInputProgress = syncPoint.toLoad>0;
|
1887
2043
|
}
|
1888
2044
|
if ((input->pos < input->size) && (endOp == ZSTD_e_end))
|
1889
2045
|
endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
|