zstd-ruby 1.3.7.0 → 1.3.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/BUCK +15 -2
  4. data/ext/zstdruby/libzstd/Makefile +37 -2
  5. data/ext/zstdruby/libzstd/README.md +67 -41
  6. data/ext/zstdruby/libzstd/common/bitstream.h +2 -2
  7. data/ext/zstdruby/libzstd/common/compiler.h +19 -12
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  9. data/ext/zstdruby/libzstd/common/debug.h +22 -11
  10. data/ext/zstdruby/libzstd/common/error_private.c +6 -0
  11. data/ext/zstdruby/libzstd/common/fse.h +2 -2
  12. data/ext/zstdruby/libzstd/common/huf.h +25 -1
  13. data/ext/zstdruby/libzstd/common/pool.c +1 -1
  14. data/ext/zstdruby/libzstd/common/zstd_common.c +3 -1
  15. data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
  16. data/ext/zstdruby/libzstd/common/zstd_internal.h +11 -2
  17. data/ext/zstdruby/libzstd/compress/fse_compress.c +3 -3
  18. data/ext/zstdruby/libzstd/compress/hist.c +19 -11
  19. data/ext/zstdruby/libzstd/compress/hist.h +11 -8
  20. data/ext/zstdruby/libzstd/compress/huf_compress.c +33 -31
  21. data/ext/zstdruby/libzstd/compress/zstd_compress.c +621 -371
  22. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +90 -28
  23. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +4 -4
  24. data/ext/zstdruby/libzstd/compress/zstd_fast.c +15 -15
  25. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +25 -18
  26. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +18 -67
  27. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +2 -6
  28. data/ext/zstdruby/libzstd/compress/zstd_opt.c +133 -48
  29. data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -0
  30. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +229 -73
  31. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +18 -10
  32. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +178 -42
  33. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +240 -0
  34. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +44 -0
  35. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +244 -1680
  36. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1307 -0
  37. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +59 -0
  38. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +168 -0
  39. data/ext/zstdruby/libzstd/dictBuilder/cover.c +13 -11
  40. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +15 -15
  41. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +28 -28
  42. data/ext/zstdruby/libzstd/dll/libzstd.def +0 -1
  43. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -10
  44. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +15 -15
  45. data/ext/zstdruby/libzstd/zstd.h +1208 -968
  46. data/lib/zstd-ruby/version.rb +1 -1
  47. metadata +7 -2
@@ -26,6 +26,10 @@ size_t ZSTD_compressBlock_btopt(
26
26
  size_t ZSTD_compressBlock_btultra(
27
27
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28
28
  void const* src, size_t srcSize);
29
+ size_t ZSTD_compressBlock_btultra2(
30
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
31
+ void const* src, size_t srcSize);
32
+
29
33
 
30
34
  size_t ZSTD_compressBlock_btopt_dictMatchState(
31
35
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -41,6 +45,10 @@ size_t ZSTD_compressBlock_btultra_extDict(
41
45
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
42
46
  void const* src, size_t srcSize);
43
47
 
48
+ /* note : no btultra2 variant for extDict nor dictMatchState,
49
+ * because btultra2 is not meant to work with dictionaries
50
+ * and is only specific for the first block (no prefix) */
51
+
44
52
  #if defined (__cplusplus)
45
53
  }
46
54
  #endif
@@ -9,21 +9,19 @@
9
9
  */
10
10
 
11
11
 
12
- /* ====== Tuning parameters ====== */
13
- #define ZSTDMT_NBWORKERS_MAX 200
14
- #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
15
- #define ZSTDMT_OVERLAPLOG_DEFAULT 6
16
-
17
-
18
12
  /* ====== Compiler specifics ====== */
19
13
  #if defined(_MSC_VER)
20
14
  # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
21
15
  #endif
22
16
 
23
17
 
18
+ /* ====== Constants ====== */
19
+ #define ZSTDMT_OVERLAPLOG_DEFAULT 0
20
+
21
+
24
22
  /* ====== Dependencies ====== */
25
23
  #include <string.h> /* memcpy, memset */
26
- #include <limits.h> /* INT_MAX */
24
+ #include <limits.h> /* INT_MAX, UINT_MAX */
27
25
  #include "pool.h" /* threadpool */
28
26
  #include "threading.h" /* mutex */
29
27
  #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
@@ -57,9 +55,9 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
57
55
  static clock_t _ticksPerSecond = 0;
58
56
  if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
59
57
 
60
- { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
61
- return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
62
- }
58
+ { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
59
+ return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
60
+ } }
63
61
 
64
62
  #define MUTEX_WAIT_TIME_DLEVEL 6
65
63
  #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
@@ -342,8 +340,8 @@ static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
342
340
 
343
341
  typedef struct {
344
342
  ZSTD_pthread_mutex_t poolMutex;
345
- unsigned totalCCtx;
346
- unsigned availCCtx;
343
+ int totalCCtx;
344
+ int availCCtx;
347
345
  ZSTD_customMem cMem;
348
346
  ZSTD_CCtx* cctx[1]; /* variable size */
349
347
  } ZSTDMT_CCtxPool;
@@ -351,16 +349,16 @@ typedef struct {
351
349
  /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
352
350
  static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
353
351
  {
354
- unsigned u;
355
- for (u=0; u<pool->totalCCtx; u++)
356
- ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
352
+ int cid;
353
+ for (cid=0; cid<pool->totalCCtx; cid++)
354
+ ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
357
355
  ZSTD_pthread_mutex_destroy(&pool->poolMutex);
358
356
  ZSTD_free(pool, pool->cMem);
359
357
  }
360
358
 
361
359
  /* ZSTDMT_createCCtxPool() :
362
360
  * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
363
- static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
361
+ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
364
362
  ZSTD_customMem cMem)
365
363
  {
366
364
  ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
@@ -381,7 +379,7 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
381
379
  }
382
380
 
383
381
  static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
384
- unsigned nbWorkers)
382
+ int nbWorkers)
385
383
  {
386
384
  if (srcPool==NULL) return NULL;
387
385
  if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
@@ -469,9 +467,9 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
469
467
  DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
470
468
  ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
471
469
  assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
472
- assert(params.ldmParams.hashEveryLog < 32);
470
+ assert(params.ldmParams.hashRateLog < 32);
473
471
  serialState->ldmState.hashPower =
474
- ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
472
+ ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
475
473
  } else {
476
474
  memset(&params.ldmParams, 0, sizeof(params.ldmParams));
477
475
  }
@@ -674,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
674
672
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
675
673
  } else { /* srcStart points at reloaded section */
676
674
  U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
677
- { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
675
+ { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
678
676
  if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
679
677
  }
680
678
  { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
@@ -777,6 +775,14 @@ typedef struct {
777
775
 
778
776
  static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
779
777
 
778
+ #define RSYNC_LENGTH 32
779
+
780
+ typedef struct {
781
+ U64 hash;
782
+ U64 hitMask;
783
+ U64 primePower;
784
+ } rsyncState_t;
785
+
780
786
  struct ZSTDMT_CCtx_s {
781
787
  POOL_ctx* factory;
782
788
  ZSTDMT_jobDescription* jobs;
@@ -790,6 +796,7 @@ struct ZSTDMT_CCtx_s {
790
796
  inBuff_t inBuff;
791
797
  roundBuff_t roundBuff;
792
798
  serialState_t serial;
799
+ rsyncState_t rsync;
793
800
  unsigned singleBlockingThread;
794
801
  unsigned jobIDMask;
795
802
  unsigned doneJobID;
@@ -859,7 +866,7 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
859
866
  {
860
867
  if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
861
868
  params->nbWorkers = nbWorkers;
862
- params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
869
+ params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
863
870
  params->jobSize = 0;
864
871
  return nbWorkers;
865
872
  }
@@ -969,52 +976,59 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
969
976
  }
970
977
 
971
978
  /* Internal only */
972
- size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
973
- ZSTDMT_parameter parameter, unsigned value) {
979
+ size_t
980
+ ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
981
+ ZSTDMT_parameter parameter,
982
+ int value)
983
+ {
974
984
  DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
975
985
  switch(parameter)
976
986
  {
977
987
  case ZSTDMT_p_jobSize :
978
- DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value);
979
- if ( (value > 0) /* value==0 => automatic job size */
980
- & (value < ZSTDMT_JOBSIZE_MIN) )
988
+ DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
989
+ if ( value != 0 /* default */
990
+ && value < ZSTDMT_JOBSIZE_MIN)
981
991
  value = ZSTDMT_JOBSIZE_MIN;
982
- if (value > ZSTDMT_JOBSIZE_MAX)
983
- value = ZSTDMT_JOBSIZE_MAX;
992
+ assert(value >= 0);
993
+ if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
984
994
  params->jobSize = value;
985
995
  return value;
986
- case ZSTDMT_p_overlapSectionLog :
987
- if (value > 9) value = 9;
988
- DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
989
- params->overlapSizeLog = (value >= 9) ? 9 : value;
996
+
997
+ case ZSTDMT_p_overlapLog :
998
+ DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
999
+ if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
1000
+ if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
1001
+ params->overlapLog = value;
1002
+ return value;
1003
+
1004
+ case ZSTDMT_p_rsyncable :
1005
+ value = (value != 0);
1006
+ params->rsyncable = value;
990
1007
  return value;
1008
+
991
1009
  default :
992
1010
  return ERROR(parameter_unsupported);
993
1011
  }
994
1012
  }
995
1013
 
996
- size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value)
1014
+ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
997
1015
  {
998
1016
  DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
999
- switch(parameter)
1000
- {
1001
- case ZSTDMT_p_jobSize :
1002
- return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1003
- case ZSTDMT_p_overlapSectionLog :
1004
- return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1005
- default :
1006
- return ERROR(parameter_unsupported);
1007
- }
1017
+ return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1008
1018
  }
1009
1019
 
1010
- size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
1020
+ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
1011
1021
  {
1012
1022
  switch (parameter) {
1013
1023
  case ZSTDMT_p_jobSize:
1014
- *value = mtctx->params.jobSize;
1024
+ assert(mtctx->params.jobSize <= INT_MAX);
1025
+ *value = (int)(mtctx->params.jobSize);
1015
1026
  break;
1016
- case ZSTDMT_p_overlapSectionLog:
1017
- *value = mtctx->params.overlapSizeLog;
1027
+ case ZSTDMT_p_overlapLog:
1028
+ *value = mtctx->params.overlapLog;
1029
+ break;
1030
+ case ZSTDMT_p_rsyncable:
1031
+ *value = mtctx->params.rsyncable;
1018
1032
  break;
1019
1033
  default:
1020
1034
  return ERROR(parameter_unsupported);
@@ -1140,22 +1154,66 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1140
1154
  /* ===== Multi-threaded compression ===== */
1141
1155
  /* ------------------------------------------ */
1142
1156
 
1143
- static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1157
+ static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1144
1158
  {
1145
1159
  if (params.ldmParams.enableLdm)
1160
+ /* In Long Range Mode, the windowLog is typically oversized.
1161
+ * In which case, it's preferable to determine the jobSize
1162
+ * based on chainLog instead. */
1146
1163
  return MAX(21, params.cParams.chainLog + 4);
1147
1164
  return MAX(20, params.cParams.windowLog + 2);
1148
1165
  }
1149
1166
 
1150
- static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params)
1167
+ static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
1151
1168
  {
1152
- unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
1153
- if (params.ldmParams.enableLdm)
1154
- return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog);
1155
- return overlapRLog >= 9 ? 0 : (params.cParams.windowLog - overlapRLog);
1169
+ switch(strat)
1170
+ {
1171
+ case ZSTD_btultra2:
1172
+ return 9;
1173
+ case ZSTD_btultra:
1174
+ case ZSTD_btopt:
1175
+ return 8;
1176
+ case ZSTD_btlazy2:
1177
+ case ZSTD_lazy2:
1178
+ return 7;
1179
+ case ZSTD_lazy:
1180
+ case ZSTD_greedy:
1181
+ case ZSTD_dfast:
1182
+ case ZSTD_fast:
1183
+ default:;
1184
+ }
1185
+ return 6;
1156
1186
  }
1157
1187
 
1158
- static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) {
1188
+ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
1189
+ {
1190
+ assert(0 <= ovlog && ovlog <= 9);
1191
+ if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
1192
+ return ovlog;
1193
+ }
1194
+
1195
+ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1196
+ {
1197
+ int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
1198
+ int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
1199
+ assert(0 <= overlapRLog && overlapRLog <= 8);
1200
+ if (params.ldmParams.enableLdm) {
1201
+ /* In Long Range Mode, the windowLog is typically oversized.
1202
+ * In which case, it's preferable to determine the jobSize
1203
+ * based on chainLog instead.
1204
+ * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
1205
+ ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1206
+ - overlapRLog;
1207
+ }
1208
+ assert(0 <= ovLog && ovLog <= 30);
1209
+ DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1210
+ DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1211
+ return (ovLog==0) ? 0 : (size_t)1 << ovLog;
1212
+ }
1213
+
1214
+ static unsigned
1215
+ ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
1216
+ {
1159
1217
  assert(nbWorkers>0);
1160
1218
  { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
1161
1219
  size_t const jobMaxSize = jobSizeTarget << 2;
@@ -1178,7 +1236,7 @@ static size_t ZSTDMT_compress_advanced_internal(
1178
1236
  ZSTD_CCtx_params params)
1179
1237
  {
1180
1238
  ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1181
- size_t const overlapSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1239
+ size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
1182
1240
  unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1183
1241
  size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1184
1242
  size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
@@ -1289,16 +1347,17 @@ static size_t ZSTDMT_compress_advanced_internal(
1289
1347
  }
1290
1348
 
1291
1349
  size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
1292
- void* dst, size_t dstCapacity,
1293
- const void* src, size_t srcSize,
1294
- const ZSTD_CDict* cdict,
1295
- ZSTD_parameters params,
1296
- unsigned overlapLog)
1350
+ void* dst, size_t dstCapacity,
1351
+ const void* src, size_t srcSize,
1352
+ const ZSTD_CDict* cdict,
1353
+ ZSTD_parameters params,
1354
+ int overlapLog)
1297
1355
  {
1298
1356
  ZSTD_CCtx_params cctxParams = mtctx->params;
1299
1357
  cctxParams.cParams = params.cParams;
1300
1358
  cctxParams.fParams = params.fParams;
1301
- cctxParams.overlapSizeLog = overlapLog;
1359
+ assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
1360
+ cctxParams.overlapLog = overlapLog;
1302
1361
  return ZSTDMT_compress_advanced_internal(mtctx,
1303
1362
  dst, dstCapacity,
1304
1363
  src, srcSize,
@@ -1311,8 +1370,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
1311
1370
  const void* src, size_t srcSize,
1312
1371
  int compressionLevel)
1313
1372
  {
1314
- U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
1315
1373
  ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
1374
+ int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
1316
1375
  params.fParams.contentSizeFlag = 1;
1317
1376
  return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
1318
1377
  }
@@ -1339,8 +1398,8 @@ size_t ZSTDMT_initCStream_internal(
1339
1398
  if (params.nbWorkers != mtctx->params.nbWorkers)
1340
1399
  CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
1341
1400
 
1342
- if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1343
- if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1401
+ if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1402
+ if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1344
1403
 
1345
1404
  mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1346
1405
  if (mtctx->singleBlockingThread) {
@@ -1375,14 +1434,24 @@ size_t ZSTDMT_initCStream_internal(
1375
1434
  mtctx->cdict = cdict;
1376
1435
  }
1377
1436
 
1378
- mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1379
- DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
1437
+ mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
1438
+ DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
1380
1439
  mtctx->targetSectionSize = params.jobSize;
1381
1440
  if (mtctx->targetSectionSize == 0) {
1382
1441
  mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1383
1442
  }
1443
+ if (params.rsyncable) {
1444
+ /* Aim for the targetsectionSize as the average job size. */
1445
+ U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
1446
+ U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
1447
+ assert(jobSizeMB >= 1);
1448
+ DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
1449
+ mtctx->rsync.hash = 0;
1450
+ mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
1451
+ mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
1452
+ }
1384
1453
  if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1385
- DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
1454
+ DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
1386
1455
  DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
1387
1456
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1388
1457
  {
@@ -1818,6 +1887,89 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
1818
1887
  return 1;
1819
1888
  }
1820
1889
 
1890
+ typedef struct {
1891
+ size_t toLoad; /* The number of bytes to load from the input. */
1892
+ int flush; /* Boolean declaring if we must flush because we found a synchronization point. */
1893
+ } syncPoint_t;
1894
+
1895
+ /**
1896
+ * Searches through the input for a synchronization point. If one is found, we
1897
+ * will instruct the caller to flush, and return the number of bytes to load.
1898
+ * Otherwise, we will load as many bytes as possible and instruct the caller
1899
+ * to continue as normal.
1900
+ */
1901
+ static syncPoint_t
1902
+ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1903
+ {
1904
+ BYTE const* const istart = (BYTE const*)input.src + input.pos;
1905
+ U64 const primePower = mtctx->rsync.primePower;
1906
+ U64 const hitMask = mtctx->rsync.hitMask;
1907
+
1908
+ syncPoint_t syncPoint;
1909
+ U64 hash;
1910
+ BYTE const* prev;
1911
+ size_t pos;
1912
+
1913
+ syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
1914
+ syncPoint.flush = 0;
1915
+ if (!mtctx->params.rsyncable)
1916
+ /* Rsync is disabled. */
1917
+ return syncPoint;
1918
+ if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
1919
+ /* Not enough to compute the hash.
1920
+ * We will miss any synchronization points in this RSYNC_LENGTH byte
1921
+ * window. However, since it depends only in the internal buffers, if the
1922
+ * state is already synchronized, we will remain synchronized.
1923
+ * Additionally, the probability that we miss a synchronization point is
1924
+ * low: RSYNC_LENGTH / targetSectionSize.
1925
+ */
1926
+ return syncPoint;
1927
+ /* Initialize the loop variables. */
1928
+ if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
1929
+ /* We have enough bytes buffered to initialize the hash.
1930
+ * Start scanning at the beginning of the input.
1931
+ */
1932
+ pos = 0;
1933
+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1934
+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1935
+ } else {
1936
+ /* We don't have enough bytes buffered to initialize the hash, but
1937
+ * we know we have at least RSYNC_LENGTH bytes total.
1938
+ * Start scanning after the first RSYNC_LENGTH bytes less the bytes
1939
+ * already buffered.
1940
+ */
1941
+ pos = RSYNC_LENGTH - mtctx->inBuff.filled;
1942
+ prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
1943
+ hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
1944
+ hash = ZSTD_rollingHash_append(hash, istart, pos);
1945
+ }
1946
+ /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
1947
+ * through the input. If we hit a synchronization point, then cut the
1948
+ * job off, and tell the compressor to flush the job. Otherwise, load
1949
+ * all the bytes and continue as normal.
1950
+ * If we go too long without a synchronization point (targetSectionSize)
1951
+ * then a block will be emitted anyways, but this is okay, since if we
1952
+ * are already synchronized we will remain synchronized.
1953
+ */
1954
+ for (; pos < syncPoint.toLoad; ++pos) {
1955
+ BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1956
+ /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
1957
+ hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1958
+ if ((hash & hitMask) == hitMask) {
1959
+ syncPoint.toLoad = pos + 1;
1960
+ syncPoint.flush = 1;
1961
+ break;
1962
+ }
1963
+ }
1964
+ return syncPoint;
1965
+ }
1966
+
1967
+ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx)
1968
+ {
1969
+ size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled;
1970
+ if (hintInSize==0) hintInSize = mtctx->targetSectionSize;
1971
+ return hintInSize;
1972
+ }
1821
1973
 
1822
1974
  /** ZSTDMT_compressStream_generic() :
1823
1975
  * internal use only - exposed to be invoked from zstd_compress.c
@@ -1844,7 +1996,8 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
1844
1996
  }
1845
1997
 
1846
1998
  /* single-pass shortcut (note : synchronous-mode) */
1847
- if ( (mtctx->nextJobID == 0) /* just started */
1999
+ if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */
2000
+ && (mtctx->nextJobID == 0) /* just started */
1848
2001
  && (mtctx->inBuff.filled == 0) /* nothing buffered */
1849
2002
  && (!mtctx->jobReady) /* no job already created */
1850
2003
  && (endOp == ZSTD_e_end) /* end order */
@@ -1876,14 +2029,17 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
1876
2029
  DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
1877
2030
  }
1878
2031
  if (mtctx->inBuff.buffer.start != NULL) {
1879
- size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
2032
+ syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
2033
+ if (syncPoint.flush && endOp == ZSTD_e_continue) {
2034
+ endOp = ZSTD_e_flush;
2035
+ }
1880
2036
  assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
1881
2037
  DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
1882
- (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
1883
- memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
1884
- input->pos += toLoad;
1885
- mtctx->inBuff.filled += toLoad;
1886
- forwardInputProgress = toLoad>0;
2038
+ (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
2039
+ memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
2040
+ input->pos += syncPoint.toLoad;
2041
+ mtctx->inBuff.filled += syncPoint.toLoad;
2042
+ forwardInputProgress = syncPoint.toLoad>0;
1887
2043
  }
1888
2044
  if ((input->pos < input->size) && (endOp == ZSTD_e_end))
1889
2045
  endOp = ZSTD_e_flush; /* can't end now : not all input consumed */