zstd-ruby 1.3.7.0 → 1.3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/BUCK +15 -2
  4. data/ext/zstdruby/libzstd/Makefile +37 -2
  5. data/ext/zstdruby/libzstd/README.md +67 -41
  6. data/ext/zstdruby/libzstd/common/bitstream.h +2 -2
  7. data/ext/zstdruby/libzstd/common/compiler.h +19 -12
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  9. data/ext/zstdruby/libzstd/common/debug.h +22 -11
  10. data/ext/zstdruby/libzstd/common/error_private.c +6 -0
  11. data/ext/zstdruby/libzstd/common/fse.h +2 -2
  12. data/ext/zstdruby/libzstd/common/huf.h +25 -1
  13. data/ext/zstdruby/libzstd/common/pool.c +1 -1
  14. data/ext/zstdruby/libzstd/common/zstd_common.c +3 -1
  15. data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
  16. data/ext/zstdruby/libzstd/common/zstd_internal.h +11 -2
  17. data/ext/zstdruby/libzstd/compress/fse_compress.c +3 -3
  18. data/ext/zstdruby/libzstd/compress/hist.c +19 -11
  19. data/ext/zstdruby/libzstd/compress/hist.h +11 -8
  20. data/ext/zstdruby/libzstd/compress/huf_compress.c +33 -31
  21. data/ext/zstdruby/libzstd/compress/zstd_compress.c +621 -371
  22. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +90 -28
  23. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +4 -4
  24. data/ext/zstdruby/libzstd/compress/zstd_fast.c +15 -15
  25. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +25 -18
  26. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +18 -67
  27. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +2 -6
  28. data/ext/zstdruby/libzstd/compress/zstd_opt.c +133 -48
  29. data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -0
  30. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +229 -73
  31. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +18 -10
  32. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +178 -42
  33. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +240 -0
  34. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +44 -0
  35. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +244 -1680
  36. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1307 -0
  37. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +59 -0
  38. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +168 -0
  39. data/ext/zstdruby/libzstd/dictBuilder/cover.c +13 -11
  40. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +15 -15
  41. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +28 -28
  42. data/ext/zstdruby/libzstd/dll/libzstd.def +0 -1
  43. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -10
  44. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +15 -15
  45. data/ext/zstdruby/libzstd/zstd.h +1208 -968
  46. data/lib/zstd-ruby/version.rb +1 -1
  47. metadata +7 -2
@@ -26,6 +26,10 @@ size_t ZSTD_compressBlock_btopt(
26
26
  size_t ZSTD_compressBlock_btultra(
27
27
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28
28
  void const* src, size_t srcSize);
29
+ size_t ZSTD_compressBlock_btultra2(
30
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
31
+ void const* src, size_t srcSize);
32
+
29
33
 
30
34
  size_t ZSTD_compressBlock_btopt_dictMatchState(
31
35
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -41,6 +45,10 @@ size_t ZSTD_compressBlock_btultra_extDict(
41
45
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
42
46
  void const* src, size_t srcSize);
43
47
 
48
+ /* note : no btultra2 variant for extDict nor dictMatchState,
49
+ * because btultra2 is not meant to work with dictionaries
50
+ * and is only specific for the first block (no prefix) */
51
+
44
52
  #if defined (__cplusplus)
45
53
  }
46
54
  #endif
@@ -9,21 +9,19 @@
9
9
  */
10
10
 
11
11
 
12
- /* ====== Tuning parameters ====== */
13
- #define ZSTDMT_NBWORKERS_MAX 200
14
- #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
15
- #define ZSTDMT_OVERLAPLOG_DEFAULT 6
16
-
17
-
18
12
  /* ====== Compiler specifics ====== */
19
13
  #if defined(_MSC_VER)
20
14
  # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
21
15
  #endif
22
16
 
23
17
 
18
+ /* ====== Constants ====== */
19
+ #define ZSTDMT_OVERLAPLOG_DEFAULT 0
20
+
21
+
24
22
  /* ====== Dependencies ====== */
25
23
  #include <string.h> /* memcpy, memset */
26
- #include <limits.h> /* INT_MAX */
24
+ #include <limits.h> /* INT_MAX, UINT_MAX */
27
25
  #include "pool.h" /* threadpool */
28
26
  #include "threading.h" /* mutex */
29
27
  #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
@@ -57,9 +55,9 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
57
55
  static clock_t _ticksPerSecond = 0;
58
56
  if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
59
57
 
60
- { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
61
- return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
62
- }
58
+ { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
59
+ return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
60
+ } }
63
61
 
64
62
  #define MUTEX_WAIT_TIME_DLEVEL 6
65
63
  #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
@@ -342,8 +340,8 @@ static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
342
340
 
343
341
  typedef struct {
344
342
  ZSTD_pthread_mutex_t poolMutex;
345
- unsigned totalCCtx;
346
- unsigned availCCtx;
343
+ int totalCCtx;
344
+ int availCCtx;
347
345
  ZSTD_customMem cMem;
348
346
  ZSTD_CCtx* cctx[1]; /* variable size */
349
347
  } ZSTDMT_CCtxPool;
@@ -351,16 +349,16 @@ typedef struct {
351
349
  /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
352
350
  static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
353
351
  {
354
- unsigned u;
355
- for (u=0; u<pool->totalCCtx; u++)
356
- ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
352
+ int cid;
353
+ for (cid=0; cid<pool->totalCCtx; cid++)
354
+ ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
357
355
  ZSTD_pthread_mutex_destroy(&pool->poolMutex);
358
356
  ZSTD_free(pool, pool->cMem);
359
357
  }
360
358
 
361
359
  /* ZSTDMT_createCCtxPool() :
362
360
  * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
363
- static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
361
+ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
364
362
  ZSTD_customMem cMem)
365
363
  {
366
364
  ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
@@ -381,7 +379,7 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
381
379
  }
382
380
 
383
381
  static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
384
- unsigned nbWorkers)
382
+ int nbWorkers)
385
383
  {
386
384
  if (srcPool==NULL) return NULL;
387
385
  if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
@@ -469,9 +467,9 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
469
467
  DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
470
468
  ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
471
469
  assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
472
- assert(params.ldmParams.hashEveryLog < 32);
470
+ assert(params.ldmParams.hashRateLog < 32);
473
471
  serialState->ldmState.hashPower =
474
- ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
472
+ ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
475
473
  } else {
476
474
  memset(&params.ldmParams, 0, sizeof(params.ldmParams));
477
475
  }
@@ -674,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
674
672
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
675
673
  } else { /* srcStart points at reloaded section */
676
674
  U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
677
- { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
675
+ { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
678
676
  if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
679
677
  }
680
678
  { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
@@ -777,6 +775,14 @@ typedef struct {
777
775
 
778
776
  static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
779
777
 
778
+ #define RSYNC_LENGTH 32
779
+
780
+ typedef struct {
781
+ U64 hash;
782
+ U64 hitMask;
783
+ U64 primePower;
784
+ } rsyncState_t;
785
+
780
786
  struct ZSTDMT_CCtx_s {
781
787
  POOL_ctx* factory;
782
788
  ZSTDMT_jobDescription* jobs;
@@ -790,6 +796,7 @@ struct ZSTDMT_CCtx_s {
790
796
  inBuff_t inBuff;
791
797
  roundBuff_t roundBuff;
792
798
  serialState_t serial;
799
+ rsyncState_t rsync;
793
800
  unsigned singleBlockingThread;
794
801
  unsigned jobIDMask;
795
802
  unsigned doneJobID;
@@ -859,7 +866,7 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
859
866
  {
860
867
  if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
861
868
  params->nbWorkers = nbWorkers;
862
- params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
869
+ params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
863
870
  params->jobSize = 0;
864
871
  return nbWorkers;
865
872
  }
@@ -969,52 +976,59 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
969
976
  }
970
977
 
971
978
  /* Internal only */
972
- size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
973
- ZSTDMT_parameter parameter, unsigned value) {
979
+ size_t
980
+ ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
981
+ ZSTDMT_parameter parameter,
982
+ int value)
983
+ {
974
984
  DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
975
985
  switch(parameter)
976
986
  {
977
987
  case ZSTDMT_p_jobSize :
978
- DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value);
979
- if ( (value > 0) /* value==0 => automatic job size */
980
- & (value < ZSTDMT_JOBSIZE_MIN) )
988
+ DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
989
+ if ( value != 0 /* default */
990
+ && value < ZSTDMT_JOBSIZE_MIN)
981
991
  value = ZSTDMT_JOBSIZE_MIN;
982
- if (value > ZSTDMT_JOBSIZE_MAX)
983
- value = ZSTDMT_JOBSIZE_MAX;
992
+ assert(value >= 0);
993
+ if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
984
994
  params->jobSize = value;
985
995
  return value;
986
- case ZSTDMT_p_overlapSectionLog :
987
- if (value > 9) value = 9;
988
- DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
989
- params->overlapSizeLog = (value >= 9) ? 9 : value;
996
+
997
+ case ZSTDMT_p_overlapLog :
998
+ DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
999
+ if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
1000
+ if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
1001
+ params->overlapLog = value;
1002
+ return value;
1003
+
1004
+ case ZSTDMT_p_rsyncable :
1005
+ value = (value != 0);
1006
+ params->rsyncable = value;
990
1007
  return value;
1008
+
991
1009
  default :
992
1010
  return ERROR(parameter_unsupported);
993
1011
  }
994
1012
  }
995
1013
 
996
- size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value)
1014
+ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
997
1015
  {
998
1016
  DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
999
- switch(parameter)
1000
- {
1001
- case ZSTDMT_p_jobSize :
1002
- return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1003
- case ZSTDMT_p_overlapSectionLog :
1004
- return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1005
- default :
1006
- return ERROR(parameter_unsupported);
1007
- }
1017
+ return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1008
1018
  }
1009
1019
 
1010
- size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
1020
+ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
1011
1021
  {
1012
1022
  switch (parameter) {
1013
1023
  case ZSTDMT_p_jobSize:
1014
- *value = mtctx->params.jobSize;
1024
+ assert(mtctx->params.jobSize <= INT_MAX);
1025
+ *value = (int)(mtctx->params.jobSize);
1015
1026
  break;
1016
- case ZSTDMT_p_overlapSectionLog:
1017
- *value = mtctx->params.overlapSizeLog;
1027
+ case ZSTDMT_p_overlapLog:
1028
+ *value = mtctx->params.overlapLog;
1029
+ break;
1030
+ case ZSTDMT_p_rsyncable:
1031
+ *value = mtctx->params.rsyncable;
1018
1032
  break;
1019
1033
  default:
1020
1034
  return ERROR(parameter_unsupported);
@@ -1140,22 +1154,66 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1140
1154
  /* ===== Multi-threaded compression ===== */
1141
1155
  /* ------------------------------------------ */
1142
1156
 
1143
- static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1157
+ static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1144
1158
  {
1145
1159
  if (params.ldmParams.enableLdm)
1160
+ /* In Long Range Mode, the windowLog is typically oversized.
1161
+ * In which case, it's preferable to determine the jobSize
1162
+ * based on chainLog instead. */
1146
1163
  return MAX(21, params.cParams.chainLog + 4);
1147
1164
  return MAX(20, params.cParams.windowLog + 2);
1148
1165
  }
1149
1166
 
1150
- static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params)
1167
+ static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
1151
1168
  {
1152
- unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
1153
- if (params.ldmParams.enableLdm)
1154
- return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog);
1155
- return overlapRLog >= 9 ? 0 : (params.cParams.windowLog - overlapRLog);
1169
+ switch(strat)
1170
+ {
1171
+ case ZSTD_btultra2:
1172
+ return 9;
1173
+ case ZSTD_btultra:
1174
+ case ZSTD_btopt:
1175
+ return 8;
1176
+ case ZSTD_btlazy2:
1177
+ case ZSTD_lazy2:
1178
+ return 7;
1179
+ case ZSTD_lazy:
1180
+ case ZSTD_greedy:
1181
+ case ZSTD_dfast:
1182
+ case ZSTD_fast:
1183
+ default:;
1184
+ }
1185
+ return 6;
1156
1186
  }
1157
1187
 
1158
- static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) {
1188
+ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
1189
+ {
1190
+ assert(0 <= ovlog && ovlog <= 9);
1191
+ if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
1192
+ return ovlog;
1193
+ }
1194
+
1195
+ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1196
+ {
1197
+ int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
1198
+ int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
1199
+ assert(0 <= overlapRLog && overlapRLog <= 8);
1200
+ if (params.ldmParams.enableLdm) {
1201
+ /* In Long Range Mode, the windowLog is typically oversized.
1202
+ * In which case, it's preferable to determine the jobSize
1203
+ * based on chainLog instead.
1204
+ * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
1205
+ ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1206
+ - overlapRLog;
1207
+ }
1208
+ assert(0 <= ovLog && ovLog <= 30);
1209
+ DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1210
+ DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1211
+ return (ovLog==0) ? 0 : (size_t)1 << ovLog;
1212
+ }
1213
+
1214
+ static unsigned
1215
+ ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
1216
+ {
1159
1217
  assert(nbWorkers>0);
1160
1218
  { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
1161
1219
  size_t const jobMaxSize = jobSizeTarget << 2;
@@ -1178,7 +1236,7 @@ static size_t ZSTDMT_compress_advanced_internal(
1178
1236
  ZSTD_CCtx_params params)
1179
1237
  {
1180
1238
  ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1181
- size_t const overlapSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1239
+ size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
1182
1240
  unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1183
1241
  size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1184
1242
  size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
@@ -1289,16 +1347,17 @@ static size_t ZSTDMT_compress_advanced_internal(
1289
1347
  }
1290
1348
 
1291
1349
  size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
1292
- void* dst, size_t dstCapacity,
1293
- const void* src, size_t srcSize,
1294
- const ZSTD_CDict* cdict,
1295
- ZSTD_parameters params,
1296
- unsigned overlapLog)
1350
+ void* dst, size_t dstCapacity,
1351
+ const void* src, size_t srcSize,
1352
+ const ZSTD_CDict* cdict,
1353
+ ZSTD_parameters params,
1354
+ int overlapLog)
1297
1355
  {
1298
1356
  ZSTD_CCtx_params cctxParams = mtctx->params;
1299
1357
  cctxParams.cParams = params.cParams;
1300
1358
  cctxParams.fParams = params.fParams;
1301
- cctxParams.overlapSizeLog = overlapLog;
1359
+ assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
1360
+ cctxParams.overlapLog = overlapLog;
1302
1361
  return ZSTDMT_compress_advanced_internal(mtctx,
1303
1362
  dst, dstCapacity,
1304
1363
  src, srcSize,
@@ -1311,8 +1370,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
1311
1370
  const void* src, size_t srcSize,
1312
1371
  int compressionLevel)
1313
1372
  {
1314
- U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
1315
1373
  ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
1374
+ int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
1316
1375
  params.fParams.contentSizeFlag = 1;
1317
1376
  return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
1318
1377
  }
@@ -1339,8 +1398,8 @@ size_t ZSTDMT_initCStream_internal(
1339
1398
  if (params.nbWorkers != mtctx->params.nbWorkers)
1340
1399
  CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
1341
1400
 
1342
- if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1343
- if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1401
+ if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1402
+ if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1344
1403
 
1345
1404
  mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1346
1405
  if (mtctx->singleBlockingThread) {
@@ -1375,14 +1434,24 @@ size_t ZSTDMT_initCStream_internal(
1375
1434
  mtctx->cdict = cdict;
1376
1435
  }
1377
1436
 
1378
- mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1379
- DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
1437
+ mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
1438
+ DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
1380
1439
  mtctx->targetSectionSize = params.jobSize;
1381
1440
  if (mtctx->targetSectionSize == 0) {
1382
1441
  mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1383
1442
  }
1443
+ if (params.rsyncable) {
1444
+ /* Aim for the targetsectionSize as the average job size. */
1445
+ U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
1446
+ U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
1447
+ assert(jobSizeMB >= 1);
1448
+ DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
1449
+ mtctx->rsync.hash = 0;
1450
+ mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
1451
+ mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
1452
+ }
1384
1453
  if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1385
- DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
1454
+ DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
1386
1455
  DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
1387
1456
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1388
1457
  {
@@ -1818,6 +1887,89 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
1818
1887
  return 1;
1819
1888
  }
1820
1889
 
1890
+ typedef struct {
1891
+ size_t toLoad; /* The number of bytes to load from the input. */
1892
+ int flush; /* Boolean declaring if we must flush because we found a synchronization point. */
1893
+ } syncPoint_t;
1894
+
1895
+ /**
1896
+ * Searches through the input for a synchronization point. If one is found, we
1897
+ * will instruct the caller to flush, and return the number of bytes to load.
1898
+ * Otherwise, we will load as many bytes as possible and instruct the caller
1899
+ * to continue as normal.
1900
+ */
1901
+ static syncPoint_t
1902
+ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1903
+ {
1904
+ BYTE const* const istart = (BYTE const*)input.src + input.pos;
1905
+ U64 const primePower = mtctx->rsync.primePower;
1906
+ U64 const hitMask = mtctx->rsync.hitMask;
1907
+
1908
+ syncPoint_t syncPoint;
1909
+ U64 hash;
1910
+ BYTE const* prev;
1911
+ size_t pos;
1912
+
1913
+ syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
1914
+ syncPoint.flush = 0;
1915
+ if (!mtctx->params.rsyncable)
1916
+ /* Rsync is disabled. */
1917
+ return syncPoint;
1918
+ if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
1919
+ /* Not enough to compute the hash.
1920
+ * We will miss any synchronization points in this RSYNC_LENGTH byte
1921
+ * window. However, since it depends only in the internal buffers, if the
1922
+ * state is already synchronized, we will remain synchronized.
1923
+ * Additionally, the probability that we miss a synchronization point is
1924
+ * low: RSYNC_LENGTH / targetSectionSize.
1925
+ */
1926
+ return syncPoint;
1927
+ /* Initialize the loop variables. */
1928
+ if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
1929
+ /* We have enough bytes buffered to initialize the hash.
1930
+ * Start scanning at the beginning of the input.
1931
+ */
1932
+ pos = 0;
1933
+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1934
+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1935
+ } else {
1936
+ /* We don't have enough bytes buffered to initialize the hash, but
1937
+ * we know we have at least RSYNC_LENGTH bytes total.
1938
+ * Start scanning after the first RSYNC_LENGTH bytes less the bytes
1939
+ * already buffered.
1940
+ */
1941
+ pos = RSYNC_LENGTH - mtctx->inBuff.filled;
1942
+ prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
1943
+ hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
1944
+ hash = ZSTD_rollingHash_append(hash, istart, pos);
1945
+ }
1946
+ /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
1947
+ * through the input. If we hit a synchronization point, then cut the
1948
+ * job off, and tell the compressor to flush the job. Otherwise, load
1949
+ * all the bytes and continue as normal.
1950
+ * If we go too long without a synchronization point (targetSectionSize)
1951
+ * then a block will be emitted anyways, but this is okay, since if we
1952
+ * are already synchronized we will remain synchronized.
1953
+ */
1954
+ for (; pos < syncPoint.toLoad; ++pos) {
1955
+ BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1956
+ /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
1957
+ hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1958
+ if ((hash & hitMask) == hitMask) {
1959
+ syncPoint.toLoad = pos + 1;
1960
+ syncPoint.flush = 1;
1961
+ break;
1962
+ }
1963
+ }
1964
+ return syncPoint;
1965
+ }
1966
+
1967
+ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx)
1968
+ {
1969
+ size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled;
1970
+ if (hintInSize==0) hintInSize = mtctx->targetSectionSize;
1971
+ return hintInSize;
1972
+ }
1821
1973
 
1822
1974
  /** ZSTDMT_compressStream_generic() :
1823
1975
  * internal use only - exposed to be invoked from zstd_compress.c
@@ -1844,7 +1996,8 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
1844
1996
  }
1845
1997
 
1846
1998
  /* single-pass shortcut (note : synchronous-mode) */
1847
- if ( (mtctx->nextJobID == 0) /* just started */
1999
+ if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */
2000
+ && (mtctx->nextJobID == 0) /* just started */
1848
2001
  && (mtctx->inBuff.filled == 0) /* nothing buffered */
1849
2002
  && (!mtctx->jobReady) /* no job already created */
1850
2003
  && (endOp == ZSTD_e_end) /* end order */
@@ -1876,14 +2029,17 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
1876
2029
  DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
1877
2030
  }
1878
2031
  if (mtctx->inBuff.buffer.start != NULL) {
1879
- size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
2032
+ syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
2033
+ if (syncPoint.flush && endOp == ZSTD_e_continue) {
2034
+ endOp = ZSTD_e_flush;
2035
+ }
1880
2036
  assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
1881
2037
  DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
1882
- (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
1883
- memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
1884
- input->pos += toLoad;
1885
- mtctx->inBuff.filled += toLoad;
1886
- forwardInputProgress = toLoad>0;
2038
+ (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
2039
+ memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
2040
+ input->pos += syncPoint.toLoad;
2041
+ mtctx->inBuff.filled += syncPoint.toLoad;
2042
+ forwardInputProgress = syncPoint.toLoad>0;
1887
2043
  }
1888
2044
  if ((input->pos < input->size) && (endOp == ZSTD_e_end))
1889
2045
  endOp = ZSTD_e_flush; /* can't end now : not all input consumed */