zstd-ruby 1.4.5.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
  10. data/ext/zstdruby/libzstd/common/compiler.h +205 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
  15. data/ext/zstdruby/libzstd/common/error_private.c +10 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +37 -86
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
  19. data/ext/zstdruby/libzstd/common/huf.h +99 -166
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +10 -4
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +74 -19
  25. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  26. data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
  34. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  35. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
  73. data/ext/zstdruby/libzstd/zstd.h +1217 -287
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +19 -36
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -354
  89. data/ext/zstdruby/libzstd/README.md +0 -179
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -20,8 +20,8 @@
20
20
 
21
21
 
22
22
  /* ====== Dependencies ====== */
23
- #include <string.h> /* memcpy, memset */
24
- #include <limits.h> /* INT_MAX, UINT_MAX */
23
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
24
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
25
25
  #include "../common/mem.h" /* MEM_STATIC */
26
26
  #include "../common/pool.h" /* threadpool */
27
27
  #include "../common/threading.h" /* mutex */
@@ -103,14 +103,13 @@ typedef struct ZSTDMT_bufferPool_s {
103
103
  buffer_t bTable[1]; /* variable size */
104
104
  } ZSTDMT_bufferPool;
105
105
 
106
- static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
106
+ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
107
107
  {
108
- unsigned const maxNbBuffers = 2*nbWorkers + 3;
109
- ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
108
+ ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
110
109
  sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
111
110
  if (bufPool==NULL) return NULL;
112
111
  if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
113
- ZSTD_free(bufPool, cMem);
112
+ ZSTD_customFree(bufPool, cMem);
114
113
  return NULL;
115
114
  }
116
115
  bufPool->bufferSize = 64 KB;
@@ -127,10 +126,10 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
127
126
  if (!bufPool) return; /* compatibility with free on NULL */
128
127
  for (u=0; u<bufPool->totalBuffers; u++) {
129
128
  DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
130
- ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
129
+ ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
131
130
  }
132
131
  ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
133
- ZSTD_free(bufPool, bufPool->cMem);
132
+ ZSTD_customFree(bufPool, bufPool->cMem);
134
133
  }
135
134
 
136
135
  /* only works at initialization, not during compression */
@@ -161,9 +160,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
161
160
  }
162
161
 
163
162
 
164
- static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
163
+ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
165
164
  {
166
- unsigned const maxNbBuffers = 2*nbWorkers + 3;
167
165
  if (srcBufPool==NULL) return NULL;
168
166
  if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
169
167
  return srcBufPool;
@@ -172,7 +170,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
172
170
  size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
173
171
  ZSTDMT_bufferPool* newBufPool;
174
172
  ZSTDMT_freeBufferPool(srcBufPool);
175
- newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
173
+ newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
176
174
  if (newBufPool==NULL) return newBufPool;
177
175
  ZSTDMT_setBufferSize(newBufPool, bSize);
178
176
  return newBufPool;
@@ -201,13 +199,13 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
201
199
  }
202
200
  /* size conditions not respected : scratch this buffer, create new one */
203
201
  DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing");
204
- ZSTD_free(buf.start, bufPool->cMem);
202
+ ZSTD_customFree(buf.start, bufPool->cMem);
205
203
  }
206
204
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
207
205
  /* create new buffer */
208
206
  DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer");
209
207
  { buffer_t buffer;
210
- void* const start = ZSTD_malloc(bSize, bufPool->cMem);
208
+ void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
211
209
  buffer.start = start; /* note : start can be NULL if malloc fails ! */
212
210
  buffer.capacity = (start==NULL) ? 0 : bSize;
213
211
  if (start==NULL) {
@@ -229,13 +227,13 @@ static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
229
227
  {
230
228
  size_t const bSize = bufPool->bufferSize;
231
229
  if (buffer.capacity < bSize) {
232
- void* const start = ZSTD_malloc(bSize, bufPool->cMem);
230
+ void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
233
231
  buffer_t newBuffer;
234
232
  newBuffer.start = start;
235
233
  newBuffer.capacity = start == NULL ? 0 : bSize;
236
234
  if (start != NULL) {
237
235
  assert(newBuffer.capacity >= buffer.capacity);
238
- memcpy(newBuffer.start, buffer.start, buffer.capacity);
236
+ ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity);
239
237
  DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize);
240
238
  return newBuffer;
241
239
  }
@@ -261,13 +259,21 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
261
259
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
262
260
  /* Reached bufferPool capacity (should not happen) */
263
261
  DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
264
- ZSTD_free(buf.start, bufPool->cMem);
262
+ ZSTD_customFree(buf.start, bufPool->cMem);
265
263
  }
266
264
 
265
+ /* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
266
+ * The 3 additional buffers are as follows:
267
+ * 1 buffer for input loading
268
+ * 1 buffer for "next input" when submitting current one
269
+ * 1 buffer stuck in queue */
270
+ #define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
267
271
 
268
- /* ===== Seq Pool Wrapper ====== */
272
+ /* After a worker releases its rawSeqStore, it is immediately ready for reuse.
273
+ * So we only need one seq buffer per worker. */
274
+ #define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
269
275
 
270
- static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0};
276
+ /* ===== Seq Pool Wrapper ====== */
271
277
 
272
278
  typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
273
279
 
@@ -278,7 +284,7 @@ static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
278
284
 
279
285
  static rawSeqStore_t bufferToSeq(buffer_t buffer)
280
286
  {
281
- rawSeqStore_t seq = {NULL, 0, 0, 0};
287
+ rawSeqStore_t seq = kNullRawSeqStore;
282
288
  seq.seq = (rawSeq*)buffer.start;
283
289
  seq.capacity = buffer.capacity / sizeof(rawSeq);
284
290
  return seq;
@@ -319,7 +325,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
319
325
 
320
326
  static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
321
327
  {
322
- ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
328
+ ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
323
329
  if (seqPool == NULL) return NULL;
324
330
  ZSTDMT_setNbSeq(seqPool, 0);
325
331
  return seqPool;
@@ -332,7 +338,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
332
338
 
333
339
  static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
334
340
  {
335
- return ZSTDMT_expandBufferPool(pool, nbWorkers);
341
+ return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
336
342
  }
337
343
 
338
344
 
@@ -354,7 +360,7 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
354
360
  for (cid=0; cid<pool->totalCCtx; cid++)
355
361
  ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
356
362
  ZSTD_pthread_mutex_destroy(&pool->poolMutex);
357
- ZSTD_free(pool, pool->cMem);
363
+ ZSTD_customFree(pool, pool->cMem);
358
364
  }
359
365
 
360
366
  /* ZSTDMT_createCCtxPool() :
@@ -362,12 +368,12 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
362
368
  static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
363
369
  ZSTD_customMem cMem)
364
370
  {
365
- ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
371
+ ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
366
372
  sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
367
373
  assert(nbWorkers > 0);
368
374
  if (!cctxPool) return NULL;
369
375
  if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
370
- ZSTD_free(cctxPool, cMem);
376
+ ZSTD_customFree(cctxPool, cMem);
371
377
  return NULL;
372
378
  }
373
379
  cctxPool->cMem = cMem;
@@ -470,54 +476,52 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
470
476
  ZSTD_dictContentType_e dictContentType)
471
477
  {
472
478
  /* Adjust parameters */
473
- if (params.ldmParams.enableLdm) {
479
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
474
480
  DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
475
481
  ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
476
482
  assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
477
483
  assert(params.ldmParams.hashRateLog < 32);
478
- serialState->ldmState.hashPower =
479
- ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
480
484
  } else {
481
- memset(&params.ldmParams, 0, sizeof(params.ldmParams));
485
+ ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
482
486
  }
483
487
  serialState->nextJobID = 0;
484
488
  if (params.fParams.checksumFlag)
485
489
  XXH64_reset(&serialState->xxhState, 0);
486
- if (params.ldmParams.enableLdm) {
490
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
487
491
  ZSTD_customMem cMem = params.customMem;
488
492
  unsigned const hashLog = params.ldmParams.hashLog;
489
493
  size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
490
494
  unsigned const bucketLog =
491
495
  params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
492
- size_t const bucketSize = (size_t)1 << bucketLog;
493
496
  unsigned const prevBucketLog =
494
497
  serialState->params.ldmParams.hashLog -
495
498
  serialState->params.ldmParams.bucketSizeLog;
499
+ size_t const numBuckets = (size_t)1 << bucketLog;
496
500
  /* Size the seq pool tables */
497
501
  ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
498
502
  /* Reset the window */
499
503
  ZSTD_window_init(&serialState->ldmState.window);
500
504
  /* Resize tables and output space if necessary. */
501
505
  if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
502
- ZSTD_free(serialState->ldmState.hashTable, cMem);
503
- serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem);
506
+ ZSTD_customFree(serialState->ldmState.hashTable, cMem);
507
+ serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem);
504
508
  }
505
509
  if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
506
- ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
507
- serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem);
510
+ ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
511
+ serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
508
512
  }
509
513
  if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
510
514
  return 1;
511
515
  /* Zero the tables */
512
- memset(serialState->ldmState.hashTable, 0, hashSize);
513
- memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
516
+ ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
517
+ ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
514
518
 
515
519
  /* Update window state and fill hash table with dict */
516
520
  serialState->ldmState.loadedDictEnd = 0;
517
521
  if (dictSize > 0) {
518
522
  if (dictContentType == ZSTD_dct_rawContent) {
519
523
  BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
520
- ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
524
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
521
525
  ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
522
526
  serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
523
527
  } else {
@@ -537,7 +541,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
537
541
  static int ZSTDMT_serialState_init(serialState_t* serialState)
538
542
  {
539
543
  int initError = 0;
540
- memset(serialState, 0, sizeof(*serialState));
544
+ ZSTD_memset(serialState, 0, sizeof(*serialState));
541
545
  initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL);
542
546
  initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL);
543
547
  initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL);
@@ -552,8 +556,8 @@ static void ZSTDMT_serialState_free(serialState_t* serialState)
552
556
  ZSTD_pthread_cond_destroy(&serialState->cond);
553
557
  ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex);
554
558
  ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond);
555
- ZSTD_free(serialState->ldmState.hashTable, cMem);
556
- ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
559
+ ZSTD_customFree(serialState->ldmState.hashTable, cMem);
560
+ ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
557
561
  }
558
562
 
559
563
  static void ZSTDMT_serialState_update(serialState_t* serialState,
@@ -569,12 +573,12 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
569
573
  /* A future job may error and skip our job */
570
574
  if (serialState->nextJobID == jobID) {
571
575
  /* It is now our turn, do any processing necessary */
572
- if (serialState->params.ldmParams.enableLdm) {
576
+ if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
573
577
  size_t error;
574
578
  assert(seqStore.seq != NULL && seqStore.pos == 0 &&
575
579
  seqStore.size == 0 && seqStore.capacity > 0);
576
580
  assert(src.size <= serialState->params.jobSize);
577
- ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
581
+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
578
582
  error = ZSTD_ldm_generateSequences(
579
583
  &serialState->ldmState, &seqStore,
580
584
  &serialState->params.ldmParams, src.start, src.size);
@@ -599,7 +603,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
599
603
  if (seqStore.size > 0) {
600
604
  size_t const err = ZSTD_referenceExternalSequences(
601
605
  jobCCtx, seqStore.seq, seqStore.size);
602
- assert(serialState->params.ldmParams.enableLdm);
606
+ assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
603
607
  assert(!ZSTD_isError(err));
604
608
  (void)err;
605
609
  }
@@ -677,7 +681,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
677
681
  if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
678
682
  job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
679
683
  }
680
- if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
684
+ if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
681
685
  JOB_ERROR(ERROR(memory_allocation));
682
686
 
683
687
  /* Don't compute the checksum for chunks, since we compute it externally,
@@ -685,7 +689,9 @@ static void ZSTDMT_compressionJob(void* jobDescription)
685
689
  */
686
690
  if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
687
691
  /* Don't run LDM for the chunks, since we handle it externally */
688
- jobParams.ldmParams.enableLdm = 0;
692
+ jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
693
+ /* Correct nbWorkers to 0. */
694
+ jobParams.nbWorkers = 0;
689
695
 
690
696
 
691
697
  /* init */
@@ -698,6 +704,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
698
704
  { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
699
705
  if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
700
706
  }
707
+ if (!job->firstJob) {
708
+ size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
709
+ if (ZSTD_isError(err)) JOB_ERROR(err);
710
+ }
701
711
  { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
702
712
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
703
713
  ZSTD_dtlm_fast,
@@ -710,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
710
720
  ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
711
721
 
712
722
  if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
713
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
723
+ size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
714
724
  if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
715
725
  DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
716
726
  ZSTD_invalidateRepCodes(cctx);
@@ -728,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
728
738
  DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
729
739
  assert(job->cSize == 0);
730
740
  for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
731
- size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
741
+ size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
732
742
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
733
743
  ip += chunkSize;
734
744
  op += cSize; assert(op < oend);
@@ -748,11 +758,18 @@ static void ZSTDMT_compressionJob(void* jobDescription)
748
758
  size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
749
759
  size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
750
760
  size_t const cSize = (job->lastJob) ?
751
- ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
752
- ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
761
+ ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
762
+ ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
753
763
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
754
764
  lastCBlockSize = cSize;
755
765
  } }
766
+ if (!job->firstJob) {
767
+ /* Double check that we don't have an ext-dict, because then our
768
+ * repcode invalidation doesn't work.
769
+ */
770
+ assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
771
+ }
772
+ ZSTD_CCtx_trace(cctx, 0);
756
773
 
757
774
  _endJob:
758
775
  ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
@@ -799,6 +816,15 @@ typedef struct {
799
816
  static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
800
817
 
801
818
  #define RSYNC_LENGTH 32
819
+ /* Don't create chunks smaller than the zstd block size.
820
+ * This stops us from regressing compression ratio too much,
821
+ * and ensures our output fits in ZSTD_compressBound().
822
+ *
823
+ * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
824
+ * ZSTD_COMPRESSBOUND() will need to be updated.
825
+ */
826
+ #define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
827
+ #define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
802
828
 
803
829
  typedef struct {
804
830
  U64 hash;
@@ -820,7 +846,6 @@ struct ZSTDMT_CCtx_s {
820
846
  roundBuff_t roundBuff;
821
847
  serialState_t serial;
822
848
  rsyncState_t rsync;
823
- unsigned singleBlockingThread;
824
849
  unsigned jobIDMask;
825
850
  unsigned doneJobID;
826
851
  unsigned nextJobID;
@@ -832,6 +857,7 @@ struct ZSTDMT_CCtx_s {
832
857
  ZSTD_customMem cMem;
833
858
  ZSTD_CDict* cdictLocal;
834
859
  const ZSTD_CDict* cdict;
860
+ unsigned providedFactory: 1;
835
861
  };
836
862
 
837
863
  static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
@@ -842,7 +868,7 @@ static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZS
842
868
  ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex);
843
869
  ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond);
844
870
  }
845
- ZSTD_free(jobTable, cMem);
871
+ ZSTD_customFree(jobTable, cMem);
846
872
  }
847
873
 
848
874
  /* ZSTDMT_allocJobsTable()
@@ -854,7 +880,7 @@ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_custom
854
880
  U32 const nbJobs = 1 << nbJobsLog2;
855
881
  U32 jobNb;
856
882
  ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
857
- ZSTD_calloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
883
+ ZSTD_customCalloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
858
884
  int initError = 0;
859
885
  if (jobTable==NULL) return NULL;
860
886
  *nbJobsPtr = nbJobs;
@@ -885,12 +911,12 @@ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
885
911
 
886
912
  /* ZSTDMT_CCtxParam_setNbWorkers():
887
913
  * Internal use only */
888
- size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
914
+ static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
889
915
  {
890
916
  return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
891
917
  }
892
918
 
893
- MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem)
919
+ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
894
920
  {
895
921
  ZSTDMT_CCtx* mtctx;
896
922
  U32 nbJobs = nbWorkers + 2;
@@ -903,16 +929,23 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
903
929
  /* invalid custom allocator */
904
930
  return NULL;
905
931
 
906
- mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
932
+ mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem);
907
933
  if (!mtctx) return NULL;
908
934
  ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
909
935
  mtctx->cMem = cMem;
910
936
  mtctx->allJobsCompleted = 1;
911
- mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
937
+ if (pool != NULL) {
938
+ mtctx->factory = pool;
939
+ mtctx->providedFactory = 1;
940
+ }
941
+ else {
942
+ mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
943
+ mtctx->providedFactory = 0;
944
+ }
912
945
  mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
913
946
  assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
914
947
  mtctx->jobIDMask = nbJobs - 1;
915
- mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
948
+ mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
916
949
  mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
917
950
  mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
918
951
  initError = ZSTDMT_serialState_init(&mtctx->serial);
@@ -925,22 +958,18 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
925
958
  return mtctx;
926
959
  }
927
960
 
928
- ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
961
+ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
929
962
  {
930
963
  #ifdef ZSTD_MULTITHREAD
931
- return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem);
964
+ return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool);
932
965
  #else
933
966
  (void)nbWorkers;
934
967
  (void)cMem;
968
+ (void)pool;
935
969
  return NULL;
936
970
  #endif
937
971
  }
938
972
 
939
- ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
940
- {
941
- return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
942
- }
943
-
944
973
 
945
974
  /* ZSTDMT_releaseAllJobResources() :
946
975
  * note : ensure all workers are killed first ! */
@@ -957,7 +986,7 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
957
986
  ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
958
987
 
959
988
  /* Clear the job description, but keep the mutex/cond */
960
- memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
989
+ ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
961
990
  mtctx->jobs[jobID].job_mutex = mutex;
962
991
  mtctx->jobs[jobID].job_cond = cond;
963
992
  }
@@ -984,7 +1013,8 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
984
1013
  size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
985
1014
  {
986
1015
  if (mtctx==NULL) return 0; /* compatible with free on NULL */
987
- POOL_free(mtctx->factory); /* stop and free worker threads */
1016
+ if (!mtctx->providedFactory)
1017
+ POOL_free(mtctx->factory); /* stop and free worker threads */
988
1018
  ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */
989
1019
  ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
990
1020
  ZSTDMT_freeBufferPool(mtctx->bufPool);
@@ -993,8 +1023,8 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
993
1023
  ZSTDMT_serialState_free(&mtctx->serial);
994
1024
  ZSTD_freeCDict(mtctx->cdictLocal);
995
1025
  if (mtctx->roundBuff.buffer)
996
- ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
997
- ZSTD_free(mtctx, mtctx->cMem);
1026
+ ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
1027
+ ZSTD_customFree(mtctx, mtctx->cMem);
998
1028
  return 0;
999
1029
  }
1000
1030
 
@@ -1011,65 +1041,6 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
1011
1041
  + mtctx->roundBuff.capacity;
1012
1042
  }
1013
1043
 
1014
- /* Internal only */
1015
- size_t
1016
- ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
1017
- ZSTDMT_parameter parameter,
1018
- int value)
1019
- {
1020
- DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
1021
- switch(parameter)
1022
- {
1023
- case ZSTDMT_p_jobSize :
1024
- DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
1025
- return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value);
1026
- case ZSTDMT_p_overlapLog :
1027
- DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
1028
- return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value);
1029
- case ZSTDMT_p_rsyncable :
1030
- DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value);
1031
- return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value);
1032
- default :
1033
- return ERROR(parameter_unsupported);
1034
- }
1035
- }
1036
-
1037
- size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
1038
- {
1039
- DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
1040
- return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1041
- }
1042
-
1043
- size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
1044
- {
1045
- switch (parameter) {
1046
- case ZSTDMT_p_jobSize:
1047
- return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value);
1048
- case ZSTDMT_p_overlapLog:
1049
- return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value);
1050
- case ZSTDMT_p_rsyncable:
1051
- return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value);
1052
- default:
1053
- return ERROR(parameter_unsupported);
1054
- }
1055
- }
1056
-
1057
- /* Sets parameters relevant to the compression job,
1058
- * initializing others to default values. */
1059
- static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
1060
- {
1061
- ZSTD_CCtx_params jobParams = *params;
1062
- /* Clear parameters related to multithreading */
1063
- jobParams.forceWindow = 0;
1064
- jobParams.nbWorkers = 0;
1065
- jobParams.jobSize = 0;
1066
- jobParams.overlapLog = 0;
1067
- jobParams.rsyncable = 0;
1068
- memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t));
1069
- memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem));
1070
- return jobParams;
1071
- }
1072
-
1073
1044
 
1074
1045
  /* ZSTDMT_resize() :
1075
1046
  * @return : error code if fails, 0 on success */
@@ -1077,7 +1048,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1077
1048
  {
1078
1049
  if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1079
1050
  FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
1080
- mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1051
+ mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
1081
1052
  if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1082
1053
  mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
1083
1054
  if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
@@ -1098,7 +1069,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
1098
1069
  DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
1099
1070
  compressionLevel);
1100
1071
  mtctx->params.compressionLevel = compressionLevel;
1101
- { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
1072
+ { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1102
1073
  cParams.windowLog = saved_wlog;
1103
1074
  mtctx->params.cParams = cParams;
1104
1075
  }
@@ -1182,11 +1153,11 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1182
1153
  static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1183
1154
  {
1184
1155
  unsigned jobLog;
1185
- if (params->ldmParams.enableLdm) {
1156
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1186
1157
  /* In Long Range Mode, the windowLog is typically oversized.
1187
1158
  * In which case, it's preferable to determine the jobSize
1188
- * based on chainLog instead. */
1189
- jobLog = MAX(21, params->cParams.chainLog + 4);
1159
+ * based on cycleLog instead. */
1160
+ jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3);
1190
1161
  } else {
1191
1162
  jobLog = MAX(20, params->cParams.windowLog + 2);
1192
1163
  }
@@ -1226,7 +1197,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1226
1197
  int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1227
1198
  int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1228
1199
  assert(0 <= overlapRLog && overlapRLog <= 8);
1229
- if (params->ldmParams.enableLdm) {
1200
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1230
1201
  /* In Long Range Mode, the windowLog is typically oversized.
1231
1202
  * In which case, it's preferable to determine the jobSize
1232
1203
  * based on chainLog instead.
@@ -1240,174 +1211,6 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1240
1211
  return (ovLog==0) ? 0 : (size_t)1 << ovLog;
1241
1212
  }
1242
1213
 
1243
- static unsigned
1244
- ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
1245
- {
1246
- assert(nbWorkers>0);
1247
- { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
1248
- size_t const jobMaxSize = jobSizeTarget << 2;
1249
- size_t const passSizeMax = jobMaxSize * nbWorkers;
1250
- unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
1251
- unsigned const nbJobsLarge = multiplier * nbWorkers;
1252
- unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1;
1253
- unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers);
1254
- return (multiplier>1) ? nbJobsLarge : nbJobsSmall;
1255
- } }
1256
-
1257
- /* ZSTDMT_compress_advanced_internal() :
1258
- * This is a blocking function : it will only give back control to caller after finishing its compression job.
1259
- */
1260
- static size_t
1261
- ZSTDMT_compress_advanced_internal(
1262
- ZSTDMT_CCtx* mtctx,
1263
- void* dst, size_t dstCapacity,
1264
- const void* src, size_t srcSize,
1265
- const ZSTD_CDict* cdict,
1266
- ZSTD_CCtx_params params)
1267
- {
1268
- ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
1269
- size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
1270
- unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
1271
- size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1272
- size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
1273
- const char* const srcStart = (const char*)src;
1274
- size_t remainingSrcSize = srcSize;
1275
- unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); /* presumes avgJobSize >= 256 KB, which should be the case */
1276
- size_t frameStartPos = 0, dstBufferPos = 0;
1277
- assert(jobParams.nbWorkers == 0);
1278
- assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
1279
-
1280
- params.jobSize = (U32)avgJobSize;
1281
- DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ",
1282
- nbJobs, (U32)proposedJobSize, (U32)avgJobSize);
1283
-
1284
- if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */
1285
- ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
1286
- DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
1287
- if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
1288
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
1289
- }
1290
-
1291
- assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1292
- ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1293
- /* LDM doesn't even try to load the dictionary in single-ingestion mode */
1294
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
1295
- return ERROR(memory_allocation);
1296
-
1297
- FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */
1298
-
1299
- { unsigned u;
1300
- for (u=0; u<nbJobs; u++) {
1301
- size_t const jobSize = MIN(remainingSrcSize, avgJobSize);
1302
- size_t const dstBufferCapacity = ZSTD_compressBound(jobSize);
1303
- buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
1304
- buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
1305
- size_t dictSize = u ? overlapSize : 0;
1306
-
1307
- mtctx->jobs[u].prefix.start = srcStart + frameStartPos - dictSize;
1308
- mtctx->jobs[u].prefix.size = dictSize;
1309
- mtctx->jobs[u].src.start = srcStart + frameStartPos;
1310
- mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0); /* avoid job.src.size == 0 */
1311
- mtctx->jobs[u].consumed = 0;
1312
- mtctx->jobs[u].cSize = 0;
1313
- mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
1314
- mtctx->jobs[u].fullFrameSize = srcSize;
1315
- mtctx->jobs[u].params = jobParams;
1316
- /* do not calculate checksum within sections, but write it in header for first section */
1317
- mtctx->jobs[u].dstBuff = dstBuffer;
1318
- mtctx->jobs[u].cctxPool = mtctx->cctxPool;
1319
- mtctx->jobs[u].bufPool = mtctx->bufPool;
1320
- mtctx->jobs[u].seqPool = mtctx->seqPool;
1321
- mtctx->jobs[u].serial = &mtctx->serial;
1322
- mtctx->jobs[u].jobID = u;
1323
- mtctx->jobs[u].firstJob = (u==0);
1324
- mtctx->jobs[u].lastJob = (u==nbJobs-1);
1325
-
1326
- DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize);
1327
- DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12);
1328
- POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]);
1329
-
1330
- frameStartPos += jobSize;
1331
- dstBufferPos += dstBufferCapacity;
1332
- remainingSrcSize -= jobSize;
1333
- } }
1334
-
1335
- /* collect result */
1336
- { size_t error = 0, dstPos = 0;
1337
- unsigned jobID;
1338
- for (jobID=0; jobID<nbJobs; jobID++) {
1339
- DEBUGLOG(5, "waiting for job %u ", jobID);
1340
- ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
1341
- while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
1342
- DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID);
1343
- ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
1344
- }
1345
- ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
1346
- DEBUGLOG(5, "ready to write job %u ", jobID);
1347
-
1348
- { size_t const cSize = mtctx->jobs[jobID].cSize;
1349
- if (ZSTD_isError(cSize)) error = cSize;
1350
- if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
1351
- if (jobID) { /* note : job 0 is written directly at dst, which is correct position */
1352
- if (!error)
1353
- memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst */
1354
- if (jobID >= compressWithinDst) { /* job compressed into its own buffer, which must be released */
1355
- DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst);
1356
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
1357
- } }
1358
- mtctx->jobs[jobID].dstBuff = g_nullBuffer;
1359
- mtctx->jobs[jobID].cSize = 0;
1360
- dstPos += cSize ;
1361
- }
1362
- } /* for (jobID=0; jobID<nbJobs; jobID++) */
1363
-
1364
- DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
1365
- if (params.fParams.checksumFlag) {
1366
- U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
1367
- if (dstPos + 4 > dstCapacity) {
1368
- error = ERROR(dstSize_tooSmall);
1369
- } else {
1370
- DEBUGLOG(4, "writing checksum : %08X \n", checksum);
1371
- MEM_writeLE32((char*)dst + dstPos, checksum);
1372
- dstPos += 4;
1373
- } }
1374
-
1375
- if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
1376
- return error ? error : dstPos;
1377
- }
1378
- }
1379
-
1380
- size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
1381
- void* dst, size_t dstCapacity,
1382
- const void* src, size_t srcSize,
1383
- const ZSTD_CDict* cdict,
1384
- ZSTD_parameters params,
1385
- int overlapLog)
1386
- {
1387
- ZSTD_CCtx_params cctxParams = mtctx->params;
1388
- cctxParams.cParams = params.cParams;
1389
- cctxParams.fParams = params.fParams;
1390
- assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
1391
- cctxParams.overlapLog = overlapLog;
1392
- return ZSTDMT_compress_advanced_internal(mtctx,
1393
- dst, dstCapacity,
1394
- src, srcSize,
1395
- cdict, cctxParams);
1396
- }
1397
-
1398
-
1399
- size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
1400
- void* dst, size_t dstCapacity,
1401
- const void* src, size_t srcSize,
1402
- int compressionLevel)
1403
- {
1404
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
1405
- int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
1406
- params.fParams.contentSizeFlag = 1;
1407
- return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
1408
- }
1409
-
1410
-
1411
1214
  /* ====================================== */
1412
1215
  /* ======= Streaming API ======= */
1413
1216
  /* ====================================== */
@@ -1432,16 +1235,6 @@ size_t ZSTDMT_initCStream_internal(
1432
1235
  if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1433
1236
  if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
1434
1237
 
1435
- mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1436
- if (mtctx->singleBlockingThread) {
1437
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
1438
- DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
1439
- assert(singleThreadParams.nbWorkers == 0);
1440
- return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
1441
- dict, dictSize, cdict,
1442
- &singleThreadParams, pledgedSrcSize);
1443
- }
1444
-
1445
1238
  DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
1446
1239
 
1447
1240
  if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */
@@ -1475,9 +1268,11 @@ size_t ZSTDMT_initCStream_internal(
1475
1268
 
1476
1269
  if (params.rsyncable) {
1477
1270
  /* Aim for the targetsectionSize as the average job size. */
1478
- U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
1479
- U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
1480
- assert(jobSizeMB >= 1);
1271
+ U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
1272
+ U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
1273
+ /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
1274
+ * expected job size is at least 4x larger. */
1275
+ assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
1481
1276
  DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
1482
1277
  mtctx->rsync.hash = 0;
1483
1278
  mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
@@ -1489,7 +1284,7 @@ size_t ZSTDMT_initCStream_internal(
1489
1284
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1490
1285
  {
1491
1286
  /* If ldm is enabled we need windowSize space. */
1492
- size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
1287
+ size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
1493
1288
  /* Two buffers of slack, plus extra space for the overlap
1494
1289
  * This is the minimum slack that LDM works with. One extra because
1495
1290
  * flush might waste up to targetSectionSize-1 bytes. Another extra
@@ -1504,8 +1299,8 @@ size_t ZSTDMT_initCStream_internal(
1504
1299
  size_t const capacity = MAX(windowSize, sectionsSize) + slackSize;
1505
1300
  if (mtctx->roundBuff.capacity < capacity) {
1506
1301
  if (mtctx->roundBuff.buffer)
1507
- ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
1508
- mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem);
1302
+ ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
1303
+ mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem);
1509
1304
  if (mtctx->roundBuff.buffer == NULL) {
1510
1305
  mtctx->roundBuff.capacity = 0;
1511
1306
  return ERROR(memory_allocation);
@@ -1530,53 +1325,6 @@ size_t ZSTDMT_initCStream_internal(
1530
1325
  return 0;
1531
1326
  }
1532
1327
 
1533
- size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
1534
- const void* dict, size_t dictSize,
1535
- ZSTD_parameters params,
1536
- unsigned long long pledgedSrcSize)
1537
- {
1538
- ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
1539
- DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
1540
- cctxParams.cParams = params.cParams;
1541
- cctxParams.fParams = params.fParams;
1542
- return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL,
1543
- cctxParams, pledgedSrcSize);
1544
- }
1545
-
1546
- size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
1547
- const ZSTD_CDict* cdict,
1548
- ZSTD_frameParameters fParams,
1549
- unsigned long long pledgedSrcSize)
1550
- {
1551
- ZSTD_CCtx_params cctxParams = mtctx->params;
1552
- if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
1553
- cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict);
1554
- cctxParams.fParams = fParams;
1555
- return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict,
1556
- cctxParams, pledgedSrcSize);
1557
- }
1558
-
1559
-
1560
- /* ZSTDMT_resetCStream() :
1561
- * pledgedSrcSize can be zero == unknown (for the time being)
1562
- * prefer using ZSTD_CONTENTSIZE_UNKNOWN,
1563
- * as `0` might mean "empty" in the future */
1564
- size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize)
1565
- {
1566
- if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
1567
- return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params,
1568
- pledgedSrcSize);
1569
- }
1570
-
1571
- size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
1572
- ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
1573
- ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
1574
- DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
1575
- cctxParams.cParams = params.cParams;
1576
- cctxParams.fParams = params.fParams;
1577
- return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
1578
- }
1579
-
1580
1328
 
1581
1329
  /* ZSTDMT_writeLastEmptyBlock()
1582
1330
  * Write a single empty block with an end-of-frame to finish a frame.
@@ -1740,7 +1488,7 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
1740
1488
  assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
1741
1489
  assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
1742
1490
  if (toFlush > 0) {
1743
- memcpy((char*)output->dst + output->pos,
1491
+ ZSTD_memcpy((char*)output->dst + output->pos,
1744
1492
  (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
1745
1493
  toFlush);
1746
1494
  }
@@ -1811,17 +1559,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
1811
1559
  static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1812
1560
  {
1813
1561
  BYTE const* const bufferStart = (BYTE const*)buffer.start;
1814
- BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1815
1562
  BYTE const* const rangeStart = (BYTE const*)range.start;
1816
- BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
1817
1563
 
1818
1564
  if (rangeStart == NULL || bufferStart == NULL)
1819
1565
  return 0;
1820
- /* Empty ranges cannot overlap */
1821
- if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1822
- return 0;
1823
1566
 
1824
- return bufferStart < rangeEnd && rangeStart < bufferEnd;
1567
+ {
1568
+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1569
+ BYTE const* const rangeEnd = rangeStart + range.size;
1570
+
1571
+ /* Empty ranges cannot overlap */
1572
+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1573
+ return 0;
1574
+
1575
+ return bufferStart < rangeEnd && rangeStart < bufferEnd;
1576
+ }
1825
1577
  }
1826
1578
 
1827
1579
  static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
@@ -1848,7 +1600,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
1848
1600
 
1849
1601
  static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
1850
1602
  {
1851
- if (mtctx->params.ldmParams.enableLdm) {
1603
+ if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
1852
1604
  ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1853
1605
  DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
1854
1606
  DEBUGLOG(5, "source [0x%zx, 0x%zx)",
@@ -1894,7 +1646,7 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
1894
1646
  return 0;
1895
1647
  }
1896
1648
  ZSTDMT_waitForLdmComplete(mtctx, buffer);
1897
- memmove(start, mtctx->inBuff.prefix.start, prefixSize);
1649
+ ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize);
1898
1650
  mtctx->inBuff.prefix.start = start;
1899
1651
  mtctx->roundBuff.pos = prefixSize;
1900
1652
  }
@@ -1951,6 +1703,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1951
1703
  if (!mtctx->params.rsyncable)
1952
1704
  /* Rsync is disabled. */
1953
1705
  return syncPoint;
1706
+ if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
1707
+ /* We don't emit synchronization points if it would produce too small blocks.
1708
+ * We don't have enough input to find a synchronization point, so don't look.
1709
+ */
1710
+ return syncPoint;
1954
1711
  if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
1955
1712
  /* Not enough to compute the hash.
1956
1713
  * We will miss any synchronization points in this RSYNC_LENGTH byte
@@ -1961,23 +1718,41 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1961
1718
  */
1962
1719
  return syncPoint;
1963
1720
  /* Initialize the loop variables. */
1964
- if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
1965
- /* We have enough bytes buffered to initialize the hash.
1721
+ if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
1722
+ /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
1723
+ * because they can't possibly be a sync point. So we can start
1724
+ * part way through the input buffer.
1725
+ */
1726
+ pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
1727
+ if (pos >= RSYNC_LENGTH) {
1728
+ prev = istart + pos - RSYNC_LENGTH;
1729
+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1730
+ } else {
1731
+ assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
1732
+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1733
+ hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
1734
+ hash = ZSTD_rollingHash_append(hash, istart, pos);
1735
+ }
1736
+ } else {
1737
+ /* We have enough bytes buffered to initialize the hash,
1738
+ * and have processed enough bytes to find a sync point.
1966
1739
  * Start scanning at the beginning of the input.
1967
1740
  */
1741
+ assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
1742
+ assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
1968
1743
  pos = 0;
1969
1744
  prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1970
1745
  hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1971
- } else {
1972
- /* We don't have enough bytes buffered to initialize the hash, but
1973
- * we know we have at least RSYNC_LENGTH bytes total.
1974
- * Start scanning after the first RSYNC_LENGTH bytes less the bytes
1975
- * already buffered.
1976
- */
1977
- pos = RSYNC_LENGTH - mtctx->inBuff.filled;
1978
- prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
1979
- hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
1980
- hash = ZSTD_rollingHash_append(hash, istart, pos);
1746
+ if ((hash & hitMask) == hitMask) {
1747
+ /* We're already at a sync point so don't load any more until
1748
+ * we're able to flush this sync point.
1749
+ * This likely happened because the job table was full so we
1750
+ * couldn't add our job.
1751
+ */
1752
+ syncPoint.toLoad = 0;
1753
+ syncPoint.flush = 1;
1754
+ return syncPoint;
1755
+ }
1981
1756
  }
1982
1757
  /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
1983
1758
  * through the input. If we hit a synchronization point, then cut the
@@ -1987,16 +1762,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1987
1762
  * then a block will be emitted anyways, but this is okay, since if we
1988
1763
  * are already synchronized we will remain synchronized.
1989
1764
  */
1765
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1990
1766
  for (; pos < syncPoint.toLoad; ++pos) {
1991
1767
  BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1992
- /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
1768
+ /* This assert is very expensive, and Debian compiles with asserts enabled.
1769
+ * So disable it for now. We can get similar coverage by checking it at the
1770
+ * beginning & end of the loop.
1771
+ * assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1772
+ */
1993
1773
  hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1774
+ assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
1994
1775
  if ((hash & hitMask) == hitMask) {
1995
1776
  syncPoint.toLoad = pos + 1;
1996
1777
  syncPoint.flush = 1;
1778
+ ++pos; /* for assert */
1997
1779
  break;
1998
1780
  }
1999
1781
  }
1782
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
2000
1783
  return syncPoint;
2001
1784
  }
2002
1785
 
@@ -2022,34 +1805,11 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
2022
1805
  assert(output->pos <= output->size);
2023
1806
  assert(input->pos <= input->size);
2024
1807
 
2025
- if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */
2026
- return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp);
2027
- }
2028
-
2029
1808
  if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
2030
1809
  /* current frame being ended. Only flush/end are allowed */
2031
1810
  return ERROR(stage_wrong);
2032
1811
  }
2033
1812
 
2034
- /* single-pass shortcut (note : synchronous-mode) */
2035
- if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */
2036
- && (mtctx->nextJobID == 0) /* just started */
2037
- && (mtctx->inBuff.filled == 0) /* nothing buffered */
2038
- && (!mtctx->jobReady) /* no job already created */
2039
- && (endOp == ZSTD_e_end) /* end order */
2040
- && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */
2041
- size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
2042
- (char*)output->dst + output->pos, output->size - output->pos,
2043
- (const char*)input->src + input->pos, input->size - input->pos,
2044
- mtctx->cdict, mtctx->params);
2045
- if (ZSTD_isError(cSize)) return cSize;
2046
- input->pos = input->size;
2047
- output->pos += cSize;
2048
- mtctx->allJobsCompleted = 1;
2049
- mtctx->frameEnded = 1;
2050
- return 0;
2051
- }
2052
-
2053
1813
  /* fill input buffer */
2054
1814
  if ( (!mtctx->jobReady)
2055
1815
  && (input->size > input->pos) ) { /* support NULL input */
@@ -2072,13 +1832,21 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
2072
1832
  assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
2073
1833
  DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
2074
1834
  (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
2075
- memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
1835
+ ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
2076
1836
  input->pos += syncPoint.toLoad;
2077
1837
  mtctx->inBuff.filled += syncPoint.toLoad;
2078
1838
  forwardInputProgress = syncPoint.toLoad>0;
2079
1839
  }
2080
- if ((input->pos < input->size) && (endOp == ZSTD_e_end))
2081
- endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
1840
+ }
1841
+ if ((input->pos < input->size) && (endOp == ZSTD_e_end)) {
1842
+ /* Can't end yet because the input is not fully consumed.
1843
+ * We are in one of these cases:
1844
+ * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job.
1845
+ * - We filled the input buffer: flush this job but don't end the frame.
1846
+ * - We hit a synchronization point: flush this job but don't end the frame.
1847
+ */
1848
+ assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable);
1849
+ endOp = ZSTD_e_flush;
2082
1850
  }
2083
1851
 
2084
1852
  if ( (mtctx->jobReady)
@@ -2097,47 +1865,3 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
2097
1865
  return remainingToFlush;
2098
1866
  }
2099
1867
  }
2100
-
2101
-
2102
- size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
2103
- {
2104
- FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
2105
-
2106
- /* recommended next input size : fill current input buffer */
2107
- return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
2108
- }
2109
-
2110
-
2111
- static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
2112
- {
2113
- size_t const srcSize = mtctx->inBuff.filled;
2114
- DEBUGLOG(5, "ZSTDMT_flushStream_internal");
2115
-
2116
- if ( mtctx->jobReady /* one job ready for a worker to pick up */
2117
- || (srcSize > 0) /* still some data within input buffer */
2118
- || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
2119
- DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
2120
- (U32)srcSize, (U32)endFrame);
2121
- FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
2122
- }
2123
-
2124
- /* check if there is any data available to flush */
2125
- return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame);
2126
- }
2127
-
2128
-
2129
- size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
2130
- {
2131
- DEBUGLOG(5, "ZSTDMT_flushStream");
2132
- if (mtctx->singleBlockingThread)
2133
- return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
2134
- return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush);
2135
- }
2136
-
2137
- size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
2138
- {
2139
- DEBUGLOG(4, "ZSTDMT_endStream");
2140
- if (mtctx->singleBlockingThread)
2141
- return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
2142
- return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end);
2143
- }