extzstd 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -15,13 +15,39 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "zstd.h" /* ZSTD_CCtx, size_t */
18
+ #include "zstd_compress_internal.h"
19
19
 
20
- size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
21
- size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
20
+ /* used in ZSTD_loadDictionaryContent() */
21
+ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
22
22
 
23
- size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
24
- size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
23
+ size_t ZSTD_compressBlock_btopt(
24
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
25
+ void const* src, size_t srcSize);
26
+ size_t ZSTD_compressBlock_btultra(
27
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28
+ void const* src, size_t srcSize);
29
+ size_t ZSTD_compressBlock_btultra2(
30
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
31
+ void const* src, size_t srcSize);
32
+
33
+
34
+ size_t ZSTD_compressBlock_btopt_dictMatchState(
35
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
36
+ void const* src, size_t srcSize);
37
+ size_t ZSTD_compressBlock_btultra_dictMatchState(
38
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
39
+ void const* src, size_t srcSize);
40
+
41
+ size_t ZSTD_compressBlock_btopt_extDict(
42
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
43
+ void const* src, size_t srcSize);
44
+ size_t ZSTD_compressBlock_btultra_extDict(
45
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
46
+ void const* src, size_t srcSize);
47
+
48
+ /* note : no btultra2 variant for extDict nor dictMatchState,
49
+ * because btultra2 is not meant to work with dictionaries
50
+ * and is only specific for the first block (no prefix) */
25
51
 
26
52
  #if defined (__cplusplus)
27
53
  }
@@ -9,38 +9,46 @@
9
9
  */
10
10
 
11
11
 
12
- /* ====== Tuning parameters ====== */
13
- #define ZSTDMT_NBTHREADS_MAX 200
14
- #define ZSTDMT_OVERLAPLOG_DEFAULT 6
15
-
16
-
17
12
  /* ====== Compiler specifics ====== */
18
13
  #if defined(_MSC_VER)
19
14
  # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
20
15
  #endif
21
16
 
22
17
 
18
+ /* ====== Constants ====== */
19
+ #define ZSTDMT_OVERLAPLOG_DEFAULT 0
20
+
21
+
23
22
  /* ====== Dependencies ====== */
24
23
  #include <string.h> /* memcpy, memset */
24
+ #include <limits.h> /* INT_MAX, UINT_MAX */
25
+ #include "mem.h" /* MEM_STATIC */
25
26
  #include "pool.h" /* threadpool */
26
27
  #include "threading.h" /* mutex */
27
28
  #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
29
+ #include "zstd_ldm.h"
28
30
  #include "zstdmt_compress.h"
29
31
 
32
+ /* Guards code to support resizing the SeqPool.
33
+ * We will want to resize the SeqPool to save memory in the future.
34
+ * Until then, comment the code out since it is unused.
35
+ */
36
+ #define ZSTD_RESIZE_SEQPOOL 0
30
37
 
31
38
  /* ====== Debug ====== */
32
- #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
39
+ #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \
40
+ && !defined(_MSC_VER) \
41
+ && !defined(__MINGW32__)
33
42
 
34
43
  # include <stdio.h>
35
44
  # include <unistd.h>
36
45
  # include <sys/times.h>
37
- # define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
38
46
 
39
47
  # define DEBUG_PRINTHEX(l,p,n) { \
40
48
  unsigned debug_u; \
41
49
  for (debug_u=0; debug_u<(n); debug_u++) \
42
- DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
43
- DEBUGLOGRAW(l, " \n"); \
50
+ RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
51
+ RAWLOG(l, " \n"); \
44
52
  }
45
53
 
46
54
  static unsigned long long GetCurrentClockTimeMicroseconds(void)
@@ -48,13 +56,13 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
48
56
  static clock_t _ticksPerSecond = 0;
49
57
  if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
50
58
 
51
- { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
52
- return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
53
- }
59
+ { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
60
+ return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
61
+ } }
54
62
 
55
63
  #define MUTEX_WAIT_TIME_DLEVEL 6
56
64
  #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
57
- if (ZSTD_DEBUG >= MUTEX_WAIT_TIME_DLEVEL) { \
65
+ if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
58
66
  unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
59
67
  ZSTD_pthread_mutex_lock(mutex); \
60
68
  { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
@@ -81,7 +89,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
81
89
 
82
90
  typedef struct buffer_s {
83
91
  void* start;
84
- size_t size;
92
+ size_t capacity;
85
93
  } buffer_t;
86
94
 
87
95
  static const buffer_t g_nullBuffer = { NULL, 0 };
@@ -95,9 +103,9 @@ typedef struct ZSTDMT_bufferPool_s {
95
103
  buffer_t bTable[1]; /* variable size */
96
104
  } ZSTDMT_bufferPool;
97
105
 
98
- static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
106
+ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
99
107
  {
100
- unsigned const maxNbBuffers = 2*nbThreads + 3;
108
+ unsigned const maxNbBuffers = 2*nbWorkers + 3;
101
109
  ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
102
110
  sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
103
111
  if (bufPool==NULL) return NULL;
@@ -129,17 +137,21 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
129
137
  static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
130
138
  {
131
139
  size_t const poolSize = sizeof(*bufPool)
132
- + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
140
+ + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
133
141
  unsigned u;
134
142
  size_t totalBufferSize = 0;
135
143
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
136
144
  for (u=0; u<bufPool->totalBuffers; u++)
137
- totalBufferSize += bufPool->bTable[u].size;
145
+ totalBufferSize += bufPool->bTable[u].capacity;
138
146
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
139
147
 
140
148
  return poolSize + totalBufferSize;
141
149
  }
142
150
 
151
+ /* ZSTDMT_setBufferSize() :
152
+ * all future buffers provided by this buffer pool will have _at least_ this size
153
+ * note : it's better for all buffers to have same size,
154
+ * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */
143
155
  static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize)
144
156
  {
145
157
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
@@ -148,8 +160,29 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
148
160
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
149
161
  }
150
162
 
163
+
164
+ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
165
+ {
166
+ unsigned const maxNbBuffers = 2*nbWorkers + 3;
167
+ if (srcBufPool==NULL) return NULL;
168
+ if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
169
+ return srcBufPool;
170
+ /* need a larger buffer pool */
171
+ { ZSTD_customMem const cMem = srcBufPool->cMem;
172
+ size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
173
+ ZSTDMT_bufferPool* newBufPool;
174
+ ZSTDMT_freeBufferPool(srcBufPool);
175
+ newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
176
+ if (newBufPool==NULL) return newBufPool;
177
+ ZSTDMT_setBufferSize(newBufPool, bSize);
178
+ return newBufPool;
179
+ }
180
+ }
181
+
151
182
  /** ZSTDMT_getBuffer() :
152
- * assumption : bufPool must be valid */
183
+ * assumption : bufPool must be valid
184
+ * @return : a buffer, with start pointer and size
185
+ * note: allocation may fail, in this case, start==NULL and size==0 */
153
186
  static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
154
187
  {
155
188
  size_t const bSize = bufPool->bufferSize;
@@ -157,12 +190,12 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
157
190
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
158
191
  if (bufPool->nbBuffers) { /* try to use an existing buffer */
159
192
  buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
160
- size_t const availBufferSize = buf.size;
193
+ size_t const availBufferSize = buf.capacity;
161
194
  bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
162
195
  if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
163
196
  /* large enough, but not too much */
164
197
  DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
165
- bufPool->nbBuffers, (U32)buf.size);
198
+ bufPool->nbBuffers, (U32)buf.capacity);
166
199
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
167
200
  return buf;
168
201
  }
@@ -176,22 +209,52 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
176
209
  { buffer_t buffer;
177
210
  void* const start = ZSTD_malloc(bSize, bufPool->cMem);
178
211
  buffer.start = start; /* note : start can be NULL if malloc fails ! */
179
- buffer.size = (start==NULL) ? 0 : bSize;
180
- DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
212
+ buffer.capacity = (start==NULL) ? 0 : bSize;
213
+ if (start==NULL) {
214
+ DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!");
215
+ } else {
216
+ DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
217
+ }
181
218
  return buffer;
182
219
  }
183
220
  }
184
221
 
222
+ #if ZSTD_RESIZE_SEQPOOL
223
+ /** ZSTDMT_resizeBuffer() :
224
+ * assumption : bufPool must be valid
225
+ * @return : a buffer that is at least the buffer pool buffer size.
226
+ * If a reallocation happens, the data in the input buffer is copied.
227
+ */
228
+ static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
229
+ {
230
+ size_t const bSize = bufPool->bufferSize;
231
+ if (buffer.capacity < bSize) {
232
+ void* const start = ZSTD_malloc(bSize, bufPool->cMem);
233
+ buffer_t newBuffer;
234
+ newBuffer.start = start;
235
+ newBuffer.capacity = start == NULL ? 0 : bSize;
236
+ if (start != NULL) {
237
+ assert(newBuffer.capacity >= buffer.capacity);
238
+ memcpy(newBuffer.start, buffer.start, buffer.capacity);
239
+ DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize);
240
+ return newBuffer;
241
+ }
242
+ DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!");
243
+ }
244
+ return buffer;
245
+ }
246
+ #endif
247
+
185
248
  /* store buffer for later re-use, up to pool capacity */
186
249
  static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
187
250
  {
188
- if (buf.start == NULL) return; /* compatible with release on NULL */
189
251
  DEBUGLOG(5, "ZSTDMT_releaseBuffer");
252
+ if (buf.start == NULL) return; /* compatible with release on NULL */
190
253
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
191
254
  if (bufPool->nbBuffers < bufPool->totalBuffers) {
192
255
  bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
193
256
  DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
194
- (U32)buf.size, (U32)(bufPool->nbBuffers-1));
257
+ (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
195
258
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
196
259
  return;
197
260
  }
@@ -201,28 +264,85 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
201
264
  ZSTD_free(buf.start, bufPool->cMem);
202
265
  }
203
266
 
204
- /* Sets parameters relevant to the compression job, initializing others to
205
- * default values. Notably, nbThreads should probably be zero. */
206
- static ZSTD_CCtx_params ZSTDMT_makeJobCCtxParams(ZSTD_CCtx_params const params)
267
+
268
+ /* ===== Seq Pool Wrapper ====== */
269
+
270
+ static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0};
271
+
272
+ typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
273
+
274
+ static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
275
+ {
276
+ return ZSTDMT_sizeof_bufferPool(seqPool);
277
+ }
278
+
279
+ static rawSeqStore_t bufferToSeq(buffer_t buffer)
207
280
  {
208
- ZSTD_CCtx_params jobParams;
209
- memset(&jobParams, 0, sizeof(jobParams));
281
+ rawSeqStore_t seq = {NULL, 0, 0, 0};
282
+ seq.seq = (rawSeq*)buffer.start;
283
+ seq.capacity = buffer.capacity / sizeof(rawSeq);
284
+ return seq;
285
+ }
210
286
 
211
- jobParams.cParams = params.cParams;
212
- jobParams.fParams = params.fParams;
213
- jobParams.compressionLevel = params.compressionLevel;
287
+ static buffer_t seqToBuffer(rawSeqStore_t seq)
288
+ {
289
+ buffer_t buffer;
290
+ buffer.start = seq.seq;
291
+ buffer.capacity = seq.capacity * sizeof(rawSeq);
292
+ return buffer;
293
+ }
214
294
 
215
- jobParams.ldmParams = params.ldmParams;
216
- return jobParams;
295
+ static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
296
+ {
297
+ if (seqPool->bufferSize == 0) {
298
+ return kNullRawSeqStore;
299
+ }
300
+ return bufferToSeq(ZSTDMT_getBuffer(seqPool));
301
+ }
302
+
303
+ #if ZSTD_RESIZE_SEQPOOL
304
+ static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
305
+ {
306
+ return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq)));
307
+ }
308
+ #endif
309
+
310
+ static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
311
+ {
312
+ ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq));
313
+ }
314
+
315
+ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
316
+ {
317
+ ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq));
318
+ }
319
+
320
+ static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
321
+ {
322
+ ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
323
+ if (seqPool == NULL) return NULL;
324
+ ZSTDMT_setNbSeq(seqPool, 0);
325
+ return seqPool;
326
+ }
327
+
328
+ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
329
+ {
330
+ ZSTDMT_freeBufferPool(seqPool);
217
331
  }
218
332
 
333
+ static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
334
+ {
335
+ return ZSTDMT_expandBufferPool(pool, nbWorkers);
336
+ }
337
+
338
+
219
339
  /* ===== CCtx Pool ===== */
220
340
  /* a single CCtx Pool can be invoked from multiple threads in parallel */
221
341
 
222
342
  typedef struct {
223
343
  ZSTD_pthread_mutex_t poolMutex;
224
- unsigned totalCCtx;
225
- unsigned availCCtx;
344
+ int totalCCtx;
345
+ int availCCtx;
226
346
  ZSTD_customMem cMem;
227
347
  ZSTD_CCtx* cctx[1]; /* variable size */
228
348
  } ZSTDMT_CCtxPool;
@@ -230,47 +350,61 @@ typedef struct {
230
350
  /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
231
351
  static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
232
352
  {
233
- unsigned u;
234
- for (u=0; u<pool->totalCCtx; u++)
235
- ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
353
+ int cid;
354
+ for (cid=0; cid<pool->totalCCtx; cid++)
355
+ ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
236
356
  ZSTD_pthread_mutex_destroy(&pool->poolMutex);
237
357
  ZSTD_free(pool, pool->cMem);
238
358
  }
239
359
 
240
360
  /* ZSTDMT_createCCtxPool() :
241
- * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
242
- static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
361
+ * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
362
+ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
243
363
  ZSTD_customMem cMem)
244
364
  {
245
365
  ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
246
- sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
366
+ sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
367
+ assert(nbWorkers > 0);
247
368
  if (!cctxPool) return NULL;
248
369
  if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
249
370
  ZSTD_free(cctxPool, cMem);
250
371
  return NULL;
251
372
  }
252
373
  cctxPool->cMem = cMem;
253
- cctxPool->totalCCtx = nbThreads;
374
+ cctxPool->totalCCtx = nbWorkers;
254
375
  cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
255
376
  cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
256
377
  if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
257
- DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
378
+ DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
258
379
  return cctxPool;
259
380
  }
260
381
 
382
+ static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
383
+ int nbWorkers)
384
+ {
385
+ if (srcPool==NULL) return NULL;
386
+ if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
387
+ /* need a larger cctx pool */
388
+ { ZSTD_customMem const cMem = srcPool->cMem;
389
+ ZSTDMT_freeCCtxPool(srcPool);
390
+ return ZSTDMT_createCCtxPool(nbWorkers, cMem);
391
+ }
392
+ }
393
+
261
394
  /* only works during initialization phase, not during compression */
262
395
  static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
263
396
  {
264
397
  ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
265
- { unsigned const nbThreads = cctxPool->totalCCtx;
398
+ { unsigned const nbWorkers = cctxPool->totalCCtx;
266
399
  size_t const poolSize = sizeof(*cctxPool)
267
- + (nbThreads-1)*sizeof(ZSTD_CCtx*);
400
+ + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
268
401
  unsigned u;
269
402
  size_t totalCCtxSize = 0;
270
- for (u=0; u<nbThreads; u++) {
403
+ for (u=0; u<nbWorkers; u++) {
271
404
  totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
272
405
  }
273
406
  ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
407
+ assert(nbWorkers > 0);
274
408
  return poolSize + totalCCtxSize;
275
409
  }
276
410
  }
@@ -297,111 +431,322 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
297
431
  if (pool->availCCtx < pool->totalCCtx)
298
432
  pool->cctx[pool->availCCtx++] = cctx;
299
433
  else {
300
- /* pool overflow : should not happen, since totalCCtx==nbThreads */
301
- DEBUGLOG(5, "CCtx pool overflow : free cctx");
434
+ /* pool overflow : should not happen, since totalCCtx==nbWorkers */
435
+ DEBUGLOG(4, "CCtx pool overflow : free cctx");
302
436
  ZSTD_freeCCtx(cctx);
303
437
  }
304
438
  ZSTD_pthread_mutex_unlock(&pool->poolMutex);
305
439
  }
306
440
 
441
+ /* ==== Serial State ==== */
307
442
 
308
- /* ===== Thread worker ===== */
443
+ typedef struct {
444
+ void const* start;
445
+ size_t size;
446
+ } range_t;
309
447
 
310
448
  typedef struct {
311
- buffer_t src;
312
- const void* srcStart;
313
- size_t prefixSize;
314
- size_t srcSize;
315
- buffer_t dstBuff;
316
- size_t cSize;
317
- size_t dstFlushed;
318
- unsigned firstChunk;
319
- unsigned lastChunk;
320
- unsigned jobCompleted;
321
- unsigned jobScanned;
322
- ZSTD_pthread_mutex_t* jobCompleted_mutex;
323
- ZSTD_pthread_cond_t* jobCompleted_cond;
449
+ /* All variables in the struct are protected by mutex. */
450
+ ZSTD_pthread_mutex_t mutex;
451
+ ZSTD_pthread_cond_t cond;
324
452
  ZSTD_CCtx_params params;
325
- const ZSTD_CDict* cdict;
326
- ZSTDMT_CCtxPool* cctxPool;
327
- ZSTDMT_bufferPool* bufPool;
328
- unsigned long long fullFrameSize;
453
+ ldmState_t ldmState;
454
+ XXH64_state_t xxhState;
455
+ unsigned nextJobID;
456
+ /* Protects ldmWindow.
457
+ * Must be acquired after the main mutex when acquiring both.
458
+ */
459
+ ZSTD_pthread_mutex_t ldmWindowMutex;
460
+ ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */
461
+ ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
462
+ } serialState_t;
463
+
464
+ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
465
+ {
466
+ /* Adjust parameters */
467
+ if (params.ldmParams.enableLdm) {
468
+ DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
469
+ ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
470
+ assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
471
+ assert(params.ldmParams.hashRateLog < 32);
472
+ serialState->ldmState.hashPower =
473
+ ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
474
+ } else {
475
+ memset(&params.ldmParams, 0, sizeof(params.ldmParams));
476
+ }
477
+ serialState->nextJobID = 0;
478
+ if (params.fParams.checksumFlag)
479
+ XXH64_reset(&serialState->xxhState, 0);
480
+ if (params.ldmParams.enableLdm) {
481
+ ZSTD_customMem cMem = params.customMem;
482
+ unsigned const hashLog = params.ldmParams.hashLog;
483
+ size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
484
+ unsigned const bucketLog =
485
+ params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
486
+ size_t const bucketSize = (size_t)1 << bucketLog;
487
+ unsigned const prevBucketLog =
488
+ serialState->params.ldmParams.hashLog -
489
+ serialState->params.ldmParams.bucketSizeLog;
490
+ /* Size the seq pool tables */
491
+ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
492
+ /* Reset the window */
493
+ ZSTD_window_clear(&serialState->ldmState.window);
494
+ serialState->ldmWindow = serialState->ldmState.window;
495
+ /* Resize tables and output space if necessary. */
496
+ if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
497
+ ZSTD_free(serialState->ldmState.hashTable, cMem);
498
+ serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem);
499
+ }
500
+ if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
501
+ ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
502
+ serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem);
503
+ }
504
+ if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
505
+ return 1;
506
+ /* Zero the tables */
507
+ memset(serialState->ldmState.hashTable, 0, hashSize);
508
+ memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
509
+ }
510
+ serialState->params = params;
511
+ serialState->params.jobSize = (U32)jobSize;
512
+ return 0;
513
+ }
514
+
515
+ static int ZSTDMT_serialState_init(serialState_t* serialState)
516
+ {
517
+ int initError = 0;
518
+ memset(serialState, 0, sizeof(*serialState));
519
+ initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL);
520
+ initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL);
521
+ initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL);
522
+ initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL);
523
+ return initError;
524
+ }
525
+
526
+ static void ZSTDMT_serialState_free(serialState_t* serialState)
527
+ {
528
+ ZSTD_customMem cMem = serialState->params.customMem;
529
+ ZSTD_pthread_mutex_destroy(&serialState->mutex);
530
+ ZSTD_pthread_cond_destroy(&serialState->cond);
531
+ ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex);
532
+ ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond);
533
+ ZSTD_free(serialState->ldmState.hashTable, cMem);
534
+ ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
535
+ }
536
+
537
+ static void ZSTDMT_serialState_update(serialState_t* serialState,
538
+ ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore,
539
+ range_t src, unsigned jobID)
540
+ {
541
+ /* Wait for our turn */
542
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
543
+ while (serialState->nextJobID < jobID) {
544
+ DEBUGLOG(5, "wait for serialState->cond");
545
+ ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex);
546
+ }
547
+ /* A future job may error and skip our job */
548
+ if (serialState->nextJobID == jobID) {
549
+ /* It is now our turn, do any processing necessary */
550
+ if (serialState->params.ldmParams.enableLdm) {
551
+ size_t error;
552
+ assert(seqStore.seq != NULL && seqStore.pos == 0 &&
553
+ seqStore.size == 0 && seqStore.capacity > 0);
554
+ assert(src.size <= serialState->params.jobSize);
555
+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
556
+ error = ZSTD_ldm_generateSequences(
557
+ &serialState->ldmState, &seqStore,
558
+ &serialState->params.ldmParams, src.start, src.size);
559
+ /* We provide a large enough buffer to never fail. */
560
+ assert(!ZSTD_isError(error)); (void)error;
561
+ /* Update ldmWindow to match the ldmState.window and signal the main
562
+ * thread if it is waiting for a buffer.
563
+ */
564
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
565
+ serialState->ldmWindow = serialState->ldmState.window;
566
+ ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
567
+ ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
568
+ }
569
+ if (serialState->params.fParams.checksumFlag && src.size > 0)
570
+ XXH64_update(&serialState->xxhState, src.start, src.size);
571
+ }
572
+ /* Now it is the next jobs turn */
573
+ serialState->nextJobID++;
574
+ ZSTD_pthread_cond_broadcast(&serialState->cond);
575
+ ZSTD_pthread_mutex_unlock(&serialState->mutex);
576
+
577
+ if (seqStore.size > 0) {
578
+ size_t const err = ZSTD_referenceExternalSequences(
579
+ jobCCtx, seqStore.seq, seqStore.size);
580
+ assert(serialState->params.ldmParams.enableLdm);
581
+ assert(!ZSTD_isError(err));
582
+ (void)err;
583
+ }
584
+ }
585
+
586
+ static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
587
+ unsigned jobID, size_t cSize)
588
+ {
589
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
590
+ if (serialState->nextJobID <= jobID) {
591
+ assert(ZSTD_isError(cSize)); (void)cSize;
592
+ DEBUGLOG(5, "Skipping past job %u because of error", jobID);
593
+ serialState->nextJobID = jobID + 1;
594
+ ZSTD_pthread_cond_broadcast(&serialState->cond);
595
+
596
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
597
+ ZSTD_window_clear(&serialState->ldmWindow);
598
+ ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
599
+ ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
600
+ }
601
+ ZSTD_pthread_mutex_unlock(&serialState->mutex);
602
+
603
+ }
604
+
605
+
606
+ /* ------------------------------------------ */
607
+ /* ===== Worker thread ===== */
608
+ /* ------------------------------------------ */
609
+
610
+ static const range_t kNullRange = { NULL, 0 };
611
+
612
+ typedef struct {
613
+ size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
614
+ size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
615
+ ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */
616
+ ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */
617
+ ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */
618
+ ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */
619
+ ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */
620
+ serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */
621
+ buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
622
+ range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */
623
+ range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */
624
+ unsigned jobID; /* set by mtctx, then read by worker => no barrier */
625
+ unsigned firstJob; /* set by mtctx, then read by worker => no barrier */
626
+ unsigned lastJob; /* set by mtctx, then read by worker => no barrier */
627
+ ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */
628
+ const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */
629
+ unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */
630
+ size_t dstFlushed; /* used only by mtctx */
631
+ unsigned frameChecksumNeeded; /* used only by mtctx */
329
632
  } ZSTDMT_jobDescription;
330
633
 
331
- /* ZSTDMT_compressChunk() : POOL_function type */
332
- void ZSTDMT_compressChunk(void* jobDescription)
634
+ #define JOB_ERROR(e) { \
635
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
636
+ job->cSize = e; \
637
+ ZSTD_pthread_mutex_unlock(&job->job_mutex); \
638
+ goto _endJob; \
639
+ }
640
+
641
+ /* ZSTDMT_compressionJob() is a POOL_function type */
642
+ static void ZSTDMT_compressionJob(void* jobDescription)
333
643
  {
334
644
  ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
645
+ ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
335
646
  ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
336
- const void* const src = (const char*)job->srcStart + job->prefixSize;
647
+ rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
337
648
  buffer_t dstBuff = job->dstBuff;
338
- DEBUGLOG(5, "ZSTDMT_compressChunk: job (first:%u) (last:%u) : prefixSize %u, srcSize %u ",
339
- job->firstChunk, job->lastChunk, (U32)job->prefixSize, (U32)job->srcSize);
649
+ size_t lastCBlockSize = 0;
340
650
 
341
- if (cctx==NULL) {
342
- job->cSize = ERROR(memory_allocation);
343
- goto _endJob;
344
- }
345
-
346
- if (dstBuff.start == NULL) {
651
+ /* resources */
652
+ if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
653
+ if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
347
654
  dstBuff = ZSTDMT_getBuffer(job->bufPool);
348
- if (dstBuff.start==NULL) {
349
- job->cSize = ERROR(memory_allocation);
350
- goto _endJob;
351
- }
352
- job->dstBuff = dstBuff;
353
- DEBUGLOG(5, "ZSTDMT_compressChunk: received dstBuff of size %u", (U32)dstBuff.size);
655
+ if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
656
+ job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
354
657
  }
658
+ if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
659
+ JOB_ERROR(ERROR(memory_allocation));
355
660
 
661
+ /* Don't compute the checksum for chunks, since we compute it externally,
662
+ * but write it in the header.
663
+ */
664
+ if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
665
+ /* Don't run LDM for the chunks, since we handle it externally */
666
+ jobParams.ldmParams.enableLdm = 0;
667
+
668
+
669
+ /* init */
356
670
  if (job->cdict) {
357
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dm_auto, job->cdict, job->params, job->fullFrameSize);
358
- DEBUGLOG(4, "ZSTDMT_compressChunk: init using CDict (windowLog=%u)", job->params.cParams.windowLog);
359
- assert(job->firstChunk); /* only allowed for first job */
360
- if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
671
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
672
+ assert(job->firstJob); /* only allowed for first job */
673
+ if (ZSTD_isError(initError)) JOB_ERROR(initError);
361
674
  } else { /* srcStart points at reloaded section */
362
- U64 const pledgedSrcSize = job->firstChunk ? job->fullFrameSize : ZSTD_CONTENTSIZE_UNKNOWN;
363
- ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
364
- size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstChunk);
365
- if (ZSTD_isError(forceWindowError)) {
366
- DEBUGLOG(5, "ZSTD_CCtxParam_setParameter error : %s ", ZSTD_getErrorName(forceWindowError));
367
- job->cSize = forceWindowError;
368
- goto _endJob;
675
+ U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
676
+ { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
677
+ if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
369
678
  }
370
- DEBUGLOG(5, "ZSTDMT_compressChunk: invoking ZSTD_compressBegin_advanced_internal with windowLog = %u ", jobParams.cParams.windowLog);
371
679
  { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
372
- job->srcStart, job->prefixSize, ZSTD_dm_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
373
- NULL,
680
+ job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
681
+ ZSTD_dtlm_fast,
682
+ NULL, /*cdict*/
374
683
  jobParams, pledgedSrcSize);
375
- if (ZSTD_isError(initError)) {
376
- DEBUGLOG(5, "ZSTD_compressBegin_advanced_internal error : %s ", ZSTD_getErrorName(initError));
377
- job->cSize = initError;
378
- goto _endJob;
379
- } }
380
- }
381
- if (!job->firstChunk) { /* flush and overwrite frame header when it's not first job */
382
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
383
- if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
684
+ if (ZSTD_isError(initError)) JOB_ERROR(initError);
685
+ } }
686
+
687
+ /* Perform serial step as early as possible, but after CCtx initialization */
688
+ ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
689
+
690
+ if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
691
+ size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
692
+ if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
693
+ DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
384
694
  ZSTD_invalidateRepCodes(cctx);
385
695
  }
386
696
 
387
- DEBUGLOG(5, "Compressing into dstBuff of size %u", (U32)dstBuff.size);
388
- DEBUG_PRINTHEX(6, job->srcStart, 12);
389
- job->cSize = (job->lastChunk) ?
390
- ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
391
- ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
392
- DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u) ",
393
- (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
394
- DEBUGLOG(5, "dstBuff.size : %u ; => %s ", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
697
+ /* compress */
698
+ { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX;
699
+ int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize);
700
+ const BYTE* ip = (const BYTE*) job->src.start;
701
+ BYTE* const ostart = (BYTE*)dstBuff.start;
702
+ BYTE* op = ostart;
703
+ BYTE* oend = op + dstBuff.capacity;
704
+ int chunkNb;
705
+ if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */
706
+ DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
707
+ assert(job->cSize == 0);
708
+ for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
709
+ size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
710
+ if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
711
+ ip += chunkSize;
712
+ op += cSize; assert(op < oend);
713
+ /* stats */
714
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
715
+ job->cSize += cSize;
716
+ job->consumed = chunkSize * chunkNb;
717
+ DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)",
718
+ (U32)cSize, (U32)job->cSize);
719
+ ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */
720
+ ZSTD_pthread_mutex_unlock(&job->job_mutex);
721
+ }
722
+ /* last block */
723
+ assert(chunkSize > 0);
724
+ assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
725
+ if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) {
726
+ size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
727
+ size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
728
+ size_t const cSize = (job->lastJob) ?
729
+ ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
730
+ ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
731
+ if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
732
+ lastCBlockSize = cSize;
733
+ } }
395
734
 
396
735
  _endJob:
736
+ ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
737
+ if (job->prefix.size > 0)
738
+ DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start);
739
+ DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start);
740
+ /* release resources */
741
+ ZSTDMT_releaseSeq(job->seqPool, rawSeqStore);
397
742
  ZSTDMT_releaseCCtx(job->cctxPool, cctx);
398
- ZSTDMT_releaseBuffer(job->bufPool, job->src);
399
- job->src = g_nullBuffer; job->srcStart = NULL;
400
- ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
401
- job->jobCompleted = 1;
402
- job->jobScanned = 0;
403
- ZSTD_pthread_cond_signal(job->jobCompleted_cond);
404
- ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
743
+ /* report */
744
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
745
+ if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0);
746
+ job->cSize += lastCBlockSize;
747
+ job->consumed = job->src.size; /* when job->consumed == job->src.size , compression job is presumed completed */
748
+ ZSTD_pthread_cond_signal(&job->job_cond);
749
+ ZSTD_pthread_mutex_unlock(&job->job_mutex);
405
750
  }
406
751
 
407
752
 
@@ -410,109 +755,171 @@ _endJob:
410
755
  /* ------------------------------------------ */
411
756
 
412
757
  typedef struct {
758
+ range_t prefix; /* read-only non-owned prefix buffer */
413
759
  buffer_t buffer;
414
760
  size_t filled;
415
761
  } inBuff_t;
416
762
 
763
+ typedef struct {
764
+ BYTE* buffer; /* The round input buffer. All jobs get references
765
+ * to pieces of the buffer. ZSTDMT_tryGetInputRange()
766
+ * handles handing out job input buffers, and makes
767
+ * sure it doesn't overlap with any pieces still in use.
768
+ */
769
+ size_t capacity; /* The capacity of buffer. */
770
+ size_t pos; /* The position of the current inBuff in the round
771
+ * buffer. Updated past the end if the inBuff once
772
+ * the inBuff is sent to the worker thread.
773
+ * pos <= capacity.
774
+ */
775
+ } roundBuff_t;
776
+
777
+ static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
778
+
779
+ #define RSYNC_LENGTH 32
780
+
781
+ typedef struct {
782
+ U64 hash;
783
+ U64 hitMask;
784
+ U64 primePower;
785
+ } rsyncState_t;
786
+
417
787
  struct ZSTDMT_CCtx_s {
418
788
  POOL_ctx* factory;
419
789
  ZSTDMT_jobDescription* jobs;
420
790
  ZSTDMT_bufferPool* bufPool;
421
791
  ZSTDMT_CCtxPool* cctxPool;
422
- ZSTD_pthread_mutex_t jobCompleted_mutex;
423
- ZSTD_pthread_cond_t jobCompleted_cond;
792
+ ZSTDMT_seqPool* seqPool;
424
793
  ZSTD_CCtx_params params;
425
794
  size_t targetSectionSize;
426
- size_t inBuffSize;
427
- size_t dictSize;
428
- size_t targetDictSize;
795
+ size_t targetPrefixSize;
796
+ int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
429
797
  inBuff_t inBuff;
430
- XXH64_state_t xxhState;
431
- unsigned singleThreaded;
798
+ roundBuff_t roundBuff;
799
+ serialState_t serial;
800
+ rsyncState_t rsync;
801
+ unsigned singleBlockingThread;
432
802
  unsigned jobIDMask;
433
803
  unsigned doneJobID;
434
804
  unsigned nextJobID;
435
805
  unsigned frameEnded;
436
806
  unsigned allJobsCompleted;
437
807
  unsigned long long frameContentSize;
808
+ unsigned long long consumed;
809
+ unsigned long long produced;
438
810
  ZSTD_customMem cMem;
439
811
  ZSTD_CDict* cdictLocal;
440
812
  const ZSTD_CDict* cdict;
441
813
  };
442
814
 
443
- static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
815
+ static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
816
+ {
817
+ U32 jobNb;
818
+ if (jobTable == NULL) return;
819
+ for (jobNb=0; jobNb<nbJobs; jobNb++) {
820
+ ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex);
821
+ ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond);
822
+ }
823
+ ZSTD_free(jobTable, cMem);
824
+ }
825
+
826
+ /* ZSTDMT_allocJobsTable()
827
+ * allocate and init a job table.
828
+ * update *nbJobsPtr to next power of 2 value, as size of table */
829
+ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
444
830
  {
445
831
  U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
446
832
  U32 const nbJobs = 1 << nbJobsLog2;
833
+ U32 jobNb;
834
+ ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
835
+ ZSTD_calloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
836
+ int initError = 0;
837
+ if (jobTable==NULL) return NULL;
447
838
  *nbJobsPtr = nbJobs;
448
- return (ZSTDMT_jobDescription*) ZSTD_calloc(
449
- nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
839
+ for (jobNb=0; jobNb<nbJobs; jobNb++) {
840
+ initError |= ZSTD_pthread_mutex_init(&jobTable[jobNb].job_mutex, NULL);
841
+ initError |= ZSTD_pthread_cond_init(&jobTable[jobNb].job_cond, NULL);
842
+ }
843
+ if (initError != 0) {
844
+ ZSTDMT_freeJobsTable(jobTable, nbJobs, cMem);
845
+ return NULL;
846
+ }
847
+ return jobTable;
450
848
  }
451
849
 
452
- /* ZSTDMT_CCtxParam_setNbThreads():
453
- * Internal use only */
454
- size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads)
455
- {
456
- if (nbThreads > ZSTDMT_NBTHREADS_MAX) nbThreads = ZSTDMT_NBTHREADS_MAX;
457
- if (nbThreads < 1) nbThreads = 1;
458
- params->nbThreads = nbThreads;
459
- params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
460
- params->jobSize = 0;
461
- return nbThreads;
850
+ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
851
+ U32 nbJobs = nbWorkers + 2;
852
+ if (nbJobs > mtctx->jobIDMask+1) { /* need more job capacity */
853
+ ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
854
+ mtctx->jobIDMask = 0;
855
+ mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
856
+ if (mtctx->jobs==NULL) return ERROR(memory_allocation);
857
+ assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */
858
+ mtctx->jobIDMask = nbJobs - 1;
859
+ }
860
+ return 0;
462
861
  }
463
862
 
464
- /* ZSTDMT_getNbThreads():
465
- * @return nb threads currently active in mtctx.
466
- * mtctx must be valid */
467
- size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx)
863
+
864
+ /* ZSTDMT_CCtxParam_setNbWorkers():
865
+ * Internal use only */
866
+ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
468
867
  {
469
- assert(mtctx != NULL);
470
- return mtctx->params.nbThreads;
868
+ return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
471
869
  }
472
870
 
473
- ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
871
+ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem)
474
872
  {
475
873
  ZSTDMT_CCtx* mtctx;
476
- U32 nbJobs = nbThreads + 2;
477
- DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbThreads = %u)", nbThreads);
874
+ U32 nbJobs = nbWorkers + 2;
875
+ int initError;
876
+ DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers);
478
877
 
479
- if (nbThreads < 1) return NULL;
480
- nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX);
878
+ if (nbWorkers < 1) return NULL;
879
+ nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX);
481
880
  if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
482
881
  /* invalid custom allocator */
483
882
  return NULL;
484
883
 
485
884
  mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
486
885
  if (!mtctx) return NULL;
487
- ZSTDMT_CCtxParam_setNbThreads(&mtctx->params, nbThreads);
886
+ ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
488
887
  mtctx->cMem = cMem;
489
888
  mtctx->allJobsCompleted = 1;
490
- mtctx->factory = POOL_create_advanced(nbThreads, 0, cMem);
491
- mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
889
+ mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
890
+ mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
891
+ assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
492
892
  mtctx->jobIDMask = nbJobs - 1;
493
- mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
494
- mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
495
- if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool) {
893
+ mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
894
+ mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
895
+ mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
896
+ initError = ZSTDMT_serialState_init(&mtctx->serial);
897
+ mtctx->roundBuff = kNullRoundBuff;
898
+ if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) {
496
899
  ZSTDMT_freeCCtx(mtctx);
497
900
  return NULL;
498
901
  }
499
- if (ZSTD_pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) {
500
- ZSTDMT_freeCCtx(mtctx);
501
- return NULL;
502
- }
503
- if (ZSTD_pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) {
504
- ZSTDMT_freeCCtx(mtctx);
505
- return NULL;
506
- }
507
- DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
902
+ DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers);
508
903
  return mtctx;
509
904
  }
510
905
 
511
- ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
906
+ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
512
907
  {
513
- return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
908
+ #ifdef ZSTD_MULTITHREAD
909
+ return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem);
910
+ #else
911
+ (void)nbWorkers;
912
+ (void)cMem;
913
+ return NULL;
914
+ #endif
514
915
  }
515
916
 
917
+ ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
918
+ {
919
+ return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
920
+ }
921
+
922
+
516
923
  /* ZSTDMT_releaseAllJobResources() :
517
924
  * note : ensure all workers are killed first ! */
518
925
  static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
@@ -523,29 +930,26 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
523
930
  DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
524
931
  ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
525
932
  mtctx->jobs[jobID].dstBuff = g_nullBuffer;
526
- DEBUGLOG(4, "job%02u: release src address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].src.start);
527
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src);
528
- mtctx->jobs[jobID].src = g_nullBuffer;
933
+ mtctx->jobs[jobID].cSize = 0;
529
934
  }
530
935
  memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
531
- DEBUGLOG(4, "input: release address %08X", (U32)(size_t)mtctx->inBuff.buffer.start);
532
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer);
533
936
  mtctx->inBuff.buffer = g_nullBuffer;
937
+ mtctx->inBuff.filled = 0;
534
938
  mtctx->allJobsCompleted = 1;
535
939
  }
536
940
 
537
- static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
941
+ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
538
942
  {
539
943
  DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
540
- while (zcs->doneJobID < zcs->nextJobID) {
541
- unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
542
- ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
543
- while (zcs->jobs[jobID].jobCompleted==0) {
544
- DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
545
- ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
944
+ while (mtctx->doneJobID < mtctx->nextJobID) {
945
+ unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
946
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
947
+ while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
948
+ DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */
949
+ ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
546
950
  }
547
- ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
548
- zcs->doneJobID++;
951
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
952
+ mtctx->doneJobID++;
549
953
  }
550
954
  }
551
955
 
@@ -554,12 +958,14 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
554
958
  if (mtctx==NULL) return 0; /* compatible with free on NULL */
555
959
  POOL_free(mtctx->factory); /* stop and free worker threads */
556
960
  ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */
557
- ZSTD_free(mtctx->jobs, mtctx->cMem);
961
+ ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
558
962
  ZSTDMT_freeBufferPool(mtctx->bufPool);
559
963
  ZSTDMT_freeCCtxPool(mtctx->cctxPool);
964
+ ZSTDMT_freeSeqPool(mtctx->seqPool);
965
+ ZSTDMT_serialState_free(&mtctx->serial);
560
966
  ZSTD_freeCDict(mtctx->cdictLocal);
561
- ZSTD_pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
562
- ZSTD_pthread_cond_destroy(&mtctx->jobCompleted_cond);
967
+ if (mtctx->roundBuff.buffer)
968
+ ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
563
969
  ZSTD_free(mtctx, mtctx->cMem);
564
970
  return 0;
565
971
  }
@@ -572,173 +978,353 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
572
978
  + ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
573
979
  + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
574
980
  + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
575
- + ZSTD_sizeof_CDict(mtctx->cdictLocal);
981
+ + ZSTDMT_sizeof_seqPool(mtctx->seqPool)
982
+ + ZSTD_sizeof_CDict(mtctx->cdictLocal)
983
+ + mtctx->roundBuff.capacity;
576
984
  }
577
985
 
578
986
  /* Internal only */
579
- size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
580
- ZSTDMT_parameter parameter, unsigned value) {
987
+ size_t
988
+ ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
989
+ ZSTDMT_parameter parameter,
990
+ int value)
991
+ {
581
992
  DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
582
993
  switch(parameter)
583
994
  {
584
995
  case ZSTDMT_p_jobSize :
585
- DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value);
586
- if ( (value > 0) /* value==0 => automatic job size */
587
- & (value < ZSTDMT_JOBSIZE_MIN) )
588
- value = ZSTDMT_JOBSIZE_MIN;
589
- params->jobSize = value;
590
- return value;
591
- case ZSTDMT_p_overlapSectionLog :
592
- if (value > 9) value = 9;
593
- DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
594
- params->overlapSizeLog = (value >= 9) ? 9 : value;
595
- return value;
996
+ DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
997
+ return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value);
998
+ case ZSTDMT_p_overlapLog :
999
+ DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
1000
+ return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value);
1001
+ case ZSTDMT_p_rsyncable :
1002
+ DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value);
1003
+ return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value);
596
1004
  default :
597
1005
  return ERROR(parameter_unsupported);
598
1006
  }
599
1007
  }
600
1008
 
601
- size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value)
1009
+ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
602
1010
  {
603
1011
  DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
604
- switch(parameter)
605
- {
606
- case ZSTDMT_p_jobSize :
607
- return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
608
- case ZSTDMT_p_overlapSectionLog :
609
- return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
610
- default :
1012
+ return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1013
+ }
1014
+
1015
+ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
1016
+ {
1017
+ switch (parameter) {
1018
+ case ZSTDMT_p_jobSize:
1019
+ return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value);
1020
+ case ZSTDMT_p_overlapLog:
1021
+ return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value);
1022
+ case ZSTDMT_p_rsyncable:
1023
+ return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value);
1024
+ default:
611
1025
  return ERROR(parameter_unsupported);
612
1026
  }
613
1027
  }
614
1028
 
1029
+ /* Sets parameters relevant to the compression job,
1030
+ * initializing others to default values. */
1031
+ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
1032
+ {
1033
+ ZSTD_CCtx_params jobParams = params;
1034
+ /* Clear parameters related to multithreading */
1035
+ jobParams.forceWindow = 0;
1036
+ jobParams.nbWorkers = 0;
1037
+ jobParams.jobSize = 0;
1038
+ jobParams.overlapLog = 0;
1039
+ jobParams.rsyncable = 0;
1040
+ memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t));
1041
+ memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem));
1042
+ return jobParams;
1043
+ }
1044
+
1045
+
1046
+ /* ZSTDMT_resize() :
1047
+ * @return : error code if fails, 0 on success */
1048
+ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1049
+ {
1050
+ if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1051
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
1052
+ mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1053
+ if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1054
+ mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
1055
+ if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
1056
+ mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers);
1057
+ if (mtctx->seqPool == NULL) return ERROR(memory_allocation);
1058
+ ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
1059
+ return 0;
1060
+ }
1061
+
1062
+
1063
+ /*! ZSTDMT_updateCParams_whileCompressing() :
1064
+ * Updates a selected set of compression parameters, remaining compatible with currently active frame.
1065
+ * New parameters will be applied to next compression job. */
1066
+ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams)
1067
+ {
1068
+ U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */
1069
+ int const compressionLevel = cctxParams->compressionLevel;
1070
+ DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
1071
+ compressionLevel);
1072
+ mtctx->params.compressionLevel = compressionLevel;
1073
+ { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0);
1074
+ cParams.windowLog = saved_wlog;
1075
+ mtctx->params.cParams = cParams;
1076
+ }
1077
+ }
1078
+
1079
+ /* ZSTDMT_getFrameProgression():
1080
+ * tells how much data has been consumed (input) and produced (output) for current frame.
1081
+ * able to count progression inside worker threads.
1082
+ * Note : mutex will be acquired during statistics collection inside workers. */
1083
+ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
1084
+ {
1085
+ ZSTD_frameProgression fps;
1086
+ DEBUGLOG(5, "ZSTDMT_getFrameProgression");
1087
+ fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
1088
+ fps.consumed = mtctx->consumed;
1089
+ fps.produced = fps.flushed = mtctx->produced;
1090
+ fps.currentJobID = mtctx->nextJobID;
1091
+ fps.nbActiveWorkers = 0;
1092
+ { unsigned jobNb;
1093
+ unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
1094
+ DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
1095
+ mtctx->doneJobID, lastJobNb, mtctx->jobReady)
1096
+ for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
1097
+ unsigned const wJobID = jobNb & mtctx->jobIDMask;
1098
+ ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
1099
+ ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
1100
+ { size_t const cResult = jobPtr->cSize;
1101
+ size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1102
+ size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
1103
+ assert(flushed <= produced);
1104
+ fps.ingested += jobPtr->src.size;
1105
+ fps.consumed += jobPtr->consumed;
1106
+ fps.produced += produced;
1107
+ fps.flushed += flushed;
1108
+ fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size);
1109
+ }
1110
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1111
+ }
1112
+ }
1113
+ return fps;
1114
+ }
1115
+
1116
+
1117
+ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1118
+ {
1119
+ size_t toFlush;
1120
+ unsigned const jobID = mtctx->doneJobID;
1121
+ assert(jobID <= mtctx->nextJobID);
1122
+ if (jobID == mtctx->nextJobID) return 0; /* no active job => nothing to flush */
1123
+
1124
+ /* look into oldest non-fully-flushed job */
1125
+ { unsigned const wJobID = jobID & mtctx->jobIDMask;
1126
+ ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID];
1127
+ ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
1128
+ { size_t const cResult = jobPtr->cSize;
1129
+ size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1130
+ size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
1131
+ assert(flushed <= produced);
1132
+ toFlush = produced - flushed;
1133
+ if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
1134
+ /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
1135
+ assert(jobPtr->consumed < jobPtr->src.size);
1136
+ }
1137
+ }
1138
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1139
+ }
1140
+
1141
+ return toFlush;
1142
+ }
1143
+
1144
+
615
1145
  /* ------------------------------------------ */
616
1146
  /* ===== Multi-threaded compression ===== */
617
1147
  /* ------------------------------------------ */
618
1148
 
619
- static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
620
- size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
621
- size_t const chunkMaxSize = chunkSizeTarget << 2;
622
- size_t const passSizeMax = chunkMaxSize * nbThreads;
623
- unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
624
- unsigned const nbChunksLarge = multiplier * nbThreads;
625
- unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
626
- unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
627
- return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
1149
+ static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1150
+ {
1151
+ if (params.ldmParams.enableLdm)
1152
+ /* In Long Range Mode, the windowLog is typically oversized.
1153
+ * In which case, it's preferable to determine the jobSize
1154
+ * based on chainLog instead. */
1155
+ return MAX(21, params.cParams.chainLog + 4);
1156
+ return MAX(20, params.cParams.windowLog + 2);
1157
+ }
1158
+
1159
+ static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
1160
+ {
1161
+ switch(strat)
1162
+ {
1163
+ case ZSTD_btultra2:
1164
+ return 9;
1165
+ case ZSTD_btultra:
1166
+ case ZSTD_btopt:
1167
+ return 8;
1168
+ case ZSTD_btlazy2:
1169
+ case ZSTD_lazy2:
1170
+ return 7;
1171
+ case ZSTD_lazy:
1172
+ case ZSTD_greedy:
1173
+ case ZSTD_dfast:
1174
+ case ZSTD_fast:
1175
+ default:;
1176
+ }
1177
+ return 6;
1178
+ }
1179
+
1180
+ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
1181
+ {
1182
+ assert(0 <= ovlog && ovlog <= 9);
1183
+ if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
1184
+ return ovlog;
1185
+ }
1186
+
1187
+ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1188
+ {
1189
+ int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
1190
+ int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
1191
+ assert(0 <= overlapRLog && overlapRLog <= 8);
1192
+ if (params.ldmParams.enableLdm) {
1193
+ /* In Long Range Mode, the windowLog is typically oversized.
1194
+ * In which case, it's preferable to determine the jobSize
1195
+ * based on chainLog instead.
1196
+ * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
1197
+ ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1198
+ - overlapRLog;
1199
+ }
1200
+ assert(0 <= ovLog && ovLog <= 30);
1201
+ DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1202
+ DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1203
+ return (ovLog==0) ? 0 : (size_t)1 << ovLog;
628
1204
  }
629
1205
 
1206
+ static unsigned
1207
+ ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
1208
+ {
1209
+ assert(nbWorkers>0);
1210
+ { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
1211
+ size_t const jobMaxSize = jobSizeTarget << 2;
1212
+ size_t const passSizeMax = jobMaxSize * nbWorkers;
1213
+ unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
1214
+ unsigned const nbJobsLarge = multiplier * nbWorkers;
1215
+ unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1;
1216
+ unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers);
1217
+ return (multiplier>1) ? nbJobsLarge : nbJobsSmall;
1218
+ } }
1219
+
1220
+ /* ZSTDMT_compress_advanced_internal() :
1221
+ * This is a blocking function : it will only give back control to caller after finishing its compression job.
1222
+ */
630
1223
  static size_t ZSTDMT_compress_advanced_internal(
631
1224
  ZSTDMT_CCtx* mtctx,
632
1225
  void* dst, size_t dstCapacity,
633
1226
  const void* src, size_t srcSize,
634
1227
  const ZSTD_CDict* cdict,
635
- ZSTD_CCtx_params const params)
636
- {
637
- ZSTD_CCtx_params const jobParams = ZSTDMT_makeJobCCtxParams(params);
638
- unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
639
- size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
640
- unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads);
641
- size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
642
- size_t const avgChunkSize = (((proposedChunkSize-1) & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
1228
+ ZSTD_CCtx_params params)
1229
+ {
1230
+ ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1231
+ size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
1232
+ unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1233
+ size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1234
+ size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
643
1235
  const char* const srcStart = (const char*)src;
644
1236
  size_t remainingSrcSize = srcSize;
645
- unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
1237
+ unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); /* presumes avgJobSize >= 256 KB, which should be the case */
646
1238
  size_t frameStartPos = 0, dstBufferPos = 0;
647
- XXH64_state_t xxh64;
648
- assert(jobParams.nbThreads == 0);
649
- assert(mtctx->cctxPool->totalCCtx == params.nbThreads);
1239
+ assert(jobParams.nbWorkers == 0);
1240
+ assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
650
1241
 
651
- DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ",
652
- nbChunks, (U32)proposedChunkSize, (U32)avgChunkSize);
653
- if (nbChunks==1) { /* fallback to single-thread mode */
1242
+ params.jobSize = (U32)avgJobSize;
1243
+ DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ",
1244
+ nbJobs, (U32)proposedJobSize, (U32)avgJobSize);
1245
+
1246
+ if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */
654
1247
  ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
1248
+ DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
655
1249
  if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
656
1250
  return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
657
1251
  }
658
- assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */
659
- ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
660
- XXH64_reset(&xxh64, 0);
661
1252
 
662
- if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
663
- U32 nbJobs = nbChunks;
664
- ZSTD_free(mtctx->jobs, mtctx->cMem);
665
- mtctx->jobIDMask = 0;
666
- mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
667
- if (mtctx->jobs==NULL) return ERROR(memory_allocation);
668
- mtctx->jobIDMask = nbJobs - 1;
669
- }
1253
+ assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1254
+ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1255
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
1256
+ return ERROR(memory_allocation);
1257
+
1258
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
670
1259
 
671
1260
  { unsigned u;
672
- for (u=0; u<nbChunks; u++) {
673
- size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
674
- size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
1261
+ for (u=0; u<nbJobs; u++) {
1262
+ size_t const jobSize = MIN(remainingSrcSize, avgJobSize);
1263
+ size_t const dstBufferCapacity = ZSTD_compressBound(jobSize);
675
1264
  buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
676
1265
  buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
677
1266
  size_t dictSize = u ? overlapSize : 0;
678
1267
 
679
- mtctx->jobs[u].src = g_nullBuffer;
680
- mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
681
- mtctx->jobs[u].prefixSize = dictSize;
682
- mtctx->jobs[u].srcSize = chunkSize;
1268
+ mtctx->jobs[u].prefix.start = srcStart + frameStartPos - dictSize;
1269
+ mtctx->jobs[u].prefix.size = dictSize;
1270
+ mtctx->jobs[u].src.start = srcStart + frameStartPos;
1271
+ mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0); /* avoid job.src.size == 0 */
1272
+ mtctx->jobs[u].consumed = 0;
1273
+ mtctx->jobs[u].cSize = 0;
683
1274
  mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
684
1275
  mtctx->jobs[u].fullFrameSize = srcSize;
685
1276
  mtctx->jobs[u].params = jobParams;
686
1277
  /* do not calculate checksum within sections, but write it in header for first section */
687
- if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
688
1278
  mtctx->jobs[u].dstBuff = dstBuffer;
689
1279
  mtctx->jobs[u].cctxPool = mtctx->cctxPool;
690
1280
  mtctx->jobs[u].bufPool = mtctx->bufPool;
691
- mtctx->jobs[u].firstChunk = (u==0);
692
- mtctx->jobs[u].lastChunk = (u==nbChunks-1);
693
- mtctx->jobs[u].jobCompleted = 0;
694
- mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
695
- mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
696
-
697
- if (params.fParams.checksumFlag) {
698
- XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
699
- }
1281
+ mtctx->jobs[u].seqPool = mtctx->seqPool;
1282
+ mtctx->jobs[u].serial = &mtctx->serial;
1283
+ mtctx->jobs[u].jobID = u;
1284
+ mtctx->jobs[u].firstJob = (u==0);
1285
+ mtctx->jobs[u].lastJob = (u==nbJobs-1);
700
1286
 
701
- DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)chunkSize);
702
- DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
703
- POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
1287
+ DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize);
1288
+ DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12);
1289
+ POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]);
704
1290
 
705
- frameStartPos += chunkSize;
1291
+ frameStartPos += jobSize;
706
1292
  dstBufferPos += dstBufferCapacity;
707
- remainingSrcSize -= chunkSize;
1293
+ remainingSrcSize -= jobSize;
708
1294
  } }
709
1295
 
710
1296
  /* collect result */
711
1297
  { size_t error = 0, dstPos = 0;
712
- unsigned chunkID;
713
- for (chunkID=0; chunkID<nbChunks; chunkID++) {
714
- DEBUGLOG(5, "waiting for chunk %u ", chunkID);
715
- ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
716
- while (mtctx->jobs[chunkID].jobCompleted==0) {
717
- DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
718
- ZSTD_pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
1298
+ unsigned jobID;
1299
+ for (jobID=0; jobID<nbJobs; jobID++) {
1300
+ DEBUGLOG(5, "waiting for job %u ", jobID);
1301
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
1302
+ while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
1303
+ DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID);
1304
+ ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
719
1305
  }
720
- ZSTD_pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
721
- DEBUGLOG(5, "ready to write chunk %u ", chunkID);
1306
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
1307
+ DEBUGLOG(5, "ready to write job %u ", jobID);
722
1308
 
723
- mtctx->jobs[chunkID].srcStart = NULL;
724
- { size_t const cSize = mtctx->jobs[chunkID].cSize;
1309
+ { size_t const cSize = mtctx->jobs[jobID].cSize;
725
1310
  if (ZSTD_isError(cSize)) error = cSize;
726
1311
  if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
727
- if (chunkID) { /* note : chunk 0 is written directly at dst, which is correct position */
1312
+ if (jobID) { /* note : job 0 is written directly at dst, which is correct position */
728
1313
  if (!error)
729
- memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
730
- if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
731
- DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
732
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
1314
+ memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst */
1315
+ if (jobID >= compressWithinDst) { /* job compressed into its own buffer, which must be released */
1316
+ DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst);
1317
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
733
1318
  } }
734
- mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
1319
+ mtctx->jobs[jobID].dstBuff = g_nullBuffer;
1320
+ mtctx->jobs[jobID].cSize = 0;
735
1321
  dstPos += cSize ;
736
1322
  }
737
- } /* for (chunkID=0; chunkID<nbChunks; chunkID++) */
1323
+ } /* for (jobID=0; jobID<nbJobs; jobID++) */
738
1324
 
739
1325
  DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
740
1326
  if (params.fParams.checksumFlag) {
741
- U32 const checksum = (U32)XXH64_digest(&xxh64);
1327
+ U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
742
1328
  if (dstPos + 4 > dstCapacity) {
743
1329
  error = ERROR(dstSize_tooSmall);
744
1330
  } else {
@@ -753,16 +1339,17 @@ static size_t ZSTDMT_compress_advanced_internal(
753
1339
  }
754
1340
 
755
1341
  size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
756
- void* dst, size_t dstCapacity,
757
- const void* src, size_t srcSize,
758
- const ZSTD_CDict* cdict,
759
- ZSTD_parameters const params,
760
- unsigned overlapLog)
1342
+ void* dst, size_t dstCapacity,
1343
+ const void* src, size_t srcSize,
1344
+ const ZSTD_CDict* cdict,
1345
+ ZSTD_parameters params,
1346
+ int overlapLog)
761
1347
  {
762
1348
  ZSTD_CCtx_params cctxParams = mtctx->params;
763
1349
  cctxParams.cParams = params.cParams;
764
1350
  cctxParams.fParams = params.fParams;
765
- cctxParams.overlapSizeLog = overlapLog;
1351
+ assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
1352
+ cctxParams.overlapLog = overlapLog;
766
1353
  return ZSTDMT_compress_advanced_internal(mtctx,
767
1354
  dst, dstCapacity,
768
1355
  src, srcSize,
@@ -775,8 +1362,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
775
1362
  const void* src, size_t srcSize,
776
1363
  int compressionLevel)
777
1364
  {
778
- U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
779
1365
  ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
1366
+ int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
780
1367
  params.fParams.contentSizeFlag = 1;
781
1368
  return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
782
1369
  }
@@ -787,66 +1374,117 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
787
1374
  /* ====================================== */
788
1375
 
789
1376
  size_t ZSTDMT_initCStream_internal(
790
- ZSTDMT_CCtx* zcs,
791
- const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
1377
+ ZSTDMT_CCtx* mtctx,
1378
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
792
1379
  const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
793
1380
  unsigned long long pledgedSrcSize)
794
1381
  {
795
- DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
796
- /* params are supposed to be fully validated at this point */
1382
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
1383
+ (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
1384
+
1385
+ /* params supposed partially fully validated at this point */
797
1386
  assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
798
1387
  assert(!((dict) && (cdict))); /* either dict or cdict, not both */
799
- assert(zcs->cctxPool->totalCCtx == params.nbThreads);
800
- zcs->singleThreaded = (params.nbThreads==1) | (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
801
-
802
- if (zcs->singleThreaded) {
803
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_makeJobCCtxParams(params);
804
- DEBUGLOG(4, "single thread mode");
805
- assert(singleThreadParams.nbThreads == 0);
806
- return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
1388
+
1389
+ /* init */
1390
+ if (params.nbWorkers != mtctx->params.nbWorkers)
1391
+ FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
1392
+
1393
+ if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1394
+ if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1395
+
1396
+ mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1397
+ if (mtctx->singleBlockingThread) {
1398
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
1399
+ DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
1400
+ assert(singleThreadParams.nbWorkers == 0);
1401
+ return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
807
1402
  dict, dictSize, cdict,
808
1403
  singleThreadParams, pledgedSrcSize);
809
1404
  }
810
- DEBUGLOG(4, "multi-threading mode (%u threads)", params.nbThreads);
811
1405
 
812
- if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
813
- ZSTDMT_waitForAllJobsCompleted(zcs);
814
- ZSTDMT_releaseAllJobResources(zcs);
815
- zcs->allJobsCompleted = 1;
1406
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
1407
+
1408
+ if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */
1409
+ ZSTDMT_waitForAllJobsCompleted(mtctx);
1410
+ ZSTDMT_releaseAllJobResources(mtctx);
1411
+ mtctx->allJobsCompleted = 1;
816
1412
  }
817
1413
 
818
- zcs->params = params;
819
- zcs->frameContentSize = pledgedSrcSize;
1414
+ mtctx->params = params;
1415
+ mtctx->frameContentSize = pledgedSrcSize;
820
1416
  if (dict) {
821
- ZSTD_freeCDict(zcs->cdictLocal);
822
- zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
823
- ZSTD_dlm_byCopy, dictMode, /* note : a loadPrefix becomes an internal CDict */
824
- params.cParams, zcs->cMem);
825
- zcs->cdict = zcs->cdictLocal;
826
- if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
1417
+ ZSTD_freeCDict(mtctx->cdictLocal);
1418
+ mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
1419
+ ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */
1420
+ params.cParams, mtctx->cMem);
1421
+ mtctx->cdict = mtctx->cdictLocal;
1422
+ if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
827
1423
  } else {
828
- ZSTD_freeCDict(zcs->cdictLocal);
829
- zcs->cdictLocal = NULL;
830
- zcs->cdict = cdict;
1424
+ ZSTD_freeCDict(mtctx->cdictLocal);
1425
+ mtctx->cdictLocal = NULL;
1426
+ mtctx->cdict = cdict;
831
1427
  }
832
1428
 
833
- assert(params.overlapSizeLog <= 9);
834
- zcs->targetDictSize = (params.overlapSizeLog==0) ? 0 : (size_t)1 << (params.cParams.windowLog - (9 - params.overlapSizeLog));
835
- DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(zcs->targetDictSize>>10));
836
- zcs->targetSectionSize = params.jobSize ? params.jobSize : (size_t)1 << (params.cParams.windowLog + 2);
837
- if (zcs->targetSectionSize < ZSTDMT_JOBSIZE_MIN) zcs->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
838
- if (zcs->targetSectionSize < zcs->targetDictSize) zcs->targetSectionSize = zcs->targetDictSize; /* job size must be >= overlap size */
839
- DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(zcs->targetSectionSize>>10), params.jobSize);
840
- zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize;
841
- DEBUGLOG(4, "inBuff Size : %u KB", (U32)(zcs->inBuffSize>>10));
842
- ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) );
843
- zcs->inBuff.buffer = g_nullBuffer;
844
- zcs->dictSize = 0;
845
- zcs->doneJobID = 0;
846
- zcs->nextJobID = 0;
847
- zcs->frameEnded = 0;
848
- zcs->allJobsCompleted = 0;
849
- if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
1429
+ mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
1430
+ DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
1431
+ mtctx->targetSectionSize = params.jobSize;
1432
+ if (mtctx->targetSectionSize == 0) {
1433
+ mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1434
+ }
1435
+ if (params.rsyncable) {
1436
+ /* Aim for the targetsectionSize as the average job size. */
1437
+ U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
1438
+ U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
1439
+ assert(jobSizeMB >= 1);
1440
+ DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
1441
+ mtctx->rsync.hash = 0;
1442
+ mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
1443
+ mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
1444
+ }
1445
+ if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1446
+ DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
1447
+ DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
1448
+ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1449
+ {
1450
+ /* If ldm is enabled we need windowSize space. */
1451
+ size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
1452
+ /* Two buffers of slack, plus extra space for the overlap
1453
+ * This is the minimum slack that LDM works with. One extra because
1454
+ * flush might waste up to targetSectionSize-1 bytes. Another extra
1455
+ * for the overlap (if > 0), then one to fill which doesn't overlap
1456
+ * with the LDM window.
1457
+ */
1458
+ size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0);
1459
+ size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers;
1460
+ /* Compute the total size, and always have enough slack */
1461
+ size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1);
1462
+ size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers;
1463
+ size_t const capacity = MAX(windowSize, sectionsSize) + slackSize;
1464
+ if (mtctx->roundBuff.capacity < capacity) {
1465
+ if (mtctx->roundBuff.buffer)
1466
+ ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
1467
+ mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem);
1468
+ if (mtctx->roundBuff.buffer == NULL) {
1469
+ mtctx->roundBuff.capacity = 0;
1470
+ return ERROR(memory_allocation);
1471
+ }
1472
+ mtctx->roundBuff.capacity = capacity;
1473
+ }
1474
+ }
1475
+ DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10));
1476
+ mtctx->roundBuff.pos = 0;
1477
+ mtctx->inBuff.buffer = g_nullBuffer;
1478
+ mtctx->inBuff.filled = 0;
1479
+ mtctx->inBuff.prefix = kNullRange;
1480
+ mtctx->doneJobID = 0;
1481
+ mtctx->nextJobID = 0;
1482
+ mtctx->frameEnded = 0;
1483
+ mtctx->allJobsCompleted = 0;
1484
+ mtctx->consumed = 0;
1485
+ mtctx->produced = 0;
1486
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
1487
+ return ERROR(memory_allocation);
850
1488
  return 0;
851
1489
  }
852
1490
 
@@ -855,11 +1493,11 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
855
1493
  ZSTD_parameters params,
856
1494
  unsigned long long pledgedSrcSize)
857
1495
  {
858
- ZSTD_CCtx_params cctxParams = mtctx->params;
859
- DEBUGLOG(5, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
1496
+ ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
1497
+ DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
860
1498
  cctxParams.cParams = params.cParams;
861
1499
  cctxParams.fParams = params.fParams;
862
- return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dm_auto, NULL,
1500
+ return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL,
863
1501
  cctxParams, pledgedSrcSize);
864
1502
  }
865
1503
 
@@ -869,10 +1507,10 @@ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
869
1507
  unsigned long long pledgedSrcSize)
870
1508
  {
871
1509
  ZSTD_CCtx_params cctxParams = mtctx->params;
1510
+ if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
872
1511
  cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict);
873
1512
  cctxParams.fParams = fParams;
874
- if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
875
- return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dm_auto, cdict,
1513
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict,
876
1514
  cctxParams, pledgedSrcSize);
877
1515
  }
878
1516
 
@@ -881,149 +1519,449 @@ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
881
1519
  * pledgedSrcSize can be zero == unknown (for the time being)
882
1520
  * prefer using ZSTD_CONTENTSIZE_UNKNOWN,
883
1521
  * as `0` might mean "empty" in the future */
884
- size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
1522
+ size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize)
885
1523
  {
886
1524
  if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
887
- if (zcs->params.nbThreads==1)
888
- return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
889
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, 0, zcs->params,
1525
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params,
890
1526
  pledgedSrcSize);
891
1527
  }
892
1528
 
893
- size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
894
- ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
895
- ZSTD_CCtx_params cctxParams = zcs->params;
1529
+ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
1530
+ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
1531
+ ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
1532
+ DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
896
1533
  cctxParams.cParams = params.cParams;
897
1534
  cctxParams.fParams = params.fParams;
898
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
899
- }
900
-
901
-
902
- static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
903
- {
904
- unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
905
-
906
- DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
907
- zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
908
- zcs->jobs[jobID].src = zcs->inBuff.buffer;
909
- zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
910
- zcs->jobs[jobID].srcSize = srcSize;
911
- zcs->jobs[jobID].prefixSize = zcs->dictSize;
912
- assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
913
- zcs->jobs[jobID].params = zcs->params;
914
- /* do not calculate checksum within sections, but write it in header for first section */
915
- if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
916
- zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
917
- zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
918
- zcs->jobs[jobID].dstBuff = g_nullBuffer;
919
- zcs->jobs[jobID].cctxPool = zcs->cctxPool;
920
- zcs->jobs[jobID].bufPool = zcs->bufPool;
921
- zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
922
- zcs->jobs[jobID].lastChunk = endFrame;
923
- zcs->jobs[jobID].jobCompleted = 0;
924
- zcs->jobs[jobID].dstFlushed = 0;
925
- zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
926
- zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
927
-
928
- if (zcs->params.fParams.checksumFlag)
929
- XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->dictSize, srcSize);
930
-
931
- /* get a new buffer for next input */
932
- if (!endFrame) {
933
- size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
934
- zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
935
- if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
936
- zcs->jobs[jobID].jobCompleted = 1;
937
- zcs->nextJobID++;
938
- ZSTDMT_waitForAllJobsCompleted(zcs);
939
- ZSTDMT_releaseAllJobResources(zcs);
940
- return ERROR(memory_allocation);
1535
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
1536
+ }
1537
+
1538
+
1539
+ /* ZSTDMT_writeLastEmptyBlock()
1540
+ * Write a single empty block with an end-of-frame to finish a frame.
1541
+ * Job must be created from streaming variant.
1542
+ * This function is always successful if expected conditions are fulfilled.
1543
+ */
1544
+ static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
1545
+ {
1546
+ assert(job->lastJob == 1);
1547
+ assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */
1548
+ assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */
1549
+ assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */
1550
+ job->dstBuff = ZSTDMT_getBuffer(job->bufPool);
1551
+ if (job->dstBuff.start == NULL) {
1552
+ job->cSize = ERROR(memory_allocation);
1553
+ return;
1554
+ }
1555
+ assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */
1556
+ job->src = kNullRange;
1557
+ job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity);
1558
+ assert(!ZSTD_isError(job->cSize));
1559
+ assert(job->consumed == 0);
1560
+ }
1561
+
1562
+ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp)
1563
+ {
1564
+ unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask;
1565
+ int const endFrame = (endOp == ZSTD_e_end);
1566
+
1567
+ if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) {
1568
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full");
1569
+ assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask));
1570
+ return 0;
1571
+ }
1572
+
1573
+ if (!mtctx->jobReady) {
1574
+ BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start;
1575
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
1576
+ mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size);
1577
+ mtctx->jobs[jobID].src.start = src;
1578
+ mtctx->jobs[jobID].src.size = srcSize;
1579
+ assert(mtctx->inBuff.filled >= srcSize);
1580
+ mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix;
1581
+ mtctx->jobs[jobID].consumed = 0;
1582
+ mtctx->jobs[jobID].cSize = 0;
1583
+ mtctx->jobs[jobID].params = mtctx->params;
1584
+ mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL;
1585
+ mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize;
1586
+ mtctx->jobs[jobID].dstBuff = g_nullBuffer;
1587
+ mtctx->jobs[jobID].cctxPool = mtctx->cctxPool;
1588
+ mtctx->jobs[jobID].bufPool = mtctx->bufPool;
1589
+ mtctx->jobs[jobID].seqPool = mtctx->seqPool;
1590
+ mtctx->jobs[jobID].serial = &mtctx->serial;
1591
+ mtctx->jobs[jobID].jobID = mtctx->nextJobID;
1592
+ mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0);
1593
+ mtctx->jobs[jobID].lastJob = endFrame;
1594
+ mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0);
1595
+ mtctx->jobs[jobID].dstFlushed = 0;
1596
+
1597
+ /* Update the round buffer pos and clear the input buffer to be reset */
1598
+ mtctx->roundBuff.pos += srcSize;
1599
+ mtctx->inBuff.buffer = g_nullBuffer;
1600
+ mtctx->inBuff.filled = 0;
1601
+ /* Set the prefix */
1602
+ if (!endFrame) {
1603
+ size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize);
1604
+ mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize;
1605
+ mtctx->inBuff.prefix.size = newPrefixSize;
1606
+ } else { /* endFrame==1 => no need for another input buffer */
1607
+ mtctx->inBuff.prefix = kNullRange;
1608
+ mtctx->frameEnded = endFrame;
1609
+ if (mtctx->nextJobID == 0) {
1610
+ /* single job exception : checksum is already calculated directly within worker thread */
1611
+ mtctx->params.fParams.checksumFlag = 0;
1612
+ } }
1613
+
1614
+ if ( (srcSize == 0)
1615
+ && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) {
1616
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame");
1617
+ assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */
1618
+ ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID);
1619
+ mtctx->nextJobID++;
1620
+ return 0;
941
1621
  }
942
- zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
943
- memmove(zcs->inBuff.buffer.start,
944
- (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
945
- zcs->inBuff.filled);
946
- zcs->dictSize = newDictSize;
947
- } else { /* if (endFrame==1) */
948
- zcs->inBuff.buffer = g_nullBuffer;
949
- zcs->inBuff.filled = 0;
950
- zcs->dictSize = 0;
951
- zcs->frameEnded = 1;
952
- if (zcs->nextJobID == 0) {
953
- /* single chunk exception : checksum is calculated directly within worker thread */
954
- zcs->params.fParams.checksumFlag = 0;
955
- } }
1622
+ }
956
1623
 
957
- DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
958
- zcs->nextJobID,
959
- (U32)zcs->jobs[jobID].srcSize,
960
- zcs->jobs[jobID].lastChunk,
961
- zcs->doneJobID,
962
- zcs->doneJobID & zcs->jobIDMask);
963
- POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
964
- zcs->nextJobID++;
1624
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))",
1625
+ mtctx->nextJobID,
1626
+ (U32)mtctx->jobs[jobID].src.size,
1627
+ mtctx->jobs[jobID].lastJob,
1628
+ mtctx->nextJobID,
1629
+ jobID);
1630
+ if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) {
1631
+ mtctx->nextJobID++;
1632
+ mtctx->jobReady = 0;
1633
+ } else {
1634
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID);
1635
+ mtctx->jobReady = 1;
1636
+ }
965
1637
  return 0;
966
1638
  }
967
1639
 
968
1640
 
969
- /* ZSTDMT_flushNextJob() :
970
- * output : will be updated with amount of data flushed .
971
- * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
972
- * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
973
- static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
1641
+ /*! ZSTDMT_flushProduced() :
1642
+ * flush whatever data has been produced but not yet flushed in current job.
1643
+ * move to next job if current one is fully flushed.
1644
+ * `output` : `pos` will be updated with amount of data flushed .
1645
+ * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
1646
+ * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
1647
+ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
974
1648
  {
975
- unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
976
- DEBUGLOG(5, "ZSTDMT_flushNextJob");
977
- if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */
978
- ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
979
- while (zcs->jobs[wJobID].jobCompleted==0) {
980
- DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
981
- if (!blockToFlush) { ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
982
- ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
1649
+ unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask;
1650
+ DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)",
1651
+ blockToFlush, mtctx->doneJobID, mtctx->nextJobID);
1652
+ assert(output->size >= output->pos);
1653
+
1654
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
1655
+ if ( blockToFlush
1656
+ && (mtctx->doneJobID < mtctx->nextJobID) ) {
1657
+ assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize);
1658
+ while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */
1659
+ if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) {
1660
+ DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none",
1661
+ mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size);
1662
+ break;
1663
+ }
1664
+ DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
1665
+ mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
1666
+ ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */
1667
+ } }
1668
+
1669
+ /* try to flush something */
1670
+ { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */
1671
+ size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */
1672
+ size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */
1673
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1674
+ if (ZSTD_isError(cSize)) {
1675
+ DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
1676
+ mtctx->doneJobID, ZSTD_getErrorName(cSize));
1677
+ ZSTDMT_waitForAllJobsCompleted(mtctx);
1678
+ ZSTDMT_releaseAllJobResources(mtctx);
1679
+ return cSize;
1680
+ }
1681
+ /* add frame checksum if necessary (can only happen once) */
1682
+ assert(srcConsumed <= srcSize);
1683
+ if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */
1684
+ && mtctx->jobs[wJobID].frameChecksumNeeded ) {
1685
+ U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
1686
+ DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
1687
+ MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum);
1688
+ cSize += 4;
1689
+ mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */
1690
+ mtctx->jobs[wJobID].frameChecksumNeeded = 0;
1691
+ }
1692
+
1693
+ if (cSize > 0) { /* compression is ongoing or completed */
1694
+ size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
1695
+ DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
1696
+ (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize);
1697
+ assert(mtctx->doneJobID < mtctx->nextJobID);
1698
+ assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
1699
+ assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
1700
+ memcpy((char*)output->dst + output->pos,
1701
+ (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
1702
+ toFlush);
1703
+ output->pos += toFlush;
1704
+ mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
1705
+
1706
+ if ( (srcConsumed == srcSize) /* job is completed */
1707
+ && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */
1708
+ DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
1709
+ mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
1710
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
1711
+ DEBUGLOG(5, "dstBuffer released");
1712
+ mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
1713
+ mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */
1714
+ mtctx->consumed += srcSize;
1715
+ mtctx->produced += cSize;
1716
+ mtctx->doneJobID++;
1717
+ } }
1718
+
1719
+ /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
1720
+ if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed);
1721
+ if (srcSize > srcConsumed) return 1; /* current job not completely compressed */
983
1722
  }
984
- ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
985
- /* compression job completed : output can be flushed */
986
- { ZSTDMT_jobDescription job = zcs->jobs[wJobID];
987
- if (!job.jobScanned) {
988
- if (ZSTD_isError(job.cSize)) {
989
- DEBUGLOG(5, "job %u : compression error detected : %s",
990
- zcs->doneJobID, ZSTD_getErrorName(job.cSize));
991
- ZSTDMT_waitForAllJobsCompleted(zcs);
992
- ZSTDMT_releaseAllJobResources(zcs);
993
- return job.cSize;
1723
+ if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */
1724
+ if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */
1725
+ if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */
1726
+ mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */
1727
+ if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */
1728
+ return 0; /* internal buffers fully flushed */
1729
+ }
1730
+
1731
+ /**
1732
+ * Returns the range of data used by the earliest job that is not yet complete.
1733
+ * If the data of the first job is broken up into two segments, we cover both
1734
+ * sections.
1735
+ */
1736
+ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
1737
+ {
1738
+ unsigned const firstJobID = mtctx->doneJobID;
1739
+ unsigned const lastJobID = mtctx->nextJobID;
1740
+ unsigned jobID;
1741
+
1742
+ for (jobID = firstJobID; jobID < lastJobID; ++jobID) {
1743
+ unsigned const wJobID = jobID & mtctx->jobIDMask;
1744
+ size_t consumed;
1745
+
1746
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
1747
+ consumed = mtctx->jobs[wJobID].consumed;
1748
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1749
+
1750
+ if (consumed < mtctx->jobs[wJobID].src.size) {
1751
+ range_t range = mtctx->jobs[wJobID].prefix;
1752
+ if (range.size == 0) {
1753
+ /* Empty prefix */
1754
+ range = mtctx->jobs[wJobID].src;
994
1755
  }
995
- DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
996
- if (zcs->params.fParams.checksumFlag) {
997
- if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
998
- U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
999
- DEBUGLOG(5, "writing checksum : %08X \n", checksum);
1000
- MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
1001
- job.cSize += 4;
1002
- zcs->jobs[wJobID].cSize += 4;
1003
- } }
1004
- zcs->jobs[wJobID].jobScanned = 1;
1756
+ /* Job source in multiple segments not supported yet */
1757
+ assert(range.start <= mtctx->jobs[wJobID].src.start);
1758
+ return range;
1005
1759
  }
1006
- { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
1007
- DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
1008
- memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
1009
- output->pos += toWrite;
1010
- job.dstFlushed += toWrite;
1760
+ }
1761
+ return kNullRange;
1762
+ }
1763
+
1764
+ /**
1765
+ * Returns non-zero iff buffer and range overlap.
1766
+ */
1767
+ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1768
+ {
1769
+ BYTE const* const bufferStart = (BYTE const*)buffer.start;
1770
+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1771
+ BYTE const* const rangeStart = (BYTE const*)range.start;
1772
+ BYTE const* const rangeEnd = rangeStart + range.size;
1773
+
1774
+ if (rangeStart == NULL || bufferStart == NULL)
1775
+ return 0;
1776
+ /* Empty ranges cannot overlap */
1777
+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1778
+ return 0;
1779
+
1780
+ return bufferStart < rangeEnd && rangeStart < bufferEnd;
1781
+ }
1782
+
1783
+ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
1784
+ {
1785
+ range_t extDict;
1786
+ range_t prefix;
1787
+
1788
+ DEBUGLOG(5, "ZSTDMT_doesOverlapWindow");
1789
+ extDict.start = window.dictBase + window.lowLimit;
1790
+ extDict.size = window.dictLimit - window.lowLimit;
1791
+
1792
+ prefix.start = window.base + window.dictLimit;
1793
+ prefix.size = window.nextSrc - (window.base + window.dictLimit);
1794
+ DEBUGLOG(5, "extDict [0x%zx, 0x%zx)",
1795
+ (size_t)extDict.start,
1796
+ (size_t)extDict.start + extDict.size);
1797
+ DEBUGLOG(5, "prefix [0x%zx, 0x%zx)",
1798
+ (size_t)prefix.start,
1799
+ (size_t)prefix.start + prefix.size);
1800
+
1801
+ return ZSTDMT_isOverlapped(buffer, extDict)
1802
+ || ZSTDMT_isOverlapped(buffer, prefix);
1803
+ }
1804
+
1805
+ static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
1806
+ {
1807
+ if (mtctx->params.ldmParams.enableLdm) {
1808
+ ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1809
+ DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
1810
+ DEBUGLOG(5, "source [0x%zx, 0x%zx)",
1811
+ (size_t)buffer.start,
1812
+ (size_t)buffer.start + buffer.capacity);
1813
+ ZSTD_PTHREAD_MUTEX_LOCK(mutex);
1814
+ while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) {
1815
+ DEBUGLOG(5, "Waiting for LDM to finish...");
1816
+ ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex);
1011
1817
  }
1012
- if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
1013
- ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
1014
- zcs->jobs[wJobID].dstBuff = g_nullBuffer;
1015
- zcs->jobs[wJobID].jobCompleted = 0;
1016
- zcs->doneJobID++;
1017
- } else {
1018
- zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
1818
+ DEBUGLOG(6, "Done waiting for LDM to finish");
1819
+ ZSTD_pthread_mutex_unlock(mutex);
1820
+ }
1821
+ }
1822
+
1823
+ /**
1824
+ * Attempts to set the inBuff to the next section to fill.
1825
+ * If any part of the new section is still in use we give up.
1826
+ * Returns non-zero if the buffer is filled.
1827
+ */
1828
+ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
1829
+ {
1830
+ range_t const inUse = ZSTDMT_getInputDataInUse(mtctx);
1831
+ size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos;
1832
+ size_t const target = mtctx->targetSectionSize;
1833
+ buffer_t buffer;
1834
+
1835
+ DEBUGLOG(5, "ZSTDMT_tryGetInputRange");
1836
+ assert(mtctx->inBuff.buffer.start == NULL);
1837
+ assert(mtctx->roundBuff.capacity >= target);
1838
+
1839
+ if (spaceLeft < target) {
1840
+ /* ZSTD_invalidateRepCodes() doesn't work for extDict variants.
1841
+ * Simply copy the prefix to the beginning in that case.
1842
+ */
1843
+ BYTE* const start = (BYTE*)mtctx->roundBuff.buffer;
1844
+ size_t const prefixSize = mtctx->inBuff.prefix.size;
1845
+
1846
+ buffer.start = start;
1847
+ buffer.capacity = prefixSize;
1848
+ if (ZSTDMT_isOverlapped(buffer, inUse)) {
1849
+ DEBUGLOG(5, "Waiting for buffer...");
1850
+ return 0;
1019
1851
  }
1020
- /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
1021
- if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
1022
- if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */
1023
- zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */
1024
- return 0; /* everything flushed */
1025
- } }
1852
+ ZSTDMT_waitForLdmComplete(mtctx, buffer);
1853
+ memmove(start, mtctx->inBuff.prefix.start, prefixSize);
1854
+ mtctx->inBuff.prefix.start = start;
1855
+ mtctx->roundBuff.pos = prefixSize;
1856
+ }
1857
+ buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos;
1858
+ buffer.capacity = target;
1859
+
1860
+ if (ZSTDMT_isOverlapped(buffer, inUse)) {
1861
+ DEBUGLOG(5, "Waiting for buffer...");
1862
+ return 0;
1863
+ }
1864
+ assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix));
1026
1865
 
1866
+ ZSTDMT_waitForLdmComplete(mtctx, buffer);
1867
+
1868
+ DEBUGLOG(5, "Using prefix range [%zx, %zx)",
1869
+ (size_t)mtctx->inBuff.prefix.start,
1870
+ (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size);
1871
+ DEBUGLOG(5, "Using source range [%zx, %zx)",
1872
+ (size_t)buffer.start,
1873
+ (size_t)buffer.start + buffer.capacity);
1874
+
1875
+
1876
+ mtctx->inBuff.buffer = buffer;
1877
+ mtctx->inBuff.filled = 0;
1878
+ assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity);
1879
+ return 1;
1880
+ }
1881
+
1882
+ typedef struct {
1883
+ size_t toLoad; /* The number of bytes to load from the input. */
1884
+ int flush; /* Boolean declaring if we must flush because we found a synchronization point. */
1885
+ } syncPoint_t;
1886
+
1887
+ /**
1888
+ * Searches through the input for a synchronization point. If one is found, we
1889
+ * will instruct the caller to flush, and return the number of bytes to load.
1890
+ * Otherwise, we will load as many bytes as possible and instruct the caller
1891
+ * to continue as normal.
1892
+ */
1893
+ static syncPoint_t
1894
+ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1895
+ {
1896
+ BYTE const* const istart = (BYTE const*)input.src + input.pos;
1897
+ U64 const primePower = mtctx->rsync.primePower;
1898
+ U64 const hitMask = mtctx->rsync.hitMask;
1899
+
1900
+ syncPoint_t syncPoint;
1901
+ U64 hash;
1902
+ BYTE const* prev;
1903
+ size_t pos;
1904
+
1905
+ syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
1906
+ syncPoint.flush = 0;
1907
+ if (!mtctx->params.rsyncable)
1908
+ /* Rsync is disabled. */
1909
+ return syncPoint;
1910
+ if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
1911
+ /* Not enough to compute the hash.
1912
+ * We will miss any synchronization points in this RSYNC_LENGTH byte
1913
+ * window. However, since it depends only in the internal buffers, if the
1914
+ * state is already synchronized, we will remain synchronized.
1915
+ * Additionally, the probability that we miss a synchronization point is
1916
+ * low: RSYNC_LENGTH / targetSectionSize.
1917
+ */
1918
+ return syncPoint;
1919
+ /* Initialize the loop variables. */
1920
+ if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
1921
+ /* We have enough bytes buffered to initialize the hash.
1922
+ * Start scanning at the beginning of the input.
1923
+ */
1924
+ pos = 0;
1925
+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1926
+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1927
+ } else {
1928
+ /* We don't have enough bytes buffered to initialize the hash, but
1929
+ * we know we have at least RSYNC_LENGTH bytes total.
1930
+ * Start scanning after the first RSYNC_LENGTH bytes less the bytes
1931
+ * already buffered.
1932
+ */
1933
+ pos = RSYNC_LENGTH - mtctx->inBuff.filled;
1934
+ prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
1935
+ hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
1936
+ hash = ZSTD_rollingHash_append(hash, istart, pos);
1937
+ }
1938
+ /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
1939
+ * through the input. If we hit a synchronization point, then cut the
1940
+ * job off, and tell the compressor to flush the job. Otherwise, load
1941
+ * all the bytes and continue as normal.
1942
+ * If we go too long without a synchronization point (targetSectionSize)
1943
+ * then a block will be emitted anyways, but this is okay, since if we
1944
+ * are already synchronized we will remain synchronized.
1945
+ */
1946
+ for (; pos < syncPoint.toLoad; ++pos) {
1947
+ BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1948
+ /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
1949
+ hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1950
+ if ((hash & hitMask) == hitMask) {
1951
+ syncPoint.toLoad = pos + 1;
1952
+ syncPoint.flush = 1;
1953
+ break;
1954
+ }
1955
+ }
1956
+ return syncPoint;
1957
+ }
1958
+
1959
+ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx)
1960
+ {
1961
+ size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled;
1962
+ if (hintInSize==0) hintInSize = mtctx->targetSectionSize;
1963
+ return hintInSize;
1964
+ }
1027
1965
 
1028
1966
  /** ZSTDMT_compressStream_generic() :
1029
1967
  * internal use only - exposed to be invoked from zstd_compress.c
@@ -1034,14 +1972,14 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
1034
1972
  ZSTD_inBuffer* input,
1035
1973
  ZSTD_EndDirective endOp)
1036
1974
  {
1037
- size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize;
1038
1975
  unsigned forwardInputProgress = 0;
1039
- DEBUGLOG(5, "ZSTDMT_compressStream_generic ");
1976
+ DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)",
1977
+ (U32)endOp, (U32)(input->size - input->pos));
1040
1978
  assert(output->pos <= output->size);
1041
1979
  assert(input->pos <= input->size);
1042
1980
 
1043
- if (mtctx->singleThreaded) { /* delegate to single-thread (synchronous) */
1044
- return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
1981
+ if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */
1982
+ return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp);
1045
1983
  }
1046
1984
 
1047
1985
  if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
@@ -1050,10 +1988,12 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
1050
1988
  }
1051
1989
 
1052
1990
  /* single-pass shortcut (note : synchronous-mode) */
1053
- if ( (mtctx->nextJobID == 0) /* just started */
1054
- && (mtctx->inBuff.filled == 0) /* nothing buffered */
1055
- && (endOp == ZSTD_e_end) /* end order */
1056
- && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
1991
+ if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */
1992
+ && (mtctx->nextJobID == 0) /* just started */
1993
+ && (mtctx->inBuff.filled == 0) /* nothing buffered */
1994
+ && (!mtctx->jobReady) /* no job already created */
1995
+ && (endOp == ZSTD_e_end) /* end order */
1996
+ && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */
1057
1997
  size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
1058
1998
  (char*)output->dst + output->pos, output->size - output->pos,
1059
1999
  (const char*)input->src + input->pos, input->size - input->pos,
@@ -1061,89 +2001,99 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
1061
2001
  if (ZSTD_isError(cSize)) return cSize;
1062
2002
  input->pos = input->size;
1063
2003
  output->pos += cSize;
1064
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer); /* was allocated in initStream */
1065
2004
  mtctx->allJobsCompleted = 1;
1066
2005
  mtctx->frameEnded = 1;
1067
2006
  return 0;
1068
2007
  }
1069
2008
 
1070
2009
  /* fill input buffer */
1071
- if (input->size > input->pos) { /* support NULL input */
2010
+ if ( (!mtctx->jobReady)
2011
+ && (input->size > input->pos) ) { /* support NULL input */
1072
2012
  if (mtctx->inBuff.buffer.start == NULL) {
1073
- mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool); /* note : may fail, in which case, no forward input progress */
1074
- mtctx->inBuff.filled = 0;
2013
+ assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */
2014
+ if (!ZSTDMT_tryGetInputRange(mtctx)) {
2015
+ /* It is only possible for this operation to fail if there are
2016
+ * still compression jobs ongoing.
2017
+ */
2018
+ DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed");
2019
+ assert(mtctx->doneJobID != mtctx->nextJobID);
2020
+ } else
2021
+ DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
1075
2022
  }
1076
- if (mtctx->inBuff.buffer.start) {
1077
- size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
1078
- DEBUGLOG(5, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
1079
- memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
1080
- input->pos += toLoad;
1081
- mtctx->inBuff.filled += toLoad;
1082
- forwardInputProgress = toLoad>0;
1083
- } }
2023
+ if (mtctx->inBuff.buffer.start != NULL) {
2024
+ syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
2025
+ if (syncPoint.flush && endOp == ZSTD_e_continue) {
2026
+ endOp = ZSTD_e_flush;
2027
+ }
2028
+ assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
2029
+ DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
2030
+ (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
2031
+ memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
2032
+ input->pos += syncPoint.toLoad;
2033
+ mtctx->inBuff.filled += syncPoint.toLoad;
2034
+ forwardInputProgress = syncPoint.toLoad>0;
2035
+ }
2036
+ if ((input->pos < input->size) && (endOp == ZSTD_e_end))
2037
+ endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
2038
+ }
1084
2039
 
1085
- if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
1086
- && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */
1087
- CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
2040
+ if ( (mtctx->jobReady)
2041
+ || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */
2042
+ || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */
2043
+ || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
2044
+ size_t const jobSize = mtctx->inBuff.filled;
2045
+ assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
2046
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
1088
2047
  }
1089
2048
 
1090
2049
  /* check for potential compressed data ready to be flushed */
1091
- CHECK_F( ZSTDMT_flushNextJob(mtctx, output, !forwardInputProgress /* blockToFlush */) ); /* block if there was no forward input progress */
1092
-
1093
- if (input->pos < input->size) /* input not consumed : do not flush yet */
1094
- endOp = ZSTD_e_continue;
1095
-
1096
- switch(endOp)
1097
- {
1098
- case ZSTD_e_flush:
1099
- return ZSTDMT_flushStream(mtctx, output);
1100
- case ZSTD_e_end:
1101
- return ZSTDMT_endStream(mtctx, output);
1102
- case ZSTD_e_continue:
1103
- return 1;
1104
- default:
1105
- return ERROR(GENERIC); /* invalid endDirective */
2050
+ { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
2051
+ if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */
2052
+ DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush);
2053
+ return remainingToFlush;
1106
2054
  }
1107
2055
  }
1108
2056
 
1109
2057
 
1110
- size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
2058
+ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
1111
2059
  {
1112
- CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
2060
+ FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
1113
2061
 
1114
2062
  /* recommended next input size : fill current input buffer */
1115
- return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
2063
+ return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
1116
2064
  }
1117
2065
 
1118
2066
 
1119
- static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned endFrame)
2067
+ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
1120
2068
  {
1121
- size_t const srcSize = mtctx->inBuff.filled - mtctx->dictSize;
2069
+ size_t const srcSize = mtctx->inBuff.filled;
1122
2070
  DEBUGLOG(5, "ZSTDMT_flushStream_internal");
1123
2071
 
1124
- if ( ((srcSize > 0) || (endFrame && !mtctx->frameEnded))
1125
- && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {
1126
- DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job");
1127
- CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
2072
+ if ( mtctx->jobReady /* one job ready for a worker to pick up */
2073
+ || (srcSize > 0) /* still some data within input buffer */
2074
+ || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
2075
+ DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
2076
+ (U32)srcSize, (U32)endFrame);
2077
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
1128
2078
  }
1129
2079
 
1130
2080
  /* check if there is any data available to flush */
1131
- return ZSTDMT_flushNextJob(mtctx, output, 1 /* blockToFlush */);
2081
+ return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame);
1132
2082
  }
1133
2083
 
1134
2084
 
1135
2085
  size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
1136
2086
  {
1137
2087
  DEBUGLOG(5, "ZSTDMT_flushStream");
1138
- if (mtctx->singleThreaded)
2088
+ if (mtctx->singleBlockingThread)
1139
2089
  return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
1140
- return ZSTDMT_flushStream_internal(mtctx, output, 0 /* endFrame */);
2090
+ return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush);
1141
2091
  }
1142
2092
 
1143
2093
  size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
1144
2094
  {
1145
2095
  DEBUGLOG(4, "ZSTDMT_endStream");
1146
- if (mtctx->singleThreaded)
2096
+ if (mtctx->singleBlockingThread)
1147
2097
  return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
1148
- return ZSTDMT_flushStream_internal(mtctx, output, 1 /* endFrame */);
2098
+ return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end);
1149
2099
  }