zstd-ruby 1.3.0.0 → 1.3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/common/bitstream.h +40 -41
  4. data/ext/zstdruby/libzstd/common/compiler.h +85 -0
  5. data/ext/zstdruby/libzstd/common/error_private.c +8 -10
  6. data/ext/zstdruby/libzstd/common/error_private.h +4 -4
  7. data/ext/zstdruby/libzstd/common/fse.h +11 -5
  8. data/ext/zstdruby/libzstd/common/fse_decompress.c +3 -22
  9. data/ext/zstdruby/libzstd/common/huf.h +5 -6
  10. data/ext/zstdruby/libzstd/common/mem.h +6 -6
  11. data/ext/zstdruby/libzstd/common/pool.c +61 -27
  12. data/ext/zstdruby/libzstd/common/pool.h +10 -10
  13. data/ext/zstdruby/libzstd/common/threading.h +5 -6
  14. data/ext/zstdruby/libzstd/common/xxhash.c +28 -22
  15. data/ext/zstdruby/libzstd/common/zstd_common.c +4 -4
  16. data/ext/zstdruby/libzstd/common/zstd_errors.h +30 -32
  17. data/ext/zstdruby/libzstd/common/zstd_internal.h +57 -56
  18. data/ext/zstdruby/libzstd/compress/fse_compress.c +4 -22
  19. data/ext/zstdruby/libzstd/compress/huf_compress.c +4 -3
  20. data/ext/zstdruby/libzstd/compress/zstd_compress.c +314 -304
  21. data/ext/zstdruby/libzstd/compress/zstd_opt.h +118 -116
  22. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +223 -156
  23. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +10 -9
  24. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +8 -24
  25. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +95 -115
  26. data/ext/zstdruby/libzstd/deprecated/zbuff.h +4 -4
  27. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +4 -5
  28. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +4 -4
  29. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +4 -4
  30. data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -9
  31. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
  32. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +4 -4
  33. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  34. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +4 -4
  35. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +4 -4
  36. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +4 -4
  37. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +4 -4
  38. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +4 -4
  39. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +4 -4
  40. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +5 -5
  41. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +4 -4
  42. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +5 -5
  43. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +4 -4
  44. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +5 -5
  45. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +4 -4
  46. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +4 -4
  47. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +4 -4
  48. data/ext/zstdruby/libzstd/zstd.h +16 -14
  49. data/lib/zstd-ruby/version.rb +1 -1
  50. metadata +3 -2
@@ -1,15 +1,16 @@
1
- /**
1
+ /*
2
2
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
- * This source code is licensed under the BSD-style license found in the
6
- * LICENSE file in the root directory of this source tree. An additional grant
7
- * of patent rights can be found in the PATENTS file in the same directory.
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
8
  */
9
9
 
10
10
 
11
11
  /* ====== Tuning parameters ====== */
12
- #define ZSTDMT_NBTHREADS_MAX 128
12
+ #define ZSTDMT_NBTHREADS_MAX 256
13
+ #define ZSTDMT_OVERLAPLOG_DEFAULT 6
13
14
 
14
15
 
15
16
  /* ====== Compiler specifics ====== */
@@ -73,6 +74,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
73
74
 
74
75
 
75
76
  /* ===== Buffer Pool ===== */
77
+ /* a single Buffer Pool can be invoked from multiple threads in parallel */
76
78
 
77
79
  typedef struct buffer_s {
78
80
  void* start;
@@ -82,6 +84,8 @@ typedef struct buffer_s {
82
84
  static const buffer_t g_nullBuffer = { NULL, 0 };
83
85
 
84
86
  typedef struct ZSTDMT_bufferPool_s {
87
+ pthread_mutex_t poolMutex;
88
+ size_t bufferSize;
85
89
  unsigned totalBuffers;
86
90
  unsigned nbBuffers;
87
91
  ZSTD_customMem cMem;
@@ -90,10 +94,15 @@ typedef struct ZSTDMT_bufferPool_s {
90
94
 
91
95
  static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
92
96
  {
93
- unsigned const maxNbBuffers = 2*nbThreads + 2;
97
+ unsigned const maxNbBuffers = 2*nbThreads + 3;
94
98
  ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
95
99
  sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
96
100
  if (bufPool==NULL) return NULL;
101
+ if (pthread_mutex_init(&bufPool->poolMutex, NULL)) {
102
+ ZSTD_free(bufPool, cMem);
103
+ return NULL;
104
+ }
105
+ bufPool->bufferSize = 64 KB;
97
106
  bufPool->totalBuffers = maxNbBuffers;
98
107
  bufPool->nbBuffers = 0;
99
108
  bufPool->cMem = cMem;
@@ -106,6 +115,7 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
106
115
  if (!bufPool) return; /* compatibility with free on NULL */
107
116
  for (u=0; u<bufPool->totalBuffers; u++)
108
117
  ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
118
+ pthread_mutex_destroy(&bufPool->poolMutex);
109
119
  ZSTD_free(bufPool, bufPool->cMem);
110
120
  }
111
121
 
@@ -116,65 +126,85 @@ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
116
126
  + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
117
127
  unsigned u;
118
128
  size_t totalBufferSize = 0;
129
+ pthread_mutex_lock(&bufPool->poolMutex);
119
130
  for (u=0; u<bufPool->totalBuffers; u++)
120
131
  totalBufferSize += bufPool->bTable[u].size;
132
+ pthread_mutex_unlock(&bufPool->poolMutex);
121
133
 
122
134
  return poolSize + totalBufferSize;
123
135
  }
124
136
 
125
- /** ZSTDMT_getBuffer() :
126
- * assumption : invocation from main thread only ! */
127
- static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
137
+ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* bufPool, size_t bSize)
128
138
  {
129
- if (pool->nbBuffers) { /* try to use an existing buffer */
130
- buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
139
+ bufPool->bufferSize = bSize;
140
+ }
141
+
142
+ /** ZSTDMT_getBuffer() :
143
+ * assumption : bufPool must be valid */
144
+ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
145
+ {
146
+ size_t const bSize = bufPool->bufferSize;
147
+ DEBUGLOG(5, "ZSTDMT_getBuffer");
148
+ pthread_mutex_lock(&bufPool->poolMutex);
149
+ if (bufPool->nbBuffers) { /* try to use an existing buffer */
150
+ buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
131
151
  size_t const availBufferSize = buf.size;
132
- if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
152
+ if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) {
133
153
  /* large enough, but not too much */
154
+ pthread_mutex_unlock(&bufPool->poolMutex);
134
155
  return buf;
156
+ }
135
157
  /* size conditions not respected : scratch this buffer, create new one */
136
- ZSTD_free(buf.start, pool->cMem);
158
+ DEBUGLOG(5, "existing buffer does not meet size conditions => freeing");
159
+ ZSTD_free(buf.start, bufPool->cMem);
137
160
  }
161
+ pthread_mutex_unlock(&bufPool->poolMutex);
138
162
  /* create new buffer */
163
+ DEBUGLOG(5, "create a new buffer");
139
164
  { buffer_t buffer;
140
- void* const start = ZSTD_malloc(bSize, pool->cMem);
141
- if (start==NULL) bSize = 0;
165
+ void* const start = ZSTD_malloc(bSize, bufPool->cMem);
142
166
  buffer.start = start; /* note : start can be NULL if malloc fails ! */
143
- buffer.size = bSize;
167
+ buffer.size = (start==NULL) ? 0 : bSize;
144
168
  return buffer;
145
169
  }
146
170
  }
147
171
 
148
172
  /* store buffer for later re-use, up to pool capacity */
149
- static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
150
- {
151
- if (buf.start == NULL) return; /* release on NULL */
152
- if (pool->nbBuffers < pool->totalBuffers) {
153
- pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */
173
+ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
174
+ {
175
+ if (buf.start == NULL) return; /* compatible with release on NULL */
176
+ DEBUGLOG(5, "ZSTDMT_releaseBuffer");
177
+ pthread_mutex_lock(&bufPool->poolMutex);
178
+ if (bufPool->nbBuffers < bufPool->totalBuffers) {
179
+ bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
180
+ pthread_mutex_unlock(&bufPool->poolMutex);
154
181
  return;
155
182
  }
183
+ pthread_mutex_unlock(&bufPool->poolMutex);
156
184
  /* Reached bufferPool capacity (should not happen) */
157
- ZSTD_free(buf.start, pool->cMem);
185
+ DEBUGLOG(5, "buffer pool capacity reached => freeing ");
186
+ ZSTD_free(buf.start, bufPool->cMem);
158
187
  }
159
188
 
160
189
 
161
190
  /* ===== CCtx Pool ===== */
191
+ /* a single CCtx Pool can be invoked from multiple threads in parallel */
162
192
 
163
193
  typedef struct {
194
+ pthread_mutex_t poolMutex;
164
195
  unsigned totalCCtx;
165
196
  unsigned availCCtx;
166
197
  ZSTD_customMem cMem;
167
198
  ZSTD_CCtx* cctx[1]; /* variable size */
168
199
  } ZSTDMT_CCtxPool;
169
200
 
170
- /* assumption : CCtxPool invocation only from main thread */
171
-
172
201
  /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
173
202
  static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
174
203
  {
175
204
  unsigned u;
176
205
  for (u=0; u<pool->totalCCtx; u++)
177
206
  ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
207
+ pthread_mutex_destroy(&pool->poolMutex);
178
208
  ZSTD_free(pool, pool->cMem);
179
209
  }
180
210
 
@@ -186,6 +216,10 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
186
216
  ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
187
217
  sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
188
218
  if (!cctxPool) return NULL;
219
+ if (pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
220
+ ZSTD_free(cctxPool, cMem);
221
+ return NULL;
222
+ }
189
223
  cctxPool->cMem = cMem;
190
224
  cctxPool->totalCCtx = nbThreads;
191
225
  cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
@@ -198,50 +232,57 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
198
232
  /* only works during initialization phase, not during compression */
199
233
  static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
200
234
  {
201
- unsigned const nbThreads = cctxPool->totalCCtx;
202
- size_t const poolSize = sizeof(*cctxPool)
203
- + (nbThreads-1)*sizeof(ZSTD_CCtx*);
204
- unsigned u;
205
- size_t totalCCtxSize = 0;
206
- for (u=0; u<nbThreads; u++)
207
- totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
208
-
209
- return poolSize + totalCCtxSize;
235
+ pthread_mutex_lock(&cctxPool->poolMutex);
236
+ { unsigned const nbThreads = cctxPool->totalCCtx;
237
+ size_t const poolSize = sizeof(*cctxPool)
238
+ + (nbThreads-1)*sizeof(ZSTD_CCtx*);
239
+ unsigned u;
240
+ size_t totalCCtxSize = 0;
241
+ for (u=0; u<nbThreads; u++) {
242
+ totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
243
+ }
244
+ pthread_mutex_unlock(&cctxPool->poolMutex);
245
+ return poolSize + totalCCtxSize;
246
+ }
210
247
  }
211
248
 
212
- static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
249
+ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
213
250
  {
214
- if (pool->availCCtx) {
215
- pool->availCCtx--;
216
- return pool->cctx[pool->availCCtx];
217
- }
218
- return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */
251
+ DEBUGLOG(5, "ZSTDMT_getCCtx");
252
+ pthread_mutex_lock(&cctxPool->poolMutex);
253
+ if (cctxPool->availCCtx) {
254
+ cctxPool->availCCtx--;
255
+ { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
256
+ pthread_mutex_unlock(&cctxPool->poolMutex);
257
+ return cctx;
258
+ } }
259
+ pthread_mutex_unlock(&cctxPool->poolMutex);
260
+ DEBUGLOG(5, "create one more CCtx");
261
+ return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */
219
262
  }
220
263
 
221
264
  static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
222
265
  {
223
266
  if (cctx==NULL) return; /* compatibility with release on NULL */
267
+ pthread_mutex_lock(&pool->poolMutex);
224
268
  if (pool->availCCtx < pool->totalCCtx)
225
269
  pool->cctx[pool->availCCtx++] = cctx;
226
- else
270
+ else {
227
271
  /* pool overflow : should not happen, since totalCCtx==nbThreads */
272
+ DEBUGLOG(5, "CCtx pool overflow : free cctx");
228
273
  ZSTD_freeCCtx(cctx);
274
+ }
275
+ pthread_mutex_unlock(&pool->poolMutex);
229
276
  }
230
277
 
231
278
 
232
279
  /* ===== Thread worker ===== */
233
280
 
234
281
  typedef struct {
235
- buffer_t buffer;
236
- size_t filled;
237
- } inBuff_t;
238
-
239
- typedef struct {
240
- ZSTD_CCtx* cctx;
241
282
  buffer_t src;
242
283
  const void* srcStart;
243
- size_t srcSize;
244
284
  size_t dictSize;
285
+ size_t srcSize;
245
286
  buffer_t dstBuff;
246
287
  size_t cSize;
247
288
  size_t dstFlushed;
@@ -253,6 +294,8 @@ typedef struct {
253
294
  pthread_cond_t* jobCompleted_cond;
254
295
  ZSTD_parameters params;
255
296
  const ZSTD_CDict* cdict;
297
+ ZSTDMT_CCtxPool* cctxPool;
298
+ ZSTDMT_bufferPool* bufPool;
256
299
  unsigned long long fullFrameSize;
257
300
  } ZSTDMT_jobDescription;
258
301
 
@@ -260,37 +303,56 @@ typedef struct {
260
303
  void ZSTDMT_compressChunk(void* jobDescription)
261
304
  {
262
305
  ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
306
+ ZSTD_CCtx* cctx = ZSTDMT_getCCtx(job->cctxPool);
263
307
  const void* const src = (const char*)job->srcStart + job->dictSize;
264
- buffer_t const dstBuff = job->dstBuff;
308
+ buffer_t dstBuff = job->dstBuff;
265
309
  DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
266
310
  job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
311
+
312
+ if (cctx==NULL) {
313
+ job->cSize = ERROR(memory_allocation);
314
+ goto _endJob;
315
+ }
316
+
317
+ if (dstBuff.start == NULL) {
318
+ dstBuff = ZSTDMT_getBuffer(job->bufPool);
319
+ if (dstBuff.start==NULL) {
320
+ job->cSize = ERROR(memory_allocation);
321
+ goto _endJob;
322
+ }
323
+ job->dstBuff = dstBuff;
324
+ }
325
+
267
326
  if (job->cdict) { /* should only happen for first segment */
268
- size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
327
+ size_t const initError = ZSTD_compressBegin_usingCDict_advanced(cctx, job->cdict, job->params.fParams, job->fullFrameSize);
269
328
  DEBUGLOG(5, "using CDict");
270
329
  if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
271
330
  } else { /* srcStart points at reloaded section */
272
331
  if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */
273
- { size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
274
- size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
332
+ { size_t const dictModeError = ZSTD_setCCtxParameter(cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
333
+ size_t const initError = ZSTD_compressBegin_advanced(cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
275
334
  if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
276
- ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
335
+ ZSTD_setCCtxParameter(cctx, ZSTD_p_forceWindow, 1);
277
336
  } }
278
337
  if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
279
- size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
338
+ size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
280
339
  if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
281
- ZSTD_invalidateRepCodes(job->cctx);
340
+ ZSTD_invalidateRepCodes(cctx);
282
341
  }
283
342
 
284
343
  DEBUGLOG(5, "Compressing : ");
285
344
  DEBUG_PRINTHEX(4, job->srcStart, 12);
286
345
  job->cSize = (job->lastChunk) ?
287
- ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
288
- ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
346
+ ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
347
+ ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
289
348
  DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
290
349
  (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
291
350
  DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
292
351
 
293
352
  _endJob:
353
+ ZSTDMT_releaseCCtx(job->cctxPool, cctx);
354
+ ZSTDMT_releaseBuffer(job->bufPool, job->src);
355
+ job->src = g_nullBuffer; job->srcStart = NULL;
294
356
  PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
295
357
  job->jobCompleted = 1;
296
358
  job->jobScanned = 0;
@@ -303,15 +365,19 @@ _endJob:
303
365
  /* ===== Multi-threaded compression ===== */
304
366
  /* ------------------------------------------ */
305
367
 
368
+ typedef struct {
369
+ buffer_t buffer;
370
+ size_t filled;
371
+ } inBuff_t;
372
+
306
373
  struct ZSTDMT_CCtx_s {
307
374
  POOL_ctx* factory;
308
375
  ZSTDMT_jobDescription* jobs;
309
- ZSTDMT_bufferPool* buffPool;
376
+ ZSTDMT_bufferPool* bufPool;
310
377
  ZSTDMT_CCtxPool* cctxPool;
311
378
  pthread_mutex_t jobCompleted_mutex;
312
379
  pthread_cond_t jobCompleted_cond;
313
380
  size_t targetSectionSize;
314
- size_t marginSize;
315
381
  size_t inBuffSize;
316
382
  size_t dictSize;
317
383
  size_t targetDictSize;
@@ -324,7 +390,7 @@ struct ZSTDMT_CCtx_s {
324
390
  unsigned nextJobID;
325
391
  unsigned frameEnded;
326
392
  unsigned allJobsCompleted;
327
- unsigned overlapRLog;
393
+ unsigned overlapLog;
328
394
  unsigned long long frameContentSize;
329
395
  size_t sectionSize;
330
396
  ZSTD_customMem cMem;
@@ -347,7 +413,8 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
347
413
  U32 nbJobs = nbThreads + 2;
348
414
  DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
349
415
 
350
- if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
416
+ if (nbThreads < 1) return NULL;
417
+ nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX);
351
418
  if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
352
419
  /* invalid custom allocator */
353
420
  return NULL;
@@ -358,18 +425,24 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
358
425
  mtctx->nbThreads = nbThreads;
359
426
  mtctx->allJobsCompleted = 1;
360
427
  mtctx->sectionSize = 0;
361
- mtctx->overlapRLog = 3;
362
- mtctx->factory = POOL_create(nbThreads, 1);
428
+ mtctx->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
429
+ mtctx->factory = POOL_create(nbThreads, 0);
363
430
  mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
364
431
  mtctx->jobIDMask = nbJobs - 1;
365
- mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem);
432
+ mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
366
433
  mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
367
- if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) {
434
+ if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool) {
435
+ ZSTDMT_freeCCtx(mtctx);
436
+ return NULL;
437
+ }
438
+ if (pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) {
439
+ ZSTDMT_freeCCtx(mtctx);
440
+ return NULL;
441
+ }
442
+ if (pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) {
368
443
  ZSTDMT_freeCCtx(mtctx);
369
444
  return NULL;
370
445
  }
371
- pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
372
- pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
373
446
  DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
374
447
  return mtctx;
375
448
  }
@@ -386,15 +459,13 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
386
459
  unsigned jobID;
387
460
  DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
388
461
  for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
389
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
462
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
390
463
  mtctx->jobs[jobID].dstBuff = g_nullBuffer;
391
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
464
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src);
392
465
  mtctx->jobs[jobID].src = g_nullBuffer;
393
- ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
394
- mtctx->jobs[jobID].cctx = NULL;
395
466
  }
396
467
  memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
397
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
468
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer);
398
469
  mtctx->inBuff.buffer = g_nullBuffer;
399
470
  mtctx->allJobsCompleted = 1;
400
471
  }
@@ -404,7 +475,7 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
404
475
  if (mtctx==NULL) return 0; /* compatible with free on NULL */
405
476
  POOL_free(mtctx->factory);
406
477
  if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
407
- ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
478
+ ZSTDMT_freeBufferPool(mtctx->bufPool); /* release job resources into pools first */
408
479
  ZSTD_free(mtctx->jobs, mtctx->cMem);
409
480
  ZSTDMT_freeCCtxPool(mtctx->cctxPool);
410
481
  ZSTD_freeCDict(mtctx->cdictLocal);
@@ -418,11 +489,11 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
418
489
  {
419
490
  if (mtctx == NULL) return 0; /* supports sizeof NULL */
420
491
  return sizeof(*mtctx)
421
- + POOL_sizeof(mtctx->factory)
422
- + ZSTDMT_sizeof_bufferPool(mtctx->buffPool)
423
- + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
424
- + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
425
- + ZSTD_sizeof_CDict(mtctx->cdictLocal);
492
+ + POOL_sizeof(mtctx->factory)
493
+ + ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
494
+ + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
495
+ + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
496
+ + ZSTD_sizeof_CDict(mtctx->cdictLocal);
426
497
  }
427
498
 
428
499
  size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
@@ -434,10 +505,10 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
434
505
  return 0;
435
506
  case ZSTDMT_p_overlapSectionLog :
436
507
  DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
437
- mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
508
+ mtctx->overlapLog = (value >= 9) ? 9 : value;
438
509
  return 0;
439
510
  default :
440
- return ERROR(compressionParameter_unsupported);
511
+ return ERROR(parameter_unsupported);
441
512
  }
442
513
  }
443
514
 
@@ -459,12 +530,13 @@ static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbT
459
530
 
460
531
 
461
532
  size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
462
- void* dst, size_t dstCapacity,
463
- const void* src, size_t srcSize,
464
- const ZSTD_CDict* cdict,
465
- ZSTD_parameters const params,
466
- unsigned overlapRLog)
533
+ void* dst, size_t dstCapacity,
534
+ const void* src, size_t srcSize,
535
+ const ZSTD_CDict* cdict,
536
+ ZSTD_parameters const params,
537
+ unsigned overlapLog)
467
538
  {
539
+ unsigned const overlapRLog = (overlapLog>9) ? 0 : 9-overlapLog;
468
540
  size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
469
541
  unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
470
542
  size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
@@ -473,6 +545,7 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
473
545
  size_t remainingSrcSize = srcSize;
474
546
  unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
475
547
  size_t frameStartPos = 0, dstBufferPos = 0;
548
+ XXH64_state_t xxh64;
476
549
 
477
550
  DEBUGLOG(4, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
478
551
  if (nbChunks==1) { /* fallback to single-thread mode */
@@ -480,7 +553,9 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
480
553
  if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
481
554
  return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
482
555
  }
483
- assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */
556
+ assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */
557
+ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
558
+ XXH64_reset(&xxh64, 0);
484
559
 
485
560
  if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
486
561
  U32 nbJobs = nbChunks;
@@ -496,17 +571,10 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
496
571
  size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
497
572
  size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
498
573
  buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
499
- buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity);
500
- ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
574
+ buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
501
575
  size_t dictSize = u ? overlapSize : 0;
502
576
 
503
- if ((cctx==NULL) || (dstBuffer.start==NULL)) {
504
- mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
505
- mtctx->jobs[u].jobCompleted = 1;
506
- nbChunks = u+1; /* only wait and free u jobs, instead of initially expected nbChunks ones */
507
- break; /* let's wait for previous jobs to complete, but don't start new ones */
508
- }
509
-
577
+ mtctx->jobs[u].src = g_nullBuffer;
510
578
  mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
511
579
  mtctx->jobs[u].dictSize = dictSize;
512
580
  mtctx->jobs[u].srcSize = chunkSize;
@@ -516,13 +584,18 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
516
584
  /* do not calculate checksum within sections, but write it in header for first section */
517
585
  if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
518
586
  mtctx->jobs[u].dstBuff = dstBuffer;
519
- mtctx->jobs[u].cctx = cctx;
587
+ mtctx->jobs[u].cctxPool = mtctx->cctxPool;
588
+ mtctx->jobs[u].bufPool = mtctx->bufPool;
520
589
  mtctx->jobs[u].firstChunk = (u==0);
521
590
  mtctx->jobs[u].lastChunk = (u==nbChunks-1);
522
591
  mtctx->jobs[u].jobCompleted = 0;
523
592
  mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
524
593
  mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
525
594
 
595
+ if (params.fParams.checksumFlag) {
596
+ XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
597
+ }
598
+
526
599
  DEBUGLOG(5, "posting job %u (%u bytes)", u, (U32)chunkSize);
527
600
  DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
528
601
  POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
@@ -533,8 +606,8 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
533
606
  } }
534
607
 
535
608
  /* collect result */
536
- { unsigned chunkID;
537
- size_t error = 0, dstPos = 0;
609
+ { size_t error = 0, dstPos = 0;
610
+ unsigned chunkID;
538
611
  for (chunkID=0; chunkID<nbChunks; chunkID++) {
539
612
  DEBUGLOG(5, "waiting for chunk %u ", chunkID);
540
613
  PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
@@ -545,8 +618,6 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
545
618
  pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
546
619
  DEBUGLOG(5, "ready to write chunk %u ", chunkID);
547
620
 
548
- ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
549
- mtctx->jobs[chunkID].cctx = NULL;
550
621
  mtctx->jobs[chunkID].srcStart = NULL;
551
622
  { size_t const cSize = mtctx->jobs[chunkID].cSize;
552
623
  if (ZSTD_isError(cSize)) error = cSize;
@@ -556,13 +627,25 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
556
627
  memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
557
628
  if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
558
629
  DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
559
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
630
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
560
631
  }
561
632
  mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
562
633
  }
563
634
  dstPos += cSize ;
564
635
  }
565
- }
636
+ } /* for (chunkID=0; chunkID<nbChunks; chunkID++) */
637
+
638
+ DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
639
+ if (params.fParams.checksumFlag) {
640
+ U32 const checksum = (U32)XXH64_digest(&xxh64);
641
+ if (dstPos + 4 > dstCapacity) {
642
+ error = ERROR(dstSize_tooSmall);
643
+ } else {
644
+ DEBUGLOG(4, "writing checksum : %08X \n", checksum);
645
+ MEM_writeLE32((char*)dst + dstPos, checksum);
646
+ dstPos += 4;
647
+ } }
648
+
566
649
  if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
567
650
  return error ? error : dstPos;
568
651
  }
@@ -574,10 +657,10 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
574
657
  const void* src, size_t srcSize,
575
658
  int compressionLevel)
576
659
  {
577
- U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
660
+ U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
578
661
  ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
579
662
  params.fParams.contentSizeFlag = 1;
580
- return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog);
663
+ return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
581
664
  }
582
665
 
583
666
 
@@ -615,8 +698,8 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
615
698
  if (zcs->nbThreads==1) {
616
699
  DEBUGLOG(4, "single thread mode");
617
700
  return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
618
- dict, dictSize, cdict,
619
- params, pledgedSrcSize);
701
+ dict, dictSize, cdict,
702
+ params, pledgedSrcSize);
620
703
  }
621
704
 
622
705
  if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
@@ -642,18 +725,16 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
642
725
  zcs->cdict = cdict;
643
726
  }
644
727
 
645
- zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
646
- DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
728
+ zcs->targetDictSize = (zcs->overlapLog==0) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - (9 - zcs->overlapLog));
729
+ DEBUGLOG(4, "overlapLog : %u ", zcs->overlapLog);
647
730
  DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
648
731
  zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
649
732
  zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
650
733
  zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
651
734
  DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
652
- zcs->marginSize = zcs->targetSectionSize >> 2;
653
- zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
654
- zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
655
- if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
656
- zcs->inBuff.filled = 0;
735
+ zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize;
736
+ ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) );
737
+ zcs->inBuff.buffer = g_nullBuffer;
657
738
  zcs->dictSize = 0;
658
739
  zcs->doneJobID = 0;
659
740
  zcs->nextJobID = 0;
@@ -664,8 +745,9 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
664
745
  }
665
746
 
666
747
  size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
667
- const void* dict, size_t dictSize,
668
- ZSTD_parameters params, unsigned long long pledgedSrcSize)
748
+ const void* dict, size_t dictSize,
749
+ ZSTD_parameters params,
750
+ unsigned long long pledgedSrcSize)
669
751
  {
670
752
  DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
671
753
  return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
@@ -701,19 +783,8 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
701
783
 
702
784
  static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
703
785
  {
704
- size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
705
- buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
706
- ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
707
786
  unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
708
787
 
709
- if ((cctx==NULL) || (dstBuffer.start==NULL)) {
710
- zcs->jobs[jobID].jobCompleted = 1;
711
- zcs->nextJobID++;
712
- ZSTDMT_waitForAllJobsCompleted(zcs);
713
- ZSTDMT_releaseAllJobResources(zcs);
714
- return ERROR(memory_allocation);
715
- }
716
-
717
788
  DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
718
789
  zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
719
790
  zcs->jobs[jobID].src = zcs->inBuff.buffer;
@@ -726,8 +797,9 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
726
797
  if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
727
798
  zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
728
799
  zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
729
- zcs->jobs[jobID].dstBuff = dstBuffer;
730
- zcs->jobs[jobID].cctx = cctx;
800
+ zcs->jobs[jobID].dstBuff = g_nullBuffer;
801
+ zcs->jobs[jobID].cctxPool = zcs->cctxPool;
802
+ zcs->jobs[jobID].bufPool = zcs->bufPool;
731
803
  zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
732
804
  zcs->jobs[jobID].lastChunk = endFrame;
733
805
  zcs->jobs[jobID].jobCompleted = 0;
@@ -735,11 +807,13 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
735
807
  zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
736
808
  zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
737
809
 
810
+ if (zcs->params.fParams.checksumFlag)
811
+ XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->dictSize, srcSize);
812
+
738
813
  /* get a new buffer for next input */
739
814
  if (!endFrame) {
740
815
  size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
741
- DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
742
- zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
816
+ zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
743
817
  if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
744
818
  zcs->jobs[jobID].jobCompleted = 1;
745
819
  zcs->nextJobID++;
@@ -747,26 +821,20 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
747
821
  ZSTDMT_releaseAllJobResources(zcs);
748
822
  return ERROR(memory_allocation);
749
823
  }
750
- DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
751
824
  zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
752
- DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
753
- (U32)zcs->inBuff.filled, (U32)newDictSize,
754
- (U32)(zcs->inBuff.filled - newDictSize));
755
825
  memmove(zcs->inBuff.buffer.start,
756
826
  (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
757
827
  zcs->inBuff.filled);
758
- DEBUGLOG(5, "new inBuff pre-filled");
759
828
  zcs->dictSize = newDictSize;
760
829
  } else { /* if (endFrame==1) */
761
- DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
762
830
  zcs->inBuff.buffer = g_nullBuffer;
763
831
  zcs->inBuff.filled = 0;
764
832
  zcs->dictSize = 0;
765
833
  zcs->frameEnded = 1;
766
- if (zcs->nextJobID == 0)
834
+ if (zcs->nextJobID == 0) {
767
835
  /* single chunk exception : checksum is calculated directly within worker thread */
768
836
  zcs->params.fParams.checksumFlag = 0;
769
- }
837
+ } }
770
838
 
771
839
  DEBUGLOG(4, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
772
840
  zcs->nextJobID,
@@ -804,11 +872,8 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
804
872
  ZSTDMT_releaseAllJobResources(zcs);
805
873
  return job.cSize;
806
874
  }
807
- ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
808
- zcs->jobs[wJobID].cctx = NULL;
809
875
  DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
810
876
  if (zcs->params.fParams.checksumFlag) {
811
- XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
812
877
  if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
813
878
  U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
814
879
  DEBUGLOG(5, "writing checksum : %08X \n", checksum);
@@ -816,9 +881,6 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
816
881
  job.cSize += 4;
817
882
  zcs->jobs[wJobID].cSize += 4;
818
883
  } }
819
- ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
820
- zcs->jobs[wJobID].srcStart = NULL;
821
- zcs->jobs[wJobID].src = g_nullBuffer;
822
884
  zcs->jobs[wJobID].jobScanned = 1;
823
885
  }
824
886
  { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
@@ -828,7 +890,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
828
890
  job.dstFlushed += toWrite;
829
891
  }
830
892
  if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
831
- ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
893
+ ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
832
894
  zcs->jobs[wJobID].dstBuff = g_nullBuffer;
833
895
  zcs->jobs[wJobID].jobCompleted = 0;
834
896
  zcs->doneJobID++;
@@ -852,18 +914,18 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
852
914
  ZSTD_inBuffer* input,
853
915
  ZSTD_EndDirective endOp)
854
916
  {
855
- size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize;
917
+ size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize;
856
918
  assert(output->pos <= output->size);
857
919
  assert(input->pos <= input->size);
858
920
  if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
859
921
  /* current frame being ended. Only flush/end are allowed. Or start new frame with init */
860
922
  return ERROR(stage_wrong);
861
923
  }
862
- if (mtctx->nbThreads==1) {
924
+ if (mtctx->nbThreads==1) { /* delegate to single-thread (synchronous) */
863
925
  return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
864
926
  }
865
927
 
866
- /* single-pass shortcut (note : this is blocking-mode) */
928
+ /* single-pass shortcut (note : this is synchronous-mode) */
867
929
  if ( (mtctx->nextJobID==0) /* just started */
868
930
  && (mtctx->inBuff.filled==0) /* nothing buffered */
869
931
  && (endOp==ZSTD_e_end) /* end order */
@@ -871,24 +933,29 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
871
933
  size_t const cSize = ZSTDMT_compress_advanced(mtctx,
872
934
  (char*)output->dst + output->pos, output->size - output->pos,
873
935
  (const char*)input->src + input->pos, input->size - input->pos,
874
- mtctx->cdict, mtctx->params, mtctx->overlapRLog);
936
+ mtctx->cdict, mtctx->params, mtctx->overlapLog);
875
937
  if (ZSTD_isError(cSize)) return cSize;
876
938
  input->pos = input->size;
877
939
  output->pos += cSize;
878
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); /* was allocated in initStream */
940
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer); /* was allocated in initStream */
879
941
  mtctx->allJobsCompleted = 1;
880
942
  mtctx->frameEnded = 1;
881
943
  return 0;
882
944
  }
883
945
 
884
946
  /* fill input buffer */
885
- if ((input->src) && (mtctx->inBuff.buffer.start)) { /* support NULL input */
886
- size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
887
- DEBUGLOG(2, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
888
- memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
889
- input->pos += toLoad;
890
- mtctx->inBuff.filled += toLoad;
891
- }
947
+ if (input->size > input->pos) { /* support NULL input */
948
+ if (mtctx->inBuff.buffer.start == NULL) {
949
+ mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool);
950
+ if (mtctx->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
951
+ mtctx->inBuff.filled = 0;
952
+ }
953
+ { size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
954
+ DEBUGLOG(5, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
955
+ memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
956
+ input->pos += toLoad;
957
+ mtctx->inBuff.filled += toLoad;
958
+ } }
892
959
 
893
960
  if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
894
961
  && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */