zstd-ruby 1.3.0.0 → 1.3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/common/bitstream.h +40 -41
  4. data/ext/zstdruby/libzstd/common/compiler.h +85 -0
  5. data/ext/zstdruby/libzstd/common/error_private.c +8 -10
  6. data/ext/zstdruby/libzstd/common/error_private.h +4 -4
  7. data/ext/zstdruby/libzstd/common/fse.h +11 -5
  8. data/ext/zstdruby/libzstd/common/fse_decompress.c +3 -22
  9. data/ext/zstdruby/libzstd/common/huf.h +5 -6
  10. data/ext/zstdruby/libzstd/common/mem.h +6 -6
  11. data/ext/zstdruby/libzstd/common/pool.c +61 -27
  12. data/ext/zstdruby/libzstd/common/pool.h +10 -10
  13. data/ext/zstdruby/libzstd/common/threading.h +5 -6
  14. data/ext/zstdruby/libzstd/common/xxhash.c +28 -22
  15. data/ext/zstdruby/libzstd/common/zstd_common.c +4 -4
  16. data/ext/zstdruby/libzstd/common/zstd_errors.h +30 -32
  17. data/ext/zstdruby/libzstd/common/zstd_internal.h +57 -56
  18. data/ext/zstdruby/libzstd/compress/fse_compress.c +4 -22
  19. data/ext/zstdruby/libzstd/compress/huf_compress.c +4 -3
  20. data/ext/zstdruby/libzstd/compress/zstd_compress.c +314 -304
  21. data/ext/zstdruby/libzstd/compress/zstd_opt.h +118 -116
  22. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +223 -156
  23. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +10 -9
  24. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +8 -24
  25. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +95 -115
  26. data/ext/zstdruby/libzstd/deprecated/zbuff.h +4 -4
  27. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +4 -5
  28. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +4 -4
  29. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +4 -4
  30. data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -9
  31. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
  32. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +4 -4
  33. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  34. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +4 -4
  35. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +4 -4
  36. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +4 -4
  37. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +4 -4
  38. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +4 -4
  39. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +4 -4
  40. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +5 -5
  41. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +4 -4
  42. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +5 -5
  43. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +4 -4
  44. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +5 -5
  45. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +4 -4
  46. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +4 -4
  47. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +4 -4
  48. data/ext/zstdruby/libzstd/zstd.h +16 -14
  49. data/lib/zstd-ruby/version.rb +1 -1
  50. metadata +3 -2
@@ -1,15 +1,16 @@
1
- /**
1
+ /*
2
2
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
- * This source code is licensed under the BSD-style license found in the
6
- * LICENSE file in the root directory of this source tree. An additional grant
7
- * of patent rights can be found in the PATENTS file in the same directory.
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
8
  */
9
9
 
10
10
 
11
11
  /* ====== Tuning parameters ====== */
12
- #define ZSTDMT_NBTHREADS_MAX 128
12
+ #define ZSTDMT_NBTHREADS_MAX 256
13
+ #define ZSTDMT_OVERLAPLOG_DEFAULT 6
13
14
 
14
15
 
15
16
  /* ====== Compiler specifics ====== */
@@ -73,6 +74,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
73
74
 
74
75
 
75
76
  /* ===== Buffer Pool ===== */
77
+ /* a single Buffer Pool can be invoked from multiple threads in parallel */
76
78
 
77
79
  typedef struct buffer_s {
78
80
  void* start;
@@ -82,6 +84,8 @@ typedef struct buffer_s {
82
84
  static const buffer_t g_nullBuffer = { NULL, 0 };
83
85
 
84
86
  typedef struct ZSTDMT_bufferPool_s {
87
+ pthread_mutex_t poolMutex;
88
+ size_t bufferSize;
85
89
  unsigned totalBuffers;
86
90
  unsigned nbBuffers;
87
91
  ZSTD_customMem cMem;
@@ -90,10 +94,15 @@ typedef struct ZSTDMT_bufferPool_s {
90
94
 
91
95
  static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
92
96
  {
93
- unsigned const maxNbBuffers = 2*nbThreads + 2;
97
+ unsigned const maxNbBuffers = 2*nbThreads + 3;
94
98
  ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
95
99
  sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
96
100
  if (bufPool==NULL) return NULL;
101
+ if (pthread_mutex_init(&bufPool->poolMutex, NULL)) {
102
+ ZSTD_free(bufPool, cMem);
103
+ return NULL;
104
+ }
105
+ bufPool->bufferSize = 64 KB;
97
106
  bufPool->totalBuffers = maxNbBuffers;
98
107
  bufPool->nbBuffers = 0;
99
108
  bufPool->cMem = cMem;
@@ -106,6 +115,7 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
106
115
  if (!bufPool) return; /* compatibility with free on NULL */
107
116
  for (u=0; u<bufPool->totalBuffers; u++)
108
117
  ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
118
+ pthread_mutex_destroy(&bufPool->poolMutex);
109
119
  ZSTD_free(bufPool, bufPool->cMem);
110
120
  }
111
121
 
@@ -116,65 +126,85 @@ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
116
126
  + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
117
127
  unsigned u;
118
128
  size_t totalBufferSize = 0;
129
+ pthread_mutex_lock(&bufPool->poolMutex);
119
130
  for (u=0; u<bufPool->totalBuffers; u++)
120
131
  totalBufferSize += bufPool->bTable[u].size;
132
+ pthread_mutex_unlock(&bufPool->poolMutex);
121
133
 
122
134
  return poolSize + totalBufferSize;
123
135
  }
124
136
 
125
- /** ZSTDMT_getBuffer() :
126
- * assumption : invocation from main thread only ! */
127
- static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
137
+ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* bufPool, size_t bSize)
128
138
  {
129
- if (pool->nbBuffers) { /* try to use an existing buffer */
130
- buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
139
+ bufPool->bufferSize = bSize;
140
+ }
141
+
142
+ /** ZSTDMT_getBuffer() :
143
+ * assumption : bufPool must be valid */
144
+ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
145
+ {
146
+ size_t const bSize = bufPool->bufferSize;
147
+ DEBUGLOG(5, "ZSTDMT_getBuffer");
148
+ pthread_mutex_lock(&bufPool->poolMutex);
149
+ if (bufPool->nbBuffers) { /* try to use an existing buffer */
150
+ buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
131
151
  size_t const availBufferSize = buf.size;
132
- if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
152
+ if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) {
133
153
  /* large enough, but not too much */
154
+ pthread_mutex_unlock(&bufPool->poolMutex);
134
155
  return buf;
156
+ }
135
157
  /* size conditions not respected : scratch this buffer, create new one */
136
- ZSTD_free(buf.start, pool->cMem);
158
+ DEBUGLOG(5, "existing buffer does not meet size conditions => freeing");
159
+ ZSTD_free(buf.start, bufPool->cMem);
137
160
  }
161
+ pthread_mutex_unlock(&bufPool->poolMutex);
138
162
  /* create new buffer */
163
+ DEBUGLOG(5, "create a new buffer");
139
164
  { buffer_t buffer;
140
- void* const start = ZSTD_malloc(bSize, pool->cMem);
141
- if (start==NULL) bSize = 0;
165
+ void* const start = ZSTD_malloc(bSize, bufPool->cMem);
142
166
  buffer.start = start; /* note : start can be NULL if malloc fails ! */
143
- buffer.size = bSize;
167
+ buffer.size = (start==NULL) ? 0 : bSize;
144
168
  return buffer;
145
169
  }
146
170
  }
147
171
 
148
172
  /* store buffer for later re-use, up to pool capacity */
149
- static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
150
- {
151
- if (buf.start == NULL) return; /* release on NULL */
152
- if (pool->nbBuffers < pool->totalBuffers) {
153
- pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */
173
+ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
174
+ {
175
+ if (buf.start == NULL) return; /* compatible with release on NULL */
176
+ DEBUGLOG(5, "ZSTDMT_releaseBuffer");
177
+ pthread_mutex_lock(&bufPool->poolMutex);
178
+ if (bufPool->nbBuffers < bufPool->totalBuffers) {
179
+ bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
180
+ pthread_mutex_unlock(&bufPool->poolMutex);
154
181
  return;
155
182
  }
183
+ pthread_mutex_unlock(&bufPool->poolMutex);
156
184
  /* Reached bufferPool capacity (should not happen) */
157
- ZSTD_free(buf.start, pool->cMem);
185
+ DEBUGLOG(5, "buffer pool capacity reached => freeing ");
186
+ ZSTD_free(buf.start, bufPool->cMem);
158
187
  }
159
188
 
160
189
 
161
190
  /* ===== CCtx Pool ===== */
191
+ /* a single CCtx Pool can be invoked from multiple threads in parallel */
162
192
 
163
193
  typedef struct {
194
+ pthread_mutex_t poolMutex;
164
195
  unsigned totalCCtx;
165
196
  unsigned availCCtx;
166
197
  ZSTD_customMem cMem;
167
198
  ZSTD_CCtx* cctx[1]; /* variable size */
168
199
  } ZSTDMT_CCtxPool;
169
200
 
170
- /* assumption : CCtxPool invocation only from main thread */
171
-
172
201
  /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
173
202
  static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
174
203
  {
175
204
  unsigned u;
176
205
  for (u=0; u<pool->totalCCtx; u++)
177
206
  ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
207
+ pthread_mutex_destroy(&pool->poolMutex);
178
208
  ZSTD_free(pool, pool->cMem);
179
209
  }
180
210
 
@@ -186,6 +216,10 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
186
216
  ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
187
217
  sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
188
218
  if (!cctxPool) return NULL;
219
+ if (pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
220
+ ZSTD_free(cctxPool, cMem);
221
+ return NULL;
222
+ }
189
223
  cctxPool->cMem = cMem;
190
224
  cctxPool->totalCCtx = nbThreads;
191
225
  cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
@@ -198,50 +232,57 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
198
232
  /* only works during initialization phase, not during compression */
199
233
  static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
200
234
  {
201
- unsigned const nbThreads = cctxPool->totalCCtx;
202
- size_t const poolSize = sizeof(*cctxPool)
203
- + (nbThreads-1)*sizeof(ZSTD_CCtx*);
204
- unsigned u;
205
- size_t totalCCtxSize = 0;
206
- for (u=0; u<nbThreads; u++)
207
- totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
208
-
209
- return poolSize + totalCCtxSize;
235
+ pthread_mutex_lock(&cctxPool->poolMutex);
236
+ { unsigned const nbThreads = cctxPool->totalCCtx;
237
+ size_t const poolSize = sizeof(*cctxPool)
238
+ + (nbThreads-1)*sizeof(ZSTD_CCtx*);
239
+ unsigned u;
240
+ size_t totalCCtxSize = 0;
241
+ for (u=0; u<nbThreads; u++) {
242
+ totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
243
+ }
244
+ pthread_mutex_unlock(&cctxPool->poolMutex);
245
+ return poolSize + totalCCtxSize;
246
+ }
210
247
  }
211
248
 
212
- static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
249
+ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
213
250
  {
214
- if (pool->availCCtx) {
215
- pool->availCCtx--;
216
- return pool->cctx[pool->availCCtx];
217
- }
218
- return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */
251
+ DEBUGLOG(5, "ZSTDMT_getCCtx");
252
+ pthread_mutex_lock(&cctxPool->poolMutex);
253
+ if (cctxPool->availCCtx) {
254
+ cctxPool->availCCtx--;
255
+ { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
256
+ pthread_mutex_unlock(&cctxPool->poolMutex);
257
+ return cctx;
258
+ } }
259
+ pthread_mutex_unlock(&cctxPool->poolMutex);
260
+ DEBUGLOG(5, "create one more CCtx");
261
+ return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */
219
262
  }
220
263
 
221
264
  static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
222
265
  {
223
266
  if (cctx==NULL) return; /* compatibility with release on NULL */
267
+ pthread_mutex_lock(&pool->poolMutex);
224
268
  if (pool->availCCtx < pool->totalCCtx)
225
269
  pool->cctx[pool->availCCtx++] = cctx;
226
- else
270
+ else {
227
271
  /* pool overflow : should not happen, since totalCCtx==nbThreads */
272
+ DEBUGLOG(5, "CCtx pool overflow : free cctx");
228
273
  ZSTD_freeCCtx(cctx);
274
+ }
275
+ pthread_mutex_unlock(&pool->poolMutex);
229
276
  }
230
277
 
231
278
 
232
279
  /* ===== Thread worker ===== */
233
280
 
234
281
  typedef struct {
235
- buffer_t buffer;
236
- size_t filled;
237
- } inBuff_t;
238
-
239
- typedef struct {
240
- ZSTD_CCtx* cctx;
241
282
  buffer_t src;
242
283
  const void* srcStart;
243
- size_t srcSize;
244
284
  size_t dictSize;
285
+ size_t srcSize;
245
286
  buffer_t dstBuff;
246
287
  size_t cSize;
247
288
  size_t dstFlushed;
@@ -253,6 +294,8 @@ typedef struct {
253
294
  pthread_cond_t* jobCompleted_cond;
254
295
  ZSTD_parameters params;
255
296
  const ZSTD_CDict* cdict;
297
+ ZSTDMT_CCtxPool* cctxPool;
298
+ ZSTDMT_bufferPool* bufPool;
256
299
  unsigned long long fullFrameSize;
257
300
  } ZSTDMT_jobDescription;
258
301
 
@@ -260,37 +303,56 @@ typedef struct {
260
303
  void ZSTDMT_compressChunk(void* jobDescription)
261
304
  {
262
305
  ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
306
+ ZSTD_CCtx* cctx = ZSTDMT_getCCtx(job->cctxPool);
263
307
  const void* const src = (const char*)job->srcStart + job->dictSize;
264
- buffer_t const dstBuff = job->dstBuff;
308
+ buffer_t dstBuff = job->dstBuff;
265
309
  DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
266
310
  job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
311
+
312
+ if (cctx==NULL) {
313
+ job->cSize = ERROR(memory_allocation);
314
+ goto _endJob;
315
+ }
316
+
317
+ if (dstBuff.start == NULL) {
318
+ dstBuff = ZSTDMT_getBuffer(job->bufPool);
319
+ if (dstBuff.start==NULL) {
320
+ job->cSize = ERROR(memory_allocation);
321
+ goto _endJob;
322
+ }
323
+ job->dstBuff = dstBuff;
324
+ }
325
+
267
326
  if (job->cdict) { /* should only happen for first segment */
268
- size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
327
+ size_t const initError = ZSTD_compressBegin_usingCDict_advanced(cctx, job->cdict, job->params.fParams, job->fullFrameSize);
269
328
  DEBUGLOG(5, "using CDict");
270
329
  if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
271
330
  } else { /* srcStart points at reloaded section */
272
331
  if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */
273
- { size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
274
- size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
332
+ { size_t const dictModeError = ZSTD_setCCtxParameter(cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
333
+ size_t const initError = ZSTD_compressBegin_advanced(cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
275
334
  if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
276
- ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
335
+ ZSTD_setCCtxParameter(cctx, ZSTD_p_forceWindow, 1);
277
336
  } }
278
337
  if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
279
- size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
338
+ size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
280
339
  if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
281
- ZSTD_invalidateRepCodes(job->cctx);
340
+ ZSTD_invalidateRepCodes(cctx);
282
341
  }
283
342
 
284
343
  DEBUGLOG(5, "Compressing : ");
285
344
  DEBUG_PRINTHEX(4, job->srcStart, 12);
286
345
  job->cSize = (job->lastChunk) ?
287
- ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
288
- ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
346
+ ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
347
+ ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
289
348
  DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
290
349
  (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
291
350
  DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
292
351
 
293
352
  _endJob:
353
+ ZSTDMT_releaseCCtx(job->cctxPool, cctx);
354
+ ZSTDMT_releaseBuffer(job->bufPool, job->src);
355
+ job->src = g_nullBuffer; job->srcStart = NULL;
294
356
  PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
295
357
  job->jobCompleted = 1;
296
358
  job->jobScanned = 0;
@@ -303,15 +365,19 @@ _endJob:
303
365
  /* ===== Multi-threaded compression ===== */
304
366
  /* ------------------------------------------ */
305
367
 
368
+ typedef struct {
369
+ buffer_t buffer;
370
+ size_t filled;
371
+ } inBuff_t;
372
+
306
373
  struct ZSTDMT_CCtx_s {
307
374
  POOL_ctx* factory;
308
375
  ZSTDMT_jobDescription* jobs;
309
- ZSTDMT_bufferPool* buffPool;
376
+ ZSTDMT_bufferPool* bufPool;
310
377
  ZSTDMT_CCtxPool* cctxPool;
311
378
  pthread_mutex_t jobCompleted_mutex;
312
379
  pthread_cond_t jobCompleted_cond;
313
380
  size_t targetSectionSize;
314
- size_t marginSize;
315
381
  size_t inBuffSize;
316
382
  size_t dictSize;
317
383
  size_t targetDictSize;
@@ -324,7 +390,7 @@ struct ZSTDMT_CCtx_s {
324
390
  unsigned nextJobID;
325
391
  unsigned frameEnded;
326
392
  unsigned allJobsCompleted;
327
- unsigned overlapRLog;
393
+ unsigned overlapLog;
328
394
  unsigned long long frameContentSize;
329
395
  size_t sectionSize;
330
396
  ZSTD_customMem cMem;
@@ -347,7 +413,8 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
347
413
  U32 nbJobs = nbThreads + 2;
348
414
  DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
349
415
 
350
- if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
416
+ if (nbThreads < 1) return NULL;
417
+ nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX);
351
418
  if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
352
419
  /* invalid custom allocator */
353
420
  return NULL;
@@ -358,18 +425,24 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
358
425
  mtctx->nbThreads = nbThreads;
359
426
  mtctx->allJobsCompleted = 1;
360
427
  mtctx->sectionSize = 0;
361
- mtctx->overlapRLog = 3;
362
- mtctx->factory = POOL_create(nbThreads, 1);
428
+ mtctx->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
429
+ mtctx->factory = POOL_create(nbThreads, 0);
363
430
  mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
364
431
  mtctx->jobIDMask = nbJobs - 1;
365
- mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem);
432
+ mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
366
433
  mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
367
- if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) {
434
+ if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool) {
435
+ ZSTDMT_freeCCtx(mtctx);
436
+ return NULL;
437
+ }
438
+ if (pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) {
439
+ ZSTDMT_freeCCtx(mtctx);
440
+ return NULL;
441
+ }
442
+ if (pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) {
368
443
  ZSTDMT_freeCCtx(mtctx);
369
444
  return NULL;
370
445
  }
371
- pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
372
- pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
373
446
  DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
374
447
  return mtctx;
375
448
  }
@@ -386,15 +459,13 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
386
459
  unsigned jobID;
387
460
  DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
388
461
  for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
389
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
462
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
390
463
  mtctx->jobs[jobID].dstBuff = g_nullBuffer;
391
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
464
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src);
392
465
  mtctx->jobs[jobID].src = g_nullBuffer;
393
- ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
394
- mtctx->jobs[jobID].cctx = NULL;
395
466
  }
396
467
  memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
397
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
468
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer);
398
469
  mtctx->inBuff.buffer = g_nullBuffer;
399
470
  mtctx->allJobsCompleted = 1;
400
471
  }
@@ -404,7 +475,7 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
404
475
  if (mtctx==NULL) return 0; /* compatible with free on NULL */
405
476
  POOL_free(mtctx->factory);
406
477
  if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
407
- ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
478
+ ZSTDMT_freeBufferPool(mtctx->bufPool); /* release job resources into pools first */
408
479
  ZSTD_free(mtctx->jobs, mtctx->cMem);
409
480
  ZSTDMT_freeCCtxPool(mtctx->cctxPool);
410
481
  ZSTD_freeCDict(mtctx->cdictLocal);
@@ -418,11 +489,11 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
418
489
  {
419
490
  if (mtctx == NULL) return 0; /* supports sizeof NULL */
420
491
  return sizeof(*mtctx)
421
- + POOL_sizeof(mtctx->factory)
422
- + ZSTDMT_sizeof_bufferPool(mtctx->buffPool)
423
- + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
424
- + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
425
- + ZSTD_sizeof_CDict(mtctx->cdictLocal);
492
+ + POOL_sizeof(mtctx->factory)
493
+ + ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
494
+ + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
495
+ + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
496
+ + ZSTD_sizeof_CDict(mtctx->cdictLocal);
426
497
  }
427
498
 
428
499
  size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
@@ -434,10 +505,10 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
434
505
  return 0;
435
506
  case ZSTDMT_p_overlapSectionLog :
436
507
  DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
437
- mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
508
+ mtctx->overlapLog = (value >= 9) ? 9 : value;
438
509
  return 0;
439
510
  default :
440
- return ERROR(compressionParameter_unsupported);
511
+ return ERROR(parameter_unsupported);
441
512
  }
442
513
  }
443
514
 
@@ -459,12 +530,13 @@ static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbT
459
530
 
460
531
 
461
532
  size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
462
- void* dst, size_t dstCapacity,
463
- const void* src, size_t srcSize,
464
- const ZSTD_CDict* cdict,
465
- ZSTD_parameters const params,
466
- unsigned overlapRLog)
533
+ void* dst, size_t dstCapacity,
534
+ const void* src, size_t srcSize,
535
+ const ZSTD_CDict* cdict,
536
+ ZSTD_parameters const params,
537
+ unsigned overlapLog)
467
538
  {
539
+ unsigned const overlapRLog = (overlapLog>9) ? 0 : 9-overlapLog;
468
540
  size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
469
541
  unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
470
542
  size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
@@ -473,6 +545,7 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
473
545
  size_t remainingSrcSize = srcSize;
474
546
  unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
475
547
  size_t frameStartPos = 0, dstBufferPos = 0;
548
+ XXH64_state_t xxh64;
476
549
 
477
550
  DEBUGLOG(4, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
478
551
  if (nbChunks==1) { /* fallback to single-thread mode */
@@ -480,7 +553,9 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
480
553
  if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
481
554
  return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
482
555
  }
483
- assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */
556
+ assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */
557
+ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
558
+ XXH64_reset(&xxh64, 0);
484
559
 
485
560
  if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
486
561
  U32 nbJobs = nbChunks;
@@ -496,17 +571,10 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
496
571
  size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
497
572
  size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
498
573
  buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
499
- buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity);
500
- ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
574
+ buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
501
575
  size_t dictSize = u ? overlapSize : 0;
502
576
 
503
- if ((cctx==NULL) || (dstBuffer.start==NULL)) {
504
- mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
505
- mtctx->jobs[u].jobCompleted = 1;
506
- nbChunks = u+1; /* only wait and free u jobs, instead of initially expected nbChunks ones */
507
- break; /* let's wait for previous jobs to complete, but don't start new ones */
508
- }
509
-
577
+ mtctx->jobs[u].src = g_nullBuffer;
510
578
  mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
511
579
  mtctx->jobs[u].dictSize = dictSize;
512
580
  mtctx->jobs[u].srcSize = chunkSize;
@@ -516,13 +584,18 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
516
584
  /* do not calculate checksum within sections, but write it in header for first section */
517
585
  if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
518
586
  mtctx->jobs[u].dstBuff = dstBuffer;
519
- mtctx->jobs[u].cctx = cctx;
587
+ mtctx->jobs[u].cctxPool = mtctx->cctxPool;
588
+ mtctx->jobs[u].bufPool = mtctx->bufPool;
520
589
  mtctx->jobs[u].firstChunk = (u==0);
521
590
  mtctx->jobs[u].lastChunk = (u==nbChunks-1);
522
591
  mtctx->jobs[u].jobCompleted = 0;
523
592
  mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
524
593
  mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
525
594
 
595
+ if (params.fParams.checksumFlag) {
596
+ XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
597
+ }
598
+
526
599
  DEBUGLOG(5, "posting job %u (%u bytes)", u, (U32)chunkSize);
527
600
  DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
528
601
  POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
@@ -533,8 +606,8 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
533
606
  } }
534
607
 
535
608
  /* collect result */
536
- { unsigned chunkID;
537
- size_t error = 0, dstPos = 0;
609
+ { size_t error = 0, dstPos = 0;
610
+ unsigned chunkID;
538
611
  for (chunkID=0; chunkID<nbChunks; chunkID++) {
539
612
  DEBUGLOG(5, "waiting for chunk %u ", chunkID);
540
613
  PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
@@ -545,8 +618,6 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
545
618
  pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
546
619
  DEBUGLOG(5, "ready to write chunk %u ", chunkID);
547
620
 
548
- ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
549
- mtctx->jobs[chunkID].cctx = NULL;
550
621
  mtctx->jobs[chunkID].srcStart = NULL;
551
622
  { size_t const cSize = mtctx->jobs[chunkID].cSize;
552
623
  if (ZSTD_isError(cSize)) error = cSize;
@@ -556,13 +627,25 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
556
627
  memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
557
628
  if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
558
629
  DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
559
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
630
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
560
631
  }
561
632
  mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
562
633
  }
563
634
  dstPos += cSize ;
564
635
  }
565
- }
636
+ } /* for (chunkID=0; chunkID<nbChunks; chunkID++) */
637
+
638
+ DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
639
+ if (params.fParams.checksumFlag) {
640
+ U32 const checksum = (U32)XXH64_digest(&xxh64);
641
+ if (dstPos + 4 > dstCapacity) {
642
+ error = ERROR(dstSize_tooSmall);
643
+ } else {
644
+ DEBUGLOG(4, "writing checksum : %08X \n", checksum);
645
+ MEM_writeLE32((char*)dst + dstPos, checksum);
646
+ dstPos += 4;
647
+ } }
648
+
566
649
  if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
567
650
  return error ? error : dstPos;
568
651
  }
@@ -574,10 +657,10 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
574
657
  const void* src, size_t srcSize,
575
658
  int compressionLevel)
576
659
  {
577
- U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
660
+ U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
578
661
  ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
579
662
  params.fParams.contentSizeFlag = 1;
580
- return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog);
663
+ return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
581
664
  }
582
665
 
583
666
 
@@ -615,8 +698,8 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
615
698
  if (zcs->nbThreads==1) {
616
699
  DEBUGLOG(4, "single thread mode");
617
700
  return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
618
- dict, dictSize, cdict,
619
- params, pledgedSrcSize);
701
+ dict, dictSize, cdict,
702
+ params, pledgedSrcSize);
620
703
  }
621
704
 
622
705
  if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
@@ -642,18 +725,16 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
642
725
  zcs->cdict = cdict;
643
726
  }
644
727
 
645
- zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
646
- DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
728
+ zcs->targetDictSize = (zcs->overlapLog==0) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - (9 - zcs->overlapLog));
729
+ DEBUGLOG(4, "overlapLog : %u ", zcs->overlapLog);
647
730
  DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
648
731
  zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
649
732
  zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
650
733
  zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
651
734
  DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
652
- zcs->marginSize = zcs->targetSectionSize >> 2;
653
- zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
654
- zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
655
- if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
656
- zcs->inBuff.filled = 0;
735
+ zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize;
736
+ ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) );
737
+ zcs->inBuff.buffer = g_nullBuffer;
657
738
  zcs->dictSize = 0;
658
739
  zcs->doneJobID = 0;
659
740
  zcs->nextJobID = 0;
@@ -664,8 +745,9 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
664
745
  }
665
746
 
666
747
  size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
667
- const void* dict, size_t dictSize,
668
- ZSTD_parameters params, unsigned long long pledgedSrcSize)
748
+ const void* dict, size_t dictSize,
749
+ ZSTD_parameters params,
750
+ unsigned long long pledgedSrcSize)
669
751
  {
670
752
  DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
671
753
  return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
@@ -701,19 +783,8 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
701
783
 
702
784
  static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
703
785
  {
704
- size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
705
- buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
706
- ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
707
786
  unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
708
787
 
709
- if ((cctx==NULL) || (dstBuffer.start==NULL)) {
710
- zcs->jobs[jobID].jobCompleted = 1;
711
- zcs->nextJobID++;
712
- ZSTDMT_waitForAllJobsCompleted(zcs);
713
- ZSTDMT_releaseAllJobResources(zcs);
714
- return ERROR(memory_allocation);
715
- }
716
-
717
788
  DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
718
789
  zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
719
790
  zcs->jobs[jobID].src = zcs->inBuff.buffer;
@@ -726,8 +797,9 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
726
797
  if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
727
798
  zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
728
799
  zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
729
- zcs->jobs[jobID].dstBuff = dstBuffer;
730
- zcs->jobs[jobID].cctx = cctx;
800
+ zcs->jobs[jobID].dstBuff = g_nullBuffer;
801
+ zcs->jobs[jobID].cctxPool = zcs->cctxPool;
802
+ zcs->jobs[jobID].bufPool = zcs->bufPool;
731
803
  zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
732
804
  zcs->jobs[jobID].lastChunk = endFrame;
733
805
  zcs->jobs[jobID].jobCompleted = 0;
@@ -735,11 +807,13 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
735
807
  zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
736
808
  zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
737
809
 
810
+ if (zcs->params.fParams.checksumFlag)
811
+ XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->dictSize, srcSize);
812
+
738
813
  /* get a new buffer for next input */
739
814
  if (!endFrame) {
740
815
  size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
741
- DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
742
- zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
816
+ zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
743
817
  if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
744
818
  zcs->jobs[jobID].jobCompleted = 1;
745
819
  zcs->nextJobID++;
@@ -747,26 +821,20 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
747
821
  ZSTDMT_releaseAllJobResources(zcs);
748
822
  return ERROR(memory_allocation);
749
823
  }
750
- DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
751
824
  zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
752
- DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
753
- (U32)zcs->inBuff.filled, (U32)newDictSize,
754
- (U32)(zcs->inBuff.filled - newDictSize));
755
825
  memmove(zcs->inBuff.buffer.start,
756
826
  (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
757
827
  zcs->inBuff.filled);
758
- DEBUGLOG(5, "new inBuff pre-filled");
759
828
  zcs->dictSize = newDictSize;
760
829
  } else { /* if (endFrame==1) */
761
- DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
762
830
  zcs->inBuff.buffer = g_nullBuffer;
763
831
  zcs->inBuff.filled = 0;
764
832
  zcs->dictSize = 0;
765
833
  zcs->frameEnded = 1;
766
- if (zcs->nextJobID == 0)
834
+ if (zcs->nextJobID == 0) {
767
835
  /* single chunk exception : checksum is calculated directly within worker thread */
768
836
  zcs->params.fParams.checksumFlag = 0;
769
- }
837
+ } }
770
838
 
771
839
  DEBUGLOG(4, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
772
840
  zcs->nextJobID,
@@ -804,11 +872,8 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
804
872
  ZSTDMT_releaseAllJobResources(zcs);
805
873
  return job.cSize;
806
874
  }
807
- ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
808
- zcs->jobs[wJobID].cctx = NULL;
809
875
  DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
810
876
  if (zcs->params.fParams.checksumFlag) {
811
- XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
812
877
  if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
813
878
  U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
814
879
  DEBUGLOG(5, "writing checksum : %08X \n", checksum);
@@ -816,9 +881,6 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
816
881
  job.cSize += 4;
817
882
  zcs->jobs[wJobID].cSize += 4;
818
883
  } }
819
- ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
820
- zcs->jobs[wJobID].srcStart = NULL;
821
- zcs->jobs[wJobID].src = g_nullBuffer;
822
884
  zcs->jobs[wJobID].jobScanned = 1;
823
885
  }
824
886
  { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
@@ -828,7 +890,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
828
890
  job.dstFlushed += toWrite;
829
891
  }
830
892
  if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
831
- ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
893
+ ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
832
894
  zcs->jobs[wJobID].dstBuff = g_nullBuffer;
833
895
  zcs->jobs[wJobID].jobCompleted = 0;
834
896
  zcs->doneJobID++;
@@ -852,18 +914,18 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
852
914
  ZSTD_inBuffer* input,
853
915
  ZSTD_EndDirective endOp)
854
916
  {
855
- size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize;
917
+ size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize;
856
918
  assert(output->pos <= output->size);
857
919
  assert(input->pos <= input->size);
858
920
  if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
859
921
  /* current frame being ended. Only flush/end are allowed. Or start new frame with init */
860
922
  return ERROR(stage_wrong);
861
923
  }
862
- if (mtctx->nbThreads==1) {
924
+ if (mtctx->nbThreads==1) { /* delegate to single-thread (synchronous) */
863
925
  return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
864
926
  }
865
927
 
866
- /* single-pass shortcut (note : this is blocking-mode) */
928
+ /* single-pass shortcut (note : this is synchronous-mode) */
867
929
  if ( (mtctx->nextJobID==0) /* just started */
868
930
  && (mtctx->inBuff.filled==0) /* nothing buffered */
869
931
  && (endOp==ZSTD_e_end) /* end order */
@@ -871,24 +933,29 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
871
933
  size_t const cSize = ZSTDMT_compress_advanced(mtctx,
872
934
  (char*)output->dst + output->pos, output->size - output->pos,
873
935
  (const char*)input->src + input->pos, input->size - input->pos,
874
- mtctx->cdict, mtctx->params, mtctx->overlapRLog);
936
+ mtctx->cdict, mtctx->params, mtctx->overlapLog);
875
937
  if (ZSTD_isError(cSize)) return cSize;
876
938
  input->pos = input->size;
877
939
  output->pos += cSize;
878
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); /* was allocated in initStream */
940
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer); /* was allocated in initStream */
879
941
  mtctx->allJobsCompleted = 1;
880
942
  mtctx->frameEnded = 1;
881
943
  return 0;
882
944
  }
883
945
 
884
946
  /* fill input buffer */
885
- if ((input->src) && (mtctx->inBuff.buffer.start)) { /* support NULL input */
886
- size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
887
- DEBUGLOG(2, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
888
- memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
889
- input->pos += toLoad;
890
- mtctx->inBuff.filled += toLoad;
891
- }
947
+ if (input->size > input->pos) { /* support NULL input */
948
+ if (mtctx->inBuff.buffer.start == NULL) {
949
+ mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool);
950
+ if (mtctx->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
951
+ mtctx->inBuff.filled = 0;
952
+ }
953
+ { size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
954
+ DEBUGLOG(5, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
955
+ memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
956
+ input->pos += toLoad;
957
+ mtctx->inBuff.filled += toLoad;
958
+ } }
892
959
 
893
960
  if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
894
961
  && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */