extzstd 0.1.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +18 -0
  3. data/README.md +15 -50
  4. data/contrib/zstd/CONTRIBUTING.md +1 -1
  5. data/contrib/zstd/COPYING +339 -0
  6. data/contrib/zstd/Makefile +82 -51
  7. data/contrib/zstd/NEWS +92 -5
  8. data/contrib/zstd/README.md +50 -41
  9. data/contrib/zstd/appveyor.yml +164 -102
  10. data/contrib/zstd/circle.yml +10 -22
  11. data/contrib/zstd/lib/BUCK +31 -10
  12. data/contrib/zstd/lib/Makefile +57 -31
  13. data/contrib/zstd/lib/README.md +68 -37
  14. data/contrib/zstd/lib/common/bitstream.h +130 -76
  15. data/contrib/zstd/lib/common/compiler.h +86 -0
  16. data/contrib/zstd/lib/common/error_private.c +15 -11
  17. data/contrib/zstd/lib/common/error_private.h +8 -8
  18. data/contrib/zstd/lib/common/fse.h +19 -9
  19. data/contrib/zstd/lib/common/fse_decompress.c +3 -22
  20. data/contrib/zstd/lib/common/huf.h +68 -26
  21. data/contrib/zstd/lib/common/mem.h +23 -35
  22. data/contrib/zstd/lib/common/pool.c +123 -63
  23. data/contrib/zstd/lib/common/pool.h +19 -10
  24. data/contrib/zstd/lib/common/threading.c +11 -16
  25. data/contrib/zstd/lib/common/threading.h +52 -33
  26. data/contrib/zstd/lib/common/xxhash.c +28 -22
  27. data/contrib/zstd/lib/common/zstd_common.c +40 -27
  28. data/contrib/zstd/lib/common/zstd_errors.h +43 -34
  29. data/contrib/zstd/lib/common/zstd_internal.h +131 -123
  30. data/contrib/zstd/lib/compress/fse_compress.c +17 -33
  31. data/contrib/zstd/lib/compress/huf_compress.c +15 -9
  32. data/contrib/zstd/lib/compress/zstd_compress.c +2096 -2363
  33. data/contrib/zstd/lib/compress/zstd_compress_internal.h +462 -0
  34. data/contrib/zstd/lib/compress/zstd_double_fast.c +309 -0
  35. data/contrib/zstd/lib/compress/zstd_double_fast.h +29 -0
  36. data/contrib/zstd/lib/compress/zstd_fast.c +243 -0
  37. data/contrib/zstd/lib/compress/zstd_fast.h +31 -0
  38. data/contrib/zstd/lib/compress/zstd_lazy.c +765 -0
  39. data/contrib/zstd/lib/compress/zstd_lazy.h +39 -0
  40. data/contrib/zstd/lib/compress/zstd_ldm.c +707 -0
  41. data/contrib/zstd/lib/compress/zstd_ldm.h +68 -0
  42. data/contrib/zstd/lib/compress/zstd_opt.c +785 -0
  43. data/contrib/zstd/lib/compress/zstd_opt.h +19 -908
  44. data/contrib/zstd/lib/compress/zstdmt_compress.c +737 -327
  45. data/contrib/zstd/lib/compress/zstdmt_compress.h +88 -26
  46. data/contrib/zstd/lib/decompress/huf_decompress.c +158 -50
  47. data/contrib/zstd/lib/decompress/zstd_decompress.c +884 -699
  48. data/contrib/zstd/lib/deprecated/zbuff.h +5 -4
  49. data/contrib/zstd/lib/deprecated/zbuff_common.c +5 -5
  50. data/contrib/zstd/lib/deprecated/zbuff_compress.c +6 -4
  51. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +5 -4
  52. data/contrib/zstd/lib/dictBuilder/cover.c +93 -77
  53. data/contrib/zstd/lib/dictBuilder/zdict.c +107 -92
  54. data/contrib/zstd/lib/dictBuilder/zdict.h +112 -102
  55. data/contrib/zstd/lib/legacy/zstd_legacy.h +9 -4
  56. data/contrib/zstd/lib/legacy/zstd_v01.c +7 -6
  57. data/contrib/zstd/lib/legacy/zstd_v01.h +5 -4
  58. data/contrib/zstd/lib/legacy/zstd_v02.c +27 -99
  59. data/contrib/zstd/lib/legacy/zstd_v02.h +5 -4
  60. data/contrib/zstd/lib/legacy/zstd_v03.c +26 -98
  61. data/contrib/zstd/lib/legacy/zstd_v03.h +5 -4
  62. data/contrib/zstd/lib/legacy/zstd_v04.c +22 -91
  63. data/contrib/zstd/lib/legacy/zstd_v04.h +5 -4
  64. data/contrib/zstd/lib/legacy/zstd_v05.c +23 -99
  65. data/contrib/zstd/lib/legacy/zstd_v05.h +5 -4
  66. data/contrib/zstd/lib/legacy/zstd_v06.c +22 -96
  67. data/contrib/zstd/lib/legacy/zstd_v06.h +5 -4
  68. data/contrib/zstd/lib/legacy/zstd_v07.c +19 -95
  69. data/contrib/zstd/lib/legacy/zstd_v07.h +5 -4
  70. data/contrib/zstd/lib/zstd.h +895 -271
  71. data/ext/extconf.rb +11 -2
  72. data/ext/extzstd.c +45 -128
  73. data/ext/extzstd.h +74 -31
  74. data/ext/extzstd_stream.c +401 -142
  75. data/ext/zstd_common.c +5 -0
  76. data/ext/zstd_compress.c +8 -0
  77. data/ext/zstd_decompress.c +1 -0
  78. data/ext/zstd_dictbuilder.c +2 -0
  79. data/lib/extzstd/version.rb +1 -1
  80. data/lib/extzstd.rb +48 -1
  81. data/test/test_basic.rb +9 -1
  82. metadata +17 -7
  83. data/HISTORY.ja +0 -10
  84. data/contrib/zstd/LICENSE-examples +0 -11
  85. data/contrib/zstd/PATENTS +0 -33
@@ -1,81 +1,83 @@
1
- /**
1
+ /*
2
2
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
- * This source code is licensed under the BSD-style license found in the
6
- * LICENSE file in the root directory of this source tree. An additional grant
7
- * of patent rights can be found in the PATENTS file in the same directory.
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
8
9
  */
9
10
 
10
11
 
11
12
  /* ====== Tuning parameters ====== */
12
- #define ZSTDMT_NBTHREADS_MAX 128
13
+ #define ZSTDMT_NBTHREADS_MAX 200
14
+ #define ZSTDMT_OVERLAPLOG_DEFAULT 6
13
15
 
14
16
 
15
17
  /* ====== Compiler specifics ====== */
16
18
  #if defined(_MSC_VER)
17
- # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
19
+ # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
18
20
  #endif
19
21
 
20
22
 
21
23
  /* ====== Dependencies ====== */
22
- #include <stdlib.h> /* malloc */
23
- #include <string.h> /* memcpy */
24
- #include "pool.h" /* threadpool */
25
- #include "threading.h" /* mutex */
26
- #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
24
+ #include <string.h> /* memcpy, memset */
25
+ #include "pool.h" /* threadpool */
26
+ #include "threading.h" /* mutex */
27
+ #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
27
28
  #include "zstdmt_compress.h"
28
29
 
29
30
 
30
31
  /* ====== Debug ====== */
31
- #if 0
32
+ #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
32
33
 
33
34
  # include <stdio.h>
34
35
  # include <unistd.h>
35
36
  # include <sys/times.h>
36
- static unsigned g_debugLevel = 3;
37
- # define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
38
- # define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
37
+ # define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
39
38
 
40
- # define DEBUG_PRINTHEX(l,p,n) { \
41
- unsigned debug_u; \
42
- for (debug_u=0; debug_u<(n); debug_u++) \
39
+ # define DEBUG_PRINTHEX(l,p,n) { \
40
+ unsigned debug_u; \
41
+ for (debug_u=0; debug_u<(n); debug_u++) \
43
42
  DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
44
- DEBUGLOGRAW(l, " \n"); \
43
+ DEBUGLOGRAW(l, " \n"); \
45
44
  }
46
45
 
47
- static unsigned long long GetCurrentClockTimeMicroseconds()
46
+ static unsigned long long GetCurrentClockTimeMicroseconds(void)
48
47
  {
49
48
  static clock_t _ticksPerSecond = 0;
50
49
  if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
51
50
 
52
- struct tms junk; clock_t newTicks = (clock_t) times(&junk);
53
- return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
51
+ { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
52
+ return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
54
53
  }
55
54
 
56
- #define MUTEX_WAIT_TIME_DLEVEL 5
57
- #define PTHREAD_MUTEX_LOCK(mutex) \
58
- if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
59
- unsigned long long beforeTime = GetCurrentClockTimeMicroseconds(); \
60
- pthread_mutex_lock(mutex); \
61
- unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \
62
- unsigned long long elapsedTime = (afterTime-beforeTime); \
63
- if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
64
- DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
65
- elapsedTime, #mutex); \
66
- } \
67
- } else pthread_mutex_lock(mutex);
55
+ #define MUTEX_WAIT_TIME_DLEVEL 6
56
+ #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
57
+ if (ZSTD_DEBUG >= MUTEX_WAIT_TIME_DLEVEL) { \
58
+ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
59
+ ZSTD_pthread_mutex_lock(mutex); \
60
+ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
61
+ unsigned long long const elapsedTime = (afterTime-beforeTime); \
62
+ if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
63
+ DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
64
+ elapsedTime, #mutex); \
65
+ } } \
66
+ } else { \
67
+ ZSTD_pthread_mutex_lock(mutex); \
68
+ } \
69
+ }
68
70
 
69
71
  #else
70
72
 
71
- # define DEBUGLOG(l, ...) {} /* disabled */
72
- # define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
73
+ # define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
73
74
  # define DEBUG_PRINTHEX(l,p,n) {}
74
75
 
75
76
  #endif
76
77
 
77
78
 
78
79
  /* ===== Buffer Pool ===== */
80
+ /* a single Buffer Pool can be invoked from multiple threads in parallel */
79
81
 
80
82
  typedef struct buffer_s {
81
83
  void* start;
@@ -85,129 +87,231 @@ typedef struct buffer_s {
85
87
  static const buffer_t g_nullBuffer = { NULL, 0 };
86
88
 
87
89
  typedef struct ZSTDMT_bufferPool_s {
90
+ ZSTD_pthread_mutex_t poolMutex;
91
+ size_t bufferSize;
88
92
  unsigned totalBuffers;
89
93
  unsigned nbBuffers;
94
+ ZSTD_customMem cMem;
90
95
  buffer_t bTable[1]; /* variable size */
91
96
  } ZSTDMT_bufferPool;
92
97
 
93
- static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
98
+ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
94
99
  {
95
- unsigned const maxNbBuffers = 2*nbThreads + 2;
96
- ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t));
100
+ unsigned const maxNbBuffers = 2*nbThreads + 3;
101
+ ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
102
+ sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
97
103
  if (bufPool==NULL) return NULL;
104
+ if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
105
+ ZSTD_free(bufPool, cMem);
106
+ return NULL;
107
+ }
108
+ bufPool->bufferSize = 64 KB;
98
109
  bufPool->totalBuffers = maxNbBuffers;
99
110
  bufPool->nbBuffers = 0;
111
+ bufPool->cMem = cMem;
100
112
  return bufPool;
101
113
  }
102
114
 
103
115
  static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
104
116
  {
105
117
  unsigned u;
118
+ DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
106
119
  if (!bufPool) return; /* compatibility with free on NULL */
120
+ for (u=0; u<bufPool->totalBuffers; u++) {
121
+ DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
122
+ ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
123
+ }
124
+ ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
125
+ ZSTD_free(bufPool, bufPool->cMem);
126
+ }
127
+
128
+ /* only works at initialization, not during compression */
129
+ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
130
+ {
131
+ size_t const poolSize = sizeof(*bufPool)
132
+ + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
133
+ unsigned u;
134
+ size_t totalBufferSize = 0;
135
+ ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
107
136
  for (u=0; u<bufPool->totalBuffers; u++)
108
- free(bufPool->bTable[u].start);
109
- free(bufPool);
137
+ totalBufferSize += bufPool->bTable[u].size;
138
+ ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
139
+
140
+ return poolSize + totalBufferSize;
141
+ }
142
+
143
+ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize)
144
+ {
145
+ ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
146
+ DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize);
147
+ bufPool->bufferSize = bSize;
148
+ ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
110
149
  }
111
150
 
112
- /* assumption : invocation from main thread only ! */
113
- static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
151
+ /** ZSTDMT_getBuffer() :
152
+ * assumption : bufPool must be valid */
153
+ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
114
154
  {
115
- if (pool->nbBuffers) { /* try to use an existing buffer */
116
- buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
155
+ size_t const bSize = bufPool->bufferSize;
156
+ DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
157
+ ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
158
+ if (bufPool->nbBuffers) { /* try to use an existing buffer */
159
+ buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
117
160
  size_t const availBufferSize = buf.size;
118
- if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */
161
+ bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
162
+ if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
163
+ /* large enough, but not too much */
164
+ DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
165
+ bufPool->nbBuffers, (U32)buf.size);
166
+ ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
119
167
  return buf;
120
- free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */
168
+ }
169
+ /* size conditions not respected : scratch this buffer, create new one */
170
+ DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing");
171
+ ZSTD_free(buf.start, bufPool->cMem);
121
172
  }
173
+ ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
122
174
  /* create new buffer */
175
+ DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer");
123
176
  { buffer_t buffer;
124
- void* const start = malloc(bSize);
125
- if (start==NULL) bSize = 0;
177
+ void* const start = ZSTD_malloc(bSize, bufPool->cMem);
126
178
  buffer.start = start; /* note : start can be NULL if malloc fails ! */
127
- buffer.size = bSize;
179
+ buffer.size = (start==NULL) ? 0 : bSize;
180
+ DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
128
181
  return buffer;
129
182
  }
130
183
  }
131
184
 
132
185
  /* store buffer for later re-use, up to pool capacity */
133
- static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
186
+ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
134
187
  {
135
- if (buf.start == NULL) return; /* release on NULL */
136
- if (pool->nbBuffers < pool->totalBuffers) {
137
- pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */
188
+ if (buf.start == NULL) return; /* compatible with release on NULL */
189
+ DEBUGLOG(5, "ZSTDMT_releaseBuffer");
190
+ ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
191
+ if (bufPool->nbBuffers < bufPool->totalBuffers) {
192
+ bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
193
+ DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
194
+ (U32)buf.size, (U32)(bufPool->nbBuffers-1));
195
+ ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
138
196
  return;
139
197
  }
198
+ ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
140
199
  /* Reached bufferPool capacity (should not happen) */
141
- free(buf.start);
200
+ DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
201
+ ZSTD_free(buf.start, bufPool->cMem);
142
202
  }
143
203
 
204
+ /* Sets parameters relevant to the compression job, initializing others to
205
+ * default values. Notably, nbThreads should probably be zero. */
206
+ static ZSTD_CCtx_params ZSTDMT_makeJobCCtxParams(ZSTD_CCtx_params const params)
207
+ {
208
+ ZSTD_CCtx_params jobParams;
209
+ memset(&jobParams, 0, sizeof(jobParams));
210
+
211
+ jobParams.cParams = params.cParams;
212
+ jobParams.fParams = params.fParams;
213
+ jobParams.compressionLevel = params.compressionLevel;
214
+
215
+ jobParams.ldmParams = params.ldmParams;
216
+ return jobParams;
217
+ }
144
218
 
145
219
  /* ===== CCtx Pool ===== */
220
+ /* a single CCtx Pool can be invoked from multiple threads in parallel */
146
221
 
147
222
  typedef struct {
223
+ ZSTD_pthread_mutex_t poolMutex;
148
224
  unsigned totalCCtx;
149
225
  unsigned availCCtx;
226
+ ZSTD_customMem cMem;
150
227
  ZSTD_CCtx* cctx[1]; /* variable size */
151
228
  } ZSTDMT_CCtxPool;
152
229
 
153
- /* assumption : CCtxPool invocation only from main thread */
154
-
155
230
  /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
156
231
  static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
157
232
  {
158
233
  unsigned u;
159
234
  for (u=0; u<pool->totalCCtx; u++)
160
235
  ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
161
- free(pool);
236
+ ZSTD_pthread_mutex_destroy(&pool->poolMutex);
237
+ ZSTD_free(pool, pool->cMem);
162
238
  }
163
239
 
164
240
  /* ZSTDMT_createCCtxPool() :
165
241
  * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
166
- static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads)
242
+ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
243
+ ZSTD_customMem cMem)
167
244
  {
168
- ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*));
245
+ ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
246
+ sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
169
247
  if (!cctxPool) return NULL;
248
+ if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
249
+ ZSTD_free(cctxPool, cMem);
250
+ return NULL;
251
+ }
252
+ cctxPool->cMem = cMem;
170
253
  cctxPool->totalCCtx = nbThreads;
171
254
  cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
172
- cctxPool->cctx[0] = ZSTD_createCCtx();
255
+ cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
173
256
  if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
174
- DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads);
257
+ DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
175
258
  return cctxPool;
176
259
  }
177
260
 
178
- static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
261
+ /* only works during initialization phase, not during compression */
262
+ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
179
263
  {
180
- if (pool->availCCtx) {
181
- pool->availCCtx--;
182
- return pool->cctx[pool->availCCtx];
264
+ ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
265
+ { unsigned const nbThreads = cctxPool->totalCCtx;
266
+ size_t const poolSize = sizeof(*cctxPool)
267
+ + (nbThreads-1)*sizeof(ZSTD_CCtx*);
268
+ unsigned u;
269
+ size_t totalCCtxSize = 0;
270
+ for (u=0; u<nbThreads; u++) {
271
+ totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
272
+ }
273
+ ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
274
+ return poolSize + totalCCtxSize;
183
275
  }
184
- return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */
276
+ }
277
+
278
+ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
279
+ {
280
+ DEBUGLOG(5, "ZSTDMT_getCCtx");
281
+ ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
282
+ if (cctxPool->availCCtx) {
283
+ cctxPool->availCCtx--;
284
+ { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
285
+ ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
286
+ return cctx;
287
+ } }
288
+ ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
289
+ DEBUGLOG(5, "create one more CCtx");
290
+ return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */
185
291
  }
186
292
 
187
293
  static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
188
294
  {
189
295
  if (cctx==NULL) return; /* compatibility with release on NULL */
296
+ ZSTD_pthread_mutex_lock(&pool->poolMutex);
190
297
  if (pool->availCCtx < pool->totalCCtx)
191
298
  pool->cctx[pool->availCCtx++] = cctx;
192
- else
299
+ else {
193
300
  /* pool overflow : should not happen, since totalCCtx==nbThreads */
301
+ DEBUGLOG(5, "CCtx pool overflow : free cctx");
194
302
  ZSTD_freeCCtx(cctx);
303
+ }
304
+ ZSTD_pthread_mutex_unlock(&pool->poolMutex);
195
305
  }
196
306
 
197
307
 
198
308
  /* ===== Thread worker ===== */
199
309
 
200
310
  typedef struct {
201
- buffer_t buffer;
202
- size_t filled;
203
- } inBuff_t;
204
-
205
- typedef struct {
206
- ZSTD_CCtx* cctx;
207
311
  buffer_t src;
208
312
  const void* srcStart;
313
+ size_t prefixSize;
209
314
  size_t srcSize;
210
- size_t dictSize;
211
315
  buffer_t dstBuff;
212
316
  size_t cSize;
213
317
  size_t dstFlushed;
@@ -215,10 +319,12 @@ typedef struct {
215
319
  unsigned lastChunk;
216
320
  unsigned jobCompleted;
217
321
  unsigned jobScanned;
218
- pthread_mutex_t* jobCompleted_mutex;
219
- pthread_cond_t* jobCompleted_cond;
220
- ZSTD_parameters params;
221
- ZSTD_CDict* cdict;
322
+ ZSTD_pthread_mutex_t* jobCompleted_mutex;
323
+ ZSTD_pthread_cond_t* jobCompleted_cond;
324
+ ZSTD_CCtx_params params;
325
+ const ZSTD_CDict* cdict;
326
+ ZSTDMT_CCtxPool* cctxPool;
327
+ ZSTDMT_bufferPool* bufPool;
222
328
  unsigned long long fullFrameSize;
223
329
  } ZSTDMT_jobDescription;
224
330
 
@@ -226,38 +332,76 @@ typedef struct {
226
332
  void ZSTDMT_compressChunk(void* jobDescription)
227
333
  {
228
334
  ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
229
- const void* const src = (const char*)job->srcStart + job->dictSize;
230
- buffer_t const dstBuff = job->dstBuff;
231
- DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
232
- if (job->cdict) { /* should only happen for first segment */
233
- size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
234
- if (job->cdict) DEBUGLOG(3, "using CDict ");
335
+ ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
336
+ const void* const src = (const char*)job->srcStart + job->prefixSize;
337
+ buffer_t dstBuff = job->dstBuff;
338
+ DEBUGLOG(5, "ZSTDMT_compressChunk: job (first:%u) (last:%u) : prefixSize %u, srcSize %u ",
339
+ job->firstChunk, job->lastChunk, (U32)job->prefixSize, (U32)job->srcSize);
340
+
341
+ if (cctx==NULL) {
342
+ job->cSize = ERROR(memory_allocation);
343
+ goto _endJob;
344
+ }
345
+
346
+ if (dstBuff.start == NULL) {
347
+ dstBuff = ZSTDMT_getBuffer(job->bufPool);
348
+ if (dstBuff.start==NULL) {
349
+ job->cSize = ERROR(memory_allocation);
350
+ goto _endJob;
351
+ }
352
+ job->dstBuff = dstBuff;
353
+ DEBUGLOG(5, "ZSTDMT_compressChunk: received dstBuff of size %u", (U32)dstBuff.size);
354
+ }
355
+
356
+ if (job->cdict) {
357
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dm_auto, job->cdict, job->params, job->fullFrameSize);
358
+ DEBUGLOG(4, "ZSTDMT_compressChunk: init using CDict (windowLog=%u)", job->params.cParams.windowLog);
359
+ assert(job->firstChunk); /* only allowed for first job */
235
360
  if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
236
361
  } else { /* srcStart points at reloaded section */
237
- size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
238
- size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, 0);
239
- if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
240
- ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
362
+ U64 const pledgedSrcSize = job->firstChunk ? job->fullFrameSize : ZSTD_CONTENTSIZE_UNKNOWN;
363
+ ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
364
+ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstChunk);
365
+ if (ZSTD_isError(forceWindowError)) {
366
+ DEBUGLOG(5, "ZSTD_CCtxParam_setParameter error : %s ", ZSTD_getErrorName(forceWindowError));
367
+ job->cSize = forceWindowError;
368
+ goto _endJob;
369
+ }
370
+ DEBUGLOG(5, "ZSTDMT_compressChunk: invoking ZSTD_compressBegin_advanced_internal with windowLog = %u ", jobParams.cParams.windowLog);
371
+ { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
372
+ job->srcStart, job->prefixSize, ZSTD_dm_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
373
+ NULL,
374
+ jobParams, pledgedSrcSize);
375
+ if (ZSTD_isError(initError)) {
376
+ DEBUGLOG(5, "ZSTD_compressBegin_advanced_internal error : %s ", ZSTD_getErrorName(initError));
377
+ job->cSize = initError;
378
+ goto _endJob;
379
+ } }
241
380
  }
242
- if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
243
- size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
244
- if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
245
- ZSTD_invalidateRepCodes(job->cctx);
381
+ if (!job->firstChunk) { /* flush and overwrite frame header when it's not first job */
382
+ size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
383
+ if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
384
+ ZSTD_invalidateRepCodes(cctx);
246
385
  }
247
386
 
248
- DEBUGLOG(4, "Compressing : ");
249
- DEBUG_PRINTHEX(4, job->srcStart, 12);
387
+ DEBUGLOG(5, "Compressing into dstBuff of size %u", (U32)dstBuff.size);
388
+ DEBUG_PRINTHEX(6, job->srcStart, 12);
250
389
  job->cSize = (job->lastChunk) ?
251
- ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
252
- ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
253
- DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
390
+ ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
391
+ ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
392
+ DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u) ",
393
+ (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
394
+ DEBUGLOG(5, "dstBuff.size : %u ; => %s ", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
254
395
 
255
396
  _endJob:
256
- PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
397
+ ZSTDMT_releaseCCtx(job->cctxPool, cctx);
398
+ ZSTDMT_releaseBuffer(job->bufPool, job->src);
399
+ job->src = g_nullBuffer; job->srcStart = NULL;
400
+ ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
257
401
  job->jobCompleted = 1;
258
402
  job->jobScanned = 0;
259
- pthread_cond_signal(job->jobCompleted_cond);
260
- pthread_mutex_unlock(job->jobCompleted_mutex);
403
+ ZSTD_pthread_cond_signal(job->jobCompleted_cond);
404
+ ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
261
405
  }
262
406
 
263
407
 
@@ -265,213 +409,376 @@ _endJob:
265
409
  /* ===== Multi-threaded compression ===== */
266
410
  /* ------------------------------------------ */
267
411
 
412
+ typedef struct {
413
+ buffer_t buffer;
414
+ size_t filled;
415
+ } inBuff_t;
416
+
268
417
  struct ZSTDMT_CCtx_s {
269
418
  POOL_ctx* factory;
270
- ZSTDMT_bufferPool* buffPool;
419
+ ZSTDMT_jobDescription* jobs;
420
+ ZSTDMT_bufferPool* bufPool;
271
421
  ZSTDMT_CCtxPool* cctxPool;
272
- pthread_mutex_t jobCompleted_mutex;
273
- pthread_cond_t jobCompleted_cond;
422
+ ZSTD_pthread_mutex_t jobCompleted_mutex;
423
+ ZSTD_pthread_cond_t jobCompleted_cond;
424
+ ZSTD_CCtx_params params;
274
425
  size_t targetSectionSize;
275
- size_t marginSize;
276
426
  size_t inBuffSize;
277
427
  size_t dictSize;
278
428
  size_t targetDictSize;
279
429
  inBuff_t inBuff;
280
- ZSTD_parameters params;
281
430
  XXH64_state_t xxhState;
282
- unsigned nbThreads;
431
+ unsigned singleThreaded;
283
432
  unsigned jobIDMask;
284
433
  unsigned doneJobID;
285
434
  unsigned nextJobID;
286
435
  unsigned frameEnded;
287
436
  unsigned allJobsCompleted;
288
- unsigned overlapRLog;
289
437
  unsigned long long frameContentSize;
290
- size_t sectionSize;
291
- ZSTD_CDict* cdict;
292
- ZSTD_CStream* cstream;
293
- ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */
438
+ ZSTD_customMem cMem;
439
+ ZSTD_CDict* cdictLocal;
440
+ const ZSTD_CDict* cdict;
294
441
  };
295
442
 
296
- ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads)
443
+ static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
297
444
  {
298
- ZSTDMT_CCtx* cctx;
299
- U32 const minNbJobs = nbThreads + 2;
300
- U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
445
+ U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
301
446
  U32 const nbJobs = 1 << nbJobsLog2;
302
- DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n",
303
- nbThreads, minNbJobs, nbJobsLog2, nbJobs);
304
- if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
305
- cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription));
306
- if (!cctx) return NULL;
307
- cctx->nbThreads = nbThreads;
308
- cctx->jobIDMask = nbJobs - 1;
309
- cctx->allJobsCompleted = 1;
310
- cctx->sectionSize = 0;
311
- cctx->overlapRLog = 3;
312
- cctx->factory = POOL_create(nbThreads, 1);
313
- cctx->buffPool = ZSTDMT_createBufferPool(nbThreads);
314
- cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads);
315
- if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */
316
- ZSTDMT_freeCCtx(cctx);
447
+ *nbJobsPtr = nbJobs;
448
+ return (ZSTDMT_jobDescription*) ZSTD_calloc(
449
+ nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
450
+ }
451
+
452
+ /* ZSTDMT_CCtxParam_setNbThreads():
453
+ * Internal use only */
454
+ size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads)
455
+ {
456
+ if (nbThreads > ZSTDMT_NBTHREADS_MAX) nbThreads = ZSTDMT_NBTHREADS_MAX;
457
+ if (nbThreads < 1) nbThreads = 1;
458
+ params->nbThreads = nbThreads;
459
+ params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
460
+ params->jobSize = 0;
461
+ return nbThreads;
462
+ }
463
+
464
+ /* ZSTDMT_getNbThreads():
465
+ * @return nb threads currently active in mtctx.
466
+ * mtctx must be valid */
467
+ size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx)
468
+ {
469
+ assert(mtctx != NULL);
470
+ return mtctx->params.nbThreads;
471
+ }
472
+
473
+ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
474
+ {
475
+ ZSTDMT_CCtx* mtctx;
476
+ U32 nbJobs = nbThreads + 2;
477
+ DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbThreads = %u)", nbThreads);
478
+
479
+ if (nbThreads < 1) return NULL;
480
+ nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX);
481
+ if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
482
+ /* invalid custom allocator */
483
+ return NULL;
484
+
485
+ mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
486
+ if (!mtctx) return NULL;
487
+ ZSTDMT_CCtxParam_setNbThreads(&mtctx->params, nbThreads);
488
+ mtctx->cMem = cMem;
489
+ mtctx->allJobsCompleted = 1;
490
+ mtctx->factory = POOL_create_advanced(nbThreads, 0, cMem);
491
+ mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
492
+ mtctx->jobIDMask = nbJobs - 1;
493
+ mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
494
+ mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
495
+ if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool) {
496
+ ZSTDMT_freeCCtx(mtctx);
317
497
  return NULL;
318
498
  }
319
- if (nbThreads==1) {
320
- cctx->cstream = ZSTD_createCStream();
321
- if (!cctx->cstream) {
322
- ZSTDMT_freeCCtx(cctx); return NULL;
323
- } }
324
- pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
325
- pthread_cond_init(&cctx->jobCompleted_cond, NULL);
326
- DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads);
327
- return cctx;
499
+ if (ZSTD_pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) {
500
+ ZSTDMT_freeCCtx(mtctx);
501
+ return NULL;
502
+ }
503
+ if (ZSTD_pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) {
504
+ ZSTDMT_freeCCtx(mtctx);
505
+ return NULL;
506
+ }
507
+ DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
508
+ return mtctx;
509
+ }
510
+
511
+ ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
512
+ {
513
+ return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
328
514
  }
329
515
 
330
516
  /* ZSTDMT_releaseAllJobResources() :
331
- * Ensure all workers are killed first. */
517
+ * note : ensure all workers are killed first ! */
332
518
  static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
333
519
  {
334
520
  unsigned jobID;
521
+ DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
335
522
  for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
336
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
523
+ DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
524
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
337
525
  mtctx->jobs[jobID].dstBuff = g_nullBuffer;
338
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
526
+ DEBUGLOG(4, "job%02u: release src address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].src.start);
527
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src);
339
528
  mtctx->jobs[jobID].src = g_nullBuffer;
340
- ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
341
- mtctx->jobs[jobID].cctx = NULL;
342
529
  }
343
530
  memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
344
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
531
+ DEBUGLOG(4, "input: release address %08X", (U32)(size_t)mtctx->inBuff.buffer.start);
532
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer);
345
533
  mtctx->inBuff.buffer = g_nullBuffer;
346
534
  mtctx->allJobsCompleted = 1;
347
535
  }
348
536
 
537
+ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
538
+ {
539
+ DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
540
+ while (zcs->doneJobID < zcs->nextJobID) {
541
+ unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
542
+ ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
543
+ while (zcs->jobs[jobID].jobCompleted==0) {
544
+ DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
545
+ ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
546
+ }
547
+ ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
548
+ zcs->doneJobID++;
549
+ }
550
+ }
551
+
349
552
  size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
350
553
  {
351
554
  if (mtctx==NULL) return 0; /* compatible with free on NULL */
352
- POOL_free(mtctx->factory);
353
- if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
354
- ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
555
+ POOL_free(mtctx->factory); /* stop and free worker threads */
556
+ ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */
557
+ ZSTD_free(mtctx->jobs, mtctx->cMem);
558
+ ZSTDMT_freeBufferPool(mtctx->bufPool);
355
559
  ZSTDMT_freeCCtxPool(mtctx->cctxPool);
356
- ZSTD_freeCDict(mtctx->cdict);
357
- ZSTD_freeCStream(mtctx->cstream);
358
- pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
359
- pthread_cond_destroy(&mtctx->jobCompleted_cond);
360
- free(mtctx);
560
+ ZSTD_freeCDict(mtctx->cdictLocal);
561
+ ZSTD_pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
562
+ ZSTD_pthread_cond_destroy(&mtctx->jobCompleted_cond);
563
+ ZSTD_free(mtctx, mtctx->cMem);
361
564
  return 0;
362
565
  }
363
566
 
364
- size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
567
+ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
365
568
  {
569
+ if (mtctx == NULL) return 0; /* supports sizeof NULL */
570
+ return sizeof(*mtctx)
571
+ + POOL_sizeof(mtctx->factory)
572
+ + ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
573
+ + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
574
+ + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
575
+ + ZSTD_sizeof_CDict(mtctx->cdictLocal);
576
+ }
577
+
578
+ /* Internal only */
579
+ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
580
+ ZSTDMT_parameter parameter, unsigned value) {
581
+ DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
366
582
  switch(parameter)
367
583
  {
368
- case ZSTDMT_p_sectionSize :
369
- mtctx->sectionSize = value;
370
- return 0;
584
+ case ZSTDMT_p_jobSize :
585
+ DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value);
586
+ if ( (value > 0) /* value==0 => automatic job size */
587
+ & (value < ZSTDMT_JOBSIZE_MIN) )
588
+ value = ZSTDMT_JOBSIZE_MIN;
589
+ params->jobSize = value;
590
+ return value;
371
591
  case ZSTDMT_p_overlapSectionLog :
372
- DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
373
- mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
374
- return 0;
592
+ if (value > 9) value = 9;
593
+ DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
594
+ params->overlapSizeLog = (value >= 9) ? 9 : value;
595
+ return value;
375
596
  default :
376
- return ERROR(compressionParameter_unsupported);
597
+ return ERROR(parameter_unsupported);
377
598
  }
378
599
  }
379
600
 
601
+ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value)
602
+ {
603
+ DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
604
+ switch(parameter)
605
+ {
606
+ case ZSTDMT_p_jobSize :
607
+ return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
608
+ case ZSTDMT_p_overlapSectionLog :
609
+ return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
610
+ default :
611
+ return ERROR(parameter_unsupported);
612
+ }
613
+ }
380
614
 
381
615
  /* ------------------------------------------ */
382
616
  /* ===== Multi-threaded compression ===== */
383
617
  /* ------------------------------------------ */
384
618
 
385
- size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
386
- void* dst, size_t dstCapacity,
387
- const void* src, size_t srcSize,
388
- int compressionLevel)
619
+ static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
620
+ size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
621
+ size_t const chunkMaxSize = chunkSizeTarget << 2;
622
+ size_t const passSizeMax = chunkMaxSize * nbThreads;
623
+ unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
624
+ unsigned const nbChunksLarge = multiplier * nbThreads;
625
+ unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
626
+ unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
627
+ return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
628
+ }
629
+
630
+ static size_t ZSTDMT_compress_advanced_internal(
631
+ ZSTDMT_CCtx* mtctx,
632
+ void* dst, size_t dstCapacity,
633
+ const void* src, size_t srcSize,
634
+ const ZSTD_CDict* cdict,
635
+ ZSTD_CCtx_params const params)
389
636
  {
390
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
391
- size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
392
- unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */;
393
- unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
637
+ ZSTD_CCtx_params const jobParams = ZSTDMT_makeJobCCtxParams(params);
638
+ unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
639
+ size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
640
+ unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads);
394
641
  size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
395
- size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
396
- size_t remainingSrcSize = srcSize;
642
+ size_t const avgChunkSize = (((proposedChunkSize-1) & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
397
643
  const char* const srcStart = (const char*)src;
398
- size_t frameStartPos = 0;
399
-
400
- DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize);
401
- DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
402
- params.fParams.contentSizeFlag = 1;
403
-
644
+ size_t remainingSrcSize = srcSize;
645
+ unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
646
+ size_t frameStartPos = 0, dstBufferPos = 0;
647
+ XXH64_state_t xxh64;
648
+ assert(jobParams.nbThreads == 0);
649
+ assert(mtctx->cctxPool->totalCCtx == params.nbThreads);
650
+
651
+ DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ",
652
+ nbChunks, (U32)proposedChunkSize, (U32)avgChunkSize);
404
653
  if (nbChunks==1) { /* fallback to single-thread mode */
405
654
  ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
406
- return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
655
+ if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
656
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
657
+ }
658
+ assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */
659
+ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
660
+ XXH64_reset(&xxh64, 0);
661
+
662
+ if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
663
+ U32 nbJobs = nbChunks;
664
+ ZSTD_free(mtctx->jobs, mtctx->cMem);
665
+ mtctx->jobIDMask = 0;
666
+ mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
667
+ if (mtctx->jobs==NULL) return ERROR(memory_allocation);
668
+ mtctx->jobIDMask = nbJobs - 1;
407
669
  }
408
670
 
409
671
  { unsigned u;
410
672
  for (u=0; u<nbChunks; u++) {
411
673
  size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
412
- size_t const dstBufferCapacity = u ? ZSTD_compressBound(chunkSize) : dstCapacity;
413
- buffer_t const dstAsBuffer = { dst, dstCapacity };
414
- buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : dstAsBuffer;
415
- ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
416
-
417
- if ((cctx==NULL) || (dstBuffer.start==NULL)) {
418
- mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
419
- mtctx->jobs[u].jobCompleted = 1;
420
- nbChunks = u+1;
421
- break; /* let's wait for previous jobs to complete, but don't start new ones */
422
- }
423
-
424
- mtctx->jobs[u].srcStart = srcStart + frameStartPos;
674
+ size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
675
+ buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
676
+ buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
677
+ size_t dictSize = u ? overlapSize : 0;
678
+
679
+ mtctx->jobs[u].src = g_nullBuffer;
680
+ mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
681
+ mtctx->jobs[u].prefixSize = dictSize;
425
682
  mtctx->jobs[u].srcSize = chunkSize;
683
+ mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
426
684
  mtctx->jobs[u].fullFrameSize = srcSize;
427
- mtctx->jobs[u].params = params;
685
+ mtctx->jobs[u].params = jobParams;
686
+ /* do not calculate checksum within sections, but write it in header for first section */
687
+ if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
428
688
  mtctx->jobs[u].dstBuff = dstBuffer;
429
- mtctx->jobs[u].cctx = cctx;
689
+ mtctx->jobs[u].cctxPool = mtctx->cctxPool;
690
+ mtctx->jobs[u].bufPool = mtctx->bufPool;
430
691
  mtctx->jobs[u].firstChunk = (u==0);
431
692
  mtctx->jobs[u].lastChunk = (u==nbChunks-1);
432
693
  mtctx->jobs[u].jobCompleted = 0;
433
694
  mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
434
695
  mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
435
696
 
436
- DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize);
437
- DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12);
697
+ if (params.fParams.checksumFlag) {
698
+ XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
699
+ }
700
+
701
+ DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)chunkSize);
702
+ DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
438
703
  POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
439
704
 
440
705
  frameStartPos += chunkSize;
706
+ dstBufferPos += dstBufferCapacity;
441
707
  remainingSrcSize -= chunkSize;
442
708
  } }
443
- /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
444
709
 
445
- { unsigned chunkID;
446
- size_t error = 0, dstPos = 0;
710
+ /* collect result */
711
+ { size_t error = 0, dstPos = 0;
712
+ unsigned chunkID;
447
713
  for (chunkID=0; chunkID<nbChunks; chunkID++) {
448
- DEBUGLOG(3, "waiting for chunk %u ", chunkID);
449
- PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
714
+ DEBUGLOG(5, "waiting for chunk %u ", chunkID);
715
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
450
716
  while (mtctx->jobs[chunkID].jobCompleted==0) {
451
- DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID);
452
- pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
717
+ DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
718
+ ZSTD_pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
453
719
  }
454
- pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
455
- DEBUGLOG(3, "ready to write chunk %u ", chunkID);
720
+ ZSTD_pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
721
+ DEBUGLOG(5, "ready to write chunk %u ", chunkID);
456
722
 
457
- ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
458
- mtctx->jobs[chunkID].cctx = NULL;
459
723
  mtctx->jobs[chunkID].srcStart = NULL;
460
724
  { size_t const cSize = mtctx->jobs[chunkID].cSize;
461
725
  if (ZSTD_isError(cSize)) error = cSize;
462
726
  if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
463
- if (chunkID) { /* note : chunk 0 is already written directly into dst */
464
- if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);
465
- ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
466
- mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
467
- }
727
+ if (chunkID) { /* note : chunk 0 is written directly at dst, which is correct position */
728
+ if (!error)
729
+ memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
730
+ if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
731
+ DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
732
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
733
+ } }
734
+ mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
468
735
  dstPos += cSize ;
469
736
  }
470
- }
471
- if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos);
737
+ } /* for (chunkID=0; chunkID<nbChunks; chunkID++) */
738
+
739
+ DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
740
+ if (params.fParams.checksumFlag) {
741
+ U32 const checksum = (U32)XXH64_digest(&xxh64);
742
+ if (dstPos + 4 > dstCapacity) {
743
+ error = ERROR(dstSize_tooSmall);
744
+ } else {
745
+ DEBUGLOG(4, "writing checksum : %08X \n", checksum);
746
+ MEM_writeLE32((char*)dst + dstPos, checksum);
747
+ dstPos += 4;
748
+ } }
749
+
750
+ if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
472
751
  return error ? error : dstPos;
473
752
  }
753
+ }
474
754
 
755
+ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
756
+ void* dst, size_t dstCapacity,
757
+ const void* src, size_t srcSize,
758
+ const ZSTD_CDict* cdict,
759
+ ZSTD_parameters const params,
760
+ unsigned overlapLog)
761
+ {
762
+ ZSTD_CCtx_params cctxParams = mtctx->params;
763
+ cctxParams.cParams = params.cParams;
764
+ cctxParams.fParams = params.fParams;
765
+ cctxParams.overlapSizeLog = overlapLog;
766
+ return ZSTDMT_compress_advanced_internal(mtctx,
767
+ dst, dstCapacity,
768
+ src, srcSize,
769
+ cdict, cctxParams);
770
+ }
771
+
772
+
773
+ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
774
+ void* dst, size_t dstCapacity,
775
+ const void* src, size_t srcSize,
776
+ int compressionLevel)
777
+ {
778
+ U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
779
+ ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
780
+ params.fParams.contentSizeFlag = 1;
781
+ return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
475
782
  }
476
783
 
477
784
 
@@ -479,52 +786,61 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
479
786
  /* ======= Streaming API ======= */
480
787
  /* ====================================== */
481
788
 
482
- static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
483
- while (zcs->doneJobID < zcs->nextJobID) {
484
- unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
485
- PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
486
- while (zcs->jobs[jobID].jobCompleted==0) {
487
- DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
488
- pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
489
- }
490
- pthread_mutex_unlock(&zcs->jobCompleted_mutex);
491
- zcs->doneJobID++;
789
+ size_t ZSTDMT_initCStream_internal(
790
+ ZSTDMT_CCtx* zcs,
791
+ const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
792
+ const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
793
+ unsigned long long pledgedSrcSize)
794
+ {
795
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
796
+ /* params are supposed to be fully validated at this point */
797
+ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
798
+ assert(!((dict) && (cdict))); /* either dict or cdict, not both */
799
+ assert(zcs->cctxPool->totalCCtx == params.nbThreads);
800
+ zcs->singleThreaded = (params.nbThreads==1) | (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
801
+
802
+ if (zcs->singleThreaded) {
803
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_makeJobCCtxParams(params);
804
+ DEBUGLOG(4, "single thread mode");
805
+ assert(singleThreadParams.nbThreads == 0);
806
+ return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
807
+ dict, dictSize, cdict,
808
+ singleThreadParams, pledgedSrcSize);
492
809
  }
493
- }
810
+ DEBUGLOG(4, "multi-threading mode (%u threads)", params.nbThreads);
494
811
 
495
-
496
- static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
497
- const void* dict, size_t dictSize, unsigned updateDict,
498
- ZSTD_parameters params, unsigned long long pledgedSrcSize)
499
- {
500
- ZSTD_customMem const cmem = { NULL, NULL, NULL };
501
- DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog);
502
- if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize);
503
- if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */
812
+ if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
504
813
  ZSTDMT_waitForAllJobsCompleted(zcs);
505
814
  ZSTDMT_releaseAllJobResources(zcs);
506
815
  zcs->allJobsCompleted = 1;
507
816
  }
817
+
508
818
  zcs->params = params;
509
- if (updateDict) {
510
- ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
511
- if (dict && dictSize) {
512
- zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params, cmem);
513
- if (zcs->cdict == NULL) return ERROR(memory_allocation);
514
- } }
515
819
  zcs->frameContentSize = pledgedSrcSize;
516
- zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
517
- DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
518
- DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
519
- zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
520
- zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
521
- zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
522
- DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
523
- zcs->marginSize = zcs->targetSectionSize >> 2;
524
- zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
525
- zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
526
- if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
527
- zcs->inBuff.filled = 0;
820
+ if (dict) {
821
+ ZSTD_freeCDict(zcs->cdictLocal);
822
+ zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
823
+ ZSTD_dlm_byCopy, dictMode, /* note : a loadPrefix becomes an internal CDict */
824
+ params.cParams, zcs->cMem);
825
+ zcs->cdict = zcs->cdictLocal;
826
+ if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
827
+ } else {
828
+ ZSTD_freeCDict(zcs->cdictLocal);
829
+ zcs->cdictLocal = NULL;
830
+ zcs->cdict = cdict;
831
+ }
832
+
833
+ assert(params.overlapSizeLog <= 9);
834
+ zcs->targetDictSize = (params.overlapSizeLog==0) ? 0 : (size_t)1 << (params.cParams.windowLog - (9 - params.overlapSizeLog));
835
+ DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(zcs->targetDictSize>>10));
836
+ zcs->targetSectionSize = params.jobSize ? params.jobSize : (size_t)1 << (params.cParams.windowLog + 2);
837
+ if (zcs->targetSectionSize < ZSTDMT_JOBSIZE_MIN) zcs->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
838
+ if (zcs->targetSectionSize < zcs->targetDictSize) zcs->targetSectionSize = zcs->targetDictSize; /* job size must be >= overlap size */
839
+ DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(zcs->targetSectionSize>>10), params.jobSize);
840
+ zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize;
841
+ DEBUGLOG(4, "inBuff Size : %u KB", (U32)(zcs->inBuffSize>>10));
842
+ ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) );
843
+ zcs->inBuff.buffer = g_nullBuffer;
528
844
  zcs->dictSize = 0;
529
845
  zcs->doneJobID = 0;
530
846
  zcs->nextJobID = 0;
@@ -534,53 +850,74 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
534
850
  return 0;
535
851
  }
536
852
 
537
- size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs,
538
- const void* dict, size_t dictSize,
539
- ZSTD_parameters params, unsigned long long pledgedSrcSize)
853
+ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
854
+ const void* dict, size_t dictSize,
855
+ ZSTD_parameters params,
856
+ unsigned long long pledgedSrcSize)
540
857
  {
541
- return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize);
858
+ ZSTD_CCtx_params cctxParams = mtctx->params;
859
+ DEBUGLOG(5, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
860
+ cctxParams.cParams = params.cParams;
861
+ cctxParams.fParams = params.fParams;
862
+ return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dm_auto, NULL,
863
+ cctxParams, pledgedSrcSize);
542
864
  }
543
865
 
866
+ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
867
+ const ZSTD_CDict* cdict,
868
+ ZSTD_frameParameters fParams,
869
+ unsigned long long pledgedSrcSize)
870
+ {
871
+ ZSTD_CCtx_params cctxParams = mtctx->params;
872
+ cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict);
873
+ cctxParams.fParams = fParams;
874
+ if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
875
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dm_auto, cdict,
876
+ cctxParams, pledgedSrcSize);
877
+ }
878
+
879
+
544
880
  /* ZSTDMT_resetCStream() :
545
- * pledgedSrcSize is optional and can be zero == unknown */
881
+ * pledgedSrcSize can be zero == unknown (for the time being)
882
+ * prefer using ZSTD_CONTENTSIZE_UNKNOWN,
883
+ * as `0` might mean "empty" in the future */
546
884
  size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
547
885
  {
548
- if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize);
549
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
886
+ if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
887
+ if (zcs->params.nbThreads==1)
888
+ return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
889
+ return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, 0, zcs->params,
890
+ pledgedSrcSize);
550
891
  }
551
892
 
552
893
  size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
553
894
  ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
554
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0);
895
+ ZSTD_CCtx_params cctxParams = zcs->params;
896
+ cctxParams.cParams = params.cParams;
897
+ cctxParams.fParams = params.fParams;
898
+ return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
555
899
  }
556
900
 
557
901
 
558
902
  static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
559
903
  {
560
- size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
561
- buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
562
- ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
563
904
  unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
564
905
 
565
- if ((cctx==NULL) || (dstBuffer.start==NULL)) {
566
- zcs->jobs[jobID].jobCompleted = 1;
567
- zcs->nextJobID++;
568
- ZSTDMT_waitForAllJobsCompleted(zcs);
569
- ZSTDMT_releaseAllJobResources(zcs);
570
- return ERROR(memory_allocation);
571
- }
572
-
573
- DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
906
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
907
+ zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
574
908
  zcs->jobs[jobID].src = zcs->inBuff.buffer;
575
909
  zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
576
910
  zcs->jobs[jobID].srcSize = srcSize;
577
- zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */
911
+ zcs->jobs[jobID].prefixSize = zcs->dictSize;
912
+ assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
578
913
  zcs->jobs[jobID].params = zcs->params;
579
- if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */
914
+ /* do not calculate checksum within sections, but write it in header for first section */
915
+ if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
580
916
  zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
581
917
  zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
582
- zcs->jobs[jobID].dstBuff = dstBuffer;
583
- zcs->jobs[jobID].cctx = cctx;
918
+ zcs->jobs[jobID].dstBuff = g_nullBuffer;
919
+ zcs->jobs[jobID].cctxPool = zcs->cctxPool;
920
+ zcs->jobs[jobID].bufPool = zcs->bufPool;
584
921
  zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
585
922
  zcs->jobs[jobID].lastChunk = endFrame;
586
923
  zcs->jobs[jobID].jobCompleted = 0;
@@ -588,10 +925,13 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
588
925
  zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
589
926
  zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
590
927
 
928
+ if (zcs->params.fParams.checksumFlag)
929
+ XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->dictSize, srcSize);
930
+
591
931
  /* get a new buffer for next input */
592
932
  if (!endFrame) {
593
933
  size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
594
- zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
934
+ zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
595
935
  if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
596
936
  zcs->jobs[jobID].jobCompleted = 1;
597
937
  zcs->nextJobID++;
@@ -599,22 +939,27 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
599
939
  ZSTDMT_releaseAllJobResources(zcs);
600
940
  return ERROR(memory_allocation);
601
941
  }
602
- DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
603
942
  zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
604
- DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize));
605
- memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled);
606
- DEBUGLOG(5, "new inBuff pre-filled");
943
+ memmove(zcs->inBuff.buffer.start,
944
+ (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
945
+ zcs->inBuff.filled);
607
946
  zcs->dictSize = newDictSize;
608
- } else {
947
+ } else { /* if (endFrame==1) */
609
948
  zcs->inBuff.buffer = g_nullBuffer;
610
949
  zcs->inBuff.filled = 0;
611
950
  zcs->dictSize = 0;
612
951
  zcs->frameEnded = 1;
613
- if (zcs->nextJobID == 0)
614
- zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */
615
- }
952
+ if (zcs->nextJobID == 0) {
953
+ /* single chunk exception : checksum is calculated directly within worker thread */
954
+ zcs->params.fParams.checksumFlag = 0;
955
+ } }
616
956
 
617
- DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask);
957
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
958
+ zcs->nextJobID,
959
+ (U32)zcs->jobs[jobID].srcSize,
960
+ zcs->jobs[jobID].lastChunk,
961
+ zcs->doneJobID,
962
+ zcs->doneJobID & zcs->jobIDMask);
618
963
  POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
619
964
  zcs->nextJobID++;
620
965
  return 0;
@@ -628,48 +973,44 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
628
973
  static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
629
974
  {
630
975
  unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
976
+ DEBUGLOG(5, "ZSTDMT_flushNextJob");
631
977
  if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */
632
- PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
978
+ ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
633
979
  while (zcs->jobs[wJobID].jobCompleted==0) {
634
980
  DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
635
- if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
636
- pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
981
+ if (!blockToFlush) { ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
982
+ ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
637
983
  }
638
- pthread_mutex_unlock(&zcs->jobCompleted_mutex);
984
+ ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
639
985
  /* compression job completed : output can be flushed */
640
986
  { ZSTDMT_jobDescription job = zcs->jobs[wJobID];
641
987
  if (!job.jobScanned) {
642
988
  if (ZSTD_isError(job.cSize)) {
643
- DEBUGLOG(5, "compression error detected ");
989
+ DEBUGLOG(5, "job %u : compression error detected : %s",
990
+ zcs->doneJobID, ZSTD_getErrorName(job.cSize));
644
991
  ZSTDMT_waitForAllJobsCompleted(zcs);
645
992
  ZSTDMT_releaseAllJobResources(zcs);
646
993
  return job.cSize;
647
994
  }
648
- ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
649
- zcs->jobs[wJobID].cctx = NULL;
650
995
  DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
651
996
  if (zcs->params.fParams.checksumFlag) {
652
- XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
653
997
  if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
654
998
  U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
655
- DEBUGLOG(4, "writing checksum : %08X \n", checksum);
999
+ DEBUGLOG(5, "writing checksum : %08X \n", checksum);
656
1000
  MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
657
1001
  job.cSize += 4;
658
1002
  zcs->jobs[wJobID].cSize += 4;
659
1003
  } }
660
- ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
661
- zcs->jobs[wJobID].srcStart = NULL;
662
- zcs->jobs[wJobID].src = g_nullBuffer;
663
1004
  zcs->jobs[wJobID].jobScanned = 1;
664
1005
  }
665
1006
  { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
666
- DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
1007
+ DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
667
1008
  memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
668
1009
  output->pos += toWrite;
669
1010
  job.dstFlushed += toWrite;
670
1011
  }
671
1012
  if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
672
- ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
1013
+ ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
673
1014
  zcs->jobs[wJobID].dstBuff = g_nullBuffer;
674
1015
  zcs->jobs[wJobID].jobCompleted = 0;
675
1016
  zcs->doneJobID++;
@@ -684,56 +1025,125 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
684
1025
  } }
685
1026
 
686
1027
 
687
- size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
1028
+ /** ZSTDMT_compressStream_generic() :
1029
+ * internal use only - exposed to be invoked from zstd_compress.c
1030
+ * assumption : output and input are valid (pos <= size)
1031
+ * @return : minimum amount of data remaining to flush, 0 if none */
1032
+ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
1033
+ ZSTD_outBuffer* output,
1034
+ ZSTD_inBuffer* input,
1035
+ ZSTD_EndDirective endOp)
688
1036
  {
689
- size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize;
690
- if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */
691
- if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input);
1037
+ size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize;
1038
+ unsigned forwardInputProgress = 0;
1039
+ DEBUGLOG(5, "ZSTDMT_compressStream_generic ");
1040
+ assert(output->pos <= output->size);
1041
+ assert(input->pos <= input->size);
1042
+
1043
+ if (mtctx->singleThreaded) { /* delegate to single-thread (synchronous) */
1044
+ return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
1045
+ }
1046
+
1047
+ if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
1048
+ /* current frame being ended. Only flush/end are allowed */
1049
+ return ERROR(stage_wrong);
1050
+ }
1051
+
1052
+ /* single-pass shortcut (note : synchronous-mode) */
1053
+ if ( (mtctx->nextJobID == 0) /* just started */
1054
+ && (mtctx->inBuff.filled == 0) /* nothing buffered */
1055
+ && (endOp == ZSTD_e_end) /* end order */
1056
+ && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
1057
+ size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
1058
+ (char*)output->dst + output->pos, output->size - output->pos,
1059
+ (const char*)input->src + input->pos, input->size - input->pos,
1060
+ mtctx->cdict, mtctx->params);
1061
+ if (ZSTD_isError(cSize)) return cSize;
1062
+ input->pos = input->size;
1063
+ output->pos += cSize;
1064
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer); /* was allocated in initStream */
1065
+ mtctx->allJobsCompleted = 1;
1066
+ mtctx->frameEnded = 1;
1067
+ return 0;
1068
+ }
692
1069
 
693
1070
  /* fill input buffer */
694
- { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled);
695
- memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad);
696
- input->pos += toLoad;
697
- zcs->inBuff.filled += toLoad;
1071
+ if (input->size > input->pos) { /* support NULL input */
1072
+ if (mtctx->inBuff.buffer.start == NULL) {
1073
+ mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool); /* note : may fail, in which case, no forward input progress */
1074
+ mtctx->inBuff.filled = 0;
1075
+ }
1076
+ if (mtctx->inBuff.buffer.start) {
1077
+ size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
1078
+ DEBUGLOG(5, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
1079
+ memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
1080
+ input->pos += toLoad;
1081
+ mtctx->inBuff.filled += toLoad;
1082
+ forwardInputProgress = toLoad>0;
1083
+ } }
1084
+
1085
+ if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
1086
+ && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */
1087
+ CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
698
1088
  }
699
1089
 
700
- if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
701
- && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */
702
- CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) );
1090
+ /* check for potential compressed data ready to be flushed */
1091
+ CHECK_F( ZSTDMT_flushNextJob(mtctx, output, !forwardInputProgress /* blockToFlush */) ); /* block if there was no forward input progress */
1092
+
1093
+ if (input->pos < input->size) /* input not consumed : do not flush yet */
1094
+ endOp = ZSTD_e_continue;
1095
+
1096
+ switch(endOp)
1097
+ {
1098
+ case ZSTD_e_flush:
1099
+ return ZSTDMT_flushStream(mtctx, output);
1100
+ case ZSTD_e_end:
1101
+ return ZSTDMT_endStream(mtctx, output);
1102
+ case ZSTD_e_continue:
1103
+ return 1;
1104
+ default:
1105
+ return ERROR(GENERIC); /* invalid endDirective */
703
1106
  }
1107
+ }
1108
+
704
1109
 
705
- /* check for data to flush */
706
- CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */
1110
+ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
1111
+ {
1112
+ CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
707
1113
 
708
1114
  /* recommended next input size : fill current input buffer */
709
1115
  return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
710
1116
  }
711
1117
 
712
1118
 
713
- static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame)
1119
+ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned endFrame)
714
1120
  {
715
- size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
1121
+ size_t const srcSize = mtctx->inBuff.filled - mtctx->dictSize;
1122
+ DEBUGLOG(5, "ZSTDMT_flushStream_internal");
716
1123
 
717
- if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
718
- if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
719
- && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
720
- CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
1124
+ if ( ((srcSize > 0) || (endFrame && !mtctx->frameEnded))
1125
+ && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {
1126
+ DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job");
1127
+ CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
721
1128
  }
722
1129
 
723
1130
  /* check if there is any data available to flush */
724
- DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID);
725
- return ZSTDMT_flushNextJob(zcs, output, 1);
1131
+ return ZSTDMT_flushNextJob(mtctx, output, 1 /* blockToFlush */);
726
1132
  }
727
1133
 
728
1134
 
729
- size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
1135
+ size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
730
1136
  {
731
- if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output);
732
- return ZSTDMT_flushStream_internal(zcs, output, 0);
1137
+ DEBUGLOG(5, "ZSTDMT_flushStream");
1138
+ if (mtctx->singleThreaded)
1139
+ return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
1140
+ return ZSTDMT_flushStream_internal(mtctx, output, 0 /* endFrame */);
733
1141
  }
734
1142
 
735
- size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
1143
+ size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
736
1144
  {
737
- if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output);
738
- return ZSTDMT_flushStream_internal(zcs, output, 1);
1145
+ DEBUGLOG(4, "ZSTDMT_endStream");
1146
+ if (mtctx->singleThreaded)
1147
+ return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
1148
+ return ZSTDMT_flushStream_internal(mtctx, output, 1 /* endFrame */);
739
1149
  }