zstd-ruby 1.2.0.0 → 1.3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +7 -5
  4. data/ext/zstdruby/libzstd/common/bitstream.h +23 -9
  5. data/ext/zstdruby/libzstd/common/error_private.c +4 -1
  6. data/ext/zstdruby/libzstd/common/huf.h +20 -0
  7. data/ext/zstdruby/libzstd/common/mem.h +0 -14
  8. data/ext/zstdruby/libzstd/common/pool.c +12 -0
  9. data/ext/zstdruby/libzstd/common/pool.h +5 -0
  10. data/ext/zstdruby/libzstd/common/threading.c +0 -1
  11. data/ext/zstdruby/libzstd/common/zstd_common.c +25 -18
  12. data/ext/zstdruby/libzstd/common/zstd_errors.h +15 -7
  13. data/ext/zstdruby/libzstd/common/zstd_internal.h +59 -9
  14. data/ext/zstdruby/libzstd/compress/huf_compress.c +7 -3
  15. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1082 -487
  16. data/ext/zstdruby/libzstd/compress/zstd_opt.h +30 -15
  17. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +362 -158
  18. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +49 -13
  19. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +150 -26
  20. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +380 -258
  21. data/ext/zstdruby/libzstd/dictBuilder/cover.c +23 -37
  22. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +30 -40
  23. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +104 -95
  24. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -10
  25. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +14 -19
  26. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -12
  27. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -14
  28. data/ext/zstdruby/libzstd/zstd.h +507 -166
  29. data/lib/zstd-ruby/version.rb +1 -1
  30. metadata +2 -2
@@ -43,6 +43,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t src
43
43
  if (ssPtr->litLengthSum == 0) {
44
44
  if (srcSize <= 1024) ssPtr->staticPrices = 1;
45
45
 
46
+ assert(ssPtr->litFreq!=NULL);
46
47
  for (u=0; u<=MaxLit; u++)
47
48
  ssPtr->litFreq[u] = 0;
48
49
  for (u=0; u<srcSize; u++)
@@ -201,6 +202,20 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
201
202
  }
202
203
 
203
204
 
205
+ /* function safe only for comparisons */
206
+ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
207
+ {
208
+ switch (length)
209
+ {
210
+ default :
211
+ case 4 : return MEM_read32(memPtr);
212
+ case 3 : if (MEM_isLittleEndian())
213
+ return MEM_read32(memPtr)<<8;
214
+ else
215
+ return MEM_read32(memPtr)>>8;
216
+ }
217
+ }
218
+
204
219
 
205
220
  /* Update hashTable3 up to ip (excluded)
206
221
  Assumption : always within prefix (i.e. not within extDict) */
@@ -234,12 +249,12 @@ static U32 ZSTD_insertBtAndGetAllMatches (
234
249
  {
235
250
  const BYTE* const base = zc->base;
236
251
  const U32 current = (U32)(ip-base);
237
- const U32 hashLog = zc->params.cParams.hashLog;
252
+ const U32 hashLog = zc->appliedParams.cParams.hashLog;
238
253
  const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
239
254
  U32* const hashTable = zc->hashTable;
240
255
  U32 matchIndex = hashTable[h];
241
256
  U32* const bt = zc->chainTable;
242
- const U32 btLog = zc->params.cParams.chainLog - 1;
257
+ const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
243
258
  const U32 btMask= (1U << btLog) - 1;
244
259
  size_t commonLengthSmaller=0, commonLengthLarger=0;
245
260
  const BYTE* const dictBase = zc->dictBase;
@@ -267,7 +282,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
267
282
  if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
268
283
  } else {
269
284
  match = dictBase + matchIndex3;
270
- if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
285
+ if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
271
286
  currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
272
287
  }
273
288
 
@@ -410,10 +425,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
410
425
  const BYTE* const base = ctx->base;
411
426
  const BYTE* const prefixStart = base + ctx->dictLimit;
412
427
 
413
- const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
414
- const U32 sufficient_len = ctx->params.cParams.targetLength;
415
- const U32 mls = ctx->params.cParams.searchLength;
416
- const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
428
+ const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
429
+ const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
430
+ const U32 mls = ctx->appliedParams.cParams.searchLength;
431
+ const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
417
432
 
418
433
  ZSTD_optimal_t* opt = seqStorePtr->priceTable;
419
434
  ZSTD_match_t* matches = seqStorePtr->matchTable;
@@ -439,7 +454,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
439
454
  for (i=(ip == anchor); i<last_i; i++) {
440
455
  const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
441
456
  if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
442
- && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
457
+ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
443
458
  mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
444
459
  if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
445
460
  best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
@@ -524,7 +539,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
524
539
  for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
525
540
  const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
526
541
  if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
527
- && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
542
+ && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
528
543
  mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
529
544
 
530
545
  if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
@@ -663,10 +678,10 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
663
678
  const BYTE* const dictBase = ctx->dictBase;
664
679
  const BYTE* const dictEnd = dictBase + dictLimit;
665
680
 
666
- const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
667
- const U32 sufficient_len = ctx->params.cParams.targetLength;
668
- const U32 mls = ctx->params.cParams.searchLength;
669
- const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
681
+ const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
682
+ const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
683
+ const U32 mls = ctx->appliedParams.cParams.searchLength;
684
+ const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
670
685
 
671
686
  ZSTD_optimal_t* opt = seqStorePtr->priceTable;
672
687
  ZSTD_match_t* matches = seqStorePtr->matchTable;
@@ -698,7 +713,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
698
713
  const BYTE* const repMatch = repBase + repIndex;
699
714
  if ( (repCur > 0 && repCur <= (S32)current)
700
715
  && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
701
- && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
716
+ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
702
717
  /* repcode detected we should take it */
703
718
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
704
719
  mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
@@ -794,7 +809,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
794
809
  const BYTE* const repMatch = repBase + repIndex;
795
810
  if ( (repCur > 0 && repCur <= (S32)(current+cur))
796
811
  && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
797
- && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
812
+ && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
798
813
  /* repcode detected */
799
814
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
800
815
  mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
@@ -14,34 +14,31 @@
14
14
 
15
15
  /* ====== Compiler specifics ====== */
16
16
  #if defined(_MSC_VER)
17
- # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
17
+ # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
18
18
  #endif
19
19
 
20
20
 
21
21
  /* ====== Dependencies ====== */
22
- #include <stdlib.h> /* malloc */
23
- #include <string.h> /* memcpy */
24
- #include "pool.h" /* threadpool */
25
- #include "threading.h" /* mutex */
26
- #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
22
+ #include <string.h> /* memcpy, memset */
23
+ #include "pool.h" /* threadpool */
24
+ #include "threading.h" /* mutex */
25
+ #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
27
26
  #include "zstdmt_compress.h"
28
27
 
29
28
 
30
29
  /* ====== Debug ====== */
31
- #if 0
30
+ #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
32
31
 
33
32
  # include <stdio.h>
34
33
  # include <unistd.h>
35
34
  # include <sys/times.h>
36
- static unsigned g_debugLevel = 5;
37
- # define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
38
- # define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
35
+ # define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
39
36
 
40
- # define DEBUG_PRINTHEX(l,p,n) { \
41
- unsigned debug_u; \
42
- for (debug_u=0; debug_u<(n); debug_u++) \
37
+ # define DEBUG_PRINTHEX(l,p,n) { \
38
+ unsigned debug_u; \
39
+ for (debug_u=0; debug_u<(n); debug_u++) \
43
40
  DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
44
- DEBUGLOGRAW(l, " \n"); \
41
+ DEBUGLOGRAW(l, " \n"); \
45
42
  }
46
43
 
47
44
  static unsigned long long GetCurrentClockTimeMicroseconds(void)
@@ -53,22 +50,22 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
53
50
  return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
54
51
  }
55
52
 
56
- #define MUTEX_WAIT_TIME_DLEVEL 5
57
- #define PTHREAD_MUTEX_LOCK(mutex) \
58
- if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
59
- unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
60
- pthread_mutex_lock(mutex); \
61
- { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
62
- unsigned long long const elapsedTime = (afterTime-beforeTime); \
63
- if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
64
- DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
65
- elapsedTime, #mutex); \
66
- } } \
67
- } else pthread_mutex_lock(mutex);
53
+ #define MUTEX_WAIT_TIME_DLEVEL 6
54
+ #define PTHREAD_MUTEX_LOCK(mutex) { \
55
+ if (ZSTD_DEBUG>=MUTEX_WAIT_TIME_DLEVEL) { \
56
+ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
57
+ pthread_mutex_lock(mutex); \
58
+ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
59
+ unsigned long long const elapsedTime = (afterTime-beforeTime); \
60
+ if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
61
+ DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
62
+ elapsedTime, #mutex); \
63
+ } } \
64
+ } else pthread_mutex_lock(mutex); \
65
+ }
68
66
 
69
67
  #else
70
68
 
71
- # define DEBUGLOG(l, ...) {} /* disabled */
72
69
  # define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
73
70
  # define DEBUG_PRINTHEX(l,p,n) {}
74
71
 
@@ -87,16 +84,19 @@ static const buffer_t g_nullBuffer = { NULL, 0 };
87
84
  typedef struct ZSTDMT_bufferPool_s {
88
85
  unsigned totalBuffers;
89
86
  unsigned nbBuffers;
87
+ ZSTD_customMem cMem;
90
88
  buffer_t bTable[1]; /* variable size */
91
89
  } ZSTDMT_bufferPool;
92
90
 
93
- static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
91
+ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
94
92
  {
95
93
  unsigned const maxNbBuffers = 2*nbThreads + 2;
96
- ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t));
94
+ ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
95
+ sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
97
96
  if (bufPool==NULL) return NULL;
98
97
  bufPool->totalBuffers = maxNbBuffers;
99
98
  bufPool->nbBuffers = 0;
99
+ bufPool->cMem = cMem;
100
100
  return bufPool;
101
101
  }
102
102
 
@@ -105,23 +105,39 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
105
105
  unsigned u;
106
106
  if (!bufPool) return; /* compatibility with free on NULL */
107
107
  for (u=0; u<bufPool->totalBuffers; u++)
108
- free(bufPool->bTable[u].start);
109
- free(bufPool);
108
+ ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
109
+ ZSTD_free(bufPool, bufPool->cMem);
110
+ }
111
+
112
+ /* only works at initialization, not during compression */
113
+ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
114
+ {
115
+ size_t const poolSize = sizeof(*bufPool)
116
+ + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
117
+ unsigned u;
118
+ size_t totalBufferSize = 0;
119
+ for (u=0; u<bufPool->totalBuffers; u++)
120
+ totalBufferSize += bufPool->bTable[u].size;
121
+
122
+ return poolSize + totalBufferSize;
110
123
  }
111
124
 
112
- /* assumption : invocation from main thread only ! */
125
+ /** ZSTDMT_getBuffer() :
126
+ * assumption : invocation from main thread only ! */
113
127
  static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
114
128
  {
115
129
  if (pool->nbBuffers) { /* try to use an existing buffer */
116
130
  buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
117
131
  size_t const availBufferSize = buf.size;
118
- if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */
132
+ if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
133
+ /* large enough, but not too much */
119
134
  return buf;
120
- free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */
135
+ /* size conditions not respected : scratch this buffer, create new one */
136
+ ZSTD_free(buf.start, pool->cMem);
121
137
  }
122
138
  /* create new buffer */
123
139
  { buffer_t buffer;
124
- void* const start = malloc(bSize);
140
+ void* const start = ZSTD_malloc(bSize, pool->cMem);
125
141
  if (start==NULL) bSize = 0;
126
142
  buffer.start = start; /* note : start can be NULL if malloc fails ! */
127
143
  buffer.size = bSize;
@@ -138,7 +154,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
138
154
  return;
139
155
  }
140
156
  /* Reached bufferPool capacity (should not happen) */
141
- free(buf.start);
157
+ ZSTD_free(buf.start, pool->cMem);
142
158
  }
143
159
 
144
160
 
@@ -147,6 +163,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
147
163
  typedef struct {
148
164
  unsigned totalCCtx;
149
165
  unsigned availCCtx;
166
+ ZSTD_customMem cMem;
150
167
  ZSTD_CCtx* cctx[1]; /* variable size */
151
168
  } ZSTDMT_CCtxPool;
152
169
 
@@ -158,23 +175,40 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
158
175
  unsigned u;
159
176
  for (u=0; u<pool->totalCCtx; u++)
160
177
  ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
161
- free(pool);
178
+ ZSTD_free(pool, pool->cMem);
162
179
  }
163
180
 
164
181
  /* ZSTDMT_createCCtxPool() :
165
182
  * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
166
- static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads)
183
+ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
184
+ ZSTD_customMem cMem)
167
185
  {
168
- ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*));
186
+ ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
187
+ sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
169
188
  if (!cctxPool) return NULL;
189
+ cctxPool->cMem = cMem;
170
190
  cctxPool->totalCCtx = nbThreads;
171
191
  cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
172
- cctxPool->cctx[0] = ZSTD_createCCtx();
192
+ cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
173
193
  if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
174
- DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads);
194
+ DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
175
195
  return cctxPool;
176
196
  }
177
197
 
198
+ /* only works during initialization phase, not during compression */
199
+ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
200
+ {
201
+ unsigned const nbThreads = cctxPool->totalCCtx;
202
+ size_t const poolSize = sizeof(*cctxPool)
203
+ + (nbThreads-1)*sizeof(ZSTD_CCtx*);
204
+ unsigned u;
205
+ size_t totalCCtxSize = 0;
206
+ for (u=0; u<nbThreads; u++)
207
+ totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
208
+
209
+ return poolSize + totalCCtxSize;
210
+ }
211
+
178
212
  static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
179
213
  {
180
214
  if (pool->availCCtx) {
@@ -218,7 +252,7 @@ typedef struct {
218
252
  pthread_mutex_t* jobCompleted_mutex;
219
253
  pthread_cond_t* jobCompleted_cond;
220
254
  ZSTD_parameters params;
221
- ZSTD_CDict* cdict;
255
+ const ZSTD_CDict* cdict;
222
256
  unsigned long long fullFrameSize;
223
257
  } ZSTDMT_jobDescription;
224
258
 
@@ -228,11 +262,11 @@ void ZSTDMT_compressChunk(void* jobDescription)
228
262
  ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
229
263
  const void* const src = (const char*)job->srcStart + job->dictSize;
230
264
  buffer_t const dstBuff = job->dstBuff;
231
- DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
265
+ DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
232
266
  job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
233
267
  if (job->cdict) { /* should only happen for first segment */
234
268
  size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
235
- if (job->cdict) DEBUGLOG(3, "using CDict ");
269
+ DEBUGLOG(5, "using CDict");
236
270
  if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
237
271
  } else { /* srcStart points at reloaded section */
238
272
  if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */
@@ -247,12 +281,12 @@ void ZSTDMT_compressChunk(void* jobDescription)
247
281
  ZSTD_invalidateRepCodes(job->cctx);
248
282
  }
249
283
 
250
- DEBUGLOG(4, "Compressing : ");
284
+ DEBUGLOG(5, "Compressing : ");
251
285
  DEBUG_PRINTHEX(4, job->srcStart, 12);
252
286
  job->cSize = (job->lastChunk) ?
253
287
  ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
254
288
  ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
255
- DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
289
+ DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
256
290
  (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
257
291
  DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
258
292
 
@@ -271,6 +305,7 @@ _endJob:
271
305
 
272
306
  struct ZSTDMT_CCtx_s {
273
307
  POOL_ctx* factory;
308
+ ZSTDMT_jobDescription* jobs;
274
309
  ZSTDMT_bufferPool* buffPool;
275
310
  ZSTDMT_CCtxPool* cctxPool;
276
311
  pthread_mutex_t jobCompleted_mutex;
@@ -292,50 +327,64 @@ struct ZSTDMT_CCtx_s {
292
327
  unsigned overlapRLog;
293
328
  unsigned long long frameContentSize;
294
329
  size_t sectionSize;
295
- ZSTD_CDict* cdict;
296
- ZSTD_CStream* cstream;
297
- ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */
330
+ ZSTD_customMem cMem;
331
+ ZSTD_CDict* cdictLocal;
332
+ const ZSTD_CDict* cdict;
298
333
  };
299
334
 
300
- ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads)
335
+ static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
301
336
  {
302
- ZSTDMT_CCtx* cctx;
303
- U32 const minNbJobs = nbThreads + 2;
304
- U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
337
+ U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
305
338
  U32 const nbJobs = 1 << nbJobsLog2;
306
- DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n",
307
- nbThreads, minNbJobs, nbJobsLog2, nbJobs);
339
+ *nbJobsPtr = nbJobs;
340
+ return (ZSTDMT_jobDescription*) ZSTD_calloc(
341
+ nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
342
+ }
343
+
344
+ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
345
+ {
346
+ ZSTDMT_CCtx* mtctx;
347
+ U32 nbJobs = nbThreads + 2;
348
+ DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
349
+
308
350
  if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
309
- cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription));
310
- if (!cctx) return NULL;
311
- cctx->nbThreads = nbThreads;
312
- cctx->jobIDMask = nbJobs - 1;
313
- cctx->allJobsCompleted = 1;
314
- cctx->sectionSize = 0;
315
- cctx->overlapRLog = 3;
316
- cctx->factory = POOL_create(nbThreads, 1);
317
- cctx->buffPool = ZSTDMT_createBufferPool(nbThreads);
318
- cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads);
319
- if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */
320
- ZSTDMT_freeCCtx(cctx);
351
+ if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
352
+ /* invalid custom allocator */
353
+ return NULL;
354
+
355
+ mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
356
+ if (!mtctx) return NULL;
357
+ mtctx->cMem = cMem;
358
+ mtctx->nbThreads = nbThreads;
359
+ mtctx->allJobsCompleted = 1;
360
+ mtctx->sectionSize = 0;
361
+ mtctx->overlapRLog = 3;
362
+ mtctx->factory = POOL_create(nbThreads, 1);
363
+ mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
364
+ mtctx->jobIDMask = nbJobs - 1;
365
+ mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem);
366
+ mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
367
+ if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) {
368
+ ZSTDMT_freeCCtx(mtctx);
321
369
  return NULL;
322
370
  }
323
- if (nbThreads==1) {
324
- cctx->cstream = ZSTD_createCStream();
325
- if (!cctx->cstream) {
326
- ZSTDMT_freeCCtx(cctx); return NULL;
327
- } }
328
- pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
329
- pthread_cond_init(&cctx->jobCompleted_cond, NULL);
330
- DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads);
331
- return cctx;
371
+ pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
372
+ pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
373
+ DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
374
+ return mtctx;
375
+ }
376
+
377
+ ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
378
+ {
379
+ return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
332
380
  }
333
381
 
334
382
  /* ZSTDMT_releaseAllJobResources() :
335
- * Ensure all workers are killed first. */
383
+ * note : ensure all workers are killed first ! */
336
384
  static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
337
385
  {
338
386
  unsigned jobID;
387
+ DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
339
388
  for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
340
389
  ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
341
390
  mtctx->jobs[jobID].dstBuff = g_nullBuffer;
@@ -356,15 +405,26 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
356
405
  POOL_free(mtctx->factory);
357
406
  if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
358
407
  ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
408
+ ZSTD_free(mtctx->jobs, mtctx->cMem);
359
409
  ZSTDMT_freeCCtxPool(mtctx->cctxPool);
360
- ZSTD_freeCDict(mtctx->cdict);
361
- ZSTD_freeCStream(mtctx->cstream);
410
+ ZSTD_freeCDict(mtctx->cdictLocal);
362
411
  pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
363
412
  pthread_cond_destroy(&mtctx->jobCompleted_cond);
364
- free(mtctx);
413
+ ZSTD_free(mtctx, mtctx->cMem);
365
414
  return 0;
366
415
  }
367
416
 
417
+ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
418
+ {
419
+ if (mtctx == NULL) return 0; /* supports sizeof NULL */
420
+ return sizeof(*mtctx)
421
+ + POOL_sizeof(mtctx->factory)
422
+ + ZSTDMT_sizeof_bufferPool(mtctx->buffPool)
423
+ + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
424
+ + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
425
+ + ZSTD_sizeof_CDict(mtctx->cdictLocal);
426
+ }
427
+
368
428
  size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
369
429
  {
370
430
  switch(parameter)
@@ -373,7 +433,7 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
373
433
  mtctx->sectionSize = value;
374
434
  return 0;
375
435
  case ZSTDMT_p_overlapSectionLog :
376
- DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
436
+ DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
377
437
  mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
378
438
  return 0;
379
439
  default :
@@ -386,31 +446,49 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
386
446
  /* ===== Multi-threaded compression ===== */
387
447
  /* ------------------------------------------ */
388
448
 
389
- size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
449
+ static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
450
+ size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
451
+ size_t const chunkMaxSize = chunkSizeTarget << 2;
452
+ size_t const passSizeMax = chunkMaxSize * nbThreads;
453
+ unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
454
+ unsigned const nbChunksLarge = multiplier * nbThreads;
455
+ unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
456
+ unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
457
+ return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
458
+ }
459
+
460
+
461
+ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
390
462
  void* dst, size_t dstCapacity,
391
463
  const void* src, size_t srcSize,
392
- int compressionLevel)
464
+ const ZSTD_CDict* cdict,
465
+ ZSTD_parameters const params,
466
+ unsigned overlapRLog)
393
467
  {
394
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
395
- U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
396
- size_t const overlapSize = (size_t)1 << (params.cParams.windowLog - overlapLog);
397
- size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
398
- unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + 1;
399
- unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
468
+ size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
469
+ unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
400
470
  size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
401
- size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
402
- size_t remainingSrcSize = srcSize;
471
+ size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
403
472
  const char* const srcStart = (const char*)src;
473
+ size_t remainingSrcSize = srcSize;
404
474
  unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
405
475
  size_t frameStartPos = 0, dstBufferPos = 0;
406
476
 
407
- DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize);
408
- DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
409
- params.fParams.contentSizeFlag = 1;
410
-
477
+ DEBUGLOG(4, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
411
478
  if (nbChunks==1) { /* fallback to single-thread mode */
412
479
  ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
413
- return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
480
+ if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
481
+ return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
482
+ }
483
+ assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */
484
+
485
+ if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
486
+ U32 nbJobs = nbChunks;
487
+ ZSTD_free(mtctx->jobs, mtctx->cMem);
488
+ mtctx->jobIDMask = 0;
489
+ mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
490
+ if (mtctx->jobs==NULL) return ERROR(memory_allocation);
491
+ mtctx->jobIDMask = nbJobs - 1;
414
492
  }
415
493
 
416
494
  { unsigned u;
@@ -425,15 +503,18 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
425
503
  if ((cctx==NULL) || (dstBuffer.start==NULL)) {
426
504
  mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
427
505
  mtctx->jobs[u].jobCompleted = 1;
428
- nbChunks = u+1;
506
+ nbChunks = u+1; /* only wait and free u jobs, instead of initially expected nbChunks ones */
429
507
  break; /* let's wait for previous jobs to complete, but don't start new ones */
430
508
  }
431
509
 
432
510
  mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
433
511
  mtctx->jobs[u].dictSize = dictSize;
434
512
  mtctx->jobs[u].srcSize = chunkSize;
513
+ mtctx->jobs[u].cdict = mtctx->nextJobID==0 ? cdict : NULL;
435
514
  mtctx->jobs[u].fullFrameSize = srcSize;
436
515
  mtctx->jobs[u].params = params;
516
+ /* do not calculate checksum within sections, but write it in header for first section */
517
+ if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
437
518
  mtctx->jobs[u].dstBuff = dstBuffer;
438
519
  mtctx->jobs[u].cctx = cctx;
439
520
  mtctx->jobs[u].firstChunk = (u==0);
@@ -442,27 +523,27 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
442
523
  mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
443
524
  mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
444
525
 
445
- DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize);
446
- DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12);
526
+ DEBUGLOG(5, "posting job %u (%u bytes)", u, (U32)chunkSize);
527
+ DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
447
528
  POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
448
529
 
449
530
  frameStartPos += chunkSize;
450
531
  dstBufferPos += dstBufferCapacity;
451
532
  remainingSrcSize -= chunkSize;
452
533
  } }
453
- /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
454
534
 
535
+ /* collect result */
455
536
  { unsigned chunkID;
456
537
  size_t error = 0, dstPos = 0;
457
538
  for (chunkID=0; chunkID<nbChunks; chunkID++) {
458
- DEBUGLOG(3, "waiting for chunk %u ", chunkID);
539
+ DEBUGLOG(5, "waiting for chunk %u ", chunkID);
459
540
  PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
460
541
  while (mtctx->jobs[chunkID].jobCompleted==0) {
461
- DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID);
542
+ DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
462
543
  pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
463
544
  }
464
545
  pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
465
- DEBUGLOG(3, "ready to write chunk %u ", chunkID);
546
+ DEBUGLOG(5, "ready to write chunk %u ", chunkID);
466
547
 
467
548
  ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
468
549
  mtctx->jobs[chunkID].cctx = NULL;
@@ -470,20 +551,33 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
470
551
  { size_t const cSize = mtctx->jobs[chunkID].cSize;
471
552
  if (ZSTD_isError(cSize)) error = cSize;
472
553
  if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
473
- if (chunkID) { /* note : chunk 0 is already written directly into dst */
554
+ if (chunkID) { /* note : chunk 0 is written directly at dst, which is correct position */
474
555
  if (!error)
475
- memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap if chunk decompressed within dst */
476
- if (chunkID >= compressWithinDst) /* otherwise, it decompresses within dst */
556
+ memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
557
+ if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
558
+ DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
477
559
  ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
560
+ }
478
561
  mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
479
562
  }
480
563
  dstPos += cSize ;
481
564
  }
482
565
  }
483
- if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos);
566
+ if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
484
567
  return error ? error : dstPos;
485
568
  }
569
+ }
570
+
486
571
 
572
+ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
573
+ void* dst, size_t dstCapacity,
574
+ const void* src, size_t srcSize,
575
+ int compressionLevel)
576
+ {
577
+ U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
578
+ ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
579
+ params.fParams.contentSizeFlag = 1;
580
+ return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog);
487
581
  }
488
582
 
489
583
 
@@ -491,12 +585,14 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
491
585
  /* ======= Streaming API ======= */
492
586
  /* ====================================== */
493
587
 
494
- static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
588
+ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
589
+ {
590
+ DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
495
591
  while (zcs->doneJobID < zcs->nextJobID) {
496
592
  unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
497
593
  PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
498
594
  while (zcs->jobs[jobID].jobCompleted==0) {
499
- DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
595
+ DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
500
596
  pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
501
597
  }
502
598
  pthread_mutex_unlock(&zcs->jobCompleted_mutex);
@@ -505,33 +601,54 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
505
601
  }
506
602
 
507
603
 
508
- static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
509
- const void* dict, size_t dictSize, unsigned updateDict,
510
- ZSTD_parameters params, unsigned long long pledgedSrcSize)
604
+ /** ZSTDMT_initCStream_internal() :
605
+ * internal usage only */
606
+ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
607
+ const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
608
+ ZSTD_parameters params, unsigned long long pledgedSrcSize)
511
609
  {
512
- ZSTD_customMem const cmem = { NULL, NULL, NULL };
513
- DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog);
514
- if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize);
515
- if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */
610
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal");
611
+ /* params are supposed to be fully validated at this point */
612
+ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
613
+ assert(!((dict) && (cdict))); /* either dict or cdict, not both */
614
+
615
+ if (zcs->nbThreads==1) {
616
+ DEBUGLOG(4, "single thread mode");
617
+ return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
618
+ dict, dictSize, cdict,
619
+ params, pledgedSrcSize);
620
+ }
621
+
622
+ if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
516
623
  ZSTDMT_waitForAllJobsCompleted(zcs);
517
624
  ZSTDMT_releaseAllJobResources(zcs);
518
625
  zcs->allJobsCompleted = 1;
519
626
  }
627
+
520
628
  zcs->params = params;
521
- if (updateDict) {
522
- ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
523
- if (dict && dictSize) {
524
- zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params.cParams, cmem);
525
- if (zcs->cdict == NULL) return ERROR(memory_allocation);
526
- } }
527
629
  zcs->frameContentSize = pledgedSrcSize;
630
+ if (dict) {
631
+ DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
632
+ ZSTD_freeCDict(zcs->cdictLocal);
633
+ zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
634
+ 0 /* byRef */, ZSTD_dm_auto, /* note : a loadPrefix becomes an internal CDict */
635
+ params.cParams, zcs->cMem);
636
+ zcs->cdict = zcs->cdictLocal;
637
+ if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
638
+ } else {
639
+ DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
640
+ ZSTD_freeCDict(zcs->cdictLocal);
641
+ zcs->cdictLocal = NULL;
642
+ zcs->cdict = cdict;
643
+ }
644
+
528
645
  zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
529
646
  DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
530
- DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
647
+ DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
531
648
  zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
532
649
  zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
533
650
  zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
534
- DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
651
+ DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
535
652
  zcs->marginSize = zcs->targetSectionSize >> 2;
536
653
  zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
537
654
  zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
@@ -546,24 +663,39 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
546
663
  return 0;
547
664
  }
548
665
 
549
- size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs,
666
+ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
550
667
  const void* dict, size_t dictSize,
551
668
  ZSTD_parameters params, unsigned long long pledgedSrcSize)
552
669
  {
553
- return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize);
670
+ DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
671
+ return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
554
672
  }
555
673
 
674
+ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
675
+ const ZSTD_CDict* cdict,
676
+ ZSTD_frameParameters fParams,
677
+ unsigned long long pledgedSrcSize)
678
+ {
679
+ ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
680
+ if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
681
+ params.fParams = fParams;
682
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, cdict,
683
+ params, pledgedSrcSize);
684
+ }
685
+
686
+
556
687
  /* ZSTDMT_resetCStream() :
557
688
  * pledgedSrcSize is optional and can be zero == unknown */
558
689
  size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
559
690
  {
560
- if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize);
691
+ if (zcs->nbThreads==1)
692
+ return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
561
693
  return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
562
694
  }
563
695
 
564
696
  size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
565
697
  ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
566
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0);
698
+ return ZSTDMT_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
567
699
  }
568
700
 
569
701
 
@@ -582,13 +714,16 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
582
714
  return ERROR(memory_allocation);
583
715
  }
584
716
 
585
- DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
717
+ DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
718
+ zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
586
719
  zcs->jobs[jobID].src = zcs->inBuff.buffer;
587
720
  zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
588
721
  zcs->jobs[jobID].srcSize = srcSize;
589
- zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */
722
+ zcs->jobs[jobID].dictSize = zcs->dictSize;
723
+ assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
590
724
  zcs->jobs[jobID].params = zcs->params;
591
- if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */
725
+ /* do not calculate checksum within sections, but write it in header for first section */
726
+ if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
592
727
  zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
593
728
  zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
594
729
  zcs->jobs[jobID].dstBuff = dstBuffer;
@@ -603,6 +738,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
603
738
  /* get a new buffer for next input */
604
739
  if (!endFrame) {
605
740
  size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
741
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
606
742
  zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
607
743
  if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
608
744
  zcs->jobs[jobID].jobCompleted = 1;
@@ -611,22 +747,33 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
611
747
  ZSTDMT_releaseAllJobResources(zcs);
612
748
  return ERROR(memory_allocation);
613
749
  }
614
- DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
750
+ DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
615
751
  zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
616
- DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize));
617
- memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled);
752
+ DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
753
+ (U32)zcs->inBuff.filled, (U32)newDictSize,
754
+ (U32)(zcs->inBuff.filled - newDictSize));
755
+ memmove(zcs->inBuff.buffer.start,
756
+ (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
757
+ zcs->inBuff.filled);
618
758
  DEBUGLOG(5, "new inBuff pre-filled");
619
759
  zcs->dictSize = newDictSize;
620
- } else {
760
+ } else { /* if (endFrame==1) */
761
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
621
762
  zcs->inBuff.buffer = g_nullBuffer;
622
763
  zcs->inBuff.filled = 0;
623
764
  zcs->dictSize = 0;
624
765
  zcs->frameEnded = 1;
625
766
  if (zcs->nextJobID == 0)
626
- zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */
767
+ /* single chunk exception : checksum is calculated directly within worker thread */
768
+ zcs->params.fParams.checksumFlag = 0;
627
769
  }
628
770
 
629
- DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask);
771
+ DEBUGLOG(4, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
772
+ zcs->nextJobID,
773
+ (U32)zcs->jobs[jobID].srcSize,
774
+ zcs->jobs[jobID].lastChunk,
775
+ zcs->doneJobID,
776
+ zcs->doneJobID & zcs->jobIDMask);
630
777
  POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
631
778
  zcs->nextJobID++;
632
779
  return 0;
@@ -664,7 +811,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
664
811
  XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
665
812
  if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
666
813
  U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
667
- DEBUGLOG(4, "writing checksum : %08X \n", checksum);
814
+ DEBUGLOG(5, "writing checksum : %08X \n", checksum);
668
815
  MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
669
816
  job.cSize += 4;
670
817
  zcs->jobs[wJobID].cSize += 4;
@@ -675,7 +822,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
675
822
  zcs->jobs[wJobID].jobScanned = 1;
676
823
  }
677
824
  { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
678
- DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
825
+ DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
679
826
  memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
680
827
  output->pos += toWrite;
681
828
  job.dstFlushed += toWrite;
@@ -696,26 +843,81 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
696
843
  } }
697
844
 
698
845
 
699
- size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
846
+ /** ZSTDMT_compressStream_generic() :
847
+ * internal use only
848
+ * assumption : output and input are valid (pos <= size)
849
+ * @return : minimum amount of data remaining to flush, 0 if none */
850
+ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
851
+ ZSTD_outBuffer* output,
852
+ ZSTD_inBuffer* input,
853
+ ZSTD_EndDirective endOp)
700
854
  {
701
- size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize;
702
- if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */
703
- if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input);
855
+ size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize;
856
+ assert(output->pos <= output->size);
857
+ assert(input->pos <= input->size);
858
+ if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
859
+ /* current frame being ended. Only flush/end are allowed. Or start new frame with init */
860
+ return ERROR(stage_wrong);
861
+ }
862
+ if (mtctx->nbThreads==1) {
863
+ return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
864
+ }
865
+
866
+ /* single-pass shortcut (note : this is blocking-mode) */
867
+ if ( (mtctx->nextJobID==0) /* just started */
868
+ && (mtctx->inBuff.filled==0) /* nothing buffered */
869
+ && (endOp==ZSTD_e_end) /* end order */
870
+ && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
871
+ size_t const cSize = ZSTDMT_compress_advanced(mtctx,
872
+ (char*)output->dst + output->pos, output->size - output->pos,
873
+ (const char*)input->src + input->pos, input->size - input->pos,
874
+ mtctx->cdict, mtctx->params, mtctx->overlapRLog);
875
+ if (ZSTD_isError(cSize)) return cSize;
876
+ input->pos = input->size;
877
+ output->pos += cSize;
878
+ ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); /* was allocated in initStream */
879
+ mtctx->allJobsCompleted = 1;
880
+ mtctx->frameEnded = 1;
881
+ return 0;
882
+ }
704
883
 
705
884
  /* fill input buffer */
706
- { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled);
707
- memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad);
885
+ if ((input->src) && (mtctx->inBuff.buffer.start)) { /* support NULL input */
886
+ size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
887
+ DEBUGLOG(2, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
888
+ memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
708
889
  input->pos += toLoad;
709
- zcs->inBuff.filled += toLoad;
890
+ mtctx->inBuff.filled += toLoad;
710
891
  }
711
892
 
712
- if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
713
- && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */
714
- CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) );
893
+ if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
894
+ && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */
895
+ CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
715
896
  }
716
897
 
717
- /* check for data to flush */
718
- CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */
898
+ /* check for potential compressed data ready to be flushed */
899
+ CHECK_F( ZSTDMT_flushNextJob(mtctx, output, (mtctx->inBuff.filled == mtctx->inBuffSize) /* blockToFlush */) ); /* block if it wasn't possible to create new job due to saturation */
900
+
901
+ if (input->pos < input->size) /* input not consumed : do not flush yet */
902
+ endOp = ZSTD_e_continue;
903
+
904
+ switch(endOp)
905
+ {
906
+ case ZSTD_e_flush:
907
+ return ZSTDMT_flushStream(mtctx, output);
908
+ case ZSTD_e_end:
909
+ return ZSTDMT_endStream(mtctx, output);
910
+ case ZSTD_e_continue:
911
+ return 1;
912
+ default:
913
+ return ERROR(GENERIC); /* invalid endDirective */
914
+ }
915
+ }
916
+
917
+
918
+ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
919
+ {
920
+ CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
719
921
 
720
922
  /* recommended next input size : fill current input buffer */
721
923
  return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
@@ -726,26 +928,28 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp
726
928
  {
727
929
  size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
728
930
 
729
- if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
730
931
  if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
731
932
  && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
732
933
  CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
733
934
  }
734
935
 
735
936
  /* check if there is any data available to flush */
736
- DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID);
737
- return ZSTDMT_flushNextJob(zcs, output, 1);
937
+ return ZSTDMT_flushNextJob(zcs, output, 1 /* blockToFlush */);
738
938
  }
739
939
 
740
940
 
741
941
  size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
742
942
  {
743
- if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output);
744
- return ZSTDMT_flushStream_internal(zcs, output, 0);
943
+ DEBUGLOG(5, "ZSTDMT_flushStream");
944
+ if (zcs->nbThreads==1)
945
+ return ZSTD_flushStream(zcs->cctxPool->cctx[0], output);
946
+ return ZSTDMT_flushStream_internal(zcs, output, 0 /* endFrame */);
745
947
  }
746
948
 
747
949
  size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
748
950
  {
749
- if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output);
750
- return ZSTDMT_flushStream_internal(zcs, output, 1);
951
+ DEBUGLOG(4, "ZSTDMT_endStream");
952
+ if (zcs->nbThreads==1)
953
+ return ZSTD_endStream(zcs->cctxPool->cctx[0], output);
954
+ return ZSTDMT_flushStream_internal(zcs, output, 1 /* endFrame */);
751
955
  }