zstd-ruby 1.2.0.0 → 1.3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +7 -5
  4. data/ext/zstdruby/libzstd/common/bitstream.h +23 -9
  5. data/ext/zstdruby/libzstd/common/error_private.c +4 -1
  6. data/ext/zstdruby/libzstd/common/huf.h +20 -0
  7. data/ext/zstdruby/libzstd/common/mem.h +0 -14
  8. data/ext/zstdruby/libzstd/common/pool.c +12 -0
  9. data/ext/zstdruby/libzstd/common/pool.h +5 -0
  10. data/ext/zstdruby/libzstd/common/threading.c +0 -1
  11. data/ext/zstdruby/libzstd/common/zstd_common.c +25 -18
  12. data/ext/zstdruby/libzstd/common/zstd_errors.h +15 -7
  13. data/ext/zstdruby/libzstd/common/zstd_internal.h +59 -9
  14. data/ext/zstdruby/libzstd/compress/huf_compress.c +7 -3
  15. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1082 -487
  16. data/ext/zstdruby/libzstd/compress/zstd_opt.h +30 -15
  17. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +362 -158
  18. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +49 -13
  19. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +150 -26
  20. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +380 -258
  21. data/ext/zstdruby/libzstd/dictBuilder/cover.c +23 -37
  22. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +30 -40
  23. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +104 -95
  24. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -10
  25. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +14 -19
  26. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -12
  27. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -14
  28. data/ext/zstdruby/libzstd/zstd.h +507 -166
  29. data/lib/zstd-ruby/version.rb +1 -1
  30. metadata +2 -2
@@ -43,6 +43,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t src
43
43
  if (ssPtr->litLengthSum == 0) {
44
44
  if (srcSize <= 1024) ssPtr->staticPrices = 1;
45
45
 
46
+ assert(ssPtr->litFreq!=NULL);
46
47
  for (u=0; u<=MaxLit; u++)
47
48
  ssPtr->litFreq[u] = 0;
48
49
  for (u=0; u<srcSize; u++)
@@ -201,6 +202,20 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
201
202
  }
202
203
 
203
204
 
205
+ /* function safe only for comparisons */
206
+ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
207
+ {
208
+ switch (length)
209
+ {
210
+ default :
211
+ case 4 : return MEM_read32(memPtr);
212
+ case 3 : if (MEM_isLittleEndian())
213
+ return MEM_read32(memPtr)<<8;
214
+ else
215
+ return MEM_read32(memPtr)>>8;
216
+ }
217
+ }
218
+
204
219
 
205
220
  /* Update hashTable3 up to ip (excluded)
206
221
  Assumption : always within prefix (i.e. not within extDict) */
@@ -234,12 +249,12 @@ static U32 ZSTD_insertBtAndGetAllMatches (
234
249
  {
235
250
  const BYTE* const base = zc->base;
236
251
  const U32 current = (U32)(ip-base);
237
- const U32 hashLog = zc->params.cParams.hashLog;
252
+ const U32 hashLog = zc->appliedParams.cParams.hashLog;
238
253
  const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
239
254
  U32* const hashTable = zc->hashTable;
240
255
  U32 matchIndex = hashTable[h];
241
256
  U32* const bt = zc->chainTable;
242
- const U32 btLog = zc->params.cParams.chainLog - 1;
257
+ const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
243
258
  const U32 btMask= (1U << btLog) - 1;
244
259
  size_t commonLengthSmaller=0, commonLengthLarger=0;
245
260
  const BYTE* const dictBase = zc->dictBase;
@@ -267,7 +282,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
267
282
  if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
268
283
  } else {
269
284
  match = dictBase + matchIndex3;
270
- if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
285
+ if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
271
286
  currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
272
287
  }
273
288
 
@@ -410,10 +425,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
410
425
  const BYTE* const base = ctx->base;
411
426
  const BYTE* const prefixStart = base + ctx->dictLimit;
412
427
 
413
- const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
414
- const U32 sufficient_len = ctx->params.cParams.targetLength;
415
- const U32 mls = ctx->params.cParams.searchLength;
416
- const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
428
+ const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
429
+ const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
430
+ const U32 mls = ctx->appliedParams.cParams.searchLength;
431
+ const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
417
432
 
418
433
  ZSTD_optimal_t* opt = seqStorePtr->priceTable;
419
434
  ZSTD_match_t* matches = seqStorePtr->matchTable;
@@ -439,7 +454,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
439
454
  for (i=(ip == anchor); i<last_i; i++) {
440
455
  const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
441
456
  if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
442
- && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
457
+ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
443
458
  mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
444
459
  if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
445
460
  best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
@@ -524,7 +539,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
524
539
  for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
525
540
  const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
526
541
  if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
527
- && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
542
+ && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
528
543
  mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
529
544
 
530
545
  if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
@@ -663,10 +678,10 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
663
678
  const BYTE* const dictBase = ctx->dictBase;
664
679
  const BYTE* const dictEnd = dictBase + dictLimit;
665
680
 
666
- const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
667
- const U32 sufficient_len = ctx->params.cParams.targetLength;
668
- const U32 mls = ctx->params.cParams.searchLength;
669
- const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
681
+ const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
682
+ const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
683
+ const U32 mls = ctx->appliedParams.cParams.searchLength;
684
+ const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
670
685
 
671
686
  ZSTD_optimal_t* opt = seqStorePtr->priceTable;
672
687
  ZSTD_match_t* matches = seqStorePtr->matchTable;
@@ -698,7 +713,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
698
713
  const BYTE* const repMatch = repBase + repIndex;
699
714
  if ( (repCur > 0 && repCur <= (S32)current)
700
715
  && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
701
- && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
716
+ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
702
717
  /* repcode detected we should take it */
703
718
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
704
719
  mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
@@ -794,7 +809,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
794
809
  const BYTE* const repMatch = repBase + repIndex;
795
810
  if ( (repCur > 0 && repCur <= (S32)(current+cur))
796
811
  && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
797
- && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
812
+ && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
798
813
  /* repcode detected */
799
814
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
800
815
  mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
@@ -14,34 +14,31 @@
14
14
 
15
15
  /* ====== Compiler specifics ====== */
16
16
  #if defined(_MSC_VER)
17
- # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
17
+ # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
18
18
  #endif
19
19
 
20
20
 
21
21
  /* ====== Dependencies ====== */
22
- #include <stdlib.h> /* malloc */
23
- #include <string.h> /* memcpy */
24
- #include "pool.h" /* threadpool */
25
- #include "threading.h" /* mutex */
26
- #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
22
+ #include <string.h> /* memcpy, memset */
23
+ #include "pool.h" /* threadpool */
24
+ #include "threading.h" /* mutex */
25
+ #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
27
26
  #include "zstdmt_compress.h"
28
27
 
29
28
 
30
29
  /* ====== Debug ====== */
31
- #if 0
30
+ #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
32
31
 
33
32
  # include <stdio.h>
34
33
  # include <unistd.h>
35
34
  # include <sys/times.h>
36
- static unsigned g_debugLevel = 5;
37
- # define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
38
- # define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
35
+ # define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
39
36
 
40
- # define DEBUG_PRINTHEX(l,p,n) { \
41
- unsigned debug_u; \
42
- for (debug_u=0; debug_u<(n); debug_u++) \
37
+ # define DEBUG_PRINTHEX(l,p,n) { \
38
+ unsigned debug_u; \
39
+ for (debug_u=0; debug_u<(n); debug_u++) \
43
40
  DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
44
- DEBUGLOGRAW(l, " \n"); \
41
+ DEBUGLOGRAW(l, " \n"); \
45
42
  }
46
43
 
47
44
  static unsigned long long GetCurrentClockTimeMicroseconds(void)
@@ -53,22 +50,22 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
53
50
  return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
54
51
  }
55
52
 
56
- #define MUTEX_WAIT_TIME_DLEVEL 5
57
- #define PTHREAD_MUTEX_LOCK(mutex) \
58
- if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
59
- unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
60
- pthread_mutex_lock(mutex); \
61
- { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
62
- unsigned long long const elapsedTime = (afterTime-beforeTime); \
63
- if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
64
- DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
65
- elapsedTime, #mutex); \
66
- } } \
67
- } else pthread_mutex_lock(mutex);
53
+ #define MUTEX_WAIT_TIME_DLEVEL 6
54
+ #define PTHREAD_MUTEX_LOCK(mutex) { \
55
+ if (ZSTD_DEBUG>=MUTEX_WAIT_TIME_DLEVEL) { \
56
+ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
57
+ pthread_mutex_lock(mutex); \
58
+ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
59
+ unsigned long long const elapsedTime = (afterTime-beforeTime); \
60
+ if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
61
+ DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
62
+ elapsedTime, #mutex); \
63
+ } } \
64
+ } else pthread_mutex_lock(mutex); \
65
+ }
68
66
 
69
67
  #else
70
68
 
71
- # define DEBUGLOG(l, ...) {} /* disabled */
72
69
  # define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
73
70
  # define DEBUG_PRINTHEX(l,p,n) {}
74
71
 
@@ -87,16 +84,19 @@ static const buffer_t g_nullBuffer = { NULL, 0 };
87
84
  typedef struct ZSTDMT_bufferPool_s {
88
85
  unsigned totalBuffers;
89
86
  unsigned nbBuffers;
87
+ ZSTD_customMem cMem;
90
88
  buffer_t bTable[1]; /* variable size */
91
89
  } ZSTDMT_bufferPool;
92
90
 
93
- static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
91
+ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
94
92
  {
95
93
  unsigned const maxNbBuffers = 2*nbThreads + 2;
96
- ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t));
94
+ ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
95
+ sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
97
96
  if (bufPool==NULL) return NULL;
98
97
  bufPool->totalBuffers = maxNbBuffers;
99
98
  bufPool->nbBuffers = 0;
99
+ bufPool->cMem = cMem;
100
100
  return bufPool;
101
101
  }
102
102
 
@@ -105,23 +105,39 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
105
105
  unsigned u;
106
106
  if (!bufPool) return; /* compatibility with free on NULL */
107
107
  for (u=0; u<bufPool->totalBuffers; u++)
108
- free(bufPool->bTable[u].start);
109
- free(bufPool);
108
+ ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
109
+ ZSTD_free(bufPool, bufPool->cMem);
110
+ }
111
+
112
+ /* only works at initialization, not during compression */
113
+ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
114
+ {
115
+ size_t const poolSize = sizeof(*bufPool)
116
+ + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
117
+ unsigned u;
118
+ size_t totalBufferSize = 0;
119
+ for (u=0; u<bufPool->totalBuffers; u++)
120
+ totalBufferSize += bufPool->bTable[u].size;
121
+
122
+ return poolSize + totalBufferSize;
110
123
  }
111
124
 
112
- /* assumption : invocation from main thread only ! */
125
+ /** ZSTDMT_getBuffer() :
126
+ * assumption : invocation from main thread only ! */
113
127
  static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
114
128
  {
115
129
  if (pool->nbBuffers) { /* try to use an existing buffer */
116
130
  buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
117
131
  size_t const availBufferSize = buf.size;
118
- if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */
132
+ if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
133
+ /* large enough, but not too much */
119
134
  return buf;
120
- free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */
135
+ /* size conditions not respected : scratch this buffer, create new one */
136
+ ZSTD_free(buf.start, pool->cMem);
121
137
  }
122
138
  /* create new buffer */
123
139
  { buffer_t buffer;
124
- void* const start = malloc(bSize);
140
+ void* const start = ZSTD_malloc(bSize, pool->cMem);
125
141
  if (start==NULL) bSize = 0;
126
142
  buffer.start = start; /* note : start can be NULL if malloc fails ! */
127
143
  buffer.size = bSize;
@@ -138,7 +154,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
138
154
  return;
139
155
  }
140
156
  /* Reached bufferPool capacity (should not happen) */
141
- free(buf.start);
157
+ ZSTD_free(buf.start, pool->cMem);
142
158
  }
143
159
 
144
160
 
@@ -147,6 +163,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
147
163
  typedef struct {
148
164
  unsigned totalCCtx;
149
165
  unsigned availCCtx;
166
+ ZSTD_customMem cMem;
150
167
  ZSTD_CCtx* cctx[1]; /* variable size */
151
168
  } ZSTDMT_CCtxPool;
152
169
 
@@ -158,23 +175,40 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
158
175
  unsigned u;
159
176
  for (u=0; u<pool->totalCCtx; u++)
160
177
  ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
161
- free(pool);
178
+ ZSTD_free(pool, pool->cMem);
162
179
  }
163
180
 
164
181
  /* ZSTDMT_createCCtxPool() :
165
182
  * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
166
- static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads)
183
+ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
184
+ ZSTD_customMem cMem)
167
185
  {
168
- ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*));
186
+ ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
187
+ sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
169
188
  if (!cctxPool) return NULL;
189
+ cctxPool->cMem = cMem;
170
190
  cctxPool->totalCCtx = nbThreads;
171
191
  cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
172
- cctxPool->cctx[0] = ZSTD_createCCtx();
192
+ cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
173
193
  if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
174
- DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads);
194
+ DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
175
195
  return cctxPool;
176
196
  }
177
197
 
198
+ /* only works during initialization phase, not during compression */
199
+ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
200
+ {
201
+ unsigned const nbThreads = cctxPool->totalCCtx;
202
+ size_t const poolSize = sizeof(*cctxPool)
203
+ + (nbThreads-1)*sizeof(ZSTD_CCtx*);
204
+ unsigned u;
205
+ size_t totalCCtxSize = 0;
206
+ for (u=0; u<nbThreads; u++)
207
+ totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
208
+
209
+ return poolSize + totalCCtxSize;
210
+ }
211
+
178
212
  static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
179
213
  {
180
214
  if (pool->availCCtx) {
@@ -218,7 +252,7 @@ typedef struct {
218
252
  pthread_mutex_t* jobCompleted_mutex;
219
253
  pthread_cond_t* jobCompleted_cond;
220
254
  ZSTD_parameters params;
221
- ZSTD_CDict* cdict;
255
+ const ZSTD_CDict* cdict;
222
256
  unsigned long long fullFrameSize;
223
257
  } ZSTDMT_jobDescription;
224
258
 
@@ -228,11 +262,11 @@ void ZSTDMT_compressChunk(void* jobDescription)
228
262
  ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
229
263
  const void* const src = (const char*)job->srcStart + job->dictSize;
230
264
  buffer_t const dstBuff = job->dstBuff;
231
- DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
265
+ DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
232
266
  job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
233
267
  if (job->cdict) { /* should only happen for first segment */
234
268
  size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
235
- if (job->cdict) DEBUGLOG(3, "using CDict ");
269
+ DEBUGLOG(5, "using CDict");
236
270
  if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
237
271
  } else { /* srcStart points at reloaded section */
238
272
  if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */
@@ -247,12 +281,12 @@ void ZSTDMT_compressChunk(void* jobDescription)
247
281
  ZSTD_invalidateRepCodes(job->cctx);
248
282
  }
249
283
 
250
- DEBUGLOG(4, "Compressing : ");
284
+ DEBUGLOG(5, "Compressing : ");
251
285
  DEBUG_PRINTHEX(4, job->srcStart, 12);
252
286
  job->cSize = (job->lastChunk) ?
253
287
  ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
254
288
  ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
255
- DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
289
+ DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
256
290
  (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
257
291
  DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
258
292
 
@@ -271,6 +305,7 @@ _endJob:
271
305
 
272
306
  struct ZSTDMT_CCtx_s {
273
307
  POOL_ctx* factory;
308
+ ZSTDMT_jobDescription* jobs;
274
309
  ZSTDMT_bufferPool* buffPool;
275
310
  ZSTDMT_CCtxPool* cctxPool;
276
311
  pthread_mutex_t jobCompleted_mutex;
@@ -292,50 +327,64 @@ struct ZSTDMT_CCtx_s {
292
327
  unsigned overlapRLog;
293
328
  unsigned long long frameContentSize;
294
329
  size_t sectionSize;
295
- ZSTD_CDict* cdict;
296
- ZSTD_CStream* cstream;
297
- ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */
330
+ ZSTD_customMem cMem;
331
+ ZSTD_CDict* cdictLocal;
332
+ const ZSTD_CDict* cdict;
298
333
  };
299
334
 
300
- ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads)
335
+ static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
301
336
  {
302
- ZSTDMT_CCtx* cctx;
303
- U32 const minNbJobs = nbThreads + 2;
304
- U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
337
+ U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
305
338
  U32 const nbJobs = 1 << nbJobsLog2;
306
- DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n",
307
- nbThreads, minNbJobs, nbJobsLog2, nbJobs);
339
+ *nbJobsPtr = nbJobs;
340
+ return (ZSTDMT_jobDescription*) ZSTD_calloc(
341
+ nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
342
+ }
343
+
344
+ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
345
+ {
346
+ ZSTDMT_CCtx* mtctx;
347
+ U32 nbJobs = nbThreads + 2;
348
+ DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
349
+
308
350
  if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
309
- cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription));
310
- if (!cctx) return NULL;
311
- cctx->nbThreads = nbThreads;
312
- cctx->jobIDMask = nbJobs - 1;
313
- cctx->allJobsCompleted = 1;
314
- cctx->sectionSize = 0;
315
- cctx->overlapRLog = 3;
316
- cctx->factory = POOL_create(nbThreads, 1);
317
- cctx->buffPool = ZSTDMT_createBufferPool(nbThreads);
318
- cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads);
319
- if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */
320
- ZSTDMT_freeCCtx(cctx);
351
+ if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
352
+ /* invalid custom allocator */
353
+ return NULL;
354
+
355
+ mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
356
+ if (!mtctx) return NULL;
357
+ mtctx->cMem = cMem;
358
+ mtctx->nbThreads = nbThreads;
359
+ mtctx->allJobsCompleted = 1;
360
+ mtctx->sectionSize = 0;
361
+ mtctx->overlapRLog = 3;
362
+ mtctx->factory = POOL_create(nbThreads, 1);
363
+ mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
364
+ mtctx->jobIDMask = nbJobs - 1;
365
+ mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem);
366
+ mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
367
+ if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) {
368
+ ZSTDMT_freeCCtx(mtctx);
321
369
  return NULL;
322
370
  }
323
- if (nbThreads==1) {
324
- cctx->cstream = ZSTD_createCStream();
325
- if (!cctx->cstream) {
326
- ZSTDMT_freeCCtx(cctx); return NULL;
327
- } }
328
- pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
329
- pthread_cond_init(&cctx->jobCompleted_cond, NULL);
330
- DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads);
331
- return cctx;
371
+ pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
372
+ pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
373
+ DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
374
+ return mtctx;
375
+ }
376
+
377
+ ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
378
+ {
379
+ return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
332
380
  }
333
381
 
334
382
  /* ZSTDMT_releaseAllJobResources() :
335
- * Ensure all workers are killed first. */
383
+ * note : ensure all workers are killed first ! */
336
384
  static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
337
385
  {
338
386
  unsigned jobID;
387
+ DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
339
388
  for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
340
389
  ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
341
390
  mtctx->jobs[jobID].dstBuff = g_nullBuffer;
@@ -356,15 +405,26 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
356
405
  POOL_free(mtctx->factory);
357
406
  if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
358
407
  ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
408
+ ZSTD_free(mtctx->jobs, mtctx->cMem);
359
409
  ZSTDMT_freeCCtxPool(mtctx->cctxPool);
360
- ZSTD_freeCDict(mtctx->cdict);
361
- ZSTD_freeCStream(mtctx->cstream);
410
+ ZSTD_freeCDict(mtctx->cdictLocal);
362
411
  pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
363
412
  pthread_cond_destroy(&mtctx->jobCompleted_cond);
364
- free(mtctx);
413
+ ZSTD_free(mtctx, mtctx->cMem);
365
414
  return 0;
366
415
  }
367
416
 
417
+ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
418
+ {
419
+ if (mtctx == NULL) return 0; /* supports sizeof NULL */
420
+ return sizeof(*mtctx)
421
+ + POOL_sizeof(mtctx->factory)
422
+ + ZSTDMT_sizeof_bufferPool(mtctx->buffPool)
423
+ + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
424
+ + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
425
+ + ZSTD_sizeof_CDict(mtctx->cdictLocal);
426
+ }
427
+
368
428
  size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
369
429
  {
370
430
  switch(parameter)
@@ -373,7 +433,7 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
373
433
  mtctx->sectionSize = value;
374
434
  return 0;
375
435
  case ZSTDMT_p_overlapSectionLog :
376
- DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
436
+ DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
377
437
  mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
378
438
  return 0;
379
439
  default :
@@ -386,31 +446,49 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
386
446
  /* ===== Multi-threaded compression ===== */
387
447
  /* ------------------------------------------ */
388
448
 
389
- size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
449
+ static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
450
+ size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
451
+ size_t const chunkMaxSize = chunkSizeTarget << 2;
452
+ size_t const passSizeMax = chunkMaxSize * nbThreads;
453
+ unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
454
+ unsigned const nbChunksLarge = multiplier * nbThreads;
455
+ unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
456
+ unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
457
+ return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
458
+ }
459
+
460
+
461
+ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
390
462
  void* dst, size_t dstCapacity,
391
463
  const void* src, size_t srcSize,
392
- int compressionLevel)
464
+ const ZSTD_CDict* cdict,
465
+ ZSTD_parameters const params,
466
+ unsigned overlapRLog)
393
467
  {
394
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
395
- U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
396
- size_t const overlapSize = (size_t)1 << (params.cParams.windowLog - overlapLog);
397
- size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
398
- unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + 1;
399
- unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
468
+ size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
469
+ unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
400
470
  size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
401
- size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
402
- size_t remainingSrcSize = srcSize;
471
+ size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
403
472
  const char* const srcStart = (const char*)src;
473
+ size_t remainingSrcSize = srcSize;
404
474
  unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
405
475
  size_t frameStartPos = 0, dstBufferPos = 0;
406
476
 
407
- DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize);
408
- DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
409
- params.fParams.contentSizeFlag = 1;
410
-
477
+ DEBUGLOG(4, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
411
478
  if (nbChunks==1) { /* fallback to single-thread mode */
412
479
  ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
413
- return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
480
+ if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
481
+ return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
482
+ }
483
+ assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */
484
+
485
+ if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
486
+ U32 nbJobs = nbChunks;
487
+ ZSTD_free(mtctx->jobs, mtctx->cMem);
488
+ mtctx->jobIDMask = 0;
489
+ mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
490
+ if (mtctx->jobs==NULL) return ERROR(memory_allocation);
491
+ mtctx->jobIDMask = nbJobs - 1;
414
492
  }
415
493
 
416
494
  { unsigned u;
@@ -425,15 +503,18 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
425
503
  if ((cctx==NULL) || (dstBuffer.start==NULL)) {
426
504
  mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
427
505
  mtctx->jobs[u].jobCompleted = 1;
428
- nbChunks = u+1;
506
+ nbChunks = u+1; /* only wait and free u jobs, instead of initially expected nbChunks ones */
429
507
  break; /* let's wait for previous jobs to complete, but don't start new ones */
430
508
  }
431
509
 
432
510
  mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
433
511
  mtctx->jobs[u].dictSize = dictSize;
434
512
  mtctx->jobs[u].srcSize = chunkSize;
513
+ mtctx->jobs[u].cdict = mtctx->nextJobID==0 ? cdict : NULL;
435
514
  mtctx->jobs[u].fullFrameSize = srcSize;
436
515
  mtctx->jobs[u].params = params;
516
+ /* do not calculate checksum within sections, but write it in header for first section */
517
+ if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
437
518
  mtctx->jobs[u].dstBuff = dstBuffer;
438
519
  mtctx->jobs[u].cctx = cctx;
439
520
  mtctx->jobs[u].firstChunk = (u==0);
@@ -442,27 +523,27 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
442
523
  mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
443
524
  mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
444
525
 
445
- DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize);
446
- DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12);
526
+ DEBUGLOG(5, "posting job %u (%u bytes)", u, (U32)chunkSize);
527
+ DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
447
528
  POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
448
529
 
449
530
  frameStartPos += chunkSize;
450
531
  dstBufferPos += dstBufferCapacity;
451
532
  remainingSrcSize -= chunkSize;
452
533
  } }
453
- /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
454
534
 
535
+ /* collect result */
455
536
  { unsigned chunkID;
456
537
  size_t error = 0, dstPos = 0;
457
538
  for (chunkID=0; chunkID<nbChunks; chunkID++) {
458
- DEBUGLOG(3, "waiting for chunk %u ", chunkID);
539
+ DEBUGLOG(5, "waiting for chunk %u ", chunkID);
459
540
  PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
460
541
  while (mtctx->jobs[chunkID].jobCompleted==0) {
461
- DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID);
542
+ DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
462
543
  pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
463
544
  }
464
545
  pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
465
- DEBUGLOG(3, "ready to write chunk %u ", chunkID);
546
+ DEBUGLOG(5, "ready to write chunk %u ", chunkID);
466
547
 
467
548
  ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
468
549
  mtctx->jobs[chunkID].cctx = NULL;
@@ -470,20 +551,33 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
470
551
  { size_t const cSize = mtctx->jobs[chunkID].cSize;
471
552
  if (ZSTD_isError(cSize)) error = cSize;
472
553
  if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
473
- if (chunkID) { /* note : chunk 0 is already written directly into dst */
554
+ if (chunkID) { /* note : chunk 0 is written directly at dst, which is correct position */
474
555
  if (!error)
475
- memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap if chunk decompressed within dst */
476
- if (chunkID >= compressWithinDst) /* otherwise, it decompresses within dst */
556
+ memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
557
+ if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
558
+ DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
477
559
  ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
560
+ }
478
561
  mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
479
562
  }
480
563
  dstPos += cSize ;
481
564
  }
482
565
  }
483
- if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos);
566
+ if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
484
567
  return error ? error : dstPos;
485
568
  }
569
+ }
570
+
486
571
 
572
+ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
573
+ void* dst, size_t dstCapacity,
574
+ const void* src, size_t srcSize,
575
+ int compressionLevel)
576
+ {
577
+ U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
578
+ ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
579
+ params.fParams.contentSizeFlag = 1;
580
+ return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog);
487
581
  }
488
582
 
489
583
 
@@ -491,12 +585,14 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
491
585
  /* ======= Streaming API ======= */
492
586
  /* ====================================== */
493
587
 
494
- static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
588
+ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
589
+ {
590
+ DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
495
591
  while (zcs->doneJobID < zcs->nextJobID) {
496
592
  unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
497
593
  PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
498
594
  while (zcs->jobs[jobID].jobCompleted==0) {
499
- DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
595
+ DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
500
596
  pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
501
597
  }
502
598
  pthread_mutex_unlock(&zcs->jobCompleted_mutex);
@@ -505,33 +601,54 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
505
601
  }
506
602
 
507
603
 
508
- static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
509
- const void* dict, size_t dictSize, unsigned updateDict,
510
- ZSTD_parameters params, unsigned long long pledgedSrcSize)
604
+ /** ZSTDMT_initCStream_internal() :
605
+ * internal usage only */
606
+ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
607
+ const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
608
+ ZSTD_parameters params, unsigned long long pledgedSrcSize)
511
609
  {
512
- ZSTD_customMem const cmem = { NULL, NULL, NULL };
513
- DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog);
514
- if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize);
515
- if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */
610
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal");
611
+ /* params are supposed to be fully validated at this point */
612
+ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
613
+ assert(!((dict) && (cdict))); /* either dict or cdict, not both */
614
+
615
+ if (zcs->nbThreads==1) {
616
+ DEBUGLOG(4, "single thread mode");
617
+ return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
618
+ dict, dictSize, cdict,
619
+ params, pledgedSrcSize);
620
+ }
621
+
622
+ if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
516
623
  ZSTDMT_waitForAllJobsCompleted(zcs);
517
624
  ZSTDMT_releaseAllJobResources(zcs);
518
625
  zcs->allJobsCompleted = 1;
519
626
  }
627
+
520
628
  zcs->params = params;
521
- if (updateDict) {
522
- ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
523
- if (dict && dictSize) {
524
- zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params.cParams, cmem);
525
- if (zcs->cdict == NULL) return ERROR(memory_allocation);
526
- } }
527
629
  zcs->frameContentSize = pledgedSrcSize;
630
+ if (dict) {
631
+ DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
632
+ ZSTD_freeCDict(zcs->cdictLocal);
633
+ zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
634
+ 0 /* byRef */, ZSTD_dm_auto, /* note : a loadPrefix becomes an internal CDict */
635
+ params.cParams, zcs->cMem);
636
+ zcs->cdict = zcs->cdictLocal;
637
+ if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
638
+ } else {
639
+ DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
640
+ ZSTD_freeCDict(zcs->cdictLocal);
641
+ zcs->cdictLocal = NULL;
642
+ zcs->cdict = cdict;
643
+ }
644
+
528
645
  zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
529
646
  DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
530
- DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
647
+ DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
531
648
  zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
532
649
  zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
533
650
  zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
534
- DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
651
+ DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
535
652
  zcs->marginSize = zcs->targetSectionSize >> 2;
536
653
  zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
537
654
  zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
@@ -546,24 +663,39 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
546
663
  return 0;
547
664
  }
548
665
 
549
- size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs,
666
+ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
550
667
  const void* dict, size_t dictSize,
551
668
  ZSTD_parameters params, unsigned long long pledgedSrcSize)
552
669
  {
553
- return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize);
670
+ DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
671
+ return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
554
672
  }
555
673
 
674
+ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
675
+ const ZSTD_CDict* cdict,
676
+ ZSTD_frameParameters fParams,
677
+ unsigned long long pledgedSrcSize)
678
+ {
679
+ ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
680
+ if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
681
+ params.fParams = fParams;
682
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, cdict,
683
+ params, pledgedSrcSize);
684
+ }
685
+
686
+
556
687
  /* ZSTDMT_resetCStream() :
557
688
  * pledgedSrcSize is optional and can be zero == unknown */
558
689
  size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
559
690
  {
560
- if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize);
691
+ if (zcs->nbThreads==1)
692
+ return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
561
693
  return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
562
694
  }
563
695
 
564
696
  size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
565
697
  ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
566
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0);
698
+ return ZSTDMT_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
567
699
  }
568
700
 
569
701
 
@@ -582,13 +714,16 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
582
714
  return ERROR(memory_allocation);
583
715
  }
584
716
 
585
- DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
717
+ DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
718
+ zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
586
719
  zcs->jobs[jobID].src = zcs->inBuff.buffer;
587
720
  zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
588
721
  zcs->jobs[jobID].srcSize = srcSize;
589
- zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */
722
+ zcs->jobs[jobID].dictSize = zcs->dictSize;
723
+ assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
590
724
  zcs->jobs[jobID].params = zcs->params;
591
- if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */
725
+ /* do not calculate checksum within sections, but write it in header for first section */
726
+ if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
592
727
  zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
593
728
  zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
594
729
  zcs->jobs[jobID].dstBuff = dstBuffer;
@@ -603,6 +738,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
603
738
  /* get a new buffer for next input */
604
739
  if (!endFrame) {
605
740
  size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
741
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
606
742
  zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
607
743
  if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
608
744
  zcs->jobs[jobID].jobCompleted = 1;
@@ -611,22 +747,33 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
611
747
  ZSTDMT_releaseAllJobResources(zcs);
612
748
  return ERROR(memory_allocation);
613
749
  }
614
- DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
750
+ DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
615
751
  zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
616
- DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize));
617
- memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled);
752
+ DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
753
+ (U32)zcs->inBuff.filled, (U32)newDictSize,
754
+ (U32)(zcs->inBuff.filled - newDictSize));
755
+ memmove(zcs->inBuff.buffer.start,
756
+ (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
757
+ zcs->inBuff.filled);
618
758
  DEBUGLOG(5, "new inBuff pre-filled");
619
759
  zcs->dictSize = newDictSize;
620
- } else {
760
+ } else { /* if (endFrame==1) */
761
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
621
762
  zcs->inBuff.buffer = g_nullBuffer;
622
763
  zcs->inBuff.filled = 0;
623
764
  zcs->dictSize = 0;
624
765
  zcs->frameEnded = 1;
625
766
  if (zcs->nextJobID == 0)
626
- zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */
767
+ /* single chunk exception : checksum is calculated directly within worker thread */
768
+ zcs->params.fParams.checksumFlag = 0;
627
769
  }
628
770
 
629
- DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask);
771
+ DEBUGLOG(4, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
772
+ zcs->nextJobID,
773
+ (U32)zcs->jobs[jobID].srcSize,
774
+ zcs->jobs[jobID].lastChunk,
775
+ zcs->doneJobID,
776
+ zcs->doneJobID & zcs->jobIDMask);
630
777
  POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
631
778
  zcs->nextJobID++;
632
779
  return 0;
@@ -664,7 +811,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
664
811
  XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
665
812
  if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
666
813
  U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
667
- DEBUGLOG(4, "writing checksum : %08X \n", checksum);
814
+ DEBUGLOG(5, "writing checksum : %08X \n", checksum);
668
815
  MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
669
816
  job.cSize += 4;
670
817
  zcs->jobs[wJobID].cSize += 4;
@@ -675,7 +822,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
675
822
  zcs->jobs[wJobID].jobScanned = 1;
676
823
  }
677
824
  { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
678
- DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
825
+ DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
679
826
  memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
680
827
  output->pos += toWrite;
681
828
  job.dstFlushed += toWrite;
@@ -696,26 +843,81 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
696
843
  } }
697
844
 
698
845
 
699
- size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
846
+ /** ZSTDMT_compressStream_generic() :
847
+ * internal use only
848
+ * assumption : output and input are valid (pos <= size)
849
+ * @return : minimum amount of data remaining to flush, 0 if none */
850
+ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
851
+ ZSTD_outBuffer* output,
852
+ ZSTD_inBuffer* input,
853
+ ZSTD_EndDirective endOp)
700
854
  {
701
- size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize;
702
- if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */
703
- if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input);
855
+ size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize;
856
+ assert(output->pos <= output->size);
857
+ assert(input->pos <= input->size);
858
+ if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
859
+ /* current frame being ended. Only flush/end are allowed. Or start new frame with init */
860
+ return ERROR(stage_wrong);
861
+ }
862
+ if (mtctx->nbThreads==1) {
863
+ return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
864
+ }
865
+
866
+ /* single-pass shortcut (note : this is blocking-mode) */
867
+ if ( (mtctx->nextJobID==0) /* just started */
868
+ && (mtctx->inBuff.filled==0) /* nothing buffered */
869
+ && (endOp==ZSTD_e_end) /* end order */
870
+ && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
871
+ size_t const cSize = ZSTDMT_compress_advanced(mtctx,
872
+ (char*)output->dst + output->pos, output->size - output->pos,
873
+ (const char*)input->src + input->pos, input->size - input->pos,
874
+ mtctx->cdict, mtctx->params, mtctx->overlapRLog);
875
+ if (ZSTD_isError(cSize)) return cSize;
876
+ input->pos = input->size;
877
+ output->pos += cSize;
878
+ ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); /* was allocated in initStream */
879
+ mtctx->allJobsCompleted = 1;
880
+ mtctx->frameEnded = 1;
881
+ return 0;
882
+ }
704
883
 
705
884
  /* fill input buffer */
706
- { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled);
707
- memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad);
885
+ if ((input->src) && (mtctx->inBuff.buffer.start)) { /* support NULL input */
886
+ size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
887
+ DEBUGLOG(2, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
888
+ memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
708
889
  input->pos += toLoad;
709
- zcs->inBuff.filled += toLoad;
890
+ mtctx->inBuff.filled += toLoad;
710
891
  }
711
892
 
712
- if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
713
- && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */
714
- CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) );
893
+ if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
894
+ && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */
895
+ CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
715
896
  }
716
897
 
717
- /* check for data to flush */
718
- CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */
898
+ /* check for potential compressed data ready to be flushed */
899
+ CHECK_F( ZSTDMT_flushNextJob(mtctx, output, (mtctx->inBuff.filled == mtctx->inBuffSize) /* blockToFlush */) ); /* block if it wasn't possible to create new job due to saturation */
900
+
901
+ if (input->pos < input->size) /* input not consumed : do not flush yet */
902
+ endOp = ZSTD_e_continue;
903
+
904
+ switch(endOp)
905
+ {
906
+ case ZSTD_e_flush:
907
+ return ZSTDMT_flushStream(mtctx, output);
908
+ case ZSTD_e_end:
909
+ return ZSTDMT_endStream(mtctx, output);
910
+ case ZSTD_e_continue:
911
+ return 1;
912
+ default:
913
+ return ERROR(GENERIC); /* invalid endDirective */
914
+ }
915
+ }
916
+
917
+
918
+ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
919
+ {
920
+ CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
719
921
 
720
922
  /* recommended next input size : fill current input buffer */
721
923
  return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
@@ -726,26 +928,28 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp
726
928
  {
727
929
  size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
728
930
 
729
- if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
730
931
  if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
731
932
  && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
732
933
  CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
733
934
  }
734
935
 
735
936
  /* check if there is any data available to flush */
736
- DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID);
737
- return ZSTDMT_flushNextJob(zcs, output, 1);
937
+ return ZSTDMT_flushNextJob(zcs, output, 1 /* blockToFlush */);
738
938
  }
739
939
 
740
940
 
741
941
  size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
742
942
  {
743
- if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output);
744
- return ZSTDMT_flushStream_internal(zcs, output, 0);
943
+ DEBUGLOG(5, "ZSTDMT_flushStream");
944
+ if (zcs->nbThreads==1)
945
+ return ZSTD_flushStream(zcs->cctxPool->cctx[0], output);
946
+ return ZSTDMT_flushStream_internal(zcs, output, 0 /* endFrame */);
745
947
  }
746
948
 
747
949
  size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
748
950
  {
749
- if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output);
750
- return ZSTDMT_flushStream_internal(zcs, output, 1);
951
+ DEBUGLOG(4, "ZSTDMT_endStream");
952
+ if (zcs->nbThreads==1)
953
+ return ZSTD_endStream(zcs->cctxPool->cctx[0], output);
954
+ return ZSTDMT_flushStream_internal(zcs, output, 1 /* endFrame */);
751
955
  }