zstd-ruby 1.3.0.0 → 1.3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/common/bitstream.h +40 -41
- data/ext/zstdruby/libzstd/common/compiler.h +85 -0
- data/ext/zstdruby/libzstd/common/error_private.c +8 -10
- data/ext/zstdruby/libzstd/common/error_private.h +4 -4
- data/ext/zstdruby/libzstd/common/fse.h +11 -5
- data/ext/zstdruby/libzstd/common/fse_decompress.c +3 -22
- data/ext/zstdruby/libzstd/common/huf.h +5 -6
- data/ext/zstdruby/libzstd/common/mem.h +6 -6
- data/ext/zstdruby/libzstd/common/pool.c +61 -27
- data/ext/zstdruby/libzstd/common/pool.h +10 -10
- data/ext/zstdruby/libzstd/common/threading.h +5 -6
- data/ext/zstdruby/libzstd/common/xxhash.c +28 -22
- data/ext/zstdruby/libzstd/common/zstd_common.c +4 -4
- data/ext/zstdruby/libzstd/common/zstd_errors.h +30 -32
- data/ext/zstdruby/libzstd/common/zstd_internal.h +57 -56
- data/ext/zstdruby/libzstd/compress/fse_compress.c +4 -22
- data/ext/zstdruby/libzstd/compress/huf_compress.c +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +314 -304
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +118 -116
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +223 -156
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +10 -9
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +8 -24
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +95 -115
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +4 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +4 -5
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +4 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +4 -4
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -9
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +5 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +5 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +5 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +4 -4
- data/ext/zstdruby/libzstd/zstd.h +16 -14
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +3 -2
@@ -1,15 +1,16 @@
|
|
1
|
-
|
1
|
+
/*
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
6
|
-
* LICENSE file in the root directory of this source tree
|
7
|
-
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
8
|
*/
|
9
9
|
|
10
10
|
|
11
11
|
/* ====== Tuning parameters ====== */
|
12
|
-
#define ZSTDMT_NBTHREADS_MAX
|
12
|
+
#define ZSTDMT_NBTHREADS_MAX 256
|
13
|
+
#define ZSTDMT_OVERLAPLOG_DEFAULT 6
|
13
14
|
|
14
15
|
|
15
16
|
/* ====== Compiler specifics ====== */
|
@@ -73,6 +74,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
73
74
|
|
74
75
|
|
75
76
|
/* ===== Buffer Pool ===== */
|
77
|
+
/* a single Buffer Pool can be invoked from multiple threads in parallel */
|
76
78
|
|
77
79
|
typedef struct buffer_s {
|
78
80
|
void* start;
|
@@ -82,6 +84,8 @@ typedef struct buffer_s {
|
|
82
84
|
static const buffer_t g_nullBuffer = { NULL, 0 };
|
83
85
|
|
84
86
|
typedef struct ZSTDMT_bufferPool_s {
|
87
|
+
pthread_mutex_t poolMutex;
|
88
|
+
size_t bufferSize;
|
85
89
|
unsigned totalBuffers;
|
86
90
|
unsigned nbBuffers;
|
87
91
|
ZSTD_customMem cMem;
|
@@ -90,10 +94,15 @@ typedef struct ZSTDMT_bufferPool_s {
|
|
90
94
|
|
91
95
|
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
|
92
96
|
{
|
93
|
-
unsigned const maxNbBuffers = 2*nbThreads +
|
97
|
+
unsigned const maxNbBuffers = 2*nbThreads + 3;
|
94
98
|
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
|
95
99
|
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
|
96
100
|
if (bufPool==NULL) return NULL;
|
101
|
+
if (pthread_mutex_init(&bufPool->poolMutex, NULL)) {
|
102
|
+
ZSTD_free(bufPool, cMem);
|
103
|
+
return NULL;
|
104
|
+
}
|
105
|
+
bufPool->bufferSize = 64 KB;
|
97
106
|
bufPool->totalBuffers = maxNbBuffers;
|
98
107
|
bufPool->nbBuffers = 0;
|
99
108
|
bufPool->cMem = cMem;
|
@@ -106,6 +115,7 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
|
|
106
115
|
if (!bufPool) return; /* compatibility with free on NULL */
|
107
116
|
for (u=0; u<bufPool->totalBuffers; u++)
|
108
117
|
ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
|
118
|
+
pthread_mutex_destroy(&bufPool->poolMutex);
|
109
119
|
ZSTD_free(bufPool, bufPool->cMem);
|
110
120
|
}
|
111
121
|
|
@@ -116,65 +126,85 @@ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
|
|
116
126
|
+ (bufPool->totalBuffers - 1) * sizeof(buffer_t);
|
117
127
|
unsigned u;
|
118
128
|
size_t totalBufferSize = 0;
|
129
|
+
pthread_mutex_lock(&bufPool->poolMutex);
|
119
130
|
for (u=0; u<bufPool->totalBuffers; u++)
|
120
131
|
totalBufferSize += bufPool->bTable[u].size;
|
132
|
+
pthread_mutex_unlock(&bufPool->poolMutex);
|
121
133
|
|
122
134
|
return poolSize + totalBufferSize;
|
123
135
|
}
|
124
136
|
|
125
|
-
|
126
|
-
* assumption : invocation from main thread only ! */
|
127
|
-
static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
|
137
|
+
static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* bufPool, size_t bSize)
|
128
138
|
{
|
129
|
-
|
130
|
-
|
139
|
+
bufPool->bufferSize = bSize;
|
140
|
+
}
|
141
|
+
|
142
|
+
/** ZSTDMT_getBuffer() :
|
143
|
+
* assumption : bufPool must be valid */
|
144
|
+
static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
|
145
|
+
{
|
146
|
+
size_t const bSize = bufPool->bufferSize;
|
147
|
+
DEBUGLOG(5, "ZSTDMT_getBuffer");
|
148
|
+
pthread_mutex_lock(&bufPool->poolMutex);
|
149
|
+
if (bufPool->nbBuffers) { /* try to use an existing buffer */
|
150
|
+
buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
|
131
151
|
size_t const availBufferSize = buf.size;
|
132
|
-
if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
|
152
|
+
if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) {
|
133
153
|
/* large enough, but not too much */
|
154
|
+
pthread_mutex_unlock(&bufPool->poolMutex);
|
134
155
|
return buf;
|
156
|
+
}
|
135
157
|
/* size conditions not respected : scratch this buffer, create new one */
|
136
|
-
|
158
|
+
DEBUGLOG(5, "existing buffer does not meet size conditions => freeing");
|
159
|
+
ZSTD_free(buf.start, bufPool->cMem);
|
137
160
|
}
|
161
|
+
pthread_mutex_unlock(&bufPool->poolMutex);
|
138
162
|
/* create new buffer */
|
163
|
+
DEBUGLOG(5, "create a new buffer");
|
139
164
|
{ buffer_t buffer;
|
140
|
-
void* const start = ZSTD_malloc(bSize,
|
141
|
-
if (start==NULL) bSize = 0;
|
165
|
+
void* const start = ZSTD_malloc(bSize, bufPool->cMem);
|
142
166
|
buffer.start = start; /* note : start can be NULL if malloc fails ! */
|
143
|
-
buffer.size = bSize;
|
167
|
+
buffer.size = (start==NULL) ? 0 : bSize;
|
144
168
|
return buffer;
|
145
169
|
}
|
146
170
|
}
|
147
171
|
|
148
172
|
/* store buffer for later re-use, up to pool capacity */
|
149
|
-
static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool*
|
150
|
-
{
|
151
|
-
if (buf.start == NULL) return; /* release on NULL */
|
152
|
-
|
153
|
-
|
173
|
+
static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
|
174
|
+
{
|
175
|
+
if (buf.start == NULL) return; /* compatible with release on NULL */
|
176
|
+
DEBUGLOG(5, "ZSTDMT_releaseBuffer");
|
177
|
+
pthread_mutex_lock(&bufPool->poolMutex);
|
178
|
+
if (bufPool->nbBuffers < bufPool->totalBuffers) {
|
179
|
+
bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
|
180
|
+
pthread_mutex_unlock(&bufPool->poolMutex);
|
154
181
|
return;
|
155
182
|
}
|
183
|
+
pthread_mutex_unlock(&bufPool->poolMutex);
|
156
184
|
/* Reached bufferPool capacity (should not happen) */
|
157
|
-
|
185
|
+
DEBUGLOG(5, "buffer pool capacity reached => freeing ");
|
186
|
+
ZSTD_free(buf.start, bufPool->cMem);
|
158
187
|
}
|
159
188
|
|
160
189
|
|
161
190
|
/* ===== CCtx Pool ===== */
|
191
|
+
/* a single CCtx Pool can be invoked from multiple threads in parallel */
|
162
192
|
|
163
193
|
typedef struct {
|
194
|
+
pthread_mutex_t poolMutex;
|
164
195
|
unsigned totalCCtx;
|
165
196
|
unsigned availCCtx;
|
166
197
|
ZSTD_customMem cMem;
|
167
198
|
ZSTD_CCtx* cctx[1]; /* variable size */
|
168
199
|
} ZSTDMT_CCtxPool;
|
169
200
|
|
170
|
-
/* assumption : CCtxPool invocation only from main thread */
|
171
|
-
|
172
201
|
/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
|
173
202
|
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
174
203
|
{
|
175
204
|
unsigned u;
|
176
205
|
for (u=0; u<pool->totalCCtx; u++)
|
177
206
|
ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
|
207
|
+
pthread_mutex_destroy(&pool->poolMutex);
|
178
208
|
ZSTD_free(pool, pool->cMem);
|
179
209
|
}
|
180
210
|
|
@@ -186,6 +216,10 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
|
|
186
216
|
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
|
187
217
|
sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
|
188
218
|
if (!cctxPool) return NULL;
|
219
|
+
if (pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
|
220
|
+
ZSTD_free(cctxPool, cMem);
|
221
|
+
return NULL;
|
222
|
+
}
|
189
223
|
cctxPool->cMem = cMem;
|
190
224
|
cctxPool->totalCCtx = nbThreads;
|
191
225
|
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
|
@@ -198,50 +232,57 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
|
|
198
232
|
/* only works during initialization phase, not during compression */
|
199
233
|
static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
|
200
234
|
{
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
235
|
+
pthread_mutex_lock(&cctxPool->poolMutex);
|
236
|
+
{ unsigned const nbThreads = cctxPool->totalCCtx;
|
237
|
+
size_t const poolSize = sizeof(*cctxPool)
|
238
|
+
+ (nbThreads-1)*sizeof(ZSTD_CCtx*);
|
239
|
+
unsigned u;
|
240
|
+
size_t totalCCtxSize = 0;
|
241
|
+
for (u=0; u<nbThreads; u++) {
|
242
|
+
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
|
243
|
+
}
|
244
|
+
pthread_mutex_unlock(&cctxPool->poolMutex);
|
245
|
+
return poolSize + totalCCtxSize;
|
246
|
+
}
|
210
247
|
}
|
211
248
|
|
212
|
-
static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool*
|
249
|
+
static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
|
213
250
|
{
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
251
|
+
DEBUGLOG(5, "ZSTDMT_getCCtx");
|
252
|
+
pthread_mutex_lock(&cctxPool->poolMutex);
|
253
|
+
if (cctxPool->availCCtx) {
|
254
|
+
cctxPool->availCCtx--;
|
255
|
+
{ ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
|
256
|
+
pthread_mutex_unlock(&cctxPool->poolMutex);
|
257
|
+
return cctx;
|
258
|
+
} }
|
259
|
+
pthread_mutex_unlock(&cctxPool->poolMutex);
|
260
|
+
DEBUGLOG(5, "create one more CCtx");
|
261
|
+
return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */
|
219
262
|
}
|
220
263
|
|
221
264
|
static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
|
222
265
|
{
|
223
266
|
if (cctx==NULL) return; /* compatibility with release on NULL */
|
267
|
+
pthread_mutex_lock(&pool->poolMutex);
|
224
268
|
if (pool->availCCtx < pool->totalCCtx)
|
225
269
|
pool->cctx[pool->availCCtx++] = cctx;
|
226
|
-
else
|
270
|
+
else {
|
227
271
|
/* pool overflow : should not happen, since totalCCtx==nbThreads */
|
272
|
+
DEBUGLOG(5, "CCtx pool overflow : free cctx");
|
228
273
|
ZSTD_freeCCtx(cctx);
|
274
|
+
}
|
275
|
+
pthread_mutex_unlock(&pool->poolMutex);
|
229
276
|
}
|
230
277
|
|
231
278
|
|
232
279
|
/* ===== Thread worker ===== */
|
233
280
|
|
234
281
|
typedef struct {
|
235
|
-
buffer_t buffer;
|
236
|
-
size_t filled;
|
237
|
-
} inBuff_t;
|
238
|
-
|
239
|
-
typedef struct {
|
240
|
-
ZSTD_CCtx* cctx;
|
241
282
|
buffer_t src;
|
242
283
|
const void* srcStart;
|
243
|
-
size_t srcSize;
|
244
284
|
size_t dictSize;
|
285
|
+
size_t srcSize;
|
245
286
|
buffer_t dstBuff;
|
246
287
|
size_t cSize;
|
247
288
|
size_t dstFlushed;
|
@@ -253,6 +294,8 @@ typedef struct {
|
|
253
294
|
pthread_cond_t* jobCompleted_cond;
|
254
295
|
ZSTD_parameters params;
|
255
296
|
const ZSTD_CDict* cdict;
|
297
|
+
ZSTDMT_CCtxPool* cctxPool;
|
298
|
+
ZSTDMT_bufferPool* bufPool;
|
256
299
|
unsigned long long fullFrameSize;
|
257
300
|
} ZSTDMT_jobDescription;
|
258
301
|
|
@@ -260,37 +303,56 @@ typedef struct {
|
|
260
303
|
void ZSTDMT_compressChunk(void* jobDescription)
|
261
304
|
{
|
262
305
|
ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
|
306
|
+
ZSTD_CCtx* cctx = ZSTDMT_getCCtx(job->cctxPool);
|
263
307
|
const void* const src = (const char*)job->srcStart + job->dictSize;
|
264
|
-
buffer_t
|
308
|
+
buffer_t dstBuff = job->dstBuff;
|
265
309
|
DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
|
266
310
|
job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
|
311
|
+
|
312
|
+
if (cctx==NULL) {
|
313
|
+
job->cSize = ERROR(memory_allocation);
|
314
|
+
goto _endJob;
|
315
|
+
}
|
316
|
+
|
317
|
+
if (dstBuff.start == NULL) {
|
318
|
+
dstBuff = ZSTDMT_getBuffer(job->bufPool);
|
319
|
+
if (dstBuff.start==NULL) {
|
320
|
+
job->cSize = ERROR(memory_allocation);
|
321
|
+
goto _endJob;
|
322
|
+
}
|
323
|
+
job->dstBuff = dstBuff;
|
324
|
+
}
|
325
|
+
|
267
326
|
if (job->cdict) { /* should only happen for first segment */
|
268
|
-
size_t const initError = ZSTD_compressBegin_usingCDict_advanced(
|
327
|
+
size_t const initError = ZSTD_compressBegin_usingCDict_advanced(cctx, job->cdict, job->params.fParams, job->fullFrameSize);
|
269
328
|
DEBUGLOG(5, "using CDict");
|
270
329
|
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
|
271
330
|
} else { /* srcStart points at reloaded section */
|
272
331
|
if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */
|
273
|
-
{ size_t const dictModeError = ZSTD_setCCtxParameter(
|
274
|
-
size_t const initError = ZSTD_compressBegin_advanced(
|
332
|
+
{ size_t const dictModeError = ZSTD_setCCtxParameter(cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
|
333
|
+
size_t const initError = ZSTD_compressBegin_advanced(cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
|
275
334
|
if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
|
276
|
-
ZSTD_setCCtxParameter(
|
335
|
+
ZSTD_setCCtxParameter(cctx, ZSTD_p_forceWindow, 1);
|
277
336
|
} }
|
278
337
|
if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
|
279
|
-
size_t const hSize = ZSTD_compressContinue(
|
338
|
+
size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
|
280
339
|
if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
|
281
|
-
ZSTD_invalidateRepCodes(
|
340
|
+
ZSTD_invalidateRepCodes(cctx);
|
282
341
|
}
|
283
342
|
|
284
343
|
DEBUGLOG(5, "Compressing : ");
|
285
344
|
DEBUG_PRINTHEX(4, job->srcStart, 12);
|
286
345
|
job->cSize = (job->lastChunk) ?
|
287
|
-
ZSTD_compressEnd (
|
288
|
-
ZSTD_compressContinue(
|
346
|
+
ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
|
347
|
+
ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
|
289
348
|
DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
|
290
349
|
(unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
|
291
350
|
DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
|
292
351
|
|
293
352
|
_endJob:
|
353
|
+
ZSTDMT_releaseCCtx(job->cctxPool, cctx);
|
354
|
+
ZSTDMT_releaseBuffer(job->bufPool, job->src);
|
355
|
+
job->src = g_nullBuffer; job->srcStart = NULL;
|
294
356
|
PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
|
295
357
|
job->jobCompleted = 1;
|
296
358
|
job->jobScanned = 0;
|
@@ -303,15 +365,19 @@ _endJob:
|
|
303
365
|
/* ===== Multi-threaded compression ===== */
|
304
366
|
/* ------------------------------------------ */
|
305
367
|
|
368
|
+
typedef struct {
|
369
|
+
buffer_t buffer;
|
370
|
+
size_t filled;
|
371
|
+
} inBuff_t;
|
372
|
+
|
306
373
|
struct ZSTDMT_CCtx_s {
|
307
374
|
POOL_ctx* factory;
|
308
375
|
ZSTDMT_jobDescription* jobs;
|
309
|
-
ZSTDMT_bufferPool*
|
376
|
+
ZSTDMT_bufferPool* bufPool;
|
310
377
|
ZSTDMT_CCtxPool* cctxPool;
|
311
378
|
pthread_mutex_t jobCompleted_mutex;
|
312
379
|
pthread_cond_t jobCompleted_cond;
|
313
380
|
size_t targetSectionSize;
|
314
|
-
size_t marginSize;
|
315
381
|
size_t inBuffSize;
|
316
382
|
size_t dictSize;
|
317
383
|
size_t targetDictSize;
|
@@ -324,7 +390,7 @@ struct ZSTDMT_CCtx_s {
|
|
324
390
|
unsigned nextJobID;
|
325
391
|
unsigned frameEnded;
|
326
392
|
unsigned allJobsCompleted;
|
327
|
-
unsigned
|
393
|
+
unsigned overlapLog;
|
328
394
|
unsigned long long frameContentSize;
|
329
395
|
size_t sectionSize;
|
330
396
|
ZSTD_customMem cMem;
|
@@ -347,7 +413,8 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
|
|
347
413
|
U32 nbJobs = nbThreads + 2;
|
348
414
|
DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
|
349
415
|
|
350
|
-
if (
|
416
|
+
if (nbThreads < 1) return NULL;
|
417
|
+
nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX);
|
351
418
|
if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
|
352
419
|
/* invalid custom allocator */
|
353
420
|
return NULL;
|
@@ -358,18 +425,24 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
|
|
358
425
|
mtctx->nbThreads = nbThreads;
|
359
426
|
mtctx->allJobsCompleted = 1;
|
360
427
|
mtctx->sectionSize = 0;
|
361
|
-
mtctx->
|
362
|
-
mtctx->factory = POOL_create(nbThreads,
|
428
|
+
mtctx->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
|
429
|
+
mtctx->factory = POOL_create(nbThreads, 0);
|
363
430
|
mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
|
364
431
|
mtctx->jobIDMask = nbJobs - 1;
|
365
|
-
mtctx->
|
432
|
+
mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
|
366
433
|
mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
|
367
|
-
if (!mtctx->factory | !mtctx->jobs | !mtctx->
|
434
|
+
if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool) {
|
435
|
+
ZSTDMT_freeCCtx(mtctx);
|
436
|
+
return NULL;
|
437
|
+
}
|
438
|
+
if (pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) {
|
439
|
+
ZSTDMT_freeCCtx(mtctx);
|
440
|
+
return NULL;
|
441
|
+
}
|
442
|
+
if (pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) {
|
368
443
|
ZSTDMT_freeCCtx(mtctx);
|
369
444
|
return NULL;
|
370
445
|
}
|
371
|
-
pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
|
372
|
-
pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
|
373
446
|
DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
|
374
447
|
return mtctx;
|
375
448
|
}
|
@@ -386,15 +459,13 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
|
|
386
459
|
unsigned jobID;
|
387
460
|
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
|
388
461
|
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
|
389
|
-
ZSTDMT_releaseBuffer(mtctx->
|
462
|
+
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
|
390
463
|
mtctx->jobs[jobID].dstBuff = g_nullBuffer;
|
391
|
-
ZSTDMT_releaseBuffer(mtctx->
|
464
|
+
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src);
|
392
465
|
mtctx->jobs[jobID].src = g_nullBuffer;
|
393
|
-
ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
|
394
|
-
mtctx->jobs[jobID].cctx = NULL;
|
395
466
|
}
|
396
467
|
memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
|
397
|
-
ZSTDMT_releaseBuffer(mtctx->
|
468
|
+
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer);
|
398
469
|
mtctx->inBuff.buffer = g_nullBuffer;
|
399
470
|
mtctx->allJobsCompleted = 1;
|
400
471
|
}
|
@@ -404,7 +475,7 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
|
|
404
475
|
if (mtctx==NULL) return 0; /* compatible with free on NULL */
|
405
476
|
POOL_free(mtctx->factory);
|
406
477
|
if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
|
407
|
-
ZSTDMT_freeBufferPool(mtctx->
|
478
|
+
ZSTDMT_freeBufferPool(mtctx->bufPool); /* release job resources into pools first */
|
408
479
|
ZSTD_free(mtctx->jobs, mtctx->cMem);
|
409
480
|
ZSTDMT_freeCCtxPool(mtctx->cctxPool);
|
410
481
|
ZSTD_freeCDict(mtctx->cdictLocal);
|
@@ -418,11 +489,11 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
|
|
418
489
|
{
|
419
490
|
if (mtctx == NULL) return 0; /* supports sizeof NULL */
|
420
491
|
return sizeof(*mtctx)
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
492
|
+
+ POOL_sizeof(mtctx->factory)
|
493
|
+
+ ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
|
494
|
+
+ (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
|
495
|
+
+ ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
|
496
|
+
+ ZSTD_sizeof_CDict(mtctx->cdictLocal);
|
426
497
|
}
|
427
498
|
|
428
499
|
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
|
@@ -434,10 +505,10 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
|
|
434
505
|
return 0;
|
435
506
|
case ZSTDMT_p_overlapSectionLog :
|
436
507
|
DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
|
437
|
-
mtctx->
|
508
|
+
mtctx->overlapLog = (value >= 9) ? 9 : value;
|
438
509
|
return 0;
|
439
510
|
default :
|
440
|
-
return ERROR(
|
511
|
+
return ERROR(parameter_unsupported);
|
441
512
|
}
|
442
513
|
}
|
443
514
|
|
@@ -459,12 +530,13 @@ static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbT
|
|
459
530
|
|
460
531
|
|
461
532
|
size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
533
|
+
void* dst, size_t dstCapacity,
|
534
|
+
const void* src, size_t srcSize,
|
535
|
+
const ZSTD_CDict* cdict,
|
536
|
+
ZSTD_parameters const params,
|
537
|
+
unsigned overlapLog)
|
467
538
|
{
|
539
|
+
unsigned const overlapRLog = (overlapLog>9) ? 0 : 9-overlapLog;
|
468
540
|
size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
|
469
541
|
unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
|
470
542
|
size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
|
@@ -473,6 +545,7 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
|
473
545
|
size_t remainingSrcSize = srcSize;
|
474
546
|
unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
|
475
547
|
size_t frameStartPos = 0, dstBufferPos = 0;
|
548
|
+
XXH64_state_t xxh64;
|
476
549
|
|
477
550
|
DEBUGLOG(4, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
|
478
551
|
if (nbChunks==1) { /* fallback to single-thread mode */
|
@@ -480,7 +553,9 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
|
480
553
|
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
|
481
554
|
return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
|
482
555
|
}
|
483
|
-
assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is
|
556
|
+
assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */
|
557
|
+
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
|
558
|
+
XXH64_reset(&xxh64, 0);
|
484
559
|
|
485
560
|
if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
|
486
561
|
U32 nbJobs = nbChunks;
|
@@ -496,17 +571,10 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
|
496
571
|
size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
|
497
572
|
size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
|
498
573
|
buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
|
499
|
-
buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer :
|
500
|
-
ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
|
574
|
+
buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
|
501
575
|
size_t dictSize = u ? overlapSize : 0;
|
502
576
|
|
503
|
-
|
504
|
-
mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
|
505
|
-
mtctx->jobs[u].jobCompleted = 1;
|
506
|
-
nbChunks = u+1; /* only wait and free u jobs, instead of initially expected nbChunks ones */
|
507
|
-
break; /* let's wait for previous jobs to complete, but don't start new ones */
|
508
|
-
}
|
509
|
-
|
577
|
+
mtctx->jobs[u].src = g_nullBuffer;
|
510
578
|
mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
|
511
579
|
mtctx->jobs[u].dictSize = dictSize;
|
512
580
|
mtctx->jobs[u].srcSize = chunkSize;
|
@@ -516,13 +584,18 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
|
516
584
|
/* do not calculate checksum within sections, but write it in header for first section */
|
517
585
|
if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
|
518
586
|
mtctx->jobs[u].dstBuff = dstBuffer;
|
519
|
-
mtctx->jobs[u].
|
587
|
+
mtctx->jobs[u].cctxPool = mtctx->cctxPool;
|
588
|
+
mtctx->jobs[u].bufPool = mtctx->bufPool;
|
520
589
|
mtctx->jobs[u].firstChunk = (u==0);
|
521
590
|
mtctx->jobs[u].lastChunk = (u==nbChunks-1);
|
522
591
|
mtctx->jobs[u].jobCompleted = 0;
|
523
592
|
mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
|
524
593
|
mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
|
525
594
|
|
595
|
+
if (params.fParams.checksumFlag) {
|
596
|
+
XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
|
597
|
+
}
|
598
|
+
|
526
599
|
DEBUGLOG(5, "posting job %u (%u bytes)", u, (U32)chunkSize);
|
527
600
|
DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
|
528
601
|
POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
|
@@ -533,8 +606,8 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
|
533
606
|
} }
|
534
607
|
|
535
608
|
/* collect result */
|
536
|
-
{
|
537
|
-
|
609
|
+
{ size_t error = 0, dstPos = 0;
|
610
|
+
unsigned chunkID;
|
538
611
|
for (chunkID=0; chunkID<nbChunks; chunkID++) {
|
539
612
|
DEBUGLOG(5, "waiting for chunk %u ", chunkID);
|
540
613
|
PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
|
@@ -545,8 +618,6 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
|
545
618
|
pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
|
546
619
|
DEBUGLOG(5, "ready to write chunk %u ", chunkID);
|
547
620
|
|
548
|
-
ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
|
549
|
-
mtctx->jobs[chunkID].cctx = NULL;
|
550
621
|
mtctx->jobs[chunkID].srcStart = NULL;
|
551
622
|
{ size_t const cSize = mtctx->jobs[chunkID].cSize;
|
552
623
|
if (ZSTD_isError(cSize)) error = cSize;
|
@@ -556,13 +627,25 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
|
556
627
|
memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
|
557
628
|
if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
|
558
629
|
DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
|
559
|
-
ZSTDMT_releaseBuffer(mtctx->
|
630
|
+
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
|
560
631
|
}
|
561
632
|
mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
|
562
633
|
}
|
563
634
|
dstPos += cSize ;
|
564
635
|
}
|
565
|
-
}
|
636
|
+
} /* for (chunkID=0; chunkID<nbChunks; chunkID++) */
|
637
|
+
|
638
|
+
DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
|
639
|
+
if (params.fParams.checksumFlag) {
|
640
|
+
U32 const checksum = (U32)XXH64_digest(&xxh64);
|
641
|
+
if (dstPos + 4 > dstCapacity) {
|
642
|
+
error = ERROR(dstSize_tooSmall);
|
643
|
+
} else {
|
644
|
+
DEBUGLOG(4, "writing checksum : %08X \n", checksum);
|
645
|
+
MEM_writeLE32((char*)dst + dstPos, checksum);
|
646
|
+
dstPos += 4;
|
647
|
+
} }
|
648
|
+
|
566
649
|
if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
|
567
650
|
return error ? error : dstPos;
|
568
651
|
}
|
@@ -574,10 +657,10 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
574
657
|
const void* src, size_t srcSize,
|
575
658
|
int compressionLevel)
|
576
659
|
{
|
577
|
-
U32 const
|
660
|
+
U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
|
578
661
|
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
|
579
662
|
params.fParams.contentSizeFlag = 1;
|
580
|
-
return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params,
|
663
|
+
return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
|
581
664
|
}
|
582
665
|
|
583
666
|
|
@@ -615,8 +698,8 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
|
615
698
|
if (zcs->nbThreads==1) {
|
616
699
|
DEBUGLOG(4, "single thread mode");
|
617
700
|
return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
|
618
|
-
|
619
|
-
|
701
|
+
dict, dictSize, cdict,
|
702
|
+
params, pledgedSrcSize);
|
620
703
|
}
|
621
704
|
|
622
705
|
if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
|
@@ -642,18 +725,16 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
|
642
725
|
zcs->cdict = cdict;
|
643
726
|
}
|
644
727
|
|
645
|
-
zcs->targetDictSize = (zcs->
|
646
|
-
DEBUGLOG(4, "
|
728
|
+
zcs->targetDictSize = (zcs->overlapLog==0) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - (9 - zcs->overlapLog));
|
729
|
+
DEBUGLOG(4, "overlapLog : %u ", zcs->overlapLog);
|
647
730
|
DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
|
648
731
|
zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
|
649
732
|
zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
|
650
733
|
zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
|
651
734
|
DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
|
652
|
-
zcs->
|
653
|
-
zcs->
|
654
|
-
zcs->inBuff.buffer =
|
655
|
-
if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
|
656
|
-
zcs->inBuff.filled = 0;
|
735
|
+
zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize;
|
736
|
+
ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) );
|
737
|
+
zcs->inBuff.buffer = g_nullBuffer;
|
657
738
|
zcs->dictSize = 0;
|
658
739
|
zcs->doneJobID = 0;
|
659
740
|
zcs->nextJobID = 0;
|
@@ -664,8 +745,9 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
|
664
745
|
}
|
665
746
|
|
666
747
|
size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
667
|
-
|
668
|
-
|
748
|
+
const void* dict, size_t dictSize,
|
749
|
+
ZSTD_parameters params,
|
750
|
+
unsigned long long pledgedSrcSize)
|
669
751
|
{
|
670
752
|
DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
|
671
753
|
return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
|
@@ -701,19 +783,8 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
|
|
701
783
|
|
702
784
|
static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
|
703
785
|
{
|
704
|
-
size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
|
705
|
-
buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
|
706
|
-
ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
|
707
786
|
unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
|
708
787
|
|
709
|
-
if ((cctx==NULL) || (dstBuffer.start==NULL)) {
|
710
|
-
zcs->jobs[jobID].jobCompleted = 1;
|
711
|
-
zcs->nextJobID++;
|
712
|
-
ZSTDMT_waitForAllJobsCompleted(zcs);
|
713
|
-
ZSTDMT_releaseAllJobResources(zcs);
|
714
|
-
return ERROR(memory_allocation);
|
715
|
-
}
|
716
|
-
|
717
788
|
DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
|
718
789
|
zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
|
719
790
|
zcs->jobs[jobID].src = zcs->inBuff.buffer;
|
@@ -726,8 +797,9 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
|
|
726
797
|
if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
|
727
798
|
zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
|
728
799
|
zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
|
729
|
-
zcs->jobs[jobID].dstBuff =
|
730
|
-
zcs->jobs[jobID].
|
800
|
+
zcs->jobs[jobID].dstBuff = g_nullBuffer;
|
801
|
+
zcs->jobs[jobID].cctxPool = zcs->cctxPool;
|
802
|
+
zcs->jobs[jobID].bufPool = zcs->bufPool;
|
731
803
|
zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
|
732
804
|
zcs->jobs[jobID].lastChunk = endFrame;
|
733
805
|
zcs->jobs[jobID].jobCompleted = 0;
|
@@ -735,11 +807,13 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
|
|
735
807
|
zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
|
736
808
|
zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
|
737
809
|
|
810
|
+
if (zcs->params.fParams.checksumFlag)
|
811
|
+
XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->dictSize, srcSize);
|
812
|
+
|
738
813
|
/* get a new buffer for next input */
|
739
814
|
if (!endFrame) {
|
740
815
|
size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
|
741
|
-
|
742
|
-
zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
|
816
|
+
zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
|
743
817
|
if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
|
744
818
|
zcs->jobs[jobID].jobCompleted = 1;
|
745
819
|
zcs->nextJobID++;
|
@@ -747,26 +821,20 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
|
|
747
821
|
ZSTDMT_releaseAllJobResources(zcs);
|
748
822
|
return ERROR(memory_allocation);
|
749
823
|
}
|
750
|
-
DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
|
751
824
|
zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
|
752
|
-
DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
|
753
|
-
(U32)zcs->inBuff.filled, (U32)newDictSize,
|
754
|
-
(U32)(zcs->inBuff.filled - newDictSize));
|
755
825
|
memmove(zcs->inBuff.buffer.start,
|
756
826
|
(const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
|
757
827
|
zcs->inBuff.filled);
|
758
|
-
DEBUGLOG(5, "new inBuff pre-filled");
|
759
828
|
zcs->dictSize = newDictSize;
|
760
829
|
} else { /* if (endFrame==1) */
|
761
|
-
DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
|
762
830
|
zcs->inBuff.buffer = g_nullBuffer;
|
763
831
|
zcs->inBuff.filled = 0;
|
764
832
|
zcs->dictSize = 0;
|
765
833
|
zcs->frameEnded = 1;
|
766
|
-
if (zcs->nextJobID == 0)
|
834
|
+
if (zcs->nextJobID == 0) {
|
767
835
|
/* single chunk exception : checksum is calculated directly within worker thread */
|
768
836
|
zcs->params.fParams.checksumFlag = 0;
|
769
|
-
}
|
837
|
+
} }
|
770
838
|
|
771
839
|
DEBUGLOG(4, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
|
772
840
|
zcs->nextJobID,
|
@@ -804,11 +872,8 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
|
|
804
872
|
ZSTDMT_releaseAllJobResources(zcs);
|
805
873
|
return job.cSize;
|
806
874
|
}
|
807
|
-
ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
|
808
|
-
zcs->jobs[wJobID].cctx = NULL;
|
809
875
|
DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
|
810
876
|
if (zcs->params.fParams.checksumFlag) {
|
811
|
-
XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
|
812
877
|
if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
|
813
878
|
U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
|
814
879
|
DEBUGLOG(5, "writing checksum : %08X \n", checksum);
|
@@ -816,9 +881,6 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
|
|
816
881
|
job.cSize += 4;
|
817
882
|
zcs->jobs[wJobID].cSize += 4;
|
818
883
|
} }
|
819
|
-
ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
|
820
|
-
zcs->jobs[wJobID].srcStart = NULL;
|
821
|
-
zcs->jobs[wJobID].src = g_nullBuffer;
|
822
884
|
zcs->jobs[wJobID].jobScanned = 1;
|
823
885
|
}
|
824
886
|
{ size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
|
@@ -828,7 +890,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
|
|
828
890
|
job.dstFlushed += toWrite;
|
829
891
|
}
|
830
892
|
if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
|
831
|
-
ZSTDMT_releaseBuffer(zcs->
|
893
|
+
ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
|
832
894
|
zcs->jobs[wJobID].dstBuff = g_nullBuffer;
|
833
895
|
zcs->jobs[wJobID].jobCompleted = 0;
|
834
896
|
zcs->doneJobID++;
|
@@ -852,18 +914,18 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
852
914
|
ZSTD_inBuffer* input,
|
853
915
|
ZSTD_EndDirective endOp)
|
854
916
|
{
|
855
|
-
size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize
|
917
|
+
size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize;
|
856
918
|
assert(output->pos <= output->size);
|
857
919
|
assert(input->pos <= input->size);
|
858
920
|
if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
|
859
921
|
/* current frame being ended. Only flush/end are allowed. Or start new frame with init */
|
860
922
|
return ERROR(stage_wrong);
|
861
923
|
}
|
862
|
-
if (mtctx->nbThreads==1) {
|
924
|
+
if (mtctx->nbThreads==1) { /* delegate to single-thread (synchronous) */
|
863
925
|
return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
|
864
926
|
}
|
865
927
|
|
866
|
-
/* single-pass shortcut (note : this is
|
928
|
+
/* single-pass shortcut (note : this is synchronous-mode) */
|
867
929
|
if ( (mtctx->nextJobID==0) /* just started */
|
868
930
|
&& (mtctx->inBuff.filled==0) /* nothing buffered */
|
869
931
|
&& (endOp==ZSTD_e_end) /* end order */
|
@@ -871,24 +933,29 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
871
933
|
size_t const cSize = ZSTDMT_compress_advanced(mtctx,
|
872
934
|
(char*)output->dst + output->pos, output->size - output->pos,
|
873
935
|
(const char*)input->src + input->pos, input->size - input->pos,
|
874
|
-
mtctx->cdict, mtctx->params, mtctx->
|
936
|
+
mtctx->cdict, mtctx->params, mtctx->overlapLog);
|
875
937
|
if (ZSTD_isError(cSize)) return cSize;
|
876
938
|
input->pos = input->size;
|
877
939
|
output->pos += cSize;
|
878
|
-
ZSTDMT_releaseBuffer(mtctx->
|
940
|
+
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer); /* was allocated in initStream */
|
879
941
|
mtctx->allJobsCompleted = 1;
|
880
942
|
mtctx->frameEnded = 1;
|
881
943
|
return 0;
|
882
944
|
}
|
883
945
|
|
884
946
|
/* fill input buffer */
|
885
|
-
if (
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
947
|
+
if (input->size > input->pos) { /* support NULL input */
|
948
|
+
if (mtctx->inBuff.buffer.start == NULL) {
|
949
|
+
mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool);
|
950
|
+
if (mtctx->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
|
951
|
+
mtctx->inBuff.filled = 0;
|
952
|
+
}
|
953
|
+
{ size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
|
954
|
+
DEBUGLOG(5, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
|
955
|
+
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
|
956
|
+
input->pos += toLoad;
|
957
|
+
mtctx->inBuff.filled += toLoad;
|
958
|
+
} }
|
892
959
|
|
893
960
|
if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
|
894
961
|
&& (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */
|