zstd-ruby 1.2.0.0 → 1.3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +7 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +23 -9
- data/ext/zstdruby/libzstd/common/error_private.c +4 -1
- data/ext/zstdruby/libzstd/common/huf.h +20 -0
- data/ext/zstdruby/libzstd/common/mem.h +0 -14
- data/ext/zstdruby/libzstd/common/pool.c +12 -0
- data/ext/zstdruby/libzstd/common/pool.h +5 -0
- data/ext/zstdruby/libzstd/common/threading.c +0 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +25 -18
- data/ext/zstdruby/libzstd/common/zstd_errors.h +15 -7
- data/ext/zstdruby/libzstd/common/zstd_internal.h +59 -9
- data/ext/zstdruby/libzstd/compress/huf_compress.c +7 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1082 -487
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +30 -15
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +362 -158
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +49 -13
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +150 -26
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +380 -258
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +23 -37
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +30 -40
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +104 -95
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +14 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -12
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -14
- data/ext/zstdruby/libzstd/zstd.h +507 -166
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +2 -2
@@ -43,6 +43,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t src
|
|
43
43
|
if (ssPtr->litLengthSum == 0) {
|
44
44
|
if (srcSize <= 1024) ssPtr->staticPrices = 1;
|
45
45
|
|
46
|
+
assert(ssPtr->litFreq!=NULL);
|
46
47
|
for (u=0; u<=MaxLit; u++)
|
47
48
|
ssPtr->litFreq[u] = 0;
|
48
49
|
for (u=0; u<srcSize; u++)
|
@@ -201,6 +202,20 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
|
|
201
202
|
}
|
202
203
|
|
203
204
|
|
205
|
+
/* function safe only for comparisons */
|
206
|
+
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
207
|
+
{
|
208
|
+
switch (length)
|
209
|
+
{
|
210
|
+
default :
|
211
|
+
case 4 : return MEM_read32(memPtr);
|
212
|
+
case 3 : if (MEM_isLittleEndian())
|
213
|
+
return MEM_read32(memPtr)<<8;
|
214
|
+
else
|
215
|
+
return MEM_read32(memPtr)>>8;
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
204
219
|
|
205
220
|
/* Update hashTable3 up to ip (excluded)
|
206
221
|
Assumption : always within prefix (i.e. not within extDict) */
|
@@ -234,12 +249,12 @@ static U32 ZSTD_insertBtAndGetAllMatches (
|
|
234
249
|
{
|
235
250
|
const BYTE* const base = zc->base;
|
236
251
|
const U32 current = (U32)(ip-base);
|
237
|
-
const U32 hashLog = zc->
|
252
|
+
const U32 hashLog = zc->appliedParams.cParams.hashLog;
|
238
253
|
const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
|
239
254
|
U32* const hashTable = zc->hashTable;
|
240
255
|
U32 matchIndex = hashTable[h];
|
241
256
|
U32* const bt = zc->chainTable;
|
242
|
-
const U32 btLog = zc->
|
257
|
+
const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
|
243
258
|
const U32 btMask= (1U << btLog) - 1;
|
244
259
|
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
245
260
|
const BYTE* const dictBase = zc->dictBase;
|
@@ -267,7 +282,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
|
|
267
282
|
if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
|
268
283
|
} else {
|
269
284
|
match = dictBase + matchIndex3;
|
270
|
-
if (
|
285
|
+
if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
|
271
286
|
currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
|
272
287
|
}
|
273
288
|
|
@@ -410,10 +425,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
|
410
425
|
const BYTE* const base = ctx->base;
|
411
426
|
const BYTE* const prefixStart = base + ctx->dictLimit;
|
412
427
|
|
413
|
-
const U32 maxSearches = 1U << ctx->
|
414
|
-
const U32 sufficient_len = ctx->
|
415
|
-
const U32 mls = ctx->
|
416
|
-
const U32 minMatch = (ctx->
|
428
|
+
const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
|
429
|
+
const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
|
430
|
+
const U32 mls = ctx->appliedParams.cParams.searchLength;
|
431
|
+
const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
|
417
432
|
|
418
433
|
ZSTD_optimal_t* opt = seqStorePtr->priceTable;
|
419
434
|
ZSTD_match_t* matches = seqStorePtr->matchTable;
|
@@ -439,7 +454,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
|
439
454
|
for (i=(ip == anchor); i<last_i; i++) {
|
440
455
|
const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
|
441
456
|
if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
|
442
|
-
&& (
|
457
|
+
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
|
443
458
|
mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
|
444
459
|
if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
|
445
460
|
best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
|
@@ -524,7 +539,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
|
524
539
|
for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
|
525
540
|
const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
|
526
541
|
if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
|
527
|
-
&& (
|
542
|
+
&& (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
|
528
543
|
mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
|
529
544
|
|
530
545
|
if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
|
@@ -663,10 +678,10 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
|
|
663
678
|
const BYTE* const dictBase = ctx->dictBase;
|
664
679
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
665
680
|
|
666
|
-
const U32 maxSearches = 1U << ctx->
|
667
|
-
const U32 sufficient_len = ctx->
|
668
|
-
const U32 mls = ctx->
|
669
|
-
const U32 minMatch = (ctx->
|
681
|
+
const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
|
682
|
+
const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
|
683
|
+
const U32 mls = ctx->appliedParams.cParams.searchLength;
|
684
|
+
const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
|
670
685
|
|
671
686
|
ZSTD_optimal_t* opt = seqStorePtr->priceTable;
|
672
687
|
ZSTD_match_t* matches = seqStorePtr->matchTable;
|
@@ -698,7 +713,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
|
|
698
713
|
const BYTE* const repMatch = repBase + repIndex;
|
699
714
|
if ( (repCur > 0 && repCur <= (S32)current)
|
700
715
|
&& (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
|
701
|
-
&& (
|
716
|
+
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
702
717
|
/* repcode detected we should take it */
|
703
718
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
704
719
|
mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
|
@@ -794,7 +809,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
|
|
794
809
|
const BYTE* const repMatch = repBase + repIndex;
|
795
810
|
if ( (repCur > 0 && repCur <= (S32)(current+cur))
|
796
811
|
&& (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
|
797
|
-
&& (
|
812
|
+
&& (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
798
813
|
/* repcode detected */
|
799
814
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
800
815
|
mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
|
@@ -14,34 +14,31 @@
|
|
14
14
|
|
15
15
|
/* ====== Compiler specifics ====== */
|
16
16
|
#if defined(_MSC_VER)
|
17
|
-
# pragma warning(disable : 4204)
|
17
|
+
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
|
18
18
|
#endif
|
19
19
|
|
20
20
|
|
21
21
|
/* ====== Dependencies ====== */
|
22
|
-
#include <
|
23
|
-
#include
|
24
|
-
#include "
|
25
|
-
#include "
|
26
|
-
#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
22
|
+
#include <string.h> /* memcpy, memset */
|
23
|
+
#include "pool.h" /* threadpool */
|
24
|
+
#include "threading.h" /* mutex */
|
25
|
+
#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
27
26
|
#include "zstdmt_compress.h"
|
28
27
|
|
29
28
|
|
30
29
|
/* ====== Debug ====== */
|
31
|
-
#if
|
30
|
+
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
|
32
31
|
|
33
32
|
# include <stdio.h>
|
34
33
|
# include <unistd.h>
|
35
34
|
# include <sys/times.h>
|
36
|
-
|
37
|
-
# define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
|
38
|
-
# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
|
35
|
+
# define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
|
39
36
|
|
40
|
-
# define DEBUG_PRINTHEX(l,p,n) {
|
41
|
-
unsigned debug_u;
|
42
|
-
for (debug_u=0; debug_u<(n); debug_u++)
|
37
|
+
# define DEBUG_PRINTHEX(l,p,n) { \
|
38
|
+
unsigned debug_u; \
|
39
|
+
for (debug_u=0; debug_u<(n); debug_u++) \
|
43
40
|
DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
|
44
|
-
DEBUGLOGRAW(l, " \n");
|
41
|
+
DEBUGLOGRAW(l, " \n"); \
|
45
42
|
}
|
46
43
|
|
47
44
|
static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
@@ -53,22 +50,22 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
53
50
|
return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
|
54
51
|
}
|
55
52
|
|
56
|
-
#define MUTEX_WAIT_TIME_DLEVEL
|
57
|
-
#define PTHREAD_MUTEX_LOCK(mutex) \
|
58
|
-
if (
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
} else pthread_mutex_lock(mutex);
|
53
|
+
#define MUTEX_WAIT_TIME_DLEVEL 6
|
54
|
+
#define PTHREAD_MUTEX_LOCK(mutex) { \
|
55
|
+
if (ZSTD_DEBUG>=MUTEX_WAIT_TIME_DLEVEL) { \
|
56
|
+
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
|
57
|
+
pthread_mutex_lock(mutex); \
|
58
|
+
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
|
59
|
+
unsigned long long const elapsedTime = (afterTime-beforeTime); \
|
60
|
+
if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
|
61
|
+
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
|
62
|
+
elapsedTime, #mutex); \
|
63
|
+
} } \
|
64
|
+
} else pthread_mutex_lock(mutex); \
|
65
|
+
}
|
68
66
|
|
69
67
|
#else
|
70
68
|
|
71
|
-
# define DEBUGLOG(l, ...) {} /* disabled */
|
72
69
|
# define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
|
73
70
|
# define DEBUG_PRINTHEX(l,p,n) {}
|
74
71
|
|
@@ -87,16 +84,19 @@ static const buffer_t g_nullBuffer = { NULL, 0 };
|
|
87
84
|
typedef struct ZSTDMT_bufferPool_s {
|
88
85
|
unsigned totalBuffers;
|
89
86
|
unsigned nbBuffers;
|
87
|
+
ZSTD_customMem cMem;
|
90
88
|
buffer_t bTable[1]; /* variable size */
|
91
89
|
} ZSTDMT_bufferPool;
|
92
90
|
|
93
|
-
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
|
91
|
+
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
|
94
92
|
{
|
95
93
|
unsigned const maxNbBuffers = 2*nbThreads + 2;
|
96
|
-
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)
|
94
|
+
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
|
95
|
+
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
|
97
96
|
if (bufPool==NULL) return NULL;
|
98
97
|
bufPool->totalBuffers = maxNbBuffers;
|
99
98
|
bufPool->nbBuffers = 0;
|
99
|
+
bufPool->cMem = cMem;
|
100
100
|
return bufPool;
|
101
101
|
}
|
102
102
|
|
@@ -105,23 +105,39 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
|
|
105
105
|
unsigned u;
|
106
106
|
if (!bufPool) return; /* compatibility with free on NULL */
|
107
107
|
for (u=0; u<bufPool->totalBuffers; u++)
|
108
|
-
|
109
|
-
|
108
|
+
ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
|
109
|
+
ZSTD_free(bufPool, bufPool->cMem);
|
110
|
+
}
|
111
|
+
|
112
|
+
/* only works at initialization, not during compression */
|
113
|
+
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
|
114
|
+
{
|
115
|
+
size_t const poolSize = sizeof(*bufPool)
|
116
|
+
+ (bufPool->totalBuffers - 1) * sizeof(buffer_t);
|
117
|
+
unsigned u;
|
118
|
+
size_t totalBufferSize = 0;
|
119
|
+
for (u=0; u<bufPool->totalBuffers; u++)
|
120
|
+
totalBufferSize += bufPool->bTable[u].size;
|
121
|
+
|
122
|
+
return poolSize + totalBufferSize;
|
110
123
|
}
|
111
124
|
|
112
|
-
|
125
|
+
/** ZSTDMT_getBuffer() :
|
126
|
+
* assumption : invocation from main thread only ! */
|
113
127
|
static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
|
114
128
|
{
|
115
129
|
if (pool->nbBuffers) { /* try to use an existing buffer */
|
116
130
|
buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
|
117
131
|
size_t const availBufferSize = buf.size;
|
118
|
-
if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
|
132
|
+
if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
|
133
|
+
/* large enough, but not too much */
|
119
134
|
return buf;
|
120
|
-
|
135
|
+
/* size conditions not respected : scratch this buffer, create new one */
|
136
|
+
ZSTD_free(buf.start, pool->cMem);
|
121
137
|
}
|
122
138
|
/* create new buffer */
|
123
139
|
{ buffer_t buffer;
|
124
|
-
void* const start =
|
140
|
+
void* const start = ZSTD_malloc(bSize, pool->cMem);
|
125
141
|
if (start==NULL) bSize = 0;
|
126
142
|
buffer.start = start; /* note : start can be NULL if malloc fails ! */
|
127
143
|
buffer.size = bSize;
|
@@ -138,7 +154,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
|
|
138
154
|
return;
|
139
155
|
}
|
140
156
|
/* Reached bufferPool capacity (should not happen) */
|
141
|
-
|
157
|
+
ZSTD_free(buf.start, pool->cMem);
|
142
158
|
}
|
143
159
|
|
144
160
|
|
@@ -147,6 +163,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
|
|
147
163
|
typedef struct {
|
148
164
|
unsigned totalCCtx;
|
149
165
|
unsigned availCCtx;
|
166
|
+
ZSTD_customMem cMem;
|
150
167
|
ZSTD_CCtx* cctx[1]; /* variable size */
|
151
168
|
} ZSTDMT_CCtxPool;
|
152
169
|
|
@@ -158,23 +175,40 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
|
158
175
|
unsigned u;
|
159
176
|
for (u=0; u<pool->totalCCtx; u++)
|
160
177
|
ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
|
161
|
-
|
178
|
+
ZSTD_free(pool, pool->cMem);
|
162
179
|
}
|
163
180
|
|
164
181
|
/* ZSTDMT_createCCtxPool() :
|
165
182
|
* implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
|
166
|
-
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads
|
183
|
+
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
|
184
|
+
ZSTD_customMem cMem)
|
167
185
|
{
|
168
|
-
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*)
|
186
|
+
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
|
187
|
+
sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
|
169
188
|
if (!cctxPool) return NULL;
|
189
|
+
cctxPool->cMem = cMem;
|
170
190
|
cctxPool->totalCCtx = nbThreads;
|
171
191
|
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
|
172
|
-
cctxPool->cctx[0] =
|
192
|
+
cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
|
173
193
|
if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
|
174
|
-
DEBUGLOG(
|
194
|
+
DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
|
175
195
|
return cctxPool;
|
176
196
|
}
|
177
197
|
|
198
|
+
/* only works during initialization phase, not during compression */
|
199
|
+
static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
|
200
|
+
{
|
201
|
+
unsigned const nbThreads = cctxPool->totalCCtx;
|
202
|
+
size_t const poolSize = sizeof(*cctxPool)
|
203
|
+
+ (nbThreads-1)*sizeof(ZSTD_CCtx*);
|
204
|
+
unsigned u;
|
205
|
+
size_t totalCCtxSize = 0;
|
206
|
+
for (u=0; u<nbThreads; u++)
|
207
|
+
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
|
208
|
+
|
209
|
+
return poolSize + totalCCtxSize;
|
210
|
+
}
|
211
|
+
|
178
212
|
static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
|
179
213
|
{
|
180
214
|
if (pool->availCCtx) {
|
@@ -218,7 +252,7 @@ typedef struct {
|
|
218
252
|
pthread_mutex_t* jobCompleted_mutex;
|
219
253
|
pthread_cond_t* jobCompleted_cond;
|
220
254
|
ZSTD_parameters params;
|
221
|
-
ZSTD_CDict* cdict;
|
255
|
+
const ZSTD_CDict* cdict;
|
222
256
|
unsigned long long fullFrameSize;
|
223
257
|
} ZSTDMT_jobDescription;
|
224
258
|
|
@@ -228,11 +262,11 @@ void ZSTDMT_compressChunk(void* jobDescription)
|
|
228
262
|
ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
|
229
263
|
const void* const src = (const char*)job->srcStart + job->dictSize;
|
230
264
|
buffer_t const dstBuff = job->dstBuff;
|
231
|
-
DEBUGLOG(
|
265
|
+
DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
|
232
266
|
job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
|
233
267
|
if (job->cdict) { /* should only happen for first segment */
|
234
268
|
size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
|
235
|
-
|
269
|
+
DEBUGLOG(5, "using CDict");
|
236
270
|
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
|
237
271
|
} else { /* srcStart points at reloaded section */
|
238
272
|
if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */
|
@@ -247,12 +281,12 @@ void ZSTDMT_compressChunk(void* jobDescription)
|
|
247
281
|
ZSTD_invalidateRepCodes(job->cctx);
|
248
282
|
}
|
249
283
|
|
250
|
-
DEBUGLOG(
|
284
|
+
DEBUGLOG(5, "Compressing : ");
|
251
285
|
DEBUG_PRINTHEX(4, job->srcStart, 12);
|
252
286
|
job->cSize = (job->lastChunk) ?
|
253
287
|
ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
|
254
288
|
ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
|
255
|
-
DEBUGLOG(
|
289
|
+
DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
|
256
290
|
(unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
|
257
291
|
DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
|
258
292
|
|
@@ -271,6 +305,7 @@ _endJob:
|
|
271
305
|
|
272
306
|
struct ZSTDMT_CCtx_s {
|
273
307
|
POOL_ctx* factory;
|
308
|
+
ZSTDMT_jobDescription* jobs;
|
274
309
|
ZSTDMT_bufferPool* buffPool;
|
275
310
|
ZSTDMT_CCtxPool* cctxPool;
|
276
311
|
pthread_mutex_t jobCompleted_mutex;
|
@@ -292,50 +327,64 @@ struct ZSTDMT_CCtx_s {
|
|
292
327
|
unsigned overlapRLog;
|
293
328
|
unsigned long long frameContentSize;
|
294
329
|
size_t sectionSize;
|
295
|
-
|
296
|
-
|
297
|
-
|
330
|
+
ZSTD_customMem cMem;
|
331
|
+
ZSTD_CDict* cdictLocal;
|
332
|
+
const ZSTD_CDict* cdict;
|
298
333
|
};
|
299
334
|
|
300
|
-
|
335
|
+
static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
|
301
336
|
{
|
302
|
-
|
303
|
-
U32 const minNbJobs = nbThreads + 2;
|
304
|
-
U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
|
337
|
+
U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
|
305
338
|
U32 const nbJobs = 1 << nbJobsLog2;
|
306
|
-
|
307
|
-
|
339
|
+
*nbJobsPtr = nbJobs;
|
340
|
+
return (ZSTDMT_jobDescription*) ZSTD_calloc(
|
341
|
+
nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
|
342
|
+
}
|
343
|
+
|
344
|
+
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
|
345
|
+
{
|
346
|
+
ZSTDMT_CCtx* mtctx;
|
347
|
+
U32 nbJobs = nbThreads + 2;
|
348
|
+
DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
|
349
|
+
|
308
350
|
if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
351
|
+
if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
|
352
|
+
/* invalid custom allocator */
|
353
|
+
return NULL;
|
354
|
+
|
355
|
+
mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
|
356
|
+
if (!mtctx) return NULL;
|
357
|
+
mtctx->cMem = cMem;
|
358
|
+
mtctx->nbThreads = nbThreads;
|
359
|
+
mtctx->allJobsCompleted = 1;
|
360
|
+
mtctx->sectionSize = 0;
|
361
|
+
mtctx->overlapRLog = 3;
|
362
|
+
mtctx->factory = POOL_create(nbThreads, 1);
|
363
|
+
mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
|
364
|
+
mtctx->jobIDMask = nbJobs - 1;
|
365
|
+
mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem);
|
366
|
+
mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
|
367
|
+
if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) {
|
368
|
+
ZSTDMT_freeCCtx(mtctx);
|
321
369
|
return NULL;
|
322
370
|
}
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
return
|
371
|
+
pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
|
372
|
+
pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
|
373
|
+
DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
|
374
|
+
return mtctx;
|
375
|
+
}
|
376
|
+
|
377
|
+
ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
|
378
|
+
{
|
379
|
+
return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
|
332
380
|
}
|
333
381
|
|
334
382
|
/* ZSTDMT_releaseAllJobResources() :
|
335
|
-
*
|
383
|
+
* note : ensure all workers are killed first ! */
|
336
384
|
static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
|
337
385
|
{
|
338
386
|
unsigned jobID;
|
387
|
+
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
|
339
388
|
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
|
340
389
|
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
|
341
390
|
mtctx->jobs[jobID].dstBuff = g_nullBuffer;
|
@@ -356,15 +405,26 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
|
|
356
405
|
POOL_free(mtctx->factory);
|
357
406
|
if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
|
358
407
|
ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
|
408
|
+
ZSTD_free(mtctx->jobs, mtctx->cMem);
|
359
409
|
ZSTDMT_freeCCtxPool(mtctx->cctxPool);
|
360
|
-
ZSTD_freeCDict(mtctx->
|
361
|
-
ZSTD_freeCStream(mtctx->cstream);
|
410
|
+
ZSTD_freeCDict(mtctx->cdictLocal);
|
362
411
|
pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
|
363
412
|
pthread_cond_destroy(&mtctx->jobCompleted_cond);
|
364
|
-
|
413
|
+
ZSTD_free(mtctx, mtctx->cMem);
|
365
414
|
return 0;
|
366
415
|
}
|
367
416
|
|
417
|
+
size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
|
418
|
+
{
|
419
|
+
if (mtctx == NULL) return 0; /* supports sizeof NULL */
|
420
|
+
return sizeof(*mtctx)
|
421
|
+
+ POOL_sizeof(mtctx->factory)
|
422
|
+
+ ZSTDMT_sizeof_bufferPool(mtctx->buffPool)
|
423
|
+
+ (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
|
424
|
+
+ ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
|
425
|
+
+ ZSTD_sizeof_CDict(mtctx->cdictLocal);
|
426
|
+
}
|
427
|
+
|
368
428
|
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
|
369
429
|
{
|
370
430
|
switch(parameter)
|
@@ -373,7 +433,7 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
|
|
373
433
|
mtctx->sectionSize = value;
|
374
434
|
return 0;
|
375
435
|
case ZSTDMT_p_overlapSectionLog :
|
376
|
-
|
436
|
+
DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
|
377
437
|
mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
|
378
438
|
return 0;
|
379
439
|
default :
|
@@ -386,31 +446,49 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
|
|
386
446
|
/* ===== Multi-threaded compression ===== */
|
387
447
|
/* ------------------------------------------ */
|
388
448
|
|
389
|
-
|
449
|
+
static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
|
450
|
+
size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
|
451
|
+
size_t const chunkMaxSize = chunkSizeTarget << 2;
|
452
|
+
size_t const passSizeMax = chunkMaxSize * nbThreads;
|
453
|
+
unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
|
454
|
+
unsigned const nbChunksLarge = multiplier * nbThreads;
|
455
|
+
unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
|
456
|
+
unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
|
457
|
+
return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
|
458
|
+
}
|
459
|
+
|
460
|
+
|
461
|
+
size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
390
462
|
void* dst, size_t dstCapacity,
|
391
463
|
const void* src, size_t srcSize,
|
392
|
-
|
464
|
+
const ZSTD_CDict* cdict,
|
465
|
+
ZSTD_parameters const params,
|
466
|
+
unsigned overlapRLog)
|
393
467
|
{
|
394
|
-
|
395
|
-
|
396
|
-
size_t const overlapSize = (size_t)1 << (params.cParams.windowLog - overlapLog);
|
397
|
-
size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
|
398
|
-
unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + 1;
|
399
|
-
unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
|
468
|
+
size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
|
469
|
+
unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
|
400
470
|
size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
|
401
|
-
size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) <
|
402
|
-
size_t remainingSrcSize = srcSize;
|
471
|
+
size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
|
403
472
|
const char* const srcStart = (const char*)src;
|
473
|
+
size_t remainingSrcSize = srcSize;
|
404
474
|
unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
|
405
475
|
size_t frameStartPos = 0, dstBufferPos = 0;
|
406
476
|
|
407
|
-
DEBUGLOG(
|
408
|
-
DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
|
409
|
-
params.fParams.contentSizeFlag = 1;
|
410
|
-
|
477
|
+
DEBUGLOG(4, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
|
411
478
|
if (nbChunks==1) { /* fallback to single-thread mode */
|
412
479
|
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
|
413
|
-
return
|
480
|
+
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
|
481
|
+
return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
|
482
|
+
}
|
483
|
+
assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */
|
484
|
+
|
485
|
+
if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
|
486
|
+
U32 nbJobs = nbChunks;
|
487
|
+
ZSTD_free(mtctx->jobs, mtctx->cMem);
|
488
|
+
mtctx->jobIDMask = 0;
|
489
|
+
mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
|
490
|
+
if (mtctx->jobs==NULL) return ERROR(memory_allocation);
|
491
|
+
mtctx->jobIDMask = nbJobs - 1;
|
414
492
|
}
|
415
493
|
|
416
494
|
{ unsigned u;
|
@@ -425,15 +503,18 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
425
503
|
if ((cctx==NULL) || (dstBuffer.start==NULL)) {
|
426
504
|
mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
|
427
505
|
mtctx->jobs[u].jobCompleted = 1;
|
428
|
-
nbChunks = u+1;
|
506
|
+
nbChunks = u+1; /* only wait and free u jobs, instead of initially expected nbChunks ones */
|
429
507
|
break; /* let's wait for previous jobs to complete, but don't start new ones */
|
430
508
|
}
|
431
509
|
|
432
510
|
mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
|
433
511
|
mtctx->jobs[u].dictSize = dictSize;
|
434
512
|
mtctx->jobs[u].srcSize = chunkSize;
|
513
|
+
mtctx->jobs[u].cdict = mtctx->nextJobID==0 ? cdict : NULL;
|
435
514
|
mtctx->jobs[u].fullFrameSize = srcSize;
|
436
515
|
mtctx->jobs[u].params = params;
|
516
|
+
/* do not calculate checksum within sections, but write it in header for first section */
|
517
|
+
if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
|
437
518
|
mtctx->jobs[u].dstBuff = dstBuffer;
|
438
519
|
mtctx->jobs[u].cctx = cctx;
|
439
520
|
mtctx->jobs[u].firstChunk = (u==0);
|
@@ -442,27 +523,27 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
442
523
|
mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
|
443
524
|
mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
|
444
525
|
|
445
|
-
DEBUGLOG(
|
446
|
-
DEBUG_PRINTHEX(
|
526
|
+
DEBUGLOG(5, "posting job %u (%u bytes)", u, (U32)chunkSize);
|
527
|
+
DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
|
447
528
|
POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
|
448
529
|
|
449
530
|
frameStartPos += chunkSize;
|
450
531
|
dstBufferPos += dstBufferCapacity;
|
451
532
|
remainingSrcSize -= chunkSize;
|
452
533
|
} }
|
453
|
-
/* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
|
454
534
|
|
535
|
+
/* collect result */
|
455
536
|
{ unsigned chunkID;
|
456
537
|
size_t error = 0, dstPos = 0;
|
457
538
|
for (chunkID=0; chunkID<nbChunks; chunkID++) {
|
458
|
-
DEBUGLOG(
|
539
|
+
DEBUGLOG(5, "waiting for chunk %u ", chunkID);
|
459
540
|
PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
|
460
541
|
while (mtctx->jobs[chunkID].jobCompleted==0) {
|
461
|
-
DEBUGLOG(
|
542
|
+
DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
|
462
543
|
pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
|
463
544
|
}
|
464
545
|
pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
|
465
|
-
DEBUGLOG(
|
546
|
+
DEBUGLOG(5, "ready to write chunk %u ", chunkID);
|
466
547
|
|
467
548
|
ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
|
468
549
|
mtctx->jobs[chunkID].cctx = NULL;
|
@@ -470,20 +551,33 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
470
551
|
{ size_t const cSize = mtctx->jobs[chunkID].cSize;
|
471
552
|
if (ZSTD_isError(cSize)) error = cSize;
|
472
553
|
if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
|
473
|
-
if (chunkID) { /* note : chunk 0 is
|
554
|
+
if (chunkID) { /* note : chunk 0 is written directly at dst, which is correct position */
|
474
555
|
if (!error)
|
475
|
-
memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap
|
476
|
-
if (chunkID >= compressWithinDst)
|
556
|
+
memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
|
557
|
+
if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
|
558
|
+
DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
|
477
559
|
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
|
560
|
+
}
|
478
561
|
mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
|
479
562
|
}
|
480
563
|
dstPos += cSize ;
|
481
564
|
}
|
482
565
|
}
|
483
|
-
if (!error) DEBUGLOG(
|
566
|
+
if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
|
484
567
|
return error ? error : dstPos;
|
485
568
|
}
|
569
|
+
}
|
570
|
+
|
486
571
|
|
572
|
+
size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
573
|
+
void* dst, size_t dstCapacity,
|
574
|
+
const void* src, size_t srcSize,
|
575
|
+
int compressionLevel)
|
576
|
+
{
|
577
|
+
U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
|
578
|
+
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
|
579
|
+
params.fParams.contentSizeFlag = 1;
|
580
|
+
return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog);
|
487
581
|
}
|
488
582
|
|
489
583
|
|
@@ -491,12 +585,14 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
491
585
|
/* ======= Streaming API ======= */
|
492
586
|
/* ====================================== */
|
493
587
|
|
494
|
-
static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
|
588
|
+
static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
|
589
|
+
{
|
590
|
+
DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
|
495
591
|
while (zcs->doneJobID < zcs->nextJobID) {
|
496
592
|
unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
|
497
593
|
PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
|
498
594
|
while (zcs->jobs[jobID].jobCompleted==0) {
|
499
|
-
DEBUGLOG(
|
595
|
+
DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
|
500
596
|
pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
|
501
597
|
}
|
502
598
|
pthread_mutex_unlock(&zcs->jobCompleted_mutex);
|
@@ -505,33 +601,54 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
|
|
505
601
|
}
|
506
602
|
|
507
603
|
|
508
|
-
|
509
|
-
|
510
|
-
|
604
|
+
/** ZSTDMT_initCStream_internal() :
|
605
|
+
* internal usage only */
|
606
|
+
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
607
|
+
const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
|
608
|
+
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
511
609
|
{
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
610
|
+
DEBUGLOG(4, "ZSTDMT_initCStream_internal");
|
611
|
+
/* params are supposed to be fully validated at this point */
|
612
|
+
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
613
|
+
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
|
614
|
+
|
615
|
+
if (zcs->nbThreads==1) {
|
616
|
+
DEBUGLOG(4, "single thread mode");
|
617
|
+
return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
|
618
|
+
dict, dictSize, cdict,
|
619
|
+
params, pledgedSrcSize);
|
620
|
+
}
|
621
|
+
|
622
|
+
if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
|
516
623
|
ZSTDMT_waitForAllJobsCompleted(zcs);
|
517
624
|
ZSTDMT_releaseAllJobResources(zcs);
|
518
625
|
zcs->allJobsCompleted = 1;
|
519
626
|
}
|
627
|
+
|
520
628
|
zcs->params = params;
|
521
|
-
if (updateDict) {
|
522
|
-
ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
|
523
|
-
if (dict && dictSize) {
|
524
|
-
zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params.cParams, cmem);
|
525
|
-
if (zcs->cdict == NULL) return ERROR(memory_allocation);
|
526
|
-
} }
|
527
629
|
zcs->frameContentSize = pledgedSrcSize;
|
630
|
+
if (dict) {
|
631
|
+
DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
|
632
|
+
ZSTD_freeCDict(zcs->cdictLocal);
|
633
|
+
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
|
634
|
+
0 /* byRef */, ZSTD_dm_auto, /* note : a loadPrefix becomes an internal CDict */
|
635
|
+
params.cParams, zcs->cMem);
|
636
|
+
zcs->cdict = zcs->cdictLocal;
|
637
|
+
if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
|
638
|
+
} else {
|
639
|
+
DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
|
640
|
+
ZSTD_freeCDict(zcs->cdictLocal);
|
641
|
+
zcs->cdictLocal = NULL;
|
642
|
+
zcs->cdict = cdict;
|
643
|
+
}
|
644
|
+
|
528
645
|
zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
|
529
646
|
DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
|
530
|
-
DEBUGLOG(
|
647
|
+
DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
|
531
648
|
zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
|
532
649
|
zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
|
533
650
|
zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
|
534
|
-
DEBUGLOG(
|
651
|
+
DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
|
535
652
|
zcs->marginSize = zcs->targetSectionSize >> 2;
|
536
653
|
zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
|
537
654
|
zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
|
@@ -546,24 +663,39 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
|
546
663
|
return 0;
|
547
664
|
}
|
548
665
|
|
549
|
-
size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx*
|
666
|
+
size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
550
667
|
const void* dict, size_t dictSize,
|
551
668
|
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
552
669
|
{
|
553
|
-
|
670
|
+
DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
|
671
|
+
return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
|
554
672
|
}
|
555
673
|
|
674
|
+
size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
675
|
+
const ZSTD_CDict* cdict,
|
676
|
+
ZSTD_frameParameters fParams,
|
677
|
+
unsigned long long pledgedSrcSize)
|
678
|
+
{
|
679
|
+
ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
|
680
|
+
if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
|
681
|
+
params.fParams = fParams;
|
682
|
+
return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, cdict,
|
683
|
+
params, pledgedSrcSize);
|
684
|
+
}
|
685
|
+
|
686
|
+
|
556
687
|
/* ZSTDMT_resetCStream() :
|
557
688
|
* pledgedSrcSize is optional and can be zero == unknown */
|
558
689
|
size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
|
559
690
|
{
|
560
|
-
if (zcs->nbThreads==1)
|
691
|
+
if (zcs->nbThreads==1)
|
692
|
+
return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
|
561
693
|
return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
|
562
694
|
}
|
563
695
|
|
564
696
|
size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
|
565
697
|
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
|
566
|
-
return ZSTDMT_initCStream_internal(zcs, NULL, 0,
|
698
|
+
return ZSTDMT_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
|
567
699
|
}
|
568
700
|
|
569
701
|
|
@@ -582,13 +714,16 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
|
|
582
714
|
return ERROR(memory_allocation);
|
583
715
|
}
|
584
716
|
|
585
|
-
DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
|
717
|
+
DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
|
718
|
+
zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
|
586
719
|
zcs->jobs[jobID].src = zcs->inBuff.buffer;
|
587
720
|
zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
|
588
721
|
zcs->jobs[jobID].srcSize = srcSize;
|
589
|
-
zcs->jobs[jobID].dictSize = zcs->dictSize;
|
722
|
+
zcs->jobs[jobID].dictSize = zcs->dictSize;
|
723
|
+
assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
|
590
724
|
zcs->jobs[jobID].params = zcs->params;
|
591
|
-
|
725
|
+
/* do not calculate checksum within sections, but write it in header for first section */
|
726
|
+
if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
|
592
727
|
zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
|
593
728
|
zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
|
594
729
|
zcs->jobs[jobID].dstBuff = dstBuffer;
|
@@ -603,6 +738,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
|
|
603
738
|
/* get a new buffer for next input */
|
604
739
|
if (!endFrame) {
|
605
740
|
size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
|
741
|
+
DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
|
606
742
|
zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
|
607
743
|
if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
|
608
744
|
zcs->jobs[jobID].jobCompleted = 1;
|
@@ -611,22 +747,33 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
|
|
611
747
|
ZSTDMT_releaseAllJobResources(zcs);
|
612
748
|
return ERROR(memory_allocation);
|
613
749
|
}
|
614
|
-
DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
|
750
|
+
DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
|
615
751
|
zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
|
616
|
-
DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src",
|
617
|
-
|
752
|
+
DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
|
753
|
+
(U32)zcs->inBuff.filled, (U32)newDictSize,
|
754
|
+
(U32)(zcs->inBuff.filled - newDictSize));
|
755
|
+
memmove(zcs->inBuff.buffer.start,
|
756
|
+
(const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
|
757
|
+
zcs->inBuff.filled);
|
618
758
|
DEBUGLOG(5, "new inBuff pre-filled");
|
619
759
|
zcs->dictSize = newDictSize;
|
620
|
-
} else {
|
760
|
+
} else { /* if (endFrame==1) */
|
761
|
+
DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
|
621
762
|
zcs->inBuff.buffer = g_nullBuffer;
|
622
763
|
zcs->inBuff.filled = 0;
|
623
764
|
zcs->dictSize = 0;
|
624
765
|
zcs->frameEnded = 1;
|
625
766
|
if (zcs->nextJobID == 0)
|
626
|
-
|
767
|
+
/* single chunk exception : checksum is calculated directly within worker thread */
|
768
|
+
zcs->params.fParams.checksumFlag = 0;
|
627
769
|
}
|
628
770
|
|
629
|
-
DEBUGLOG(
|
771
|
+
DEBUGLOG(4, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
|
772
|
+
zcs->nextJobID,
|
773
|
+
(U32)zcs->jobs[jobID].srcSize,
|
774
|
+
zcs->jobs[jobID].lastChunk,
|
775
|
+
zcs->doneJobID,
|
776
|
+
zcs->doneJobID & zcs->jobIDMask);
|
630
777
|
POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
|
631
778
|
zcs->nextJobID++;
|
632
779
|
return 0;
|
@@ -664,7 +811,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
|
|
664
811
|
XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
|
665
812
|
if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
|
666
813
|
U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
|
667
|
-
DEBUGLOG(
|
814
|
+
DEBUGLOG(5, "writing checksum : %08X \n", checksum);
|
668
815
|
MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
|
669
816
|
job.cSize += 4;
|
670
817
|
zcs->jobs[wJobID].cSize += 4;
|
@@ -675,7 +822,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
|
|
675
822
|
zcs->jobs[wJobID].jobScanned = 1;
|
676
823
|
}
|
677
824
|
{ size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
|
678
|
-
DEBUGLOG(
|
825
|
+
DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
|
679
826
|
memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
|
680
827
|
output->pos += toWrite;
|
681
828
|
job.dstFlushed += toWrite;
|
@@ -696,26 +843,81 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
|
|
696
843
|
} }
|
697
844
|
|
698
845
|
|
699
|
-
|
846
|
+
/** ZSTDMT_compressStream_generic() :
|
847
|
+
* internal use only
|
848
|
+
* assumption : output and input are valid (pos <= size)
|
849
|
+
* @return : minimum amount of data remaining to flush, 0 if none */
|
850
|
+
size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
851
|
+
ZSTD_outBuffer* output,
|
852
|
+
ZSTD_inBuffer* input,
|
853
|
+
ZSTD_EndDirective endOp)
|
700
854
|
{
|
701
|
-
size_t const newJobThreshold =
|
702
|
-
|
703
|
-
|
855
|
+
size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize;
|
856
|
+
assert(output->pos <= output->size);
|
857
|
+
assert(input->pos <= input->size);
|
858
|
+
if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
|
859
|
+
/* current frame being ended. Only flush/end are allowed. Or start new frame with init */
|
860
|
+
return ERROR(stage_wrong);
|
861
|
+
}
|
862
|
+
if (mtctx->nbThreads==1) {
|
863
|
+
return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
|
864
|
+
}
|
865
|
+
|
866
|
+
/* single-pass shortcut (note : this is blocking-mode) */
|
867
|
+
if ( (mtctx->nextJobID==0) /* just started */
|
868
|
+
&& (mtctx->inBuff.filled==0) /* nothing buffered */
|
869
|
+
&& (endOp==ZSTD_e_end) /* end order */
|
870
|
+
&& (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
|
871
|
+
size_t const cSize = ZSTDMT_compress_advanced(mtctx,
|
872
|
+
(char*)output->dst + output->pos, output->size - output->pos,
|
873
|
+
(const char*)input->src + input->pos, input->size - input->pos,
|
874
|
+
mtctx->cdict, mtctx->params, mtctx->overlapRLog);
|
875
|
+
if (ZSTD_isError(cSize)) return cSize;
|
876
|
+
input->pos = input->size;
|
877
|
+
output->pos += cSize;
|
878
|
+
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); /* was allocated in initStream */
|
879
|
+
mtctx->allJobsCompleted = 1;
|
880
|
+
mtctx->frameEnded = 1;
|
881
|
+
return 0;
|
882
|
+
}
|
704
883
|
|
705
884
|
/* fill input buffer */
|
706
|
-
|
707
|
-
|
885
|
+
if ((input->src) && (mtctx->inBuff.buffer.start)) { /* support NULL input */
|
886
|
+
size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
|
887
|
+
DEBUGLOG(2, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
|
888
|
+
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
|
708
889
|
input->pos += toLoad;
|
709
|
-
|
890
|
+
mtctx->inBuff.filled += toLoad;
|
710
891
|
}
|
711
892
|
|
712
|
-
if ( (
|
713
|
-
|
714
|
-
CHECK_F( ZSTDMT_createCompressionJob(
|
893
|
+
if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
|
894
|
+
&& (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */
|
895
|
+
CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
|
715
896
|
}
|
716
897
|
|
717
|
-
/* check for data to
|
718
|
-
CHECK_F( ZSTDMT_flushNextJob(
|
898
|
+
/* check for potential compressed data ready to be flushed */
|
899
|
+
CHECK_F( ZSTDMT_flushNextJob(mtctx, output, (mtctx->inBuff.filled == mtctx->inBuffSize) /* blockToFlush */) ); /* block if it wasn't possible to create new job due to saturation */
|
900
|
+
|
901
|
+
if (input->pos < input->size) /* input not consumed : do not flush yet */
|
902
|
+
endOp = ZSTD_e_continue;
|
903
|
+
|
904
|
+
switch(endOp)
|
905
|
+
{
|
906
|
+
case ZSTD_e_flush:
|
907
|
+
return ZSTDMT_flushStream(mtctx, output);
|
908
|
+
case ZSTD_e_end:
|
909
|
+
return ZSTDMT_endStream(mtctx, output);
|
910
|
+
case ZSTD_e_continue:
|
911
|
+
return 1;
|
912
|
+
default:
|
913
|
+
return ERROR(GENERIC); /* invalid endDirective */
|
914
|
+
}
|
915
|
+
}
|
916
|
+
|
917
|
+
|
918
|
+
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
919
|
+
{
|
920
|
+
CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
|
719
921
|
|
720
922
|
/* recommended next input size : fill current input buffer */
|
721
923
|
return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
|
@@ -726,26 +928,28 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp
|
|
726
928
|
{
|
727
929
|
size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
|
728
930
|
|
729
|
-
if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
|
730
931
|
if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
|
731
932
|
&& (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
|
732
933
|
CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
|
733
934
|
}
|
734
935
|
|
735
936
|
/* check if there is any data available to flush */
|
736
|
-
|
737
|
-
return ZSTDMT_flushNextJob(zcs, output, 1);
|
937
|
+
return ZSTDMT_flushNextJob(zcs, output, 1 /* blockToFlush */);
|
738
938
|
}
|
739
939
|
|
740
940
|
|
741
941
|
size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
|
742
942
|
{
|
743
|
-
|
744
|
-
|
943
|
+
DEBUGLOG(5, "ZSTDMT_flushStream");
|
944
|
+
if (zcs->nbThreads==1)
|
945
|
+
return ZSTD_flushStream(zcs->cctxPool->cctx[0], output);
|
946
|
+
return ZSTDMT_flushStream_internal(zcs, output, 0 /* endFrame */);
|
745
947
|
}
|
746
948
|
|
747
949
|
size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
|
748
950
|
{
|
749
|
-
|
750
|
-
|
951
|
+
DEBUGLOG(4, "ZSTDMT_endStream");
|
952
|
+
if (zcs->nbThreads==1)
|
953
|
+
return ZSTD_endStream(zcs->cctxPool->cctx[0], output);
|
954
|
+
return ZSTDMT_flushStream_internal(zcs, output, 1 /* endFrame */);
|
751
955
|
}
|