zstd-ruby 1.2.0.0 → 1.3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +7 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +23 -9
- data/ext/zstdruby/libzstd/common/error_private.c +4 -1
- data/ext/zstdruby/libzstd/common/huf.h +20 -0
- data/ext/zstdruby/libzstd/common/mem.h +0 -14
- data/ext/zstdruby/libzstd/common/pool.c +12 -0
- data/ext/zstdruby/libzstd/common/pool.h +5 -0
- data/ext/zstdruby/libzstd/common/threading.c +0 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +25 -18
- data/ext/zstdruby/libzstd/common/zstd_errors.h +15 -7
- data/ext/zstdruby/libzstd/common/zstd_internal.h +59 -9
- data/ext/zstdruby/libzstd/compress/huf_compress.c +7 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1082 -487
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +30 -15
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +362 -158
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +49 -13
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +150 -26
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +380 -258
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +23 -37
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +30 -40
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +104 -95
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +14 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -12
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -14
- data/ext/zstdruby/libzstd/zstd.h +507 -166
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +2 -2
@@ -43,6 +43,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t src
|
|
43
43
|
if (ssPtr->litLengthSum == 0) {
|
44
44
|
if (srcSize <= 1024) ssPtr->staticPrices = 1;
|
45
45
|
|
46
|
+
assert(ssPtr->litFreq!=NULL);
|
46
47
|
for (u=0; u<=MaxLit; u++)
|
47
48
|
ssPtr->litFreq[u] = 0;
|
48
49
|
for (u=0; u<srcSize; u++)
|
@@ -201,6 +202,20 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
|
|
201
202
|
}
|
202
203
|
|
203
204
|
|
205
|
+
/* function safe only for comparisons */
|
206
|
+
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
207
|
+
{
|
208
|
+
switch (length)
|
209
|
+
{
|
210
|
+
default :
|
211
|
+
case 4 : return MEM_read32(memPtr);
|
212
|
+
case 3 : if (MEM_isLittleEndian())
|
213
|
+
return MEM_read32(memPtr)<<8;
|
214
|
+
else
|
215
|
+
return MEM_read32(memPtr)>>8;
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
204
219
|
|
205
220
|
/* Update hashTable3 up to ip (excluded)
|
206
221
|
Assumption : always within prefix (i.e. not within extDict) */
|
@@ -234,12 +249,12 @@ static U32 ZSTD_insertBtAndGetAllMatches (
|
|
234
249
|
{
|
235
250
|
const BYTE* const base = zc->base;
|
236
251
|
const U32 current = (U32)(ip-base);
|
237
|
-
const U32 hashLog = zc->
|
252
|
+
const U32 hashLog = zc->appliedParams.cParams.hashLog;
|
238
253
|
const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
|
239
254
|
U32* const hashTable = zc->hashTable;
|
240
255
|
U32 matchIndex = hashTable[h];
|
241
256
|
U32* const bt = zc->chainTable;
|
242
|
-
const U32 btLog = zc->
|
257
|
+
const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
|
243
258
|
const U32 btMask= (1U << btLog) - 1;
|
244
259
|
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
245
260
|
const BYTE* const dictBase = zc->dictBase;
|
@@ -267,7 +282,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
|
|
267
282
|
if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
|
268
283
|
} else {
|
269
284
|
match = dictBase + matchIndex3;
|
270
|
-
if (
|
285
|
+
if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
|
271
286
|
currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
|
272
287
|
}
|
273
288
|
|
@@ -410,10 +425,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
|
410
425
|
const BYTE* const base = ctx->base;
|
411
426
|
const BYTE* const prefixStart = base + ctx->dictLimit;
|
412
427
|
|
413
|
-
const U32 maxSearches = 1U << ctx->
|
414
|
-
const U32 sufficient_len = ctx->
|
415
|
-
const U32 mls = ctx->
|
416
|
-
const U32 minMatch = (ctx->
|
428
|
+
const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
|
429
|
+
const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
|
430
|
+
const U32 mls = ctx->appliedParams.cParams.searchLength;
|
431
|
+
const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
|
417
432
|
|
418
433
|
ZSTD_optimal_t* opt = seqStorePtr->priceTable;
|
419
434
|
ZSTD_match_t* matches = seqStorePtr->matchTable;
|
@@ -439,7 +454,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
|
439
454
|
for (i=(ip == anchor); i<last_i; i++) {
|
440
455
|
const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
|
441
456
|
if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
|
442
|
-
&& (
|
457
|
+
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
|
443
458
|
mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
|
444
459
|
if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
|
445
460
|
best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
|
@@ -524,7 +539,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
|
|
524
539
|
for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
|
525
540
|
const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
|
526
541
|
if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
|
527
|
-
&& (
|
542
|
+
&& (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
|
528
543
|
mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
|
529
544
|
|
530
545
|
if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
|
@@ -663,10 +678,10 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
|
|
663
678
|
const BYTE* const dictBase = ctx->dictBase;
|
664
679
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
665
680
|
|
666
|
-
const U32 maxSearches = 1U << ctx->
|
667
|
-
const U32 sufficient_len = ctx->
|
668
|
-
const U32 mls = ctx->
|
669
|
-
const U32 minMatch = (ctx->
|
681
|
+
const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
|
682
|
+
const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
|
683
|
+
const U32 mls = ctx->appliedParams.cParams.searchLength;
|
684
|
+
const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
|
670
685
|
|
671
686
|
ZSTD_optimal_t* opt = seqStorePtr->priceTable;
|
672
687
|
ZSTD_match_t* matches = seqStorePtr->matchTable;
|
@@ -698,7 +713,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
|
|
698
713
|
const BYTE* const repMatch = repBase + repIndex;
|
699
714
|
if ( (repCur > 0 && repCur <= (S32)current)
|
700
715
|
&& (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
|
701
|
-
&& (
|
716
|
+
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
702
717
|
/* repcode detected we should take it */
|
703
718
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
704
719
|
mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
|
@@ -794,7 +809,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
|
|
794
809
|
const BYTE* const repMatch = repBase + repIndex;
|
795
810
|
if ( (repCur > 0 && repCur <= (S32)(current+cur))
|
796
811
|
&& (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
|
797
|
-
&& (
|
812
|
+
&& (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
798
813
|
/* repcode detected */
|
799
814
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
800
815
|
mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
|
@@ -14,34 +14,31 @@
|
|
14
14
|
|
15
15
|
/* ====== Compiler specifics ====== */
|
16
16
|
#if defined(_MSC_VER)
|
17
|
-
# pragma warning(disable : 4204)
|
17
|
+
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
|
18
18
|
#endif
|
19
19
|
|
20
20
|
|
21
21
|
/* ====== Dependencies ====== */
|
22
|
-
#include <
|
23
|
-
#include
|
24
|
-
#include "
|
25
|
-
#include "
|
26
|
-
#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
22
|
+
#include <string.h> /* memcpy, memset */
|
23
|
+
#include "pool.h" /* threadpool */
|
24
|
+
#include "threading.h" /* mutex */
|
25
|
+
#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
27
26
|
#include "zstdmt_compress.h"
|
28
27
|
|
29
28
|
|
30
29
|
/* ====== Debug ====== */
|
31
|
-
#if
|
30
|
+
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
|
32
31
|
|
33
32
|
# include <stdio.h>
|
34
33
|
# include <unistd.h>
|
35
34
|
# include <sys/times.h>
|
36
|
-
|
37
|
-
# define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
|
38
|
-
# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
|
35
|
+
# define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
|
39
36
|
|
40
|
-
# define DEBUG_PRINTHEX(l,p,n) {
|
41
|
-
unsigned debug_u;
|
42
|
-
for (debug_u=0; debug_u<(n); debug_u++)
|
37
|
+
# define DEBUG_PRINTHEX(l,p,n) { \
|
38
|
+
unsigned debug_u; \
|
39
|
+
for (debug_u=0; debug_u<(n); debug_u++) \
|
43
40
|
DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
|
44
|
-
DEBUGLOGRAW(l, " \n");
|
41
|
+
DEBUGLOGRAW(l, " \n"); \
|
45
42
|
}
|
46
43
|
|
47
44
|
static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
@@ -53,22 +50,22 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
53
50
|
return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
|
54
51
|
}
|
55
52
|
|
56
|
-
#define MUTEX_WAIT_TIME_DLEVEL
|
57
|
-
#define PTHREAD_MUTEX_LOCK(mutex) \
|
58
|
-
if (
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
} else pthread_mutex_lock(mutex);
|
53
|
+
#define MUTEX_WAIT_TIME_DLEVEL 6
|
54
|
+
#define PTHREAD_MUTEX_LOCK(mutex) { \
|
55
|
+
if (ZSTD_DEBUG>=MUTEX_WAIT_TIME_DLEVEL) { \
|
56
|
+
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
|
57
|
+
pthread_mutex_lock(mutex); \
|
58
|
+
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
|
59
|
+
unsigned long long const elapsedTime = (afterTime-beforeTime); \
|
60
|
+
if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
|
61
|
+
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
|
62
|
+
elapsedTime, #mutex); \
|
63
|
+
} } \
|
64
|
+
} else pthread_mutex_lock(mutex); \
|
65
|
+
}
|
68
66
|
|
69
67
|
#else
|
70
68
|
|
71
|
-
# define DEBUGLOG(l, ...) {} /* disabled */
|
72
69
|
# define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
|
73
70
|
# define DEBUG_PRINTHEX(l,p,n) {}
|
74
71
|
|
@@ -87,16 +84,19 @@ static const buffer_t g_nullBuffer = { NULL, 0 };
|
|
87
84
|
typedef struct ZSTDMT_bufferPool_s {
|
88
85
|
unsigned totalBuffers;
|
89
86
|
unsigned nbBuffers;
|
87
|
+
ZSTD_customMem cMem;
|
90
88
|
buffer_t bTable[1]; /* variable size */
|
91
89
|
} ZSTDMT_bufferPool;
|
92
90
|
|
93
|
-
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
|
91
|
+
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
|
94
92
|
{
|
95
93
|
unsigned const maxNbBuffers = 2*nbThreads + 2;
|
96
|
-
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)
|
94
|
+
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
|
95
|
+
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
|
97
96
|
if (bufPool==NULL) return NULL;
|
98
97
|
bufPool->totalBuffers = maxNbBuffers;
|
99
98
|
bufPool->nbBuffers = 0;
|
99
|
+
bufPool->cMem = cMem;
|
100
100
|
return bufPool;
|
101
101
|
}
|
102
102
|
|
@@ -105,23 +105,39 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
|
|
105
105
|
unsigned u;
|
106
106
|
if (!bufPool) return; /* compatibility with free on NULL */
|
107
107
|
for (u=0; u<bufPool->totalBuffers; u++)
|
108
|
-
|
109
|
-
|
108
|
+
ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
|
109
|
+
ZSTD_free(bufPool, bufPool->cMem);
|
110
|
+
}
|
111
|
+
|
112
|
+
/* only works at initialization, not during compression */
|
113
|
+
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
|
114
|
+
{
|
115
|
+
size_t const poolSize = sizeof(*bufPool)
|
116
|
+
+ (bufPool->totalBuffers - 1) * sizeof(buffer_t);
|
117
|
+
unsigned u;
|
118
|
+
size_t totalBufferSize = 0;
|
119
|
+
for (u=0; u<bufPool->totalBuffers; u++)
|
120
|
+
totalBufferSize += bufPool->bTable[u].size;
|
121
|
+
|
122
|
+
return poolSize + totalBufferSize;
|
110
123
|
}
|
111
124
|
|
112
|
-
|
125
|
+
/** ZSTDMT_getBuffer() :
|
126
|
+
* assumption : invocation from main thread only ! */
|
113
127
|
static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
|
114
128
|
{
|
115
129
|
if (pool->nbBuffers) { /* try to use an existing buffer */
|
116
130
|
buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
|
117
131
|
size_t const availBufferSize = buf.size;
|
118
|
-
if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
|
132
|
+
if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize))
|
133
|
+
/* large enough, but not too much */
|
119
134
|
return buf;
|
120
|
-
|
135
|
+
/* size conditions not respected : scratch this buffer, create new one */
|
136
|
+
ZSTD_free(buf.start, pool->cMem);
|
121
137
|
}
|
122
138
|
/* create new buffer */
|
123
139
|
{ buffer_t buffer;
|
124
|
-
void* const start =
|
140
|
+
void* const start = ZSTD_malloc(bSize, pool->cMem);
|
125
141
|
if (start==NULL) bSize = 0;
|
126
142
|
buffer.start = start; /* note : start can be NULL if malloc fails ! */
|
127
143
|
buffer.size = bSize;
|
@@ -138,7 +154,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
|
|
138
154
|
return;
|
139
155
|
}
|
140
156
|
/* Reached bufferPool capacity (should not happen) */
|
141
|
-
|
157
|
+
ZSTD_free(buf.start, pool->cMem);
|
142
158
|
}
|
143
159
|
|
144
160
|
|
@@ -147,6 +163,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
|
|
147
163
|
typedef struct {
|
148
164
|
unsigned totalCCtx;
|
149
165
|
unsigned availCCtx;
|
166
|
+
ZSTD_customMem cMem;
|
150
167
|
ZSTD_CCtx* cctx[1]; /* variable size */
|
151
168
|
} ZSTDMT_CCtxPool;
|
152
169
|
|
@@ -158,23 +175,40 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
|
158
175
|
unsigned u;
|
159
176
|
for (u=0; u<pool->totalCCtx; u++)
|
160
177
|
ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
|
161
|
-
|
178
|
+
ZSTD_free(pool, pool->cMem);
|
162
179
|
}
|
163
180
|
|
164
181
|
/* ZSTDMT_createCCtxPool() :
|
165
182
|
* implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
|
166
|
-
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads
|
183
|
+
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
|
184
|
+
ZSTD_customMem cMem)
|
167
185
|
{
|
168
|
-
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*)
|
186
|
+
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
|
187
|
+
sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
|
169
188
|
if (!cctxPool) return NULL;
|
189
|
+
cctxPool->cMem = cMem;
|
170
190
|
cctxPool->totalCCtx = nbThreads;
|
171
191
|
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
|
172
|
-
cctxPool->cctx[0] =
|
192
|
+
cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
|
173
193
|
if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
|
174
|
-
DEBUGLOG(
|
194
|
+
DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
|
175
195
|
return cctxPool;
|
176
196
|
}
|
177
197
|
|
198
|
+
/* only works during initialization phase, not during compression */
|
199
|
+
static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
|
200
|
+
{
|
201
|
+
unsigned const nbThreads = cctxPool->totalCCtx;
|
202
|
+
size_t const poolSize = sizeof(*cctxPool)
|
203
|
+
+ (nbThreads-1)*sizeof(ZSTD_CCtx*);
|
204
|
+
unsigned u;
|
205
|
+
size_t totalCCtxSize = 0;
|
206
|
+
for (u=0; u<nbThreads; u++)
|
207
|
+
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
|
208
|
+
|
209
|
+
return poolSize + totalCCtxSize;
|
210
|
+
}
|
211
|
+
|
178
212
|
static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
|
179
213
|
{
|
180
214
|
if (pool->availCCtx) {
|
@@ -218,7 +252,7 @@ typedef struct {
|
|
218
252
|
pthread_mutex_t* jobCompleted_mutex;
|
219
253
|
pthread_cond_t* jobCompleted_cond;
|
220
254
|
ZSTD_parameters params;
|
221
|
-
ZSTD_CDict* cdict;
|
255
|
+
const ZSTD_CDict* cdict;
|
222
256
|
unsigned long long fullFrameSize;
|
223
257
|
} ZSTDMT_jobDescription;
|
224
258
|
|
@@ -228,11 +262,11 @@ void ZSTDMT_compressChunk(void* jobDescription)
|
|
228
262
|
ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
|
229
263
|
const void* const src = (const char*)job->srcStart + job->dictSize;
|
230
264
|
buffer_t const dstBuff = job->dstBuff;
|
231
|
-
DEBUGLOG(
|
265
|
+
DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u",
|
232
266
|
job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
|
233
267
|
if (job->cdict) { /* should only happen for first segment */
|
234
268
|
size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize);
|
235
|
-
|
269
|
+
DEBUGLOG(5, "using CDict");
|
236
270
|
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
|
237
271
|
} else { /* srcStart points at reloaded section */
|
238
272
|
if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */
|
@@ -247,12 +281,12 @@ void ZSTDMT_compressChunk(void* jobDescription)
|
|
247
281
|
ZSTD_invalidateRepCodes(job->cctx);
|
248
282
|
}
|
249
283
|
|
250
|
-
DEBUGLOG(
|
284
|
+
DEBUGLOG(5, "Compressing : ");
|
251
285
|
DEBUG_PRINTHEX(4, job->srcStart, 12);
|
252
286
|
job->cSize = (job->lastChunk) ?
|
253
287
|
ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
|
254
288
|
ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
|
255
|
-
DEBUGLOG(
|
289
|
+
DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u)",
|
256
290
|
(unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
|
257
291
|
DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
|
258
292
|
|
@@ -271,6 +305,7 @@ _endJob:
|
|
271
305
|
|
272
306
|
struct ZSTDMT_CCtx_s {
|
273
307
|
POOL_ctx* factory;
|
308
|
+
ZSTDMT_jobDescription* jobs;
|
274
309
|
ZSTDMT_bufferPool* buffPool;
|
275
310
|
ZSTDMT_CCtxPool* cctxPool;
|
276
311
|
pthread_mutex_t jobCompleted_mutex;
|
@@ -292,50 +327,64 @@ struct ZSTDMT_CCtx_s {
|
|
292
327
|
unsigned overlapRLog;
|
293
328
|
unsigned long long frameContentSize;
|
294
329
|
size_t sectionSize;
|
295
|
-
|
296
|
-
|
297
|
-
|
330
|
+
ZSTD_customMem cMem;
|
331
|
+
ZSTD_CDict* cdictLocal;
|
332
|
+
const ZSTD_CDict* cdict;
|
298
333
|
};
|
299
334
|
|
300
|
-
|
335
|
+
static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
|
301
336
|
{
|
302
|
-
|
303
|
-
U32 const minNbJobs = nbThreads + 2;
|
304
|
-
U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
|
337
|
+
U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
|
305
338
|
U32 const nbJobs = 1 << nbJobsLog2;
|
306
|
-
|
307
|
-
|
339
|
+
*nbJobsPtr = nbJobs;
|
340
|
+
return (ZSTDMT_jobDescription*) ZSTD_calloc(
|
341
|
+
nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
|
342
|
+
}
|
343
|
+
|
344
|
+
ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
|
345
|
+
{
|
346
|
+
ZSTDMT_CCtx* mtctx;
|
347
|
+
U32 nbJobs = nbThreads + 2;
|
348
|
+
DEBUGLOG(3, "ZSTDMT_createCCtx_advanced");
|
349
|
+
|
308
350
|
if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
351
|
+
if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
|
352
|
+
/* invalid custom allocator */
|
353
|
+
return NULL;
|
354
|
+
|
355
|
+
mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
|
356
|
+
if (!mtctx) return NULL;
|
357
|
+
mtctx->cMem = cMem;
|
358
|
+
mtctx->nbThreads = nbThreads;
|
359
|
+
mtctx->allJobsCompleted = 1;
|
360
|
+
mtctx->sectionSize = 0;
|
361
|
+
mtctx->overlapRLog = 3;
|
362
|
+
mtctx->factory = POOL_create(nbThreads, 1);
|
363
|
+
mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
|
364
|
+
mtctx->jobIDMask = nbJobs - 1;
|
365
|
+
mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem);
|
366
|
+
mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
|
367
|
+
if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) {
|
368
|
+
ZSTDMT_freeCCtx(mtctx);
|
321
369
|
return NULL;
|
322
370
|
}
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
return
|
371
|
+
pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
|
372
|
+
pthread_cond_init(&mtctx->jobCompleted_cond, NULL);
|
373
|
+
DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
|
374
|
+
return mtctx;
|
375
|
+
}
|
376
|
+
|
377
|
+
ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
|
378
|
+
{
|
379
|
+
return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
|
332
380
|
}
|
333
381
|
|
334
382
|
/* ZSTDMT_releaseAllJobResources() :
|
335
|
-
*
|
383
|
+
* note : ensure all workers are killed first ! */
|
336
384
|
static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
|
337
385
|
{
|
338
386
|
unsigned jobID;
|
387
|
+
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
|
339
388
|
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
|
340
389
|
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
|
341
390
|
mtctx->jobs[jobID].dstBuff = g_nullBuffer;
|
@@ -356,15 +405,26 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
|
|
356
405
|
POOL_free(mtctx->factory);
|
357
406
|
if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
|
358
407
|
ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
|
408
|
+
ZSTD_free(mtctx->jobs, mtctx->cMem);
|
359
409
|
ZSTDMT_freeCCtxPool(mtctx->cctxPool);
|
360
|
-
ZSTD_freeCDict(mtctx->
|
361
|
-
ZSTD_freeCStream(mtctx->cstream);
|
410
|
+
ZSTD_freeCDict(mtctx->cdictLocal);
|
362
411
|
pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
|
363
412
|
pthread_cond_destroy(&mtctx->jobCompleted_cond);
|
364
|
-
|
413
|
+
ZSTD_free(mtctx, mtctx->cMem);
|
365
414
|
return 0;
|
366
415
|
}
|
367
416
|
|
417
|
+
size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
|
418
|
+
{
|
419
|
+
if (mtctx == NULL) return 0; /* supports sizeof NULL */
|
420
|
+
return sizeof(*mtctx)
|
421
|
+
+ POOL_sizeof(mtctx->factory)
|
422
|
+
+ ZSTDMT_sizeof_bufferPool(mtctx->buffPool)
|
423
|
+
+ (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
|
424
|
+
+ ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
|
425
|
+
+ ZSTD_sizeof_CDict(mtctx->cdictLocal);
|
426
|
+
}
|
427
|
+
|
368
428
|
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
|
369
429
|
{
|
370
430
|
switch(parameter)
|
@@ -373,7 +433,7 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
|
|
373
433
|
mtctx->sectionSize = value;
|
374
434
|
return 0;
|
375
435
|
case ZSTDMT_p_overlapSectionLog :
|
376
|
-
|
436
|
+
DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value);
|
377
437
|
mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
|
378
438
|
return 0;
|
379
439
|
default :
|
@@ -386,31 +446,49 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter,
|
|
386
446
|
/* ===== Multi-threaded compression ===== */
|
387
447
|
/* ------------------------------------------ */
|
388
448
|
|
389
|
-
|
449
|
+
static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
|
450
|
+
size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
|
451
|
+
size_t const chunkMaxSize = chunkSizeTarget << 2;
|
452
|
+
size_t const passSizeMax = chunkMaxSize * nbThreads;
|
453
|
+
unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
|
454
|
+
unsigned const nbChunksLarge = multiplier * nbThreads;
|
455
|
+
unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
|
456
|
+
unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
|
457
|
+
return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
|
458
|
+
}
|
459
|
+
|
460
|
+
|
461
|
+
size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
390
462
|
void* dst, size_t dstCapacity,
|
391
463
|
const void* src, size_t srcSize,
|
392
|
-
|
464
|
+
const ZSTD_CDict* cdict,
|
465
|
+
ZSTD_parameters const params,
|
466
|
+
unsigned overlapRLog)
|
393
467
|
{
|
394
|
-
|
395
|
-
|
396
|
-
size_t const overlapSize = (size_t)1 << (params.cParams.windowLog - overlapLog);
|
397
|
-
size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
|
398
|
-
unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + 1;
|
399
|
-
unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
|
468
|
+
size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
|
469
|
+
unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads);
|
400
470
|
size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
|
401
|
-
size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) <
|
402
|
-
size_t remainingSrcSize = srcSize;
|
471
|
+
size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
|
403
472
|
const char* const srcStart = (const char*)src;
|
473
|
+
size_t remainingSrcSize = srcSize;
|
404
474
|
unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
|
405
475
|
size_t frameStartPos = 0, dstBufferPos = 0;
|
406
476
|
|
407
|
-
DEBUGLOG(
|
408
|
-
DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
|
409
|
-
params.fParams.contentSizeFlag = 1;
|
410
|
-
|
477
|
+
DEBUGLOG(4, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
|
411
478
|
if (nbChunks==1) { /* fallback to single-thread mode */
|
412
479
|
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
|
413
|
-
return
|
480
|
+
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams);
|
481
|
+
return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
|
482
|
+
}
|
483
|
+
assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */
|
484
|
+
|
485
|
+
if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
|
486
|
+
U32 nbJobs = nbChunks;
|
487
|
+
ZSTD_free(mtctx->jobs, mtctx->cMem);
|
488
|
+
mtctx->jobIDMask = 0;
|
489
|
+
mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
|
490
|
+
if (mtctx->jobs==NULL) return ERROR(memory_allocation);
|
491
|
+
mtctx->jobIDMask = nbJobs - 1;
|
414
492
|
}
|
415
493
|
|
416
494
|
{ unsigned u;
|
@@ -425,15 +503,18 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
425
503
|
if ((cctx==NULL) || (dstBuffer.start==NULL)) {
|
426
504
|
mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
|
427
505
|
mtctx->jobs[u].jobCompleted = 1;
|
428
|
-
nbChunks = u+1;
|
506
|
+
nbChunks = u+1; /* only wait and free u jobs, instead of initially expected nbChunks ones */
|
429
507
|
break; /* let's wait for previous jobs to complete, but don't start new ones */
|
430
508
|
}
|
431
509
|
|
432
510
|
mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
|
433
511
|
mtctx->jobs[u].dictSize = dictSize;
|
434
512
|
mtctx->jobs[u].srcSize = chunkSize;
|
513
|
+
mtctx->jobs[u].cdict = mtctx->nextJobID==0 ? cdict : NULL;
|
435
514
|
mtctx->jobs[u].fullFrameSize = srcSize;
|
436
515
|
mtctx->jobs[u].params = params;
|
516
|
+
/* do not calculate checksum within sections, but write it in header for first section */
|
517
|
+
if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
|
437
518
|
mtctx->jobs[u].dstBuff = dstBuffer;
|
438
519
|
mtctx->jobs[u].cctx = cctx;
|
439
520
|
mtctx->jobs[u].firstChunk = (u==0);
|
@@ -442,27 +523,27 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
442
523
|
mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
|
443
524
|
mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
|
444
525
|
|
445
|
-
DEBUGLOG(
|
446
|
-
DEBUG_PRINTHEX(
|
526
|
+
DEBUGLOG(5, "posting job %u (%u bytes)", u, (U32)chunkSize);
|
527
|
+
DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
|
447
528
|
POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
|
448
529
|
|
449
530
|
frameStartPos += chunkSize;
|
450
531
|
dstBufferPos += dstBufferCapacity;
|
451
532
|
remainingSrcSize -= chunkSize;
|
452
533
|
} }
|
453
|
-
/* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
|
454
534
|
|
535
|
+
/* collect result */
|
455
536
|
{ unsigned chunkID;
|
456
537
|
size_t error = 0, dstPos = 0;
|
457
538
|
for (chunkID=0; chunkID<nbChunks; chunkID++) {
|
458
|
-
DEBUGLOG(
|
539
|
+
DEBUGLOG(5, "waiting for chunk %u ", chunkID);
|
459
540
|
PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
|
460
541
|
while (mtctx->jobs[chunkID].jobCompleted==0) {
|
461
|
-
DEBUGLOG(
|
542
|
+
DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
|
462
543
|
pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
|
463
544
|
}
|
464
545
|
pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
|
465
|
-
DEBUGLOG(
|
546
|
+
DEBUGLOG(5, "ready to write chunk %u ", chunkID);
|
466
547
|
|
467
548
|
ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
|
468
549
|
mtctx->jobs[chunkID].cctx = NULL;
|
@@ -470,20 +551,33 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
470
551
|
{ size_t const cSize = mtctx->jobs[chunkID].cSize;
|
471
552
|
if (ZSTD_isError(cSize)) error = cSize;
|
472
553
|
if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
|
473
|
-
if (chunkID) { /* note : chunk 0 is
|
554
|
+
if (chunkID) { /* note : chunk 0 is written directly at dst, which is correct position */
|
474
555
|
if (!error)
|
475
|
-
memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap
|
476
|
-
if (chunkID >= compressWithinDst)
|
556
|
+
memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
|
557
|
+
if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
|
558
|
+
DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
|
477
559
|
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
|
560
|
+
}
|
478
561
|
mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
|
479
562
|
}
|
480
563
|
dstPos += cSize ;
|
481
564
|
}
|
482
565
|
}
|
483
|
-
if (!error) DEBUGLOG(
|
566
|
+
if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos);
|
484
567
|
return error ? error : dstPos;
|
485
568
|
}
|
569
|
+
}
|
570
|
+
|
486
571
|
|
572
|
+
size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
573
|
+
void* dst, size_t dstCapacity,
|
574
|
+
const void* src, size_t srcSize,
|
575
|
+
int compressionLevel)
|
576
|
+
{
|
577
|
+
U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3;
|
578
|
+
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
|
579
|
+
params.fParams.contentSizeFlag = 1;
|
580
|
+
return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog);
|
487
581
|
}
|
488
582
|
|
489
583
|
|
@@ -491,12 +585,14 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
491
585
|
/* ======= Streaming API ======= */
|
492
586
|
/* ====================================== */
|
493
587
|
|
494
|
-
static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
|
588
|
+
static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
|
589
|
+
{
|
590
|
+
DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
|
495
591
|
while (zcs->doneJobID < zcs->nextJobID) {
|
496
592
|
unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
|
497
593
|
PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
|
498
594
|
while (zcs->jobs[jobID].jobCompleted==0) {
|
499
|
-
DEBUGLOG(
|
595
|
+
DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
|
500
596
|
pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
|
501
597
|
}
|
502
598
|
pthread_mutex_unlock(&zcs->jobCompleted_mutex);
|
@@ -505,33 +601,54 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
|
|
505
601
|
}
|
506
602
|
|
507
603
|
|
508
|
-
|
509
|
-
|
510
|
-
|
604
|
+
/** ZSTDMT_initCStream_internal() :
|
605
|
+
* internal usage only */
|
606
|
+
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
607
|
+
const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
|
608
|
+
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
511
609
|
{
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
610
|
+
DEBUGLOG(4, "ZSTDMT_initCStream_internal");
|
611
|
+
/* params are supposed to be fully validated at this point */
|
612
|
+
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
613
|
+
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
|
614
|
+
|
615
|
+
if (zcs->nbThreads==1) {
|
616
|
+
DEBUGLOG(4, "single thread mode");
|
617
|
+
return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
|
618
|
+
dict, dictSize, cdict,
|
619
|
+
params, pledgedSrcSize);
|
620
|
+
}
|
621
|
+
|
622
|
+
if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
|
516
623
|
ZSTDMT_waitForAllJobsCompleted(zcs);
|
517
624
|
ZSTDMT_releaseAllJobResources(zcs);
|
518
625
|
zcs->allJobsCompleted = 1;
|
519
626
|
}
|
627
|
+
|
520
628
|
zcs->params = params;
|
521
|
-
if (updateDict) {
|
522
|
-
ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
|
523
|
-
if (dict && dictSize) {
|
524
|
-
zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params.cParams, cmem);
|
525
|
-
if (zcs->cdict == NULL) return ERROR(memory_allocation);
|
526
|
-
} }
|
527
629
|
zcs->frameContentSize = pledgedSrcSize;
|
630
|
+
if (dict) {
|
631
|
+
DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
|
632
|
+
ZSTD_freeCDict(zcs->cdictLocal);
|
633
|
+
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
|
634
|
+
0 /* byRef */, ZSTD_dm_auto, /* note : a loadPrefix becomes an internal CDict */
|
635
|
+
params.cParams, zcs->cMem);
|
636
|
+
zcs->cdict = zcs->cdictLocal;
|
637
|
+
if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
|
638
|
+
} else {
|
639
|
+
DEBUGLOG(4,"cdictLocal: %08X", (U32)(size_t)zcs->cdictLocal);
|
640
|
+
ZSTD_freeCDict(zcs->cdictLocal);
|
641
|
+
zcs->cdictLocal = NULL;
|
642
|
+
zcs->cdict = cdict;
|
643
|
+
}
|
644
|
+
|
528
645
|
zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
|
529
646
|
DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
|
530
|
-
DEBUGLOG(
|
647
|
+
DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
|
531
648
|
zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
|
532
649
|
zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
|
533
650
|
zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
|
534
|
-
DEBUGLOG(
|
651
|
+
DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
|
535
652
|
zcs->marginSize = zcs->targetSectionSize >> 2;
|
536
653
|
zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
|
537
654
|
zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
|
@@ -546,24 +663,39 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
|
546
663
|
return 0;
|
547
664
|
}
|
548
665
|
|
549
|
-
size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx*
|
666
|
+
size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
550
667
|
const void* dict, size_t dictSize,
|
551
668
|
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
552
669
|
{
|
553
|
-
|
670
|
+
DEBUGLOG(5, "ZSTDMT_initCStream_advanced");
|
671
|
+
return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize);
|
554
672
|
}
|
555
673
|
|
674
|
+
size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
675
|
+
const ZSTD_CDict* cdict,
|
676
|
+
ZSTD_frameParameters fParams,
|
677
|
+
unsigned long long pledgedSrcSize)
|
678
|
+
{
|
679
|
+
ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
|
680
|
+
if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
|
681
|
+
params.fParams = fParams;
|
682
|
+
return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, cdict,
|
683
|
+
params, pledgedSrcSize);
|
684
|
+
}
|
685
|
+
|
686
|
+
|
556
687
|
/* ZSTDMT_resetCStream() :
|
557
688
|
* pledgedSrcSize is optional and can be zero == unknown */
|
558
689
|
size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
|
559
690
|
{
|
560
|
-
if (zcs->nbThreads==1)
|
691
|
+
if (zcs->nbThreads==1)
|
692
|
+
return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
|
561
693
|
return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
|
562
694
|
}
|
563
695
|
|
564
696
|
size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
|
565
697
|
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
|
566
|
-
return ZSTDMT_initCStream_internal(zcs, NULL, 0,
|
698
|
+
return ZSTDMT_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
|
567
699
|
}
|
568
700
|
|
569
701
|
|
@@ -582,13 +714,16 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
|
|
582
714
|
return ERROR(memory_allocation);
|
583
715
|
}
|
584
716
|
|
585
|
-
DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
|
717
|
+
DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ",
|
718
|
+
zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
|
586
719
|
zcs->jobs[jobID].src = zcs->inBuff.buffer;
|
587
720
|
zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
|
588
721
|
zcs->jobs[jobID].srcSize = srcSize;
|
589
|
-
zcs->jobs[jobID].dictSize = zcs->dictSize;
|
722
|
+
zcs->jobs[jobID].dictSize = zcs->dictSize;
|
723
|
+
assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
|
590
724
|
zcs->jobs[jobID].params = zcs->params;
|
591
|
-
|
725
|
+
/* do not calculate checksum within sections, but write it in header for first section */
|
726
|
+
if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
|
592
727
|
zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
|
593
728
|
zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
|
594
729
|
zcs->jobs[jobID].dstBuff = dstBuffer;
|
@@ -603,6 +738,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
|
|
603
738
|
/* get a new buffer for next input */
|
604
739
|
if (!endFrame) {
|
605
740
|
size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
|
741
|
+
DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
|
606
742
|
zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
|
607
743
|
if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
|
608
744
|
zcs->jobs[jobID].jobCompleted = 1;
|
@@ -611,22 +747,33 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
|
|
611
747
|
ZSTDMT_releaseAllJobResources(zcs);
|
612
748
|
return ERROR(memory_allocation);
|
613
749
|
}
|
614
|
-
DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
|
750
|
+
DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled);
|
615
751
|
zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
|
616
|
-
DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src",
|
617
|
-
|
752
|
+
DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src",
|
753
|
+
(U32)zcs->inBuff.filled, (U32)newDictSize,
|
754
|
+
(U32)(zcs->inBuff.filled - newDictSize));
|
755
|
+
memmove(zcs->inBuff.buffer.start,
|
756
|
+
(const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
|
757
|
+
zcs->inBuff.filled);
|
618
758
|
DEBUGLOG(5, "new inBuff pre-filled");
|
619
759
|
zcs->dictSize = newDictSize;
|
620
|
-
} else {
|
760
|
+
} else { /* if (endFrame==1) */
|
761
|
+
DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame);
|
621
762
|
zcs->inBuff.buffer = g_nullBuffer;
|
622
763
|
zcs->inBuff.filled = 0;
|
623
764
|
zcs->dictSize = 0;
|
624
765
|
zcs->frameEnded = 1;
|
625
766
|
if (zcs->nextJobID == 0)
|
626
|
-
|
767
|
+
/* single chunk exception : checksum is calculated directly within worker thread */
|
768
|
+
zcs->params.fParams.checksumFlag = 0;
|
627
769
|
}
|
628
770
|
|
629
|
-
DEBUGLOG(
|
771
|
+
DEBUGLOG(4, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
|
772
|
+
zcs->nextJobID,
|
773
|
+
(U32)zcs->jobs[jobID].srcSize,
|
774
|
+
zcs->jobs[jobID].lastChunk,
|
775
|
+
zcs->doneJobID,
|
776
|
+
zcs->doneJobID & zcs->jobIDMask);
|
630
777
|
POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
|
631
778
|
zcs->nextJobID++;
|
632
779
|
return 0;
|
@@ -664,7 +811,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
|
|
664
811
|
XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
|
665
812
|
if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
|
666
813
|
U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
|
667
|
-
DEBUGLOG(
|
814
|
+
DEBUGLOG(5, "writing checksum : %08X \n", checksum);
|
668
815
|
MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
|
669
816
|
job.cSize += 4;
|
670
817
|
zcs->jobs[wJobID].cSize += 4;
|
@@ -675,7 +822,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
|
|
675
822
|
zcs->jobs[wJobID].jobScanned = 1;
|
676
823
|
}
|
677
824
|
{ size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
|
678
|
-
DEBUGLOG(
|
825
|
+
DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
|
679
826
|
memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
|
680
827
|
output->pos += toWrite;
|
681
828
|
job.dstFlushed += toWrite;
|
@@ -696,26 +843,81 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
|
|
696
843
|
} }
|
697
844
|
|
698
845
|
|
699
|
-
|
846
|
+
/** ZSTDMT_compressStream_generic() :
|
847
|
+
* internal use only
|
848
|
+
* assumption : output and input are valid (pos <= size)
|
849
|
+
* @return : minimum amount of data remaining to flush, 0 if none */
|
850
|
+
size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
851
|
+
ZSTD_outBuffer* output,
|
852
|
+
ZSTD_inBuffer* input,
|
853
|
+
ZSTD_EndDirective endOp)
|
700
854
|
{
|
701
|
-
size_t const newJobThreshold =
|
702
|
-
|
703
|
-
|
855
|
+
size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize;
|
856
|
+
assert(output->pos <= output->size);
|
857
|
+
assert(input->pos <= input->size);
|
858
|
+
if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
|
859
|
+
/* current frame being ended. Only flush/end are allowed. Or start new frame with init */
|
860
|
+
return ERROR(stage_wrong);
|
861
|
+
}
|
862
|
+
if (mtctx->nbThreads==1) {
|
863
|
+
return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
|
864
|
+
}
|
865
|
+
|
866
|
+
/* single-pass shortcut (note : this is blocking-mode) */
|
867
|
+
if ( (mtctx->nextJobID==0) /* just started */
|
868
|
+
&& (mtctx->inBuff.filled==0) /* nothing buffered */
|
869
|
+
&& (endOp==ZSTD_e_end) /* end order */
|
870
|
+
&& (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
|
871
|
+
size_t const cSize = ZSTDMT_compress_advanced(mtctx,
|
872
|
+
(char*)output->dst + output->pos, output->size - output->pos,
|
873
|
+
(const char*)input->src + input->pos, input->size - input->pos,
|
874
|
+
mtctx->cdict, mtctx->params, mtctx->overlapRLog);
|
875
|
+
if (ZSTD_isError(cSize)) return cSize;
|
876
|
+
input->pos = input->size;
|
877
|
+
output->pos += cSize;
|
878
|
+
ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); /* was allocated in initStream */
|
879
|
+
mtctx->allJobsCompleted = 1;
|
880
|
+
mtctx->frameEnded = 1;
|
881
|
+
return 0;
|
882
|
+
}
|
704
883
|
|
705
884
|
/* fill input buffer */
|
706
|
-
|
707
|
-
|
885
|
+
if ((input->src) && (mtctx->inBuff.buffer.start)) { /* support NULL input */
|
886
|
+
size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
|
887
|
+
DEBUGLOG(2, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
|
888
|
+
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
|
708
889
|
input->pos += toLoad;
|
709
|
-
|
890
|
+
mtctx->inBuff.filled += toLoad;
|
710
891
|
}
|
711
892
|
|
712
|
-
if ( (
|
713
|
-
|
714
|
-
CHECK_F( ZSTDMT_createCompressionJob(
|
893
|
+
if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
|
894
|
+
&& (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */
|
895
|
+
CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
|
715
896
|
}
|
716
897
|
|
717
|
-
/* check for data to
|
718
|
-
CHECK_F( ZSTDMT_flushNextJob(
|
898
|
+
/* check for potential compressed data ready to be flushed */
|
899
|
+
CHECK_F( ZSTDMT_flushNextJob(mtctx, output, (mtctx->inBuff.filled == mtctx->inBuffSize) /* blockToFlush */) ); /* block if it wasn't possible to create new job due to saturation */
|
900
|
+
|
901
|
+
if (input->pos < input->size) /* input not consumed : do not flush yet */
|
902
|
+
endOp = ZSTD_e_continue;
|
903
|
+
|
904
|
+
switch(endOp)
|
905
|
+
{
|
906
|
+
case ZSTD_e_flush:
|
907
|
+
return ZSTDMT_flushStream(mtctx, output);
|
908
|
+
case ZSTD_e_end:
|
909
|
+
return ZSTDMT_endStream(mtctx, output);
|
910
|
+
case ZSTD_e_continue:
|
911
|
+
return 1;
|
912
|
+
default:
|
913
|
+
return ERROR(GENERIC); /* invalid endDirective */
|
914
|
+
}
|
915
|
+
}
|
916
|
+
|
917
|
+
|
918
|
+
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
919
|
+
{
|
920
|
+
CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
|
719
921
|
|
720
922
|
/* recommended next input size : fill current input buffer */
|
721
923
|
return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
|
@@ -726,26 +928,28 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp
|
|
726
928
|
{
|
727
929
|
size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
|
728
930
|
|
729
|
-
if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
|
730
931
|
if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
|
731
932
|
&& (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
|
732
933
|
CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
|
733
934
|
}
|
734
935
|
|
735
936
|
/* check if there is any data available to flush */
|
736
|
-
|
737
|
-
return ZSTDMT_flushNextJob(zcs, output, 1);
|
937
|
+
return ZSTDMT_flushNextJob(zcs, output, 1 /* blockToFlush */);
|
738
938
|
}
|
739
939
|
|
740
940
|
|
741
941
|
size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
|
742
942
|
{
|
743
|
-
|
744
|
-
|
943
|
+
DEBUGLOG(5, "ZSTDMT_flushStream");
|
944
|
+
if (zcs->nbThreads==1)
|
945
|
+
return ZSTD_flushStream(zcs->cctxPool->cctx[0], output);
|
946
|
+
return ZSTDMT_flushStream_internal(zcs, output, 0 /* endFrame */);
|
745
947
|
}
|
746
948
|
|
747
949
|
size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
|
748
950
|
{
|
749
|
-
|
750
|
-
|
951
|
+
DEBUGLOG(4, "ZSTDMT_endStream");
|
952
|
+
if (zcs->nbThreads==1)
|
953
|
+
return ZSTD_endStream(zcs->cctxPool->cctx[0], output);
|
954
|
+
return ZSTDMT_flushStream_internal(zcs, output, 1 /* endFrame */);
|
751
955
|
}
|