extzstd 0.3.2 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,16 +15,13 @@
|
|
15
15
|
#endif
|
16
16
|
|
17
17
|
|
18
|
-
/* ====== Constants ====== */
|
19
|
-
#define ZSTDMT_OVERLAPLOG_DEFAULT 0
|
20
|
-
|
21
|
-
|
22
18
|
/* ====== Dependencies ====== */
|
19
|
+
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
|
23
20
|
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
|
24
21
|
#include "../common/mem.h" /* MEM_STATIC */
|
25
22
|
#include "../common/pool.h" /* threadpool */
|
26
23
|
#include "../common/threading.h" /* mutex */
|
27
|
-
#include "zstd_compress_internal.h"
|
24
|
+
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
28
25
|
#include "zstd_ldm.h"
|
29
26
|
#include "zstdmt_compress.h"
|
30
27
|
|
@@ -43,12 +40,13 @@
|
|
43
40
|
# include <unistd.h>
|
44
41
|
# include <sys/times.h>
|
45
42
|
|
46
|
-
# define DEBUG_PRINTHEX(l,p,n)
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
43
|
+
# define DEBUG_PRINTHEX(l,p,n) \
|
44
|
+
do { \
|
45
|
+
unsigned debug_u; \
|
46
|
+
for (debug_u=0; debug_u<(n); debug_u++) \
|
47
|
+
RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
|
48
|
+
RAWLOG(l, " \n"); \
|
49
|
+
} while (0)
|
52
50
|
|
53
51
|
static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
54
52
|
{
|
@@ -60,25 +58,28 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
60
58
|
} }
|
61
59
|
|
62
60
|
#define MUTEX_WAIT_TIME_DLEVEL 6
|
63
|
-
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex)
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
unsigned long long const
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
}
|
61
|
+
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) \
|
62
|
+
do { \
|
63
|
+
if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
|
64
|
+
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
|
65
|
+
ZSTD_pthread_mutex_lock(mutex); \
|
66
|
+
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
|
67
|
+
unsigned long long const elapsedTime = (afterTime-beforeTime); \
|
68
|
+
if (elapsedTime > 1000) { \
|
69
|
+
/* or whatever threshold you like; I'm using 1 millisecond here */ \
|
70
|
+
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, \
|
71
|
+
"Thread took %llu microseconds to acquire mutex %s \n", \
|
72
|
+
elapsedTime, #mutex); \
|
73
|
+
} } \
|
74
|
+
} else { \
|
75
|
+
ZSTD_pthread_mutex_lock(mutex); \
|
76
|
+
} \
|
77
|
+
} while (0)
|
77
78
|
|
78
79
|
#else
|
79
80
|
|
80
81
|
# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
|
81
|
-
# define DEBUG_PRINTHEX(l,p,n) {}
|
82
|
+
# define DEBUG_PRINTHEX(l,p,n) do { } while (0)
|
82
83
|
|
83
84
|
#endif
|
84
85
|
|
@@ -99,19 +100,39 @@ typedef struct ZSTDMT_bufferPool_s {
|
|
99
100
|
unsigned totalBuffers;
|
100
101
|
unsigned nbBuffers;
|
101
102
|
ZSTD_customMem cMem;
|
102
|
-
buffer_t
|
103
|
+
buffer_t* buffers;
|
103
104
|
} ZSTDMT_bufferPool;
|
104
105
|
|
105
|
-
static ZSTDMT_bufferPool*
|
106
|
+
static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
|
107
|
+
{
|
108
|
+
DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
|
109
|
+
if (!bufPool) return; /* compatibility with free on NULL */
|
110
|
+
if (bufPool->buffers) {
|
111
|
+
unsigned u;
|
112
|
+
for (u=0; u<bufPool->totalBuffers; u++) {
|
113
|
+
DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
|
114
|
+
ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
|
115
|
+
}
|
116
|
+
ZSTD_customFree(bufPool->buffers, bufPool->cMem);
|
117
|
+
}
|
118
|
+
ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
|
119
|
+
ZSTD_customFree(bufPool, bufPool->cMem);
|
120
|
+
}
|
121
|
+
|
122
|
+
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
|
106
123
|
{
|
107
|
-
|
108
|
-
|
109
|
-
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
|
124
|
+
ZSTDMT_bufferPool* const bufPool =
|
125
|
+
(ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
|
110
126
|
if (bufPool==NULL) return NULL;
|
111
127
|
if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
|
112
128
|
ZSTD_customFree(bufPool, cMem);
|
113
129
|
return NULL;
|
114
130
|
}
|
131
|
+
bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
|
132
|
+
if (bufPool->buffers==NULL) {
|
133
|
+
ZSTDMT_freeBufferPool(bufPool);
|
134
|
+
return NULL;
|
135
|
+
}
|
115
136
|
bufPool->bufferSize = 64 KB;
|
116
137
|
bufPool->totalBuffers = maxNbBuffers;
|
117
138
|
bufPool->nbBuffers = 0;
|
@@ -119,32 +140,19 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_custo
|
|
119
140
|
return bufPool;
|
120
141
|
}
|
121
142
|
|
122
|
-
static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
|
123
|
-
{
|
124
|
-
unsigned u;
|
125
|
-
DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
|
126
|
-
if (!bufPool) return; /* compatibility with free on NULL */
|
127
|
-
for (u=0; u<bufPool->totalBuffers; u++) {
|
128
|
-
DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
|
129
|
-
ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
|
130
|
-
}
|
131
|
-
ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
|
132
|
-
ZSTD_customFree(bufPool, bufPool->cMem);
|
133
|
-
}
|
134
|
-
|
135
143
|
/* only works at initialization, not during compression */
|
136
144
|
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
|
137
145
|
{
|
138
|
-
size_t const poolSize = sizeof(*bufPool)
|
139
|
-
|
146
|
+
size_t const poolSize = sizeof(*bufPool);
|
147
|
+
size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
|
140
148
|
unsigned u;
|
141
149
|
size_t totalBufferSize = 0;
|
142
150
|
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
|
143
151
|
for (u=0; u<bufPool->totalBuffers; u++)
|
144
|
-
totalBufferSize += bufPool->
|
152
|
+
totalBufferSize += bufPool->buffers[u].capacity;
|
145
153
|
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
146
154
|
|
147
|
-
return poolSize + totalBufferSize;
|
155
|
+
return poolSize + arraySize + totalBufferSize;
|
148
156
|
}
|
149
157
|
|
150
158
|
/* ZSTDMT_setBufferSize() :
|
@@ -160,9 +168,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
|
|
160
168
|
}
|
161
169
|
|
162
170
|
|
163
|
-
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
|
171
|
+
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
|
164
172
|
{
|
165
|
-
unsigned const maxNbBuffers = 2*nbWorkers + 3;
|
166
173
|
if (srcBufPool==NULL) return NULL;
|
167
174
|
if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
|
168
175
|
return srcBufPool;
|
@@ -171,7 +178,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
|
|
171
178
|
size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
|
172
179
|
ZSTDMT_bufferPool* newBufPool;
|
173
180
|
ZSTDMT_freeBufferPool(srcBufPool);
|
174
|
-
newBufPool = ZSTDMT_createBufferPool(
|
181
|
+
newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
|
175
182
|
if (newBufPool==NULL) return newBufPool;
|
176
183
|
ZSTDMT_setBufferSize(newBufPool, bSize);
|
177
184
|
return newBufPool;
|
@@ -188,9 +195,9 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
|
|
188
195
|
DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
|
189
196
|
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
|
190
197
|
if (bufPool->nbBuffers) { /* try to use an existing buffer */
|
191
|
-
buffer_t const buf = bufPool->
|
198
|
+
buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
|
192
199
|
size_t const availBufferSize = buf.capacity;
|
193
|
-
bufPool->
|
200
|
+
bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
|
194
201
|
if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
|
195
202
|
/* large enough, but not too much */
|
196
203
|
DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
|
@@ -251,18 +258,28 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
|
|
251
258
|
if (buf.start == NULL) return; /* compatible with release on NULL */
|
252
259
|
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
|
253
260
|
if (bufPool->nbBuffers < bufPool->totalBuffers) {
|
254
|
-
bufPool->
|
261
|
+
bufPool->buffers[bufPool->nbBuffers++] = buf; /* stored for later use */
|
255
262
|
DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
|
256
263
|
(U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
|
257
264
|
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
258
265
|
return;
|
259
266
|
}
|
260
267
|
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
261
|
-
/* Reached bufferPool capacity (should not happen) */
|
268
|
+
/* Reached bufferPool capacity (note: should not happen) */
|
262
269
|
DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
|
263
270
|
ZSTD_customFree(buf.start, bufPool->cMem);
|
264
271
|
}
|
265
272
|
|
273
|
+
/* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
|
274
|
+
* The 3 additional buffers are as follows:
|
275
|
+
* 1 buffer for input loading
|
276
|
+
* 1 buffer for "next input" when submitting current one
|
277
|
+
* 1 buffer stuck in queue */
|
278
|
+
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
|
279
|
+
|
280
|
+
/* After a worker releases its rawSeqStore, it is immediately ready for reuse.
|
281
|
+
* So we only need one seq buffer per worker. */
|
282
|
+
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
|
266
283
|
|
267
284
|
/* ===== Seq Pool Wrapper ====== */
|
268
285
|
|
@@ -316,7 +333,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
|
|
316
333
|
|
317
334
|
static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
|
318
335
|
{
|
319
|
-
ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
336
|
+
ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
|
320
337
|
if (seqPool == NULL) return NULL;
|
321
338
|
ZSTDMT_setNbSeq(seqPool, 0);
|
322
339
|
return seqPool;
|
@@ -329,7 +346,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
|
|
329
346
|
|
330
347
|
static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
|
331
348
|
{
|
332
|
-
return ZSTDMT_expandBufferPool(pool, nbWorkers);
|
349
|
+
return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
|
333
350
|
}
|
334
351
|
|
335
352
|
|
@@ -341,16 +358,20 @@ typedef struct {
|
|
341
358
|
int totalCCtx;
|
342
359
|
int availCCtx;
|
343
360
|
ZSTD_customMem cMem;
|
344
|
-
ZSTD_CCtx
|
361
|
+
ZSTD_CCtx** cctxs;
|
345
362
|
} ZSTDMT_CCtxPool;
|
346
363
|
|
347
|
-
/* note : all CCtx borrowed from the pool
|
364
|
+
/* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
|
348
365
|
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
349
366
|
{
|
350
|
-
|
351
|
-
for (cid=0; cid<pool->totalCCtx; cid++)
|
352
|
-
ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
|
367
|
+
if (!pool) return;
|
353
368
|
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
|
369
|
+
if (pool->cctxs) {
|
370
|
+
int cid;
|
371
|
+
for (cid=0; cid<pool->totalCCtx; cid++)
|
372
|
+
ZSTD_freeCCtx(pool->cctxs[cid]); /* free compatible with NULL */
|
373
|
+
ZSTD_customFree(pool->cctxs, pool->cMem);
|
374
|
+
}
|
354
375
|
ZSTD_customFree(pool, pool->cMem);
|
355
376
|
}
|
356
377
|
|
@@ -359,19 +380,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
|
359
380
|
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
|
360
381
|
ZSTD_customMem cMem)
|
361
382
|
{
|
362
|
-
ZSTDMT_CCtxPool* const cctxPool =
|
363
|
-
|
383
|
+
ZSTDMT_CCtxPool* const cctxPool =
|
384
|
+
(ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
|
364
385
|
assert(nbWorkers > 0);
|
365
386
|
if (!cctxPool) return NULL;
|
366
387
|
if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
|
367
388
|
ZSTD_customFree(cctxPool, cMem);
|
368
389
|
return NULL;
|
369
390
|
}
|
370
|
-
cctxPool->cMem = cMem;
|
371
391
|
cctxPool->totalCCtx = nbWorkers;
|
392
|
+
cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
|
393
|
+
if (!cctxPool->cctxs) {
|
394
|
+
ZSTDMT_freeCCtxPool(cctxPool);
|
395
|
+
return NULL;
|
396
|
+
}
|
397
|
+
cctxPool->cMem = cMem;
|
398
|
+
cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
|
399
|
+
if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
|
372
400
|
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
|
373
|
-
cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
|
374
|
-
if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
|
375
401
|
DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
|
376
402
|
return cctxPool;
|
377
403
|
}
|
@@ -393,16 +419,16 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
|
|
393
419
|
{
|
394
420
|
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
|
395
421
|
{ unsigned const nbWorkers = cctxPool->totalCCtx;
|
396
|
-
size_t const poolSize = sizeof(*cctxPool)
|
397
|
-
|
398
|
-
unsigned u;
|
422
|
+
size_t const poolSize = sizeof(*cctxPool);
|
423
|
+
size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
|
399
424
|
size_t totalCCtxSize = 0;
|
425
|
+
unsigned u;
|
400
426
|
for (u=0; u<nbWorkers; u++) {
|
401
|
-
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->
|
427
|
+
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
|
402
428
|
}
|
403
429
|
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
|
404
430
|
assert(nbWorkers > 0);
|
405
|
-
return poolSize + totalCCtxSize;
|
431
|
+
return poolSize + arraySize + totalCCtxSize;
|
406
432
|
}
|
407
433
|
}
|
408
434
|
|
@@ -412,7 +438,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
|
|
412
438
|
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
|
413
439
|
if (cctxPool->availCCtx) {
|
414
440
|
cctxPool->availCCtx--;
|
415
|
-
{ ZSTD_CCtx* const cctx = cctxPool->
|
441
|
+
{ ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
|
416
442
|
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
|
417
443
|
return cctx;
|
418
444
|
} }
|
@@ -426,7 +452,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
|
|
426
452
|
if (cctx==NULL) return; /* compatibility with release on NULL */
|
427
453
|
ZSTD_pthread_mutex_lock(&pool->poolMutex);
|
428
454
|
if (pool->availCCtx < pool->totalCCtx)
|
429
|
-
pool->
|
455
|
+
pool->cctxs[pool->availCCtx++] = cctx;
|
430
456
|
else {
|
431
457
|
/* pool overflow : should not happen, since totalCCtx==nbWorkers */
|
432
458
|
DEBUGLOG(4, "CCtx pool overflow : free cctx");
|
@@ -467,29 +493,27 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
|
467
493
|
ZSTD_dictContentType_e dictContentType)
|
468
494
|
{
|
469
495
|
/* Adjust parameters */
|
470
|
-
if (params.ldmParams.enableLdm) {
|
496
|
+
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
471
497
|
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
472
498
|
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
473
499
|
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
474
500
|
assert(params.ldmParams.hashRateLog < 32);
|
475
|
-
serialState->ldmState.hashPower =
|
476
|
-
ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
|
477
501
|
} else {
|
478
502
|
ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams));
|
479
503
|
}
|
480
504
|
serialState->nextJobID = 0;
|
481
505
|
if (params.fParams.checksumFlag)
|
482
506
|
XXH64_reset(&serialState->xxhState, 0);
|
483
|
-
if (params.ldmParams.enableLdm) {
|
507
|
+
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
484
508
|
ZSTD_customMem cMem = params.customMem;
|
485
509
|
unsigned const hashLog = params.ldmParams.hashLog;
|
486
510
|
size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
|
487
511
|
unsigned const bucketLog =
|
488
512
|
params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
|
489
|
-
size_t const bucketSize = (size_t)1 << bucketLog;
|
490
513
|
unsigned const prevBucketLog =
|
491
514
|
serialState->params.ldmParams.hashLog -
|
492
515
|
serialState->params.ldmParams.bucketSizeLog;
|
516
|
+
size_t const numBuckets = (size_t)1 << bucketLog;
|
493
517
|
/* Size the seq pool tables */
|
494
518
|
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
495
519
|
/* Reset the window */
|
@@ -501,20 +525,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
|
501
525
|
}
|
502
526
|
if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
|
503
527
|
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
|
504
|
-
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(
|
528
|
+
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
|
505
529
|
}
|
506
530
|
if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
|
507
531
|
return 1;
|
508
532
|
/* Zero the tables */
|
509
533
|
ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
|
510
|
-
ZSTD_memset(serialState->ldmState.bucketOffsets, 0,
|
534
|
+
ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
|
511
535
|
|
512
536
|
/* Update window state and fill hash table with dict */
|
513
537
|
serialState->ldmState.loadedDictEnd = 0;
|
514
538
|
if (dictSize > 0) {
|
515
539
|
if (dictContentType == ZSTD_dct_rawContent) {
|
516
540
|
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
|
517
|
-
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
|
541
|
+
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
|
518
542
|
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams);
|
519
543
|
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
|
520
544
|
} else {
|
@@ -566,12 +590,12 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
|
566
590
|
/* A future job may error and skip our job */
|
567
591
|
if (serialState->nextJobID == jobID) {
|
568
592
|
/* It is now our turn, do any processing necessary */
|
569
|
-
if (serialState->params.ldmParams.enableLdm) {
|
593
|
+
if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
570
594
|
size_t error;
|
571
595
|
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
|
572
596
|
seqStore.size == 0 && seqStore.capacity > 0);
|
573
597
|
assert(src.size <= serialState->params.jobSize);
|
574
|
-
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
|
598
|
+
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
|
575
599
|
error = ZSTD_ldm_generateSequences(
|
576
600
|
&serialState->ldmState, &seqStore,
|
577
601
|
&serialState->params.ldmParams, src.start, src.size);
|
@@ -594,11 +618,8 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
|
594
618
|
ZSTD_pthread_mutex_unlock(&serialState->mutex);
|
595
619
|
|
596
620
|
if (seqStore.size > 0) {
|
597
|
-
|
598
|
-
|
599
|
-
assert(serialState->params.ldmParams.enableLdm);
|
600
|
-
assert(!ZSTD_isError(err));
|
601
|
-
(void)err;
|
621
|
+
ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size);
|
622
|
+
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
|
602
623
|
}
|
603
624
|
}
|
604
625
|
|
@@ -650,12 +671,13 @@ typedef struct {
|
|
650
671
|
unsigned frameChecksumNeeded; /* used only by mtctx */
|
651
672
|
} ZSTDMT_jobDescription;
|
652
673
|
|
653
|
-
#define JOB_ERROR(e)
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
674
|
+
#define JOB_ERROR(e) \
|
675
|
+
do { \
|
676
|
+
ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
|
677
|
+
job->cSize = e; \
|
678
|
+
ZSTD_pthread_mutex_unlock(&job->job_mutex); \
|
679
|
+
goto _endJob; \
|
680
|
+
} while (0)
|
659
681
|
|
660
682
|
/* ZSTDMT_compressionJob() is a POOL_function type */
|
661
683
|
static void ZSTDMT_compressionJob(void* jobDescription)
|
@@ -674,7 +696,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
674
696
|
if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
|
675
697
|
job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
|
676
698
|
}
|
677
|
-
if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
|
699
|
+
if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
|
678
700
|
JOB_ERROR(ERROR(memory_allocation));
|
679
701
|
|
680
702
|
/* Don't compute the checksum for chunks, since we compute it externally,
|
@@ -682,7 +704,9 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
682
704
|
*/
|
683
705
|
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
|
684
706
|
/* Don't run LDM for the chunks, since we handle it externally */
|
685
|
-
jobParams.ldmParams.enableLdm =
|
707
|
+
jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
|
708
|
+
/* Correct nbWorkers to 0. */
|
709
|
+
jobParams.nbWorkers = 0;
|
686
710
|
|
687
711
|
|
688
712
|
/* init */
|
@@ -695,6 +719,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
695
719
|
{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
696
720
|
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
|
697
721
|
}
|
722
|
+
if (!job->firstJob) {
|
723
|
+
size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
|
724
|
+
if (ZSTD_isError(err)) JOB_ERROR(err);
|
725
|
+
}
|
698
726
|
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
699
727
|
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
|
700
728
|
ZSTD_dtlm_fast,
|
@@ -707,7 +735,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
707
735
|
ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
|
708
736
|
|
709
737
|
if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
|
710
|
-
size_t const hSize =
|
738
|
+
size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
|
711
739
|
if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
|
712
740
|
DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
|
713
741
|
ZSTD_invalidateRepCodes(cctx);
|
@@ -725,7 +753,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
725
753
|
DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
|
726
754
|
assert(job->cSize == 0);
|
727
755
|
for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
|
728
|
-
size_t const cSize =
|
756
|
+
size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
|
729
757
|
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
730
758
|
ip += chunkSize;
|
731
759
|
op += cSize; assert(op < oend);
|
@@ -745,11 +773,18 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
745
773
|
size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
|
746
774
|
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
|
747
775
|
size_t const cSize = (job->lastJob) ?
|
748
|
-
|
749
|
-
|
776
|
+
ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
|
777
|
+
ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
|
750
778
|
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
751
779
|
lastCBlockSize = cSize;
|
752
780
|
} }
|
781
|
+
if (!job->firstJob) {
|
782
|
+
/* Double check that we don't have an ext-dict, because then our
|
783
|
+
* repcode invalidation doesn't work.
|
784
|
+
*/
|
785
|
+
assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
|
786
|
+
}
|
787
|
+
ZSTD_CCtx_trace(cctx, 0);
|
753
788
|
|
754
789
|
_endJob:
|
755
790
|
ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
|
@@ -796,6 +831,15 @@ typedef struct {
|
|
796
831
|
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
|
797
832
|
|
798
833
|
#define RSYNC_LENGTH 32
|
834
|
+
/* Don't create chunks smaller than the zstd block size.
|
835
|
+
* This stops us from regressing compression ratio too much,
|
836
|
+
* and ensures our output fits in ZSTD_compressBound().
|
837
|
+
*
|
838
|
+
* If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
|
839
|
+
* ZSTD_COMPRESSBOUND() will need to be updated.
|
840
|
+
*/
|
841
|
+
#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
|
842
|
+
#define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
|
799
843
|
|
800
844
|
typedef struct {
|
801
845
|
U64 hash;
|
@@ -916,7 +960,7 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
|
|
916
960
|
mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
|
917
961
|
assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
|
918
962
|
mtctx->jobIDMask = nbJobs - 1;
|
919
|
-
mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
963
|
+
mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
|
920
964
|
mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
|
921
965
|
mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
|
922
966
|
initError = ZSTDMT_serialState_init(&mtctx->serial);
|
@@ -1019,7 +1063,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
|
1019
1063
|
{
|
1020
1064
|
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
1021
1065
|
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
|
1022
|
-
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
1066
|
+
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
|
1023
1067
|
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
1024
1068
|
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
1025
1069
|
if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
|
@@ -1062,7 +1106,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
|
|
1062
1106
|
{ unsigned jobNb;
|
1063
1107
|
unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
|
1064
1108
|
DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
|
1065
|
-
mtctx->doneJobID, lastJobNb, mtctx->jobReady)
|
1109
|
+
mtctx->doneJobID, lastJobNb, mtctx->jobReady);
|
1066
1110
|
for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
|
1067
1111
|
unsigned const wJobID = jobNb & mtctx->jobIDMask;
|
1068
1112
|
ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
|
@@ -1124,7 +1168,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
1124
1168
|
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
|
1125
1169
|
{
|
1126
1170
|
unsigned jobLog;
|
1127
|
-
if (params->ldmParams.enableLdm) {
|
1171
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
1128
1172
|
/* In Long Range Mode, the windowLog is typically oversized.
|
1129
1173
|
* In which case, it's preferable to determine the jobSize
|
1130
1174
|
* based on cycleLog instead. */
|
@@ -1168,7 +1212,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
|
|
1168
1212
|
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
|
1169
1213
|
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
|
1170
1214
|
assert(0 <= overlapRLog && overlapRLog <= 8);
|
1171
|
-
if (params->ldmParams.enableLdm) {
|
1215
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
1172
1216
|
/* In Long Range Mode, the windowLog is typically oversized.
|
1173
1217
|
* In which case, it's preferable to determine the jobSize
|
1174
1218
|
* based on chainLog instead.
|
@@ -1239,9 +1283,11 @@ size_t ZSTDMT_initCStream_internal(
|
|
1239
1283
|
|
1240
1284
|
if (params.rsyncable) {
|
1241
1285
|
/* Aim for the targetsectionSize as the average job size. */
|
1242
|
-
U32 const
|
1243
|
-
U32 const rsyncBits = ZSTD_highbit32(
|
1244
|
-
|
1286
|
+
U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
|
1287
|
+
U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
|
1288
|
+
/* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
|
1289
|
+
* expected job size is at least 4x larger. */
|
1290
|
+
assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
|
1245
1291
|
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
|
1246
1292
|
mtctx->rsync.hash = 0;
|
1247
1293
|
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
|
@@ -1253,7 +1299,7 @@ size_t ZSTDMT_initCStream_internal(
|
|
1253
1299
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
|
1254
1300
|
{
|
1255
1301
|
/* If ldm is enabled we need windowSize space. */
|
1256
|
-
size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
|
1302
|
+
size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
|
1257
1303
|
/* Two buffers of slack, plus extra space for the overlap
|
1258
1304
|
* This is the minimum slack that LDM works with. One extra because
|
1259
1305
|
* flush might waste up to targetSectionSize-1 bytes. Another extra
|
@@ -1528,17 +1574,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
|
|
1528
1574
|
static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
|
1529
1575
|
{
|
1530
1576
|
BYTE const* const bufferStart = (BYTE const*)buffer.start;
|
1531
|
-
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
1532
1577
|
BYTE const* const rangeStart = (BYTE const*)range.start;
|
1533
|
-
BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
|
1534
1578
|
|
1535
1579
|
if (rangeStart == NULL || bufferStart == NULL)
|
1536
1580
|
return 0;
|
1537
|
-
/* Empty ranges cannot overlap */
|
1538
|
-
if (bufferStart == bufferEnd || rangeStart == rangeEnd)
|
1539
|
-
return 0;
|
1540
1581
|
|
1541
|
-
|
1582
|
+
{
|
1583
|
+
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
1584
|
+
BYTE const* const rangeEnd = rangeStart + range.size;
|
1585
|
+
|
1586
|
+
/* Empty ranges cannot overlap */
|
1587
|
+
if (bufferStart == bufferEnd || rangeStart == rangeEnd)
|
1588
|
+
return 0;
|
1589
|
+
|
1590
|
+
return bufferStart < rangeEnd && rangeStart < bufferEnd;
|
1591
|
+
}
|
1542
1592
|
}
|
1543
1593
|
|
1544
1594
|
static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|
@@ -1565,7 +1615,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|
|
1565
1615
|
|
1566
1616
|
static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
|
1567
1617
|
{
|
1568
|
-
if (mtctx->params.ldmParams.enableLdm) {
|
1618
|
+
if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
1569
1619
|
ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
|
1570
1620
|
DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
|
1571
1621
|
DEBUGLOG(5, "source [0x%zx, 0x%zx)",
|
@@ -1668,6 +1718,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
1668
1718
|
if (!mtctx->params.rsyncable)
|
1669
1719
|
/* Rsync is disabled. */
|
1670
1720
|
return syncPoint;
|
1721
|
+
if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
|
1722
|
+
/* We don't emit synchronization points if it would produce too small blocks.
|
1723
|
+
* We don't have enough input to find a synchronization point, so don't look.
|
1724
|
+
*/
|
1725
|
+
return syncPoint;
|
1671
1726
|
if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
|
1672
1727
|
/* Not enough to compute the hash.
|
1673
1728
|
* We will miss any synchronization points in this RSYNC_LENGTH byte
|
@@ -1678,10 +1733,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
1678
1733
|
*/
|
1679
1734
|
return syncPoint;
|
1680
1735
|
/* Initialize the loop variables. */
|
1681
|
-
if (mtctx->inBuff.filled
|
1682
|
-
/* We
|
1736
|
+
if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
|
1737
|
+
/* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
|
1738
|
+
* because they can't possibly be a sync point. So we can start
|
1739
|
+
* part way through the input buffer.
|
1740
|
+
*/
|
1741
|
+
pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
|
1742
|
+
if (pos >= RSYNC_LENGTH) {
|
1743
|
+
prev = istart + pos - RSYNC_LENGTH;
|
1744
|
+
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
1745
|
+
} else {
|
1746
|
+
assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
|
1747
|
+
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
1748
|
+
hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
|
1749
|
+
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
1750
|
+
}
|
1751
|
+
} else {
|
1752
|
+
/* We have enough bytes buffered to initialize the hash,
|
1753
|
+
* and have processed enough bytes to find a sync point.
|
1683
1754
|
* Start scanning at the beginning of the input.
|
1684
1755
|
*/
|
1756
|
+
assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
|
1757
|
+
assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
|
1685
1758
|
pos = 0;
|
1686
1759
|
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
1687
1760
|
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
@@ -1695,16 +1768,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
1695
1768
|
syncPoint.flush = 1;
|
1696
1769
|
return syncPoint;
|
1697
1770
|
}
|
1698
|
-
} else {
|
1699
|
-
/* We don't have enough bytes buffered to initialize the hash, but
|
1700
|
-
* we know we have at least RSYNC_LENGTH bytes total.
|
1701
|
-
* Start scanning after the first RSYNC_LENGTH bytes less the bytes
|
1702
|
-
* already buffered.
|
1703
|
-
*/
|
1704
|
-
pos = RSYNC_LENGTH - mtctx->inBuff.filled;
|
1705
|
-
prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
|
1706
|
-
hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
|
1707
|
-
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
1708
1771
|
}
|
1709
1772
|
/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
|
1710
1773
|
* through the input. If we hit a synchronization point, then cut the
|
@@ -1714,16 +1777,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
1714
1777
|
* then a block will be emitted anyways, but this is okay, since if we
|
1715
1778
|
* are already synchronized we will remain synchronized.
|
1716
1779
|
*/
|
1780
|
+
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
1717
1781
|
for (; pos < syncPoint.toLoad; ++pos) {
|
1718
1782
|
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
|
1719
|
-
/*
|
1783
|
+
/* This assert is very expensive, and Debian compiles with asserts enabled.
|
1784
|
+
* So disable it for now. We can get similar coverage by checking it at the
|
1785
|
+
* beginning & end of the loop.
|
1786
|
+
* assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
1787
|
+
*/
|
1720
1788
|
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
|
1789
|
+
assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
|
1721
1790
|
if ((hash & hitMask) == hitMask) {
|
1722
1791
|
syncPoint.toLoad = pos + 1;
|
1723
1792
|
syncPoint.flush = 1;
|
1793
|
+
++pos; /* for assert */
|
1724
1794
|
break;
|
1725
1795
|
}
|
1726
1796
|
}
|
1797
|
+
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
1727
1798
|
return syncPoint;
|
1728
1799
|
}
|
1729
1800
|
|