extzstd 0.3.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,16 +15,13 @@
|
|
|
15
15
|
#endif
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
/* ====== Constants ====== */
|
|
19
|
-
#define ZSTDMT_OVERLAPLOG_DEFAULT 0
|
|
20
|
-
|
|
21
|
-
|
|
22
18
|
/* ====== Dependencies ====== */
|
|
19
|
+
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
|
|
23
20
|
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
|
|
24
21
|
#include "../common/mem.h" /* MEM_STATIC */
|
|
25
22
|
#include "../common/pool.h" /* threadpool */
|
|
26
23
|
#include "../common/threading.h" /* mutex */
|
|
27
|
-
#include "zstd_compress_internal.h"
|
|
24
|
+
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
|
28
25
|
#include "zstd_ldm.h"
|
|
29
26
|
#include "zstdmt_compress.h"
|
|
30
27
|
|
|
@@ -43,12 +40,13 @@
|
|
|
43
40
|
# include <unistd.h>
|
|
44
41
|
# include <sys/times.h>
|
|
45
42
|
|
|
46
|
-
# define DEBUG_PRINTHEX(l,p,n)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
43
|
+
# define DEBUG_PRINTHEX(l,p,n) \
|
|
44
|
+
do { \
|
|
45
|
+
unsigned debug_u; \
|
|
46
|
+
for (debug_u=0; debug_u<(n); debug_u++) \
|
|
47
|
+
RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
|
|
48
|
+
RAWLOG(l, " \n"); \
|
|
49
|
+
} while (0)
|
|
52
50
|
|
|
53
51
|
static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
54
52
|
{
|
|
@@ -60,25 +58,28 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
|
60
58
|
} }
|
|
61
59
|
|
|
62
60
|
#define MUTEX_WAIT_TIME_DLEVEL 6
|
|
63
|
-
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
unsigned long long const
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
61
|
+
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) \
|
|
62
|
+
do { \
|
|
63
|
+
if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
|
|
64
|
+
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
|
|
65
|
+
ZSTD_pthread_mutex_lock(mutex); \
|
|
66
|
+
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
|
|
67
|
+
unsigned long long const elapsedTime = (afterTime-beforeTime); \
|
|
68
|
+
if (elapsedTime > 1000) { \
|
|
69
|
+
/* or whatever threshold you like; I'm using 1 millisecond here */ \
|
|
70
|
+
DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, \
|
|
71
|
+
"Thread took %llu microseconds to acquire mutex %s \n", \
|
|
72
|
+
elapsedTime, #mutex); \
|
|
73
|
+
} } \
|
|
74
|
+
} else { \
|
|
75
|
+
ZSTD_pthread_mutex_lock(mutex); \
|
|
76
|
+
} \
|
|
77
|
+
} while (0)
|
|
77
78
|
|
|
78
79
|
#else
|
|
79
80
|
|
|
80
81
|
# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
|
|
81
|
-
# define DEBUG_PRINTHEX(l,p,n) {}
|
|
82
|
+
# define DEBUG_PRINTHEX(l,p,n) do { } while (0)
|
|
82
83
|
|
|
83
84
|
#endif
|
|
84
85
|
|
|
@@ -99,19 +100,39 @@ typedef struct ZSTDMT_bufferPool_s {
|
|
|
99
100
|
unsigned totalBuffers;
|
|
100
101
|
unsigned nbBuffers;
|
|
101
102
|
ZSTD_customMem cMem;
|
|
102
|
-
buffer_t
|
|
103
|
+
buffer_t* buffers;
|
|
103
104
|
} ZSTDMT_bufferPool;
|
|
104
105
|
|
|
105
|
-
static ZSTDMT_bufferPool*
|
|
106
|
+
static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
|
|
107
|
+
{
|
|
108
|
+
DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
|
|
109
|
+
if (!bufPool) return; /* compatibility with free on NULL */
|
|
110
|
+
if (bufPool->buffers) {
|
|
111
|
+
unsigned u;
|
|
112
|
+
for (u=0; u<bufPool->totalBuffers; u++) {
|
|
113
|
+
DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
|
|
114
|
+
ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
|
|
115
|
+
}
|
|
116
|
+
ZSTD_customFree(bufPool->buffers, bufPool->cMem);
|
|
117
|
+
}
|
|
118
|
+
ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
|
|
119
|
+
ZSTD_customFree(bufPool, bufPool->cMem);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
|
|
106
123
|
{
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
|
|
124
|
+
ZSTDMT_bufferPool* const bufPool =
|
|
125
|
+
(ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
|
|
110
126
|
if (bufPool==NULL) return NULL;
|
|
111
127
|
if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
|
|
112
128
|
ZSTD_customFree(bufPool, cMem);
|
|
113
129
|
return NULL;
|
|
114
130
|
}
|
|
131
|
+
bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
|
|
132
|
+
if (bufPool->buffers==NULL) {
|
|
133
|
+
ZSTDMT_freeBufferPool(bufPool);
|
|
134
|
+
return NULL;
|
|
135
|
+
}
|
|
115
136
|
bufPool->bufferSize = 64 KB;
|
|
116
137
|
bufPool->totalBuffers = maxNbBuffers;
|
|
117
138
|
bufPool->nbBuffers = 0;
|
|
@@ -119,32 +140,19 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_custo
|
|
|
119
140
|
return bufPool;
|
|
120
141
|
}
|
|
121
142
|
|
|
122
|
-
static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
|
|
123
|
-
{
|
|
124
|
-
unsigned u;
|
|
125
|
-
DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
|
|
126
|
-
if (!bufPool) return; /* compatibility with free on NULL */
|
|
127
|
-
for (u=0; u<bufPool->totalBuffers; u++) {
|
|
128
|
-
DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
|
|
129
|
-
ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
|
|
130
|
-
}
|
|
131
|
-
ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
|
|
132
|
-
ZSTD_customFree(bufPool, bufPool->cMem);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
143
|
/* only works at initialization, not during compression */
|
|
136
144
|
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
|
|
137
145
|
{
|
|
138
|
-
size_t const poolSize = sizeof(*bufPool)
|
|
139
|
-
|
|
146
|
+
size_t const poolSize = sizeof(*bufPool);
|
|
147
|
+
size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
|
|
140
148
|
unsigned u;
|
|
141
149
|
size_t totalBufferSize = 0;
|
|
142
150
|
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
|
|
143
151
|
for (u=0; u<bufPool->totalBuffers; u++)
|
|
144
|
-
totalBufferSize += bufPool->
|
|
152
|
+
totalBufferSize += bufPool->buffers[u].capacity;
|
|
145
153
|
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
|
146
154
|
|
|
147
|
-
return poolSize + totalBufferSize;
|
|
155
|
+
return poolSize + arraySize + totalBufferSize;
|
|
148
156
|
}
|
|
149
157
|
|
|
150
158
|
/* ZSTDMT_setBufferSize() :
|
|
@@ -160,9 +168,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
|
|
|
160
168
|
}
|
|
161
169
|
|
|
162
170
|
|
|
163
|
-
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
|
|
171
|
+
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
|
|
164
172
|
{
|
|
165
|
-
unsigned const maxNbBuffers = 2*nbWorkers + 3;
|
|
166
173
|
if (srcBufPool==NULL) return NULL;
|
|
167
174
|
if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
|
|
168
175
|
return srcBufPool;
|
|
@@ -171,7 +178,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
|
|
|
171
178
|
size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
|
|
172
179
|
ZSTDMT_bufferPool* newBufPool;
|
|
173
180
|
ZSTDMT_freeBufferPool(srcBufPool);
|
|
174
|
-
newBufPool = ZSTDMT_createBufferPool(
|
|
181
|
+
newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
|
|
175
182
|
if (newBufPool==NULL) return newBufPool;
|
|
176
183
|
ZSTDMT_setBufferSize(newBufPool, bSize);
|
|
177
184
|
return newBufPool;
|
|
@@ -188,9 +195,9 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
|
|
|
188
195
|
DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
|
|
189
196
|
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
|
|
190
197
|
if (bufPool->nbBuffers) { /* try to use an existing buffer */
|
|
191
|
-
buffer_t const buf = bufPool->
|
|
198
|
+
buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
|
|
192
199
|
size_t const availBufferSize = buf.capacity;
|
|
193
|
-
bufPool->
|
|
200
|
+
bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
|
|
194
201
|
if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
|
|
195
202
|
/* large enough, but not too much */
|
|
196
203
|
DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
|
|
@@ -251,18 +258,28 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
|
|
|
251
258
|
if (buf.start == NULL) return; /* compatible with release on NULL */
|
|
252
259
|
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
|
|
253
260
|
if (bufPool->nbBuffers < bufPool->totalBuffers) {
|
|
254
|
-
bufPool->
|
|
261
|
+
bufPool->buffers[bufPool->nbBuffers++] = buf; /* stored for later use */
|
|
255
262
|
DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
|
|
256
263
|
(U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
|
|
257
264
|
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
|
258
265
|
return;
|
|
259
266
|
}
|
|
260
267
|
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
|
261
|
-
/* Reached bufferPool capacity (should not happen) */
|
|
268
|
+
/* Reached bufferPool capacity (note: should not happen) */
|
|
262
269
|
DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
|
|
263
270
|
ZSTD_customFree(buf.start, bufPool->cMem);
|
|
264
271
|
}
|
|
265
272
|
|
|
273
|
+
/* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
|
|
274
|
+
* The 3 additional buffers are as follows:
|
|
275
|
+
* 1 buffer for input loading
|
|
276
|
+
* 1 buffer for "next input" when submitting current one
|
|
277
|
+
* 1 buffer stuck in queue */
|
|
278
|
+
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
|
|
279
|
+
|
|
280
|
+
/* After a worker releases its rawSeqStore, it is immediately ready for reuse.
|
|
281
|
+
* So we only need one seq buffer per worker. */
|
|
282
|
+
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
|
|
266
283
|
|
|
267
284
|
/* ===== Seq Pool Wrapper ====== */
|
|
268
285
|
|
|
@@ -316,7 +333,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
|
|
|
316
333
|
|
|
317
334
|
static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
|
|
318
335
|
{
|
|
319
|
-
ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
|
336
|
+
ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
|
|
320
337
|
if (seqPool == NULL) return NULL;
|
|
321
338
|
ZSTDMT_setNbSeq(seqPool, 0);
|
|
322
339
|
return seqPool;
|
|
@@ -329,7 +346,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
|
|
|
329
346
|
|
|
330
347
|
static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
|
|
331
348
|
{
|
|
332
|
-
return ZSTDMT_expandBufferPool(pool, nbWorkers);
|
|
349
|
+
return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
|
|
333
350
|
}
|
|
334
351
|
|
|
335
352
|
|
|
@@ -341,16 +358,20 @@ typedef struct {
|
|
|
341
358
|
int totalCCtx;
|
|
342
359
|
int availCCtx;
|
|
343
360
|
ZSTD_customMem cMem;
|
|
344
|
-
ZSTD_CCtx
|
|
361
|
+
ZSTD_CCtx** cctxs;
|
|
345
362
|
} ZSTDMT_CCtxPool;
|
|
346
363
|
|
|
347
|
-
/* note : all CCtx borrowed from the pool
|
|
364
|
+
/* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
|
|
348
365
|
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
|
349
366
|
{
|
|
350
|
-
|
|
351
|
-
for (cid=0; cid<pool->totalCCtx; cid++)
|
|
352
|
-
ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
|
|
367
|
+
if (!pool) return;
|
|
353
368
|
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
|
|
369
|
+
if (pool->cctxs) {
|
|
370
|
+
int cid;
|
|
371
|
+
for (cid=0; cid<pool->totalCCtx; cid++)
|
|
372
|
+
ZSTD_freeCCtx(pool->cctxs[cid]); /* free compatible with NULL */
|
|
373
|
+
ZSTD_customFree(pool->cctxs, pool->cMem);
|
|
374
|
+
}
|
|
354
375
|
ZSTD_customFree(pool, pool->cMem);
|
|
355
376
|
}
|
|
356
377
|
|
|
@@ -359,19 +380,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
|
|
359
380
|
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
|
|
360
381
|
ZSTD_customMem cMem)
|
|
361
382
|
{
|
|
362
|
-
ZSTDMT_CCtxPool* const cctxPool =
|
|
363
|
-
|
|
383
|
+
ZSTDMT_CCtxPool* const cctxPool =
|
|
384
|
+
(ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
|
|
364
385
|
assert(nbWorkers > 0);
|
|
365
386
|
if (!cctxPool) return NULL;
|
|
366
387
|
if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
|
|
367
388
|
ZSTD_customFree(cctxPool, cMem);
|
|
368
389
|
return NULL;
|
|
369
390
|
}
|
|
370
|
-
cctxPool->cMem = cMem;
|
|
371
391
|
cctxPool->totalCCtx = nbWorkers;
|
|
392
|
+
cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
|
|
393
|
+
if (!cctxPool->cctxs) {
|
|
394
|
+
ZSTDMT_freeCCtxPool(cctxPool);
|
|
395
|
+
return NULL;
|
|
396
|
+
}
|
|
397
|
+
cctxPool->cMem = cMem;
|
|
398
|
+
cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
|
|
399
|
+
if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
|
|
372
400
|
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
|
|
373
|
-
cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
|
|
374
|
-
if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
|
|
375
401
|
DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
|
|
376
402
|
return cctxPool;
|
|
377
403
|
}
|
|
@@ -393,16 +419,16 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
|
|
|
393
419
|
{
|
|
394
420
|
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
|
|
395
421
|
{ unsigned const nbWorkers = cctxPool->totalCCtx;
|
|
396
|
-
size_t const poolSize = sizeof(*cctxPool)
|
|
397
|
-
|
|
398
|
-
unsigned u;
|
|
422
|
+
size_t const poolSize = sizeof(*cctxPool);
|
|
423
|
+
size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
|
|
399
424
|
size_t totalCCtxSize = 0;
|
|
425
|
+
unsigned u;
|
|
400
426
|
for (u=0; u<nbWorkers; u++) {
|
|
401
|
-
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->
|
|
427
|
+
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
|
|
402
428
|
}
|
|
403
429
|
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
|
|
404
430
|
assert(nbWorkers > 0);
|
|
405
|
-
return poolSize + totalCCtxSize;
|
|
431
|
+
return poolSize + arraySize + totalCCtxSize;
|
|
406
432
|
}
|
|
407
433
|
}
|
|
408
434
|
|
|
@@ -412,7 +438,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
|
|
|
412
438
|
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
|
|
413
439
|
if (cctxPool->availCCtx) {
|
|
414
440
|
cctxPool->availCCtx--;
|
|
415
|
-
{ ZSTD_CCtx* const cctx = cctxPool->
|
|
441
|
+
{ ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
|
|
416
442
|
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
|
|
417
443
|
return cctx;
|
|
418
444
|
} }
|
|
@@ -426,7 +452,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
|
|
|
426
452
|
if (cctx==NULL) return; /* compatibility with release on NULL */
|
|
427
453
|
ZSTD_pthread_mutex_lock(&pool->poolMutex);
|
|
428
454
|
if (pool->availCCtx < pool->totalCCtx)
|
|
429
|
-
pool->
|
|
455
|
+
pool->cctxs[pool->availCCtx++] = cctx;
|
|
430
456
|
else {
|
|
431
457
|
/* pool overflow : should not happen, since totalCCtx==nbWorkers */
|
|
432
458
|
DEBUGLOG(4, "CCtx pool overflow : free cctx");
|
|
@@ -467,29 +493,27 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
|
|
467
493
|
ZSTD_dictContentType_e dictContentType)
|
|
468
494
|
{
|
|
469
495
|
/* Adjust parameters */
|
|
470
|
-
if (params.ldmParams.enableLdm) {
|
|
496
|
+
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
471
497
|
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
|
472
498
|
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
|
473
499
|
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
|
474
500
|
assert(params.ldmParams.hashRateLog < 32);
|
|
475
|
-
serialState->ldmState.hashPower =
|
|
476
|
-
ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
|
|
477
501
|
} else {
|
|
478
502
|
ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams));
|
|
479
503
|
}
|
|
480
504
|
serialState->nextJobID = 0;
|
|
481
505
|
if (params.fParams.checksumFlag)
|
|
482
506
|
XXH64_reset(&serialState->xxhState, 0);
|
|
483
|
-
if (params.ldmParams.enableLdm) {
|
|
507
|
+
if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
484
508
|
ZSTD_customMem cMem = params.customMem;
|
|
485
509
|
unsigned const hashLog = params.ldmParams.hashLog;
|
|
486
510
|
size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
|
|
487
511
|
unsigned const bucketLog =
|
|
488
512
|
params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
|
|
489
|
-
size_t const bucketSize = (size_t)1 << bucketLog;
|
|
490
513
|
unsigned const prevBucketLog =
|
|
491
514
|
serialState->params.ldmParams.hashLog -
|
|
492
515
|
serialState->params.ldmParams.bucketSizeLog;
|
|
516
|
+
size_t const numBuckets = (size_t)1 << bucketLog;
|
|
493
517
|
/* Size the seq pool tables */
|
|
494
518
|
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
|
495
519
|
/* Reset the window */
|
|
@@ -501,20 +525,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
|
|
501
525
|
}
|
|
502
526
|
if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
|
|
503
527
|
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
|
|
504
|
-
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(
|
|
528
|
+
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
|
|
505
529
|
}
|
|
506
530
|
if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
|
|
507
531
|
return 1;
|
|
508
532
|
/* Zero the tables */
|
|
509
533
|
ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
|
|
510
|
-
ZSTD_memset(serialState->ldmState.bucketOffsets, 0,
|
|
534
|
+
ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
|
|
511
535
|
|
|
512
536
|
/* Update window state and fill hash table with dict */
|
|
513
537
|
serialState->ldmState.loadedDictEnd = 0;
|
|
514
538
|
if (dictSize > 0) {
|
|
515
539
|
if (dictContentType == ZSTD_dct_rawContent) {
|
|
516
540
|
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
|
|
517
|
-
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
|
|
541
|
+
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
|
|
518
542
|
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams);
|
|
519
543
|
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
|
|
520
544
|
} else {
|
|
@@ -566,12 +590,12 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
|
|
566
590
|
/* A future job may error and skip our job */
|
|
567
591
|
if (serialState->nextJobID == jobID) {
|
|
568
592
|
/* It is now our turn, do any processing necessary */
|
|
569
|
-
if (serialState->params.ldmParams.enableLdm) {
|
|
593
|
+
if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
570
594
|
size_t error;
|
|
571
595
|
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
|
|
572
596
|
seqStore.size == 0 && seqStore.capacity > 0);
|
|
573
597
|
assert(src.size <= serialState->params.jobSize);
|
|
574
|
-
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
|
|
598
|
+
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
|
|
575
599
|
error = ZSTD_ldm_generateSequences(
|
|
576
600
|
&serialState->ldmState, &seqStore,
|
|
577
601
|
&serialState->params.ldmParams, src.start, src.size);
|
|
@@ -594,11 +618,8 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
|
|
594
618
|
ZSTD_pthread_mutex_unlock(&serialState->mutex);
|
|
595
619
|
|
|
596
620
|
if (seqStore.size > 0) {
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
assert(serialState->params.ldmParams.enableLdm);
|
|
600
|
-
assert(!ZSTD_isError(err));
|
|
601
|
-
(void)err;
|
|
621
|
+
ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size);
|
|
622
|
+
assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
|
|
602
623
|
}
|
|
603
624
|
}
|
|
604
625
|
|
|
@@ -650,12 +671,13 @@ typedef struct {
|
|
|
650
671
|
unsigned frameChecksumNeeded; /* used only by mtctx */
|
|
651
672
|
} ZSTDMT_jobDescription;
|
|
652
673
|
|
|
653
|
-
#define JOB_ERROR(e)
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
674
|
+
#define JOB_ERROR(e) \
|
|
675
|
+
do { \
|
|
676
|
+
ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
|
|
677
|
+
job->cSize = e; \
|
|
678
|
+
ZSTD_pthread_mutex_unlock(&job->job_mutex); \
|
|
679
|
+
goto _endJob; \
|
|
680
|
+
} while (0)
|
|
659
681
|
|
|
660
682
|
/* ZSTDMT_compressionJob() is a POOL_function type */
|
|
661
683
|
static void ZSTDMT_compressionJob(void* jobDescription)
|
|
@@ -674,7 +696,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
674
696
|
if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
|
|
675
697
|
job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
|
|
676
698
|
}
|
|
677
|
-
if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
|
|
699
|
+
if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
|
|
678
700
|
JOB_ERROR(ERROR(memory_allocation));
|
|
679
701
|
|
|
680
702
|
/* Don't compute the checksum for chunks, since we compute it externally,
|
|
@@ -682,7 +704,9 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
682
704
|
*/
|
|
683
705
|
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
|
|
684
706
|
/* Don't run LDM for the chunks, since we handle it externally */
|
|
685
|
-
jobParams.ldmParams.enableLdm =
|
|
707
|
+
jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
|
|
708
|
+
/* Correct nbWorkers to 0. */
|
|
709
|
+
jobParams.nbWorkers = 0;
|
|
686
710
|
|
|
687
711
|
|
|
688
712
|
/* init */
|
|
@@ -695,6 +719,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
695
719
|
{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
|
696
720
|
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
|
|
697
721
|
}
|
|
722
|
+
if (!job->firstJob) {
|
|
723
|
+
size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
|
|
724
|
+
if (ZSTD_isError(err)) JOB_ERROR(err);
|
|
725
|
+
}
|
|
698
726
|
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
|
699
727
|
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
|
|
700
728
|
ZSTD_dtlm_fast,
|
|
@@ -707,7 +735,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
707
735
|
ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
|
|
708
736
|
|
|
709
737
|
if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
|
|
710
|
-
size_t const hSize =
|
|
738
|
+
size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
|
|
711
739
|
if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
|
|
712
740
|
DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
|
|
713
741
|
ZSTD_invalidateRepCodes(cctx);
|
|
@@ -725,7 +753,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
725
753
|
DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
|
|
726
754
|
assert(job->cSize == 0);
|
|
727
755
|
for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
|
|
728
|
-
size_t const cSize =
|
|
756
|
+
size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
|
|
729
757
|
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
|
730
758
|
ip += chunkSize;
|
|
731
759
|
op += cSize; assert(op < oend);
|
|
@@ -745,11 +773,18 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
745
773
|
size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
|
|
746
774
|
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
|
|
747
775
|
size_t const cSize = (job->lastJob) ?
|
|
748
|
-
|
|
749
|
-
|
|
776
|
+
ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
|
|
777
|
+
ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
|
|
750
778
|
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
|
|
751
779
|
lastCBlockSize = cSize;
|
|
752
780
|
} }
|
|
781
|
+
if (!job->firstJob) {
|
|
782
|
+
/* Double check that we don't have an ext-dict, because then our
|
|
783
|
+
* repcode invalidation doesn't work.
|
|
784
|
+
*/
|
|
785
|
+
assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
|
|
786
|
+
}
|
|
787
|
+
ZSTD_CCtx_trace(cctx, 0);
|
|
753
788
|
|
|
754
789
|
_endJob:
|
|
755
790
|
ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
|
|
@@ -796,6 +831,15 @@ typedef struct {
|
|
|
796
831
|
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
|
|
797
832
|
|
|
798
833
|
#define RSYNC_LENGTH 32
|
|
834
|
+
/* Don't create chunks smaller than the zstd block size.
|
|
835
|
+
* This stops us from regressing compression ratio too much,
|
|
836
|
+
* and ensures our output fits in ZSTD_compressBound().
|
|
837
|
+
*
|
|
838
|
+
* If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
|
|
839
|
+
* ZSTD_COMPRESSBOUND() will need to be updated.
|
|
840
|
+
*/
|
|
841
|
+
#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
|
|
842
|
+
#define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
|
|
799
843
|
|
|
800
844
|
typedef struct {
|
|
801
845
|
U64 hash;
|
|
@@ -916,7 +960,7 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
|
|
|
916
960
|
mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
|
|
917
961
|
assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
|
|
918
962
|
mtctx->jobIDMask = nbJobs - 1;
|
|
919
|
-
mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
|
963
|
+
mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
|
|
920
964
|
mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
|
|
921
965
|
mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
|
|
922
966
|
initError = ZSTDMT_serialState_init(&mtctx->serial);
|
|
@@ -1019,7 +1063,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
|
|
1019
1063
|
{
|
|
1020
1064
|
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
|
1021
1065
|
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
|
|
1022
|
-
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
|
1066
|
+
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
|
|
1023
1067
|
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
|
1024
1068
|
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
|
1025
1069
|
if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
|
|
@@ -1062,7 +1106,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
|
|
|
1062
1106
|
{ unsigned jobNb;
|
|
1063
1107
|
unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
|
|
1064
1108
|
DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
|
|
1065
|
-
mtctx->doneJobID, lastJobNb, mtctx->jobReady)
|
|
1109
|
+
mtctx->doneJobID, lastJobNb, mtctx->jobReady);
|
|
1066
1110
|
for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
|
|
1067
1111
|
unsigned const wJobID = jobNb & mtctx->jobIDMask;
|
|
1068
1112
|
ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
|
|
@@ -1124,7 +1168,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
|
1124
1168
|
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
|
|
1125
1169
|
{
|
|
1126
1170
|
unsigned jobLog;
|
|
1127
|
-
if (params->ldmParams.enableLdm) {
|
|
1171
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
1128
1172
|
/* In Long Range Mode, the windowLog is typically oversized.
|
|
1129
1173
|
* In which case, it's preferable to determine the jobSize
|
|
1130
1174
|
* based on cycleLog instead. */
|
|
@@ -1168,7 +1212,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
|
|
|
1168
1212
|
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
|
|
1169
1213
|
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
|
|
1170
1214
|
assert(0 <= overlapRLog && overlapRLog <= 8);
|
|
1171
|
-
if (params->ldmParams.enableLdm) {
|
|
1215
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
1172
1216
|
/* In Long Range Mode, the windowLog is typically oversized.
|
|
1173
1217
|
* In which case, it's preferable to determine the jobSize
|
|
1174
1218
|
* based on chainLog instead.
|
|
@@ -1239,9 +1283,11 @@ size_t ZSTDMT_initCStream_internal(
|
|
|
1239
1283
|
|
|
1240
1284
|
if (params.rsyncable) {
|
|
1241
1285
|
/* Aim for the targetsectionSize as the average job size. */
|
|
1242
|
-
U32 const
|
|
1243
|
-
U32 const rsyncBits = ZSTD_highbit32(
|
|
1244
|
-
|
|
1286
|
+
U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
|
|
1287
|
+
U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
|
|
1288
|
+
/* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
|
|
1289
|
+
* expected job size is at least 4x larger. */
|
|
1290
|
+
assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
|
|
1245
1291
|
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
|
|
1246
1292
|
mtctx->rsync.hash = 0;
|
|
1247
1293
|
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
|
|
@@ -1253,7 +1299,7 @@ size_t ZSTDMT_initCStream_internal(
|
|
|
1253
1299
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
|
|
1254
1300
|
{
|
|
1255
1301
|
/* If ldm is enabled we need windowSize space. */
|
|
1256
|
-
size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
|
|
1302
|
+
size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
|
|
1257
1303
|
/* Two buffers of slack, plus extra space for the overlap
|
|
1258
1304
|
* This is the minimum slack that LDM works with. One extra because
|
|
1259
1305
|
* flush might waste up to targetSectionSize-1 bytes. Another extra
|
|
@@ -1528,17 +1574,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
|
|
|
1528
1574
|
static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
|
|
1529
1575
|
{
|
|
1530
1576
|
BYTE const* const bufferStart = (BYTE const*)buffer.start;
|
|
1531
|
-
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
|
1532
1577
|
BYTE const* const rangeStart = (BYTE const*)range.start;
|
|
1533
|
-
BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
|
|
1534
1578
|
|
|
1535
1579
|
if (rangeStart == NULL || bufferStart == NULL)
|
|
1536
1580
|
return 0;
|
|
1537
|
-
/* Empty ranges cannot overlap */
|
|
1538
|
-
if (bufferStart == bufferEnd || rangeStart == rangeEnd)
|
|
1539
|
-
return 0;
|
|
1540
1581
|
|
|
1541
|
-
|
|
1582
|
+
{
|
|
1583
|
+
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
|
1584
|
+
BYTE const* const rangeEnd = rangeStart + range.size;
|
|
1585
|
+
|
|
1586
|
+
/* Empty ranges cannot overlap */
|
|
1587
|
+
if (bufferStart == bufferEnd || rangeStart == rangeEnd)
|
|
1588
|
+
return 0;
|
|
1589
|
+
|
|
1590
|
+
return bufferStart < rangeEnd && rangeStart < bufferEnd;
|
|
1591
|
+
}
|
|
1542
1592
|
}
|
|
1543
1593
|
|
|
1544
1594
|
static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|
|
@@ -1565,7 +1615,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
|
|
|
1565
1615
|
|
|
1566
1616
|
static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
|
|
1567
1617
|
{
|
|
1568
|
-
if (mtctx->params.ldmParams.enableLdm) {
|
|
1618
|
+
if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
1569
1619
|
ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
|
|
1570
1620
|
DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
|
|
1571
1621
|
DEBUGLOG(5, "source [0x%zx, 0x%zx)",
|
|
@@ -1668,6 +1718,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1668
1718
|
if (!mtctx->params.rsyncable)
|
|
1669
1719
|
/* Rsync is disabled. */
|
|
1670
1720
|
return syncPoint;
|
|
1721
|
+
if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
|
|
1722
|
+
/* We don't emit synchronization points if it would produce too small blocks.
|
|
1723
|
+
* We don't have enough input to find a synchronization point, so don't look.
|
|
1724
|
+
*/
|
|
1725
|
+
return syncPoint;
|
|
1671
1726
|
if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
|
|
1672
1727
|
/* Not enough to compute the hash.
|
|
1673
1728
|
* We will miss any synchronization points in this RSYNC_LENGTH byte
|
|
@@ -1678,10 +1733,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1678
1733
|
*/
|
|
1679
1734
|
return syncPoint;
|
|
1680
1735
|
/* Initialize the loop variables. */
|
|
1681
|
-
if (mtctx->inBuff.filled
|
|
1682
|
-
/* We
|
|
1736
|
+
if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
|
|
1737
|
+
/* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
|
|
1738
|
+
* because they can't possibly be a sync point. So we can start
|
|
1739
|
+
* part way through the input buffer.
|
|
1740
|
+
*/
|
|
1741
|
+
pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
|
|
1742
|
+
if (pos >= RSYNC_LENGTH) {
|
|
1743
|
+
prev = istart + pos - RSYNC_LENGTH;
|
|
1744
|
+
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
|
1745
|
+
} else {
|
|
1746
|
+
assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
|
|
1747
|
+
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
|
1748
|
+
hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
|
|
1749
|
+
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
|
1750
|
+
}
|
|
1751
|
+
} else {
|
|
1752
|
+
/* We have enough bytes buffered to initialize the hash,
|
|
1753
|
+
* and have processed enough bytes to find a sync point.
|
|
1683
1754
|
* Start scanning at the beginning of the input.
|
|
1684
1755
|
*/
|
|
1756
|
+
assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
|
|
1757
|
+
assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
|
|
1685
1758
|
pos = 0;
|
|
1686
1759
|
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
|
1687
1760
|
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
|
@@ -1695,16 +1768,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1695
1768
|
syncPoint.flush = 1;
|
|
1696
1769
|
return syncPoint;
|
|
1697
1770
|
}
|
|
1698
|
-
} else {
|
|
1699
|
-
/* We don't have enough bytes buffered to initialize the hash, but
|
|
1700
|
-
* we know we have at least RSYNC_LENGTH bytes total.
|
|
1701
|
-
* Start scanning after the first RSYNC_LENGTH bytes less the bytes
|
|
1702
|
-
* already buffered.
|
|
1703
|
-
*/
|
|
1704
|
-
pos = RSYNC_LENGTH - mtctx->inBuff.filled;
|
|
1705
|
-
prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
|
|
1706
|
-
hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
|
|
1707
|
-
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
|
1708
1771
|
}
|
|
1709
1772
|
/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
|
|
1710
1773
|
* through the input. If we hit a synchronization point, then cut the
|
|
@@ -1714,16 +1777,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1714
1777
|
* then a block will be emitted anyways, but this is okay, since if we
|
|
1715
1778
|
* are already synchronized we will remain synchronized.
|
|
1716
1779
|
*/
|
|
1780
|
+
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
|
1717
1781
|
for (; pos < syncPoint.toLoad; ++pos) {
|
|
1718
1782
|
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
|
|
1719
|
-
/*
|
|
1783
|
+
/* This assert is very expensive, and Debian compiles with asserts enabled.
|
|
1784
|
+
* So disable it for now. We can get similar coverage by checking it at the
|
|
1785
|
+
* beginning & end of the loop.
|
|
1786
|
+
* assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
|
1787
|
+
*/
|
|
1720
1788
|
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
|
|
1789
|
+
assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
|
|
1721
1790
|
if ((hash & hitMask) == hitMask) {
|
|
1722
1791
|
syncPoint.toLoad = pos + 1;
|
|
1723
1792
|
syncPoint.flush = 1;
|
|
1793
|
+
++pos; /* for assert */
|
|
1724
1794
|
break;
|
|
1725
1795
|
}
|
|
1726
1796
|
}
|
|
1797
|
+
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
|
1727
1798
|
return syncPoint;
|
|
1728
1799
|
}
|
|
1729
1800
|
|