extzstd 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/contrib/zstd/CHANGELOG +188 -1
- data/contrib/zstd/CONTRIBUTING.md +157 -74
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +81 -58
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +59 -35
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/appveyor.yml +49 -136
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +87 -181
- data/contrib/zstd/lib/README.md +23 -6
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +33 -59
- data/contrib/zstd/lib/common/compiler.h +115 -45
- data/contrib/zstd/lib/common/cpu.h +1 -1
- data/contrib/zstd/lib/common/debug.c +1 -1
- data/contrib/zstd/lib/common/debug.h +1 -1
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +82 -3
- data/contrib/zstd/lib/common/fse.h +9 -85
- data/contrib/zstd/lib/common/fse_decompress.c +29 -111
- data/contrib/zstd/lib/common/huf.h +84 -172
- data/contrib/zstd/lib/common/mem.h +58 -49
- data/contrib/zstd/lib/common/pool.c +37 -16
- data/contrib/zstd/lib/common/pool.h +9 -3
- data/contrib/zstd/lib/common/portability_macros.h +156 -0
- data/contrib/zstd/lib/common/threading.c +68 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +7 -809
- data/contrib/zstd/lib/common/xxhash.h +5568 -167
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +64 -150
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +69 -150
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +773 -251
- data/contrib/zstd/lib/compress/zstd_compress.c +2650 -826
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +509 -180
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +33 -305
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +266 -85
- data/contrib/zstd/lib/compress/zstd_double_fast.c +369 -132
- data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +722 -258
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1105 -360
- data/contrib/zstd/lib/compress/zstd_lazy.h +41 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +272 -208
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +324 -197
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +109 -53
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1071 -539
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +507 -82
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +962 -310
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +54 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +44 -32
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -5
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +24 -16
- data/contrib/zstd/lib/dictBuilder/zdict.c +88 -95
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +16 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +24 -69
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +25 -72
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +23 -69
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +35 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +42 -87
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +35 -82
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +214 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +922 -293
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +7 -6
- data/ext/extzstd.c +13 -10
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +16 -5
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -20,12 +20,12 @@
|
|
|
20
20
|
#include "../common/mem.h" /* low level memory routines */
|
|
21
21
|
#define FSE_STATIC_LINKING_ONLY
|
|
22
22
|
#include "../common/fse.h"
|
|
23
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
24
23
|
#include "../common/huf.h"
|
|
25
24
|
#include "../common/zstd_internal.h"
|
|
26
25
|
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
|
27
26
|
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
|
28
27
|
#include "zstd_decompress_block.h"
|
|
28
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
|
29
29
|
|
|
30
30
|
/*_*******************************************************
|
|
31
31
|
* Macros
|
|
@@ -69,15 +69,56 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
+
/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
|
|
73
|
+
static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
|
|
74
|
+
const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
|
|
75
|
+
{
|
|
76
|
+
if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
|
|
77
|
+
{
|
|
78
|
+
/* room for litbuffer to fit without read faulting */
|
|
79
|
+
dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
|
|
80
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
|
81
|
+
dctx->litBufferLocation = ZSTD_in_dst;
|
|
82
|
+
}
|
|
83
|
+
else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
|
|
84
|
+
{
|
|
85
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
|
86
|
+
if (splitImmediately) {
|
|
87
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
|
88
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
|
89
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
/* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
|
|
93
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
|
|
94
|
+
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
|
|
95
|
+
}
|
|
96
|
+
dctx->litBufferLocation = ZSTD_split;
|
|
97
|
+
}
|
|
98
|
+
else
|
|
99
|
+
{
|
|
100
|
+
/* fits entirely within litExtraBuffer, so no split is necessary */
|
|
101
|
+
dctx->litBuffer = dctx->litExtraBuffer;
|
|
102
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
|
103
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
72
106
|
|
|
73
107
|
/* Hidden declaration for fullbench */
|
|
74
108
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
75
|
-
const void* src, size_t srcSize
|
|
109
|
+
const void* src, size_t srcSize,
|
|
110
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming);
|
|
76
111
|
/*! ZSTD_decodeLiteralsBlock() :
|
|
112
|
+
* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
|
|
113
|
+
* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
|
|
114
|
+
* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
|
|
115
|
+
* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
|
|
116
|
+
*
|
|
77
117
|
* @return : nb of bytes read from src (< srcSize )
|
|
78
118
|
* note : symbol not declared but exposed for fullbench */
|
|
79
119
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
80
|
-
const void* src, size_t srcSize
|
|
120
|
+
const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
|
|
121
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming)
|
|
81
122
|
{
|
|
82
123
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
|
83
124
|
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
|
@@ -90,15 +131,19 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
90
131
|
case set_repeat:
|
|
91
132
|
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
|
92
133
|
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
|
93
|
-
|
|
134
|
+
ZSTD_FALLTHROUGH;
|
|
94
135
|
|
|
95
136
|
case set_compressed:
|
|
96
|
-
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE ==
|
|
137
|
+
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
|
|
97
138
|
{ size_t lhSize, litSize, litCSize;
|
|
98
139
|
U32 singleStream=0;
|
|
99
140
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
|
100
141
|
U32 const lhc = MEM_readLE32(istart);
|
|
101
142
|
size_t hufSuccess;
|
|
143
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
|
144
|
+
int const flags = 0
|
|
145
|
+
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
|
|
146
|
+
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
|
|
102
147
|
switch(lhlCode)
|
|
103
148
|
{
|
|
104
149
|
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -121,8 +166,15 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
121
166
|
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
|
122
167
|
break;
|
|
123
168
|
}
|
|
169
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
124
170
|
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
171
|
+
if (!singleStream)
|
|
172
|
+
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
|
|
173
|
+
"Not enough literals (%zu) for the 4-streams mode (min %u)",
|
|
174
|
+
litSize, MIN_LITERALS_FOR_4_STREAMS);
|
|
125
175
|
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
|
176
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
|
|
177
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
|
|
126
178
|
|
|
127
179
|
/* prefetch huffman table if cold */
|
|
128
180
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
|
@@ -131,13 +183,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
131
183
|
|
|
132
184
|
if (litEncType==set_repeat) {
|
|
133
185
|
if (singleStream) {
|
|
134
|
-
hufSuccess =
|
|
186
|
+
hufSuccess = HUF_decompress1X_usingDTable(
|
|
135
187
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
|
136
|
-
dctx->HUFptr,
|
|
188
|
+
dctx->HUFptr, flags);
|
|
137
189
|
} else {
|
|
138
|
-
|
|
190
|
+
assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
|
|
191
|
+
hufSuccess = HUF_decompress4X_usingDTable(
|
|
139
192
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
|
140
|
-
dctx->HUFptr,
|
|
193
|
+
dctx->HUFptr, flags);
|
|
141
194
|
}
|
|
142
195
|
} else {
|
|
143
196
|
if (singleStream) {
|
|
@@ -145,20 +198,27 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
145
198
|
hufSuccess = HUF_decompress1X_DCtx_wksp(
|
|
146
199
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
147
200
|
istart+lhSize, litCSize, dctx->workspace,
|
|
148
|
-
sizeof(dctx->workspace));
|
|
201
|
+
sizeof(dctx->workspace), flags);
|
|
149
202
|
#else
|
|
150
|
-
hufSuccess =
|
|
203
|
+
hufSuccess = HUF_decompress1X1_DCtx_wksp(
|
|
151
204
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
152
205
|
istart+lhSize, litCSize, dctx->workspace,
|
|
153
|
-
sizeof(dctx->workspace),
|
|
206
|
+
sizeof(dctx->workspace), flags);
|
|
154
207
|
#endif
|
|
155
208
|
} else {
|
|
156
|
-
hufSuccess =
|
|
209
|
+
hufSuccess = HUF_decompress4X_hufOnly_wksp(
|
|
157
210
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
158
211
|
istart+lhSize, litCSize, dctx->workspace,
|
|
159
|
-
sizeof(dctx->workspace),
|
|
212
|
+
sizeof(dctx->workspace), flags);
|
|
160
213
|
}
|
|
161
214
|
}
|
|
215
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
216
|
+
{
|
|
217
|
+
ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
|
218
|
+
ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
219
|
+
dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
|
220
|
+
dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
|
|
221
|
+
}
|
|
162
222
|
|
|
163
223
|
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
|
164
224
|
|
|
@@ -166,13 +226,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
166
226
|
dctx->litSize = litSize;
|
|
167
227
|
dctx->litEntropy = 1;
|
|
168
228
|
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
|
169
|
-
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
170
229
|
return litCSize + lhSize;
|
|
171
230
|
}
|
|
172
231
|
|
|
173
232
|
case set_basic:
|
|
174
233
|
{ size_t litSize, lhSize;
|
|
175
234
|
U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
|
235
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
|
176
236
|
switch(lhlCode)
|
|
177
237
|
{
|
|
178
238
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -185,27 +245,41 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
185
245
|
break;
|
|
186
246
|
case 3:
|
|
187
247
|
lhSize = 3;
|
|
248
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
|
|
188
249
|
litSize = MEM_readLE24(istart) >> 4;
|
|
189
250
|
break;
|
|
190
251
|
}
|
|
191
252
|
|
|
253
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
254
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
|
255
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
|
192
256
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
|
193
257
|
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
|
194
|
-
|
|
258
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
259
|
+
{
|
|
260
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
261
|
+
ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
|
262
|
+
}
|
|
263
|
+
else
|
|
264
|
+
{
|
|
265
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
|
|
266
|
+
}
|
|
195
267
|
dctx->litPtr = dctx->litBuffer;
|
|
196
268
|
dctx->litSize = litSize;
|
|
197
|
-
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
198
269
|
return lhSize+litSize;
|
|
199
270
|
}
|
|
200
271
|
/* direct reference into compressed stream */
|
|
201
272
|
dctx->litPtr = istart+lhSize;
|
|
202
273
|
dctx->litSize = litSize;
|
|
274
|
+
dctx->litBufferEnd = dctx->litPtr + litSize;
|
|
275
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
203
276
|
return lhSize+litSize;
|
|
204
277
|
}
|
|
205
278
|
|
|
206
279
|
case set_rle:
|
|
207
280
|
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
|
208
281
|
size_t litSize, lhSize;
|
|
282
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
|
209
283
|
switch(lhlCode)
|
|
210
284
|
{
|
|
211
285
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -214,16 +288,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
214
288
|
break;
|
|
215
289
|
case 1:
|
|
216
290
|
lhSize = 2;
|
|
291
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
|
|
217
292
|
litSize = MEM_readLE16(istart) >> 4;
|
|
218
293
|
break;
|
|
219
294
|
case 3:
|
|
220
295
|
lhSize = 3;
|
|
296
|
+
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
|
|
221
297
|
litSize = MEM_readLE24(istart) >> 4;
|
|
222
|
-
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
|
223
298
|
break;
|
|
224
299
|
}
|
|
300
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
225
301
|
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
226
|
-
|
|
302
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
|
303
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
|
304
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
305
|
+
{
|
|
306
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
307
|
+
ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
|
|
308
|
+
}
|
|
309
|
+
else
|
|
310
|
+
{
|
|
311
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
|
|
312
|
+
}
|
|
227
313
|
dctx->litPtr = dctx->litBuffer;
|
|
228
314
|
dctx->litSize = litSize;
|
|
229
315
|
return lhSize+1;
|
|
@@ -236,7 +322,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
236
322
|
|
|
237
323
|
/* Default FSE distribution tables.
|
|
238
324
|
* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
|
|
239
|
-
* https://github.com/facebook/zstd/blob/
|
|
325
|
+
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
|
|
240
326
|
* They were generated programmatically with following method :
|
|
241
327
|
* - start from default distributions, present in /lib/common/zstd_internal.h
|
|
242
328
|
* - generate tables normally, using ZSTD_buildFSETable()
|
|
@@ -343,7 +429,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
|
|
|
343
429
|
}; /* ML_defaultDTable */
|
|
344
430
|
|
|
345
431
|
|
|
346
|
-
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue,
|
|
432
|
+
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
|
|
347
433
|
{
|
|
348
434
|
void* ptr = dt;
|
|
349
435
|
ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
|
|
@@ -355,7 +441,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
|
355
441
|
cell->nbBits = 0;
|
|
356
442
|
cell->nextState = 0;
|
|
357
443
|
assert(nbAddBits < 255);
|
|
358
|
-
cell->nbAdditionalBits =
|
|
444
|
+
cell->nbAdditionalBits = nbAddBits;
|
|
359
445
|
cell->baseValue = baseValue;
|
|
360
446
|
}
|
|
361
447
|
|
|
@@ -367,7 +453,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
|
367
453
|
FORCE_INLINE_TEMPLATE
|
|
368
454
|
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
369
455
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
370
|
-
const U32* baseValue, const
|
|
456
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
371
457
|
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
372
458
|
{
|
|
373
459
|
ZSTD_seqSymbol* const tableDecode = dt+1;
|
|
@@ -430,14 +516,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
|
430
516
|
for (i = 8; i < n; i += 8) {
|
|
431
517
|
MEM_write64(spread + pos + i, sv);
|
|
432
518
|
}
|
|
433
|
-
|
|
519
|
+
assert(n>=0);
|
|
520
|
+
pos += (size_t)n;
|
|
434
521
|
}
|
|
435
522
|
}
|
|
436
523
|
/* Now we spread those positions across the table.
|
|
437
|
-
* The benefit of doing it in two stages is that we avoid the
|
|
524
|
+
* The benefit of doing it in two stages is that we avoid the
|
|
438
525
|
* variable size inner loop, which caused lots of branch misses.
|
|
439
526
|
* Now we can run through all the positions without any branch misses.
|
|
440
|
-
* We unroll the loop twice, since that is what
|
|
527
|
+
* We unroll the loop twice, since that is what empirically worked best.
|
|
441
528
|
*/
|
|
442
529
|
{
|
|
443
530
|
size_t position = 0;
|
|
@@ -464,7 +551,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
|
464
551
|
for (i=0; i<n; i++) {
|
|
465
552
|
tableDecode[position].baseValue = s;
|
|
466
553
|
position = (position + step) & tableMask;
|
|
467
|
-
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
|
554
|
+
while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
|
|
468
555
|
} }
|
|
469
556
|
assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
|
|
470
557
|
}
|
|
@@ -475,10 +562,10 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
|
475
562
|
for (u=0; u<tableSize; u++) {
|
|
476
563
|
U32 const symbol = tableDecode[u].baseValue;
|
|
477
564
|
U32 const nextState = symbolNext[symbol]++;
|
|
478
|
-
tableDecode[u].nbBits = (BYTE) (tableLog -
|
|
565
|
+
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
|
479
566
|
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
|
480
567
|
assert(nbAdditionalBits[symbol] < 255);
|
|
481
|
-
tableDecode[u].nbAdditionalBits =
|
|
568
|
+
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
|
|
482
569
|
tableDecode[u].baseValue = baseValue[symbol];
|
|
483
570
|
}
|
|
484
571
|
}
|
|
@@ -487,7 +574,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
|
487
574
|
/* Avoids the FORCE_INLINE of the _body() function. */
|
|
488
575
|
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
|
489
576
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
490
|
-
const U32* baseValue, const
|
|
577
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
491
578
|
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
492
579
|
{
|
|
493
580
|
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
@@ -495,9 +582,9 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
|
|
495
582
|
}
|
|
496
583
|
|
|
497
584
|
#if DYNAMIC_BMI2
|
|
498
|
-
|
|
585
|
+
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
|
499
586
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
500
|
-
const U32* baseValue, const
|
|
587
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
501
588
|
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
502
589
|
{
|
|
503
590
|
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
@@ -507,7 +594,7 @@ TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol
|
|
|
507
594
|
|
|
508
595
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
509
596
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
510
|
-
const U32* baseValue, const
|
|
597
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
511
598
|
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
|
512
599
|
{
|
|
513
600
|
#if DYNAMIC_BMI2
|
|
@@ -529,7 +616,7 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
529
616
|
static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
|
|
530
617
|
symbolEncodingType_e type, unsigned max, U32 maxLog,
|
|
531
618
|
const void* src, size_t srcSize,
|
|
532
|
-
const U32* baseValue, const
|
|
619
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
533
620
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
|
534
621
|
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
|
535
622
|
int bmi2)
|
|
@@ -541,7 +628,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
541
628
|
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
|
542
629
|
{ U32 const symbol = *(const BYTE*)src;
|
|
543
630
|
U32 const baseline = baseValue[symbol];
|
|
544
|
-
|
|
631
|
+
U8 const nbBits = nbAdditionalBits[symbol];
|
|
545
632
|
ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
|
|
546
633
|
}
|
|
547
634
|
*DTablePtr = DTableSpace;
|
|
@@ -577,7 +664,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
577
664
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
578
665
|
const void* src, size_t srcSize)
|
|
579
666
|
{
|
|
580
|
-
const BYTE* const istart = (const BYTE*
|
|
667
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
581
668
|
const BYTE* const iend = istart + srcSize;
|
|
582
669
|
const BYTE* ip = istart;
|
|
583
670
|
int nbSeq;
|
|
@@ -620,7 +707,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
620
707
|
LL_defaultDTable, dctx->fseEntropy,
|
|
621
708
|
dctx->ddictIsCold, nbSeq,
|
|
622
709
|
dctx->workspace, sizeof(dctx->workspace),
|
|
623
|
-
dctx
|
|
710
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
624
711
|
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
625
712
|
ip += llhSize;
|
|
626
713
|
}
|
|
@@ -632,7 +719,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
632
719
|
OF_defaultDTable, dctx->fseEntropy,
|
|
633
720
|
dctx->ddictIsCold, nbSeq,
|
|
634
721
|
dctx->workspace, sizeof(dctx->workspace),
|
|
635
|
-
dctx
|
|
722
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
636
723
|
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
637
724
|
ip += ofhSize;
|
|
638
725
|
}
|
|
@@ -644,7 +731,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
644
731
|
ML_defaultDTable, dctx->fseEntropy,
|
|
645
732
|
dctx->ddictIsCold, nbSeq,
|
|
646
733
|
dctx->workspace, sizeof(dctx->workspace),
|
|
647
|
-
dctx
|
|
734
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
648
735
|
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
649
736
|
ip += mlhSize;
|
|
650
737
|
}
|
|
@@ -658,7 +745,6 @@ typedef struct {
|
|
|
658
745
|
size_t litLength;
|
|
659
746
|
size_t matchLength;
|
|
660
747
|
size_t offset;
|
|
661
|
-
const BYTE* match;
|
|
662
748
|
} seq_t;
|
|
663
749
|
|
|
664
750
|
typedef struct {
|
|
@@ -672,9 +758,6 @@ typedef struct {
|
|
|
672
758
|
ZSTD_fseState stateOffb;
|
|
673
759
|
ZSTD_fseState stateML;
|
|
674
760
|
size_t prevOffset[ZSTD_REP_NUM];
|
|
675
|
-
const BYTE* prefixStart;
|
|
676
|
-
const BYTE* dictEnd;
|
|
677
|
-
size_t pos;
|
|
678
761
|
} seqState_t;
|
|
679
762
|
|
|
680
763
|
/*! ZSTD_overlapCopy8() :
|
|
@@ -717,7 +800,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
|
717
800
|
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
|
718
801
|
* The src buffer must be before the dst buffer.
|
|
719
802
|
*/
|
|
720
|
-
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
803
|
+
static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
721
804
|
ptrdiff_t const diff = op - ip;
|
|
722
805
|
BYTE* const oend = op + length;
|
|
723
806
|
|
|
@@ -733,6 +816,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
733
816
|
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
|
734
817
|
assert(length >= 8);
|
|
735
818
|
ZSTD_overlapCopy8(&op, &ip, diff);
|
|
819
|
+
length -= 8;
|
|
736
820
|
assert(op - ip >= 8);
|
|
737
821
|
assert(op <= oend);
|
|
738
822
|
}
|
|
@@ -747,8 +831,31 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
747
831
|
assert(oend > oend_w);
|
|
748
832
|
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
|
749
833
|
ip += oend_w - op;
|
|
750
|
-
op
|
|
834
|
+
op += oend_w - op;
|
|
835
|
+
}
|
|
836
|
+
/* Handle the leftovers. */
|
|
837
|
+
while (op < oend) *op++ = *ip++;
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
/* ZSTD_safecopyDstBeforeSrc():
|
|
841
|
+
* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
|
|
842
|
+
* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
|
|
843
|
+
static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
|
|
844
|
+
ptrdiff_t const diff = op - ip;
|
|
845
|
+
BYTE* const oend = op + length;
|
|
846
|
+
|
|
847
|
+
if (length < 8 || diff > -8) {
|
|
848
|
+
/* Handle short lengths, close overlaps, and dst not before src. */
|
|
849
|
+
while (op < oend) *op++ = *ip++;
|
|
850
|
+
return;
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
|
|
854
|
+
ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
|
|
855
|
+
ip += oend - WILDCOPY_OVERLENGTH - op;
|
|
856
|
+
op += oend - WILDCOPY_OVERLENGTH - op;
|
|
751
857
|
}
|
|
858
|
+
|
|
752
859
|
/* Handle the leftovers. */
|
|
753
860
|
while (op < oend) *op++ = *ip++;
|
|
754
861
|
}
|
|
@@ -763,9 +870,9 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
763
870
|
*/
|
|
764
871
|
FORCE_NOINLINE
|
|
765
872
|
size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
873
|
+
BYTE* const oend, seq_t sequence,
|
|
874
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
875
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
769
876
|
{
|
|
770
877
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
771
878
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
@@ -788,27 +895,76 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
|
788
895
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
789
896
|
/* offset beyond prefix */
|
|
790
897
|
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
791
|
-
match = dictEnd - (prefixStart-match);
|
|
898
|
+
match = dictEnd - (prefixStart - match);
|
|
792
899
|
if (match + sequence.matchLength <= dictEnd) {
|
|
793
900
|
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
794
901
|
return sequenceLength;
|
|
795
902
|
}
|
|
796
903
|
/* span extDict & currentPrefixSegment */
|
|
797
904
|
{ size_t const length1 = dictEnd - match;
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
905
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
906
|
+
op = oLitEnd + length1;
|
|
907
|
+
sequence.matchLength -= length1;
|
|
908
|
+
match = prefixStart;
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
912
|
+
return sequenceLength;
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
/* ZSTD_execSequenceEndSplitLitBuffer():
|
|
916
|
+
* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
|
|
917
|
+
*/
|
|
918
|
+
FORCE_NOINLINE
|
|
919
|
+
size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
|
|
920
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
|
921
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
922
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
923
|
+
{
|
|
924
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
|
925
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
926
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
927
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
|
931
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
|
932
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
|
933
|
+
assert(op < op + sequenceLength);
|
|
934
|
+
assert(oLitEnd < op + sequenceLength);
|
|
935
|
+
|
|
936
|
+
/* copy literals */
|
|
937
|
+
RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
|
|
938
|
+
ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
|
|
939
|
+
op = oLitEnd;
|
|
940
|
+
*litPtr = iLitEnd;
|
|
941
|
+
|
|
942
|
+
/* copy Match */
|
|
943
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
944
|
+
/* offset beyond prefix */
|
|
945
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
946
|
+
match = dictEnd - (prefixStart - match);
|
|
947
|
+
if (match + sequence.matchLength <= dictEnd) {
|
|
948
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
949
|
+
return sequenceLength;
|
|
950
|
+
}
|
|
951
|
+
/* span extDict & currentPrefixSegment */
|
|
952
|
+
{ size_t const length1 = dictEnd - match;
|
|
953
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
954
|
+
op = oLitEnd + length1;
|
|
955
|
+
sequence.matchLength -= length1;
|
|
956
|
+
match = prefixStart;
|
|
957
|
+
}
|
|
958
|
+
}
|
|
803
959
|
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
804
960
|
return sequenceLength;
|
|
805
961
|
}
|
|
806
962
|
|
|
807
963
|
HINT_INLINE
|
|
808
964
|
size_t ZSTD_execSequence(BYTE* op,
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
965
|
+
BYTE* const oend, seq_t sequence,
|
|
966
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
967
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
812
968
|
{
|
|
813
969
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
814
970
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
@@ -819,6 +975,103 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
819
975
|
|
|
820
976
|
assert(op != NULL /* Precondition */);
|
|
821
977
|
assert(oend_w < oend /* No underflow */);
|
|
978
|
+
|
|
979
|
+
#if defined(__aarch64__)
|
|
980
|
+
/* prefetch sequence starting from match that will be used for copy later */
|
|
981
|
+
PREFETCH_L1(match);
|
|
982
|
+
#endif
|
|
983
|
+
/* Handle edge cases in a slow path:
|
|
984
|
+
* - Read beyond end of literals
|
|
985
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
986
|
+
* - 32-bit mode and the match length overflows
|
|
987
|
+
*/
|
|
988
|
+
if (UNLIKELY(
|
|
989
|
+
iLitEnd > litLimit ||
|
|
990
|
+
oMatchEnd > oend_w ||
|
|
991
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
992
|
+
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
993
|
+
|
|
994
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
995
|
+
assert(op <= oLitEnd /* No overflow */);
|
|
996
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
|
997
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
|
998
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
|
999
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
|
1000
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
|
1001
|
+
|
|
1002
|
+
/* Copy Literals:
|
|
1003
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
|
1004
|
+
* We likely don't need the full 32-byte wildcopy.
|
|
1005
|
+
*/
|
|
1006
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
|
1007
|
+
ZSTD_copy16(op, (*litPtr));
|
|
1008
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
|
1009
|
+
ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
|
|
1010
|
+
}
|
|
1011
|
+
op = oLitEnd;
|
|
1012
|
+
*litPtr = iLitEnd; /* update for next sequence */
|
|
1013
|
+
|
|
1014
|
+
/* Copy Match */
|
|
1015
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
1016
|
+
/* offset beyond prefix -> go into extDict */
|
|
1017
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
|
1018
|
+
match = dictEnd + (match - prefixStart);
|
|
1019
|
+
if (match + sequence.matchLength <= dictEnd) {
|
|
1020
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
1021
|
+
return sequenceLength;
|
|
1022
|
+
}
|
|
1023
|
+
/* span extDict & currentPrefixSegment */
|
|
1024
|
+
{ size_t const length1 = dictEnd - match;
|
|
1025
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
1026
|
+
op = oLitEnd + length1;
|
|
1027
|
+
sequence.matchLength -= length1;
|
|
1028
|
+
match = prefixStart;
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
/* Match within prefix of 1 or more bytes */
|
|
1032
|
+
assert(op <= oMatchEnd);
|
|
1033
|
+
assert(oMatchEnd <= oend_w);
|
|
1034
|
+
assert(match >= prefixStart);
|
|
1035
|
+
assert(sequence.matchLength >= 1);
|
|
1036
|
+
|
|
1037
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
|
1038
|
+
* without overlap checking.
|
|
1039
|
+
*/
|
|
1040
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
|
1041
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
|
1042
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
|
1043
|
+
* than 16 bytes.
|
|
1044
|
+
*/
|
|
1045
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
|
1046
|
+
return sequenceLength;
|
|
1047
|
+
}
|
|
1048
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
|
1049
|
+
|
|
1050
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
|
1051
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
|
1052
|
+
|
|
1053
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
|
1054
|
+
if (sequence.matchLength > 8) {
|
|
1055
|
+
assert(op < oMatchEnd);
|
|
1056
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
|
|
1057
|
+
}
|
|
1058
|
+
return sequenceLength;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
HINT_INLINE
|
|
1062
|
+
size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
|
|
1063
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
|
1064
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
1065
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
1066
|
+
{
|
|
1067
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
|
1068
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
1069
|
+
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
1070
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
1071
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
|
1072
|
+
|
|
1073
|
+
assert(op != NULL /* Precondition */);
|
|
1074
|
+
assert(oend_w < oend /* No underflow */);
|
|
822
1075
|
/* Handle edge cases in a slow path:
|
|
823
1076
|
* - Read beyond end of literals
|
|
824
1077
|
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
@@ -828,7 +1081,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
828
1081
|
iLitEnd > litLimit ||
|
|
829
1082
|
oMatchEnd > oend_w ||
|
|
830
1083
|
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
831
|
-
return
|
|
1084
|
+
return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
832
1085
|
|
|
833
1086
|
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
834
1087
|
assert(op <= oLitEnd /* No overflow */);
|
|
@@ -896,6 +1149,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
896
1149
|
return sequenceLength;
|
|
897
1150
|
}
|
|
898
1151
|
|
|
1152
|
+
|
|
899
1153
|
static void
|
|
900
1154
|
ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
|
|
901
1155
|
{
|
|
@@ -909,24 +1163,14 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS
|
|
|
909
1163
|
}
|
|
910
1164
|
|
|
911
1165
|
FORCE_INLINE_TEMPLATE void
|
|
912
|
-
|
|
913
|
-
{
|
|
914
|
-
ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
|
|
915
|
-
U32 const nbBits = DInfo.nbBits;
|
|
916
|
-
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
917
|
-
DStatePtr->state = DInfo.nextState + lowBits;
|
|
918
|
-
}
|
|
919
|
-
|
|
920
|
-
FORCE_INLINE_TEMPLATE void
|
|
921
|
-
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
|
1166
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
|
|
922
1167
|
{
|
|
923
|
-
U32 const nbBits = DInfo.nbBits;
|
|
924
1168
|
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
925
|
-
DStatePtr->state =
|
|
1169
|
+
DStatePtr->state = nextState + lowBits;
|
|
926
1170
|
}
|
|
927
1171
|
|
|
928
1172
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
|
929
|
-
* offset bits. But we can only read at most
|
|
1173
|
+
* offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
|
|
930
1174
|
* bits before reloading. This value is the maximum number of bytes we read
|
|
931
1175
|
* after reloading when we are decoding long offsets.
|
|
932
1176
|
*/
|
|
@@ -936,116 +1180,118 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
|
|
|
936
1180
|
: 0)
|
|
937
1181
|
|
|
938
1182
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
|
939
|
-
typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
|
|
940
1183
|
|
|
941
1184
|
FORCE_INLINE_TEMPLATE seq_t
|
|
942
|
-
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets
|
|
1185
|
+
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
943
1186
|
{
|
|
944
1187
|
seq_t seq;
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
BYTE const mlBits = mlDInfo.nbAdditionalBits;
|
|
953
|
-
BYTE const ofBits = ofDInfo.nbAdditionalBits;
|
|
954
|
-
BYTE const totalBits = llBits+mlBits+ofBits;
|
|
955
|
-
|
|
956
|
-
/* sequence */
|
|
957
|
-
{ size_t offset;
|
|
958
|
-
if (ofBits > 1) {
|
|
959
|
-
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
960
|
-
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
961
|
-
assert(ofBits <= MaxOff);
|
|
962
|
-
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
|
963
|
-
U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
|
|
964
|
-
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
|
965
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
966
|
-
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
|
967
|
-
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
|
|
968
|
-
} else {
|
|
969
|
-
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
970
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
971
|
-
}
|
|
972
|
-
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
973
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
974
|
-
seqState->prevOffset[0] = offset;
|
|
975
|
-
} else {
|
|
976
|
-
U32 const ll0 = (llBase == 0);
|
|
977
|
-
if (LIKELY((ofBits == 0))) {
|
|
978
|
-
if (LIKELY(!ll0))
|
|
979
|
-
offset = seqState->prevOffset[0];
|
|
980
|
-
else {
|
|
981
|
-
offset = seqState->prevOffset[1];
|
|
982
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
983
|
-
seqState->prevOffset[0] = offset;
|
|
984
|
-
}
|
|
985
|
-
} else {
|
|
986
|
-
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
|
987
|
-
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
988
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
989
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
990
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
991
|
-
seqState->prevOffset[0] = offset = temp;
|
|
992
|
-
} } }
|
|
993
|
-
seq.offset = offset;
|
|
994
|
-
}
|
|
995
|
-
|
|
996
|
-
seq.matchLength = mlBase;
|
|
997
|
-
if (mlBits > 0)
|
|
998
|
-
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
|
999
|
-
|
|
1000
|
-
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
|
1001
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1002
|
-
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
1003
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1004
|
-
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
|
1005
|
-
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
|
1006
|
-
|
|
1007
|
-
seq.litLength = llBase;
|
|
1008
|
-
if (llBits > 0)
|
|
1009
|
-
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
|
1010
|
-
|
|
1011
|
-
if (MEM_32bits())
|
|
1012
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1013
|
-
|
|
1014
|
-
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
|
1015
|
-
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
1016
|
-
|
|
1017
|
-
if (prefetch == ZSTD_p_prefetch) {
|
|
1018
|
-
size_t const pos = seqState->pos + seq.litLength;
|
|
1019
|
-
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
|
1020
|
-
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1021
|
-
* No consequence though : no memory access will occur, offset is only used for prefetching */
|
|
1022
|
-
seqState->pos = pos + seq.matchLength;
|
|
1023
|
-
}
|
|
1024
|
-
|
|
1025
|
-
/* ANS state update
|
|
1026
|
-
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
|
1027
|
-
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
|
1028
|
-
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
|
1029
|
-
* better option, so it is the default for other compilers. But, if you
|
|
1030
|
-
* measure that it is worse, please put up a pull request.
|
|
1188
|
+
/*
|
|
1189
|
+
* ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
|
|
1190
|
+
* loaded in one operation and extracted its fields by simply shifting or
|
|
1191
|
+
* bit-extracting on aarch64.
|
|
1192
|
+
* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
|
|
1193
|
+
* operations that cause performance drop. This can be avoided by using this
|
|
1194
|
+
* ZSTD_memcpy hack.
|
|
1031
1195
|
*/
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1196
|
+
#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
|
|
1197
|
+
ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
|
|
1198
|
+
ZSTD_seqSymbol* const llDInfo = &llDInfoS;
|
|
1199
|
+
ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
|
|
1200
|
+
ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
|
|
1201
|
+
ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
|
|
1202
|
+
ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
|
|
1203
|
+
ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
|
|
1035
1204
|
#else
|
|
1036
|
-
|
|
1205
|
+
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
|
|
1206
|
+
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
|
|
1207
|
+
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
|
|
1037
1208
|
#endif
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1209
|
+
seq.matchLength = mlDInfo->baseValue;
|
|
1210
|
+
seq.litLength = llDInfo->baseValue;
|
|
1211
|
+
{ U32 const ofBase = ofDInfo->baseValue;
|
|
1212
|
+
BYTE const llBits = llDInfo->nbAdditionalBits;
|
|
1213
|
+
BYTE const mlBits = mlDInfo->nbAdditionalBits;
|
|
1214
|
+
BYTE const ofBits = ofDInfo->nbAdditionalBits;
|
|
1215
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
|
1216
|
+
|
|
1217
|
+
U16 const llNext = llDInfo->nextState;
|
|
1218
|
+
U16 const mlNext = mlDInfo->nextState;
|
|
1219
|
+
U16 const ofNext = ofDInfo->nextState;
|
|
1220
|
+
U32 const llnbBits = llDInfo->nbBits;
|
|
1221
|
+
U32 const mlnbBits = mlDInfo->nbBits;
|
|
1222
|
+
U32 const ofnbBits = ofDInfo->nbBits;
|
|
1223
|
+
|
|
1224
|
+
assert(llBits <= MaxLLBits);
|
|
1225
|
+
assert(mlBits <= MaxMLBits);
|
|
1226
|
+
assert(ofBits <= MaxOff);
|
|
1227
|
+
/*
|
|
1228
|
+
* As gcc has better branch and block analyzers, sometimes it is only
|
|
1229
|
+
* valuable to mark likeliness for clang, it gives around 3-4% of
|
|
1230
|
+
* performance.
|
|
1231
|
+
*/
|
|
1232
|
+
|
|
1233
|
+
/* sequence */
|
|
1234
|
+
{ size_t offset;
|
|
1235
|
+
if (ofBits > 1) {
|
|
1236
|
+
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
1237
|
+
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
1238
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
|
|
1239
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
|
|
1240
|
+
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
|
1241
|
+
/* Always read extra bits, this keeps the logic simple,
|
|
1242
|
+
* avoids branches, and avoids accidentally reading 0 bits.
|
|
1243
|
+
*/
|
|
1244
|
+
U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
|
|
1245
|
+
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
|
1246
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1247
|
+
offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
|
1248
|
+
} else {
|
|
1249
|
+
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
1250
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
1251
|
+
}
|
|
1252
|
+
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1253
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1254
|
+
seqState->prevOffset[0] = offset;
|
|
1255
|
+
} else {
|
|
1256
|
+
U32 const ll0 = (llDInfo->baseValue == 0);
|
|
1257
|
+
if (LIKELY((ofBits == 0))) {
|
|
1258
|
+
offset = seqState->prevOffset[ll0];
|
|
1259
|
+
seqState->prevOffset[1] = seqState->prevOffset[!ll0];
|
|
1260
|
+
seqState->prevOffset[0] = offset;
|
|
1261
|
+
} else {
|
|
1262
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
|
1263
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
1264
|
+
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
1265
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1266
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1267
|
+
seqState->prevOffset[0] = offset = temp;
|
|
1268
|
+
} } }
|
|
1269
|
+
seq.offset = offset;
|
|
1048
1270
|
}
|
|
1271
|
+
|
|
1272
|
+
if (mlBits > 0)
|
|
1273
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
|
1274
|
+
|
|
1275
|
+
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
|
1276
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1277
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
1278
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1279
|
+
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
|
1280
|
+
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
|
1281
|
+
|
|
1282
|
+
if (llBits > 0)
|
|
1283
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
|
1284
|
+
|
|
1285
|
+
if (MEM_32bits())
|
|
1286
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1287
|
+
|
|
1288
|
+
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
|
1289
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
1290
|
+
|
|
1291
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
|
|
1292
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
|
|
1293
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1294
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
|
|
1049
1295
|
}
|
|
1050
1296
|
|
|
1051
1297
|
return seq;
|
|
@@ -1098,9 +1344,11 @@ MEM_STATIC void ZSTD_assertValidSequence(
|
|
|
1098
1344
|
#endif
|
|
1099
1345
|
|
|
1100
1346
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1347
|
+
|
|
1348
|
+
|
|
1101
1349
|
FORCE_INLINE_TEMPLATE size_t
|
|
1102
1350
|
DONT_VECTORIZE
|
|
1103
|
-
|
|
1351
|
+
ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
|
|
1104
1352
|
void* dst, size_t maxDstSize,
|
|
1105
1353
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1106
1354
|
const ZSTD_longOffset_e isLongOffset,
|
|
@@ -1108,21 +1356,20 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
|
1108
1356
|
{
|
|
1109
1357
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
1110
1358
|
const BYTE* const iend = ip + seqSize;
|
|
1111
|
-
BYTE* const ostart = (BYTE*
|
|
1359
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1112
1360
|
BYTE* const oend = ostart + maxDstSize;
|
|
1113
1361
|
BYTE* op = ostart;
|
|
1114
1362
|
const BYTE* litPtr = dctx->litPtr;
|
|
1115
|
-
const BYTE*
|
|
1363
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
|
1116
1364
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
1117
1365
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
|
1118
1366
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
1119
|
-
DEBUGLOG(5, "
|
|
1367
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
|
|
1120
1368
|
(void)frame;
|
|
1121
1369
|
|
|
1122
1370
|
/* Regen sequences */
|
|
1123
1371
|
if (nbSeq) {
|
|
1124
1372
|
seqState_t seqState;
|
|
1125
|
-
size_t error = 0;
|
|
1126
1373
|
dctx->fseEntropy = 1;
|
|
1127
1374
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1128
1375
|
RETURN_ERROR_IF(
|
|
@@ -1138,70 +1385,255 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
|
1138
1385
|
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
|
1139
1386
|
BIT_DStream_completed < BIT_DStream_overflow);
|
|
1140
1387
|
|
|
1388
|
+
/* decompress without overrunning litPtr begins */
|
|
1389
|
+
{
|
|
1390
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1391
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
|
1392
|
+
*
|
|
1393
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
|
1394
|
+
* speed swings based on the alignment of the decompression loop. This
|
|
1395
|
+
* performance swing is caused by parts of the decompression loop falling
|
|
1396
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
|
1397
|
+
* when it can't we get much worse performance. You can measure if you've
|
|
1398
|
+
* hit the good case or the bad case with this perf command for some
|
|
1399
|
+
* compressed file test.zst:
|
|
1400
|
+
*
|
|
1401
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
|
1402
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
|
1403
|
+
*
|
|
1404
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
|
1405
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
|
1406
|
+
* If it is pretty even then you may be in an okay case.
|
|
1407
|
+
*
|
|
1408
|
+
* This issue has been reproduced on the following CPUs:
|
|
1409
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
|
1410
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
|
1411
|
+
* I never got performance swings, but I was able to
|
|
1412
|
+
* go from the good case of mostly DSB to half of the
|
|
1413
|
+
* cycles served from MITE.
|
|
1414
|
+
* - Coffeelake: Intel i9-9900k
|
|
1415
|
+
* - Coffeelake: Intel i7-9700k
|
|
1416
|
+
*
|
|
1417
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
|
1418
|
+
* of the following CPUS:
|
|
1419
|
+
* - Haswell
|
|
1420
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
|
1421
|
+
* - Skylake
|
|
1422
|
+
*
|
|
1423
|
+
* Alignment is done for each of the three major decompression loops:
|
|
1424
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
|
|
1425
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
|
|
1426
|
+
* - ZSTD_decompressSequences_body
|
|
1427
|
+
* Alignment choices are made to minimize large swings on bad cases and influence on performance
|
|
1428
|
+
* from changes external to this code, rather than to overoptimize on the current commit.
|
|
1429
|
+
*
|
|
1430
|
+
* If you are seeing performance stability this script can help test.
|
|
1431
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
|
1432
|
+
*
|
|
1433
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
|
1434
|
+
*/
|
|
1141
1435
|
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1436
|
+
__asm__(".p2align 6");
|
|
1437
|
+
# if __GNUC__ >= 7
|
|
1438
|
+
/* good for gcc-7, gcc-9, and gcc-11 */
|
|
1439
|
+
__asm__("nop");
|
|
1440
|
+
__asm__(".p2align 5");
|
|
1441
|
+
__asm__("nop");
|
|
1442
|
+
__asm__(".p2align 4");
|
|
1443
|
+
# if __GNUC__ == 8 || __GNUC__ == 10
|
|
1444
|
+
/* good for gcc-8 and gcc-10 */
|
|
1445
|
+
__asm__("nop");
|
|
1446
|
+
__asm__(".p2align 3");
|
|
1447
|
+
# endif
|
|
1448
|
+
# endif
|
|
1449
|
+
#endif
|
|
1450
|
+
|
|
1451
|
+
/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
|
|
1452
|
+
for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
|
|
1453
|
+
size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1454
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1455
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1456
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1457
|
+
#endif
|
|
1458
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1459
|
+
return oneSeqSize;
|
|
1460
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1461
|
+
op += oneSeqSize;
|
|
1462
|
+
if (UNLIKELY(!--nbSeq))
|
|
1463
|
+
break;
|
|
1464
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1465
|
+
sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
|
|
1469
|
+
if (nbSeq > 0) {
|
|
1470
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1471
|
+
if (leftoverLit)
|
|
1472
|
+
{
|
|
1473
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1474
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1475
|
+
sequence.litLength -= leftoverLit;
|
|
1476
|
+
op += leftoverLit;
|
|
1477
|
+
}
|
|
1478
|
+
litPtr = dctx->litExtraBuffer;
|
|
1479
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1480
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1481
|
+
{
|
|
1482
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1483
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1484
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1485
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1486
|
+
#endif
|
|
1487
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1488
|
+
return oneSeqSize;
|
|
1489
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1490
|
+
op += oneSeqSize;
|
|
1491
|
+
if (--nbSeq)
|
|
1492
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1493
|
+
}
|
|
1494
|
+
}
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1497
|
+
if (nbSeq > 0) /* there is remaining lit from extra buffer */
|
|
1498
|
+
{
|
|
1499
|
+
|
|
1500
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1501
|
+
__asm__(".p2align 6");
|
|
1502
|
+
__asm__("nop");
|
|
1503
|
+
# if __GNUC__ != 7
|
|
1504
|
+
/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
|
|
1505
|
+
__asm__(".p2align 4");
|
|
1506
|
+
__asm__("nop");
|
|
1507
|
+
__asm__(".p2align 3");
|
|
1508
|
+
# elif __GNUC__ >= 11
|
|
1509
|
+
__asm__(".p2align 3");
|
|
1510
|
+
# else
|
|
1511
|
+
__asm__(".p2align 5");
|
|
1512
|
+
__asm__("nop");
|
|
1513
|
+
__asm__(".p2align 3");
|
|
1514
|
+
# endif
|
|
1515
|
+
#endif
|
|
1516
|
+
|
|
1517
|
+
for (; ; ) {
|
|
1518
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1519
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1520
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1521
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1522
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1181
1523
|
#endif
|
|
1524
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1525
|
+
return oneSeqSize;
|
|
1526
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1527
|
+
op += oneSeqSize;
|
|
1528
|
+
if (UNLIKELY(!--nbSeq))
|
|
1529
|
+
break;
|
|
1530
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
/* check if reached exact end */
|
|
1535
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
|
|
1536
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
|
1537
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
|
1538
|
+
/* save reps for next block */
|
|
1539
|
+
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
/* last literal segment */
|
|
1543
|
+
if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
|
|
1544
|
+
{
|
|
1545
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1546
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
|
1547
|
+
if (op != NULL) {
|
|
1548
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1549
|
+
op += lastLLSize;
|
|
1550
|
+
}
|
|
1551
|
+
litPtr = dctx->litExtraBuffer;
|
|
1552
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1553
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1554
|
+
}
|
|
1555
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1556
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1557
|
+
if (op != NULL) {
|
|
1558
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
|
1559
|
+
op += lastLLSize;
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
|
|
1563
|
+
return op-ostart;
|
|
1564
|
+
}
|
|
1565
|
+
|
|
1566
|
+
FORCE_INLINE_TEMPLATE size_t
|
|
1567
|
+
DONT_VECTORIZE
|
|
1568
|
+
ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
|
|
1569
|
+
void* dst, size_t maxDstSize,
|
|
1570
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1571
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1572
|
+
const int frame)
|
|
1573
|
+
{
|
|
1574
|
+
const BYTE* ip = (const BYTE*)seqStart;
|
|
1575
|
+
const BYTE* const iend = ip + seqSize;
|
|
1576
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1577
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
|
|
1578
|
+
BYTE* op = ostart;
|
|
1579
|
+
const BYTE* litPtr = dctx->litPtr;
|
|
1580
|
+
const BYTE* const litEnd = litPtr + dctx->litSize;
|
|
1581
|
+
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
|
|
1582
|
+
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
|
|
1583
|
+
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
|
|
1584
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
|
|
1585
|
+
(void)frame;
|
|
1586
|
+
|
|
1587
|
+
/* Regen sequences */
|
|
1588
|
+
if (nbSeq) {
|
|
1589
|
+
seqState_t seqState;
|
|
1590
|
+
dctx->fseEntropy = 1;
|
|
1591
|
+
{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1592
|
+
RETURN_ERROR_IF(
|
|
1593
|
+
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
|
|
1594
|
+
corruption_detected, "");
|
|
1595
|
+
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
1596
|
+
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
1597
|
+
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1598
|
+
assert(dst != NULL);
|
|
1599
|
+
|
|
1600
|
+
ZSTD_STATIC_ASSERT(
|
|
1601
|
+
BIT_DStream_unfinished < BIT_DStream_completed &&
|
|
1602
|
+
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
|
1603
|
+
BIT_DStream_completed < BIT_DStream_overflow);
|
|
1604
|
+
|
|
1605
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1606
|
+
__asm__(".p2align 6");
|
|
1607
|
+
__asm__("nop");
|
|
1608
|
+
# if __GNUC__ >= 7
|
|
1609
|
+
__asm__(".p2align 5");
|
|
1610
|
+
__asm__("nop");
|
|
1611
|
+
__asm__(".p2align 3");
|
|
1612
|
+
# else
|
|
1613
|
+
__asm__(".p2align 4");
|
|
1614
|
+
__asm__("nop");
|
|
1615
|
+
__asm__(".p2align 3");
|
|
1616
|
+
# endif
|
|
1617
|
+
#endif
|
|
1618
|
+
|
|
1182
1619
|
for ( ; ; ) {
|
|
1183
|
-
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset
|
|
1620
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1184
1621
|
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
|
1185
1622
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1186
1623
|
assert(!ZSTD_isError(oneSeqSize));
|
|
1187
1624
|
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1188
1625
|
#endif
|
|
1626
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1627
|
+
return oneSeqSize;
|
|
1189
1628
|
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1190
|
-
BIT_reloadDStream(&(seqState.DStream));
|
|
1191
1629
|
op += oneSeqSize;
|
|
1192
|
-
|
|
1193
|
-
* Instead break and check for an error at the end of the loop.
|
|
1194
|
-
*/
|
|
1195
|
-
if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
|
|
1196
|
-
error = oneSeqSize;
|
|
1630
|
+
if (UNLIKELY(!--nbSeq))
|
|
1197
1631
|
break;
|
|
1198
|
-
|
|
1199
|
-
if (UNLIKELY(!--nbSeq)) break;
|
|
1632
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1200
1633
|
}
|
|
1201
1634
|
|
|
1202
1635
|
/* check if reached exact end */
|
|
1203
1636
|
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
|
1204
|
-
if (ZSTD_isError(error)) return error;
|
|
1205
1637
|
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
|
1206
1638
|
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
|
1207
1639
|
/* save reps for next block */
|
|
@@ -1229,9 +1661,37 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
|
|
1229
1661
|
{
|
|
1230
1662
|
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1231
1663
|
}
|
|
1664
|
+
|
|
1665
|
+
static size_t
|
|
1666
|
+
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
|
|
1667
|
+
void* dst, size_t maxDstSize,
|
|
1668
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1669
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1670
|
+
const int frame)
|
|
1671
|
+
{
|
|
1672
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1673
|
+
}
|
|
1232
1674
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1233
1675
|
|
|
1234
1676
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1677
|
+
|
|
1678
|
+
FORCE_INLINE_TEMPLATE size_t
|
|
1679
|
+
ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
|
|
1680
|
+
const BYTE* const prefixStart, const BYTE* const dictEnd)
|
|
1681
|
+
{
|
|
1682
|
+
prefetchPos += sequence.litLength;
|
|
1683
|
+
{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
|
|
1684
|
+
const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1685
|
+
* No consequence though : memory address is only used for prefetching, not for dereferencing */
|
|
1686
|
+
PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
|
1687
|
+
}
|
|
1688
|
+
return prefetchPos + sequence.matchLength;
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1691
|
+
/* This decoding function employs prefetching
|
|
1692
|
+
* to reduce latency impact of cache misses.
|
|
1693
|
+
* It's generally employed when block contains a significant portion of long-distance matches
|
|
1694
|
+
* or when coupled with a "cold" dictionary */
|
|
1235
1695
|
FORCE_INLINE_TEMPLATE size_t
|
|
1236
1696
|
ZSTD_decompressSequencesLong_body(
|
|
1237
1697
|
ZSTD_DCtx* dctx,
|
|
@@ -1242,11 +1702,11 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1242
1702
|
{
|
|
1243
1703
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
1244
1704
|
const BYTE* const iend = ip + seqSize;
|
|
1245
|
-
BYTE* const ostart = (BYTE*
|
|
1246
|
-
BYTE* const oend = ostart + maxDstSize;
|
|
1705
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1706
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
|
|
1247
1707
|
BYTE* op = ostart;
|
|
1248
1708
|
const BYTE* litPtr = dctx->litPtr;
|
|
1249
|
-
const BYTE*
|
|
1709
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
|
1250
1710
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
1251
1711
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
|
1252
1712
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
@@ -1254,18 +1714,17 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1254
1714
|
|
|
1255
1715
|
/* Regen sequences */
|
|
1256
1716
|
if (nbSeq) {
|
|
1257
|
-
#define STORED_SEQS
|
|
1717
|
+
#define STORED_SEQS 8
|
|
1258
1718
|
#define STORED_SEQS_MASK (STORED_SEQS-1)
|
|
1259
|
-
#define ADVANCED_SEQS
|
|
1719
|
+
#define ADVANCED_SEQS STORED_SEQS
|
|
1260
1720
|
seq_t sequences[STORED_SEQS];
|
|
1261
1721
|
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
|
|
1262
1722
|
seqState_t seqState;
|
|
1263
1723
|
int seqNb;
|
|
1724
|
+
size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
|
|
1725
|
+
|
|
1264
1726
|
dctx->fseEntropy = 1;
|
|
1265
1727
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1266
|
-
seqState.prefixStart = prefixStart;
|
|
1267
|
-
seqState.pos = (size_t)(op-prefixStart);
|
|
1268
|
-
seqState.dictEnd = dictEnd;
|
|
1269
1728
|
assert(dst != NULL);
|
|
1270
1729
|
assert(iend >= ip);
|
|
1271
1730
|
RETURN_ERROR_IF(
|
|
@@ -1277,36 +1736,100 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1277
1736
|
|
|
1278
1737
|
/* prepare in advance */
|
|
1279
1738
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
|
1280
|
-
|
|
1281
|
-
|
|
1739
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1740
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1741
|
+
sequences[seqNb] = sequence;
|
|
1282
1742
|
}
|
|
1283
1743
|
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
|
1284
1744
|
|
|
1285
|
-
/*
|
|
1286
|
-
for (
|
|
1287
|
-
seq_t
|
|
1288
|
-
size_t
|
|
1745
|
+
/* decompress without stomping litBuffer */
|
|
1746
|
+
for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
|
|
1747
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1748
|
+
size_t oneSeqSize;
|
|
1749
|
+
|
|
1750
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
|
|
1751
|
+
{
|
|
1752
|
+
/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
|
|
1753
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1754
|
+
if (leftoverLit)
|
|
1755
|
+
{
|
|
1756
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1757
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1758
|
+
sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
|
|
1759
|
+
op += leftoverLit;
|
|
1760
|
+
}
|
|
1761
|
+
litPtr = dctx->litExtraBuffer;
|
|
1762
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1763
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1764
|
+
oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1289
1765
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1290
|
-
|
|
1291
|
-
|
|
1766
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1767
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1292
1768
|
#endif
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1769
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1770
|
+
|
|
1771
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1772
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1773
|
+
op += oneSeqSize;
|
|
1774
|
+
}
|
|
1775
|
+
else
|
|
1776
|
+
{
|
|
1777
|
+
/* lit buffer is either wholly contained in first or second split, or not split at all*/
|
|
1778
|
+
oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
|
1779
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
|
1780
|
+
ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1781
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1782
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1783
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1784
|
+
#endif
|
|
1785
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1786
|
+
|
|
1787
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1788
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1789
|
+
op += oneSeqSize;
|
|
1790
|
+
}
|
|
1297
1791
|
}
|
|
1298
1792
|
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
|
|
1299
1793
|
|
|
1300
1794
|
/* finish queue */
|
|
1301
1795
|
seqNb -= seqAdvance;
|
|
1302
1796
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
|
1303
|
-
|
|
1797
|
+
seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
|
|
1798
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
|
|
1799
|
+
{
|
|
1800
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1801
|
+
if (leftoverLit)
|
|
1802
|
+
{
|
|
1803
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1804
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1805
|
+
sequence->litLength -= leftoverLit;
|
|
1806
|
+
op += leftoverLit;
|
|
1807
|
+
}
|
|
1808
|
+
litPtr = dctx->litExtraBuffer;
|
|
1809
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1810
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1811
|
+
{
|
|
1812
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1304
1813
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1305
|
-
|
|
1306
|
-
|
|
1814
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1815
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1307
1816
|
#endif
|
|
1308
|
-
|
|
1309
|
-
|
|
1817
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1818
|
+
op += oneSeqSize;
|
|
1819
|
+
}
|
|
1820
|
+
}
|
|
1821
|
+
else
|
|
1822
|
+
{
|
|
1823
|
+
size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
|
1824
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
|
1825
|
+
ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1826
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1827
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1828
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1829
|
+
#endif
|
|
1830
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1831
|
+
op += oneSeqSize;
|
|
1832
|
+
}
|
|
1310
1833
|
}
|
|
1311
1834
|
|
|
1312
1835
|
/* save reps for next block */
|
|
@@ -1314,10 +1837,21 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1314
1837
|
}
|
|
1315
1838
|
|
|
1316
1839
|
/* last literal segment */
|
|
1317
|
-
|
|
1840
|
+
if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
|
|
1841
|
+
{
|
|
1842
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1843
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
|
1844
|
+
if (op != NULL) {
|
|
1845
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1846
|
+
op += lastLLSize;
|
|
1847
|
+
}
|
|
1848
|
+
litPtr = dctx->litExtraBuffer;
|
|
1849
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1850
|
+
}
|
|
1851
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1318
1852
|
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1319
1853
|
if (op != NULL) {
|
|
1320
|
-
|
|
1854
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1321
1855
|
op += lastLLSize;
|
|
1322
1856
|
}
|
|
1323
1857
|
}
|
|
@@ -1341,7 +1875,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
|
1341
1875
|
#if DYNAMIC_BMI2
|
|
1342
1876
|
|
|
1343
1877
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1344
|
-
static
|
|
1878
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1345
1879
|
DONT_VECTORIZE
|
|
1346
1880
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
|
1347
1881
|
void* dst, size_t maxDstSize,
|
|
@@ -1351,10 +1885,20 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
|
|
1351
1885
|
{
|
|
1352
1886
|
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1353
1887
|
}
|
|
1888
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1889
|
+
DONT_VECTORIZE
|
|
1890
|
+
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
|
|
1891
|
+
void* dst, size_t maxDstSize,
|
|
1892
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1893
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1894
|
+
const int frame)
|
|
1895
|
+
{
|
|
1896
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1897
|
+
}
|
|
1354
1898
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1355
1899
|
|
|
1356
1900
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1357
|
-
static
|
|
1901
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1358
1902
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
|
1359
1903
|
void* dst, size_t maxDstSize,
|
|
1360
1904
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
@@ -1383,11 +1927,25 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
|
1383
1927
|
{
|
|
1384
1928
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
|
1385
1929
|
#if DYNAMIC_BMI2
|
|
1386
|
-
if (dctx
|
|
1930
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1387
1931
|
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1388
1932
|
}
|
|
1389
1933
|
#endif
|
|
1390
|
-
|
|
1934
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1935
|
+
}
|
|
1936
|
+
static size_t
|
|
1937
|
+
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1938
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1939
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1940
|
+
const int frame)
|
|
1941
|
+
{
|
|
1942
|
+
DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
|
|
1943
|
+
#if DYNAMIC_BMI2
|
|
1944
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1945
|
+
return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1946
|
+
}
|
|
1947
|
+
#endif
|
|
1948
|
+
return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1391
1949
|
}
|
|
1392
1950
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1393
1951
|
|
|
@@ -1407,7 +1965,7 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
|
1407
1965
|
{
|
|
1408
1966
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
|
1409
1967
|
#if DYNAMIC_BMI2
|
|
1410
|
-
if (dctx
|
|
1968
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1411
1969
|
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1412
1970
|
}
|
|
1413
1971
|
#endif
|
|
@@ -1416,55 +1974,101 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
|
1416
1974
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1417
1975
|
|
|
1418
1976
|
|
|
1977
|
+
/**
|
|
1978
|
+
* @returns The total size of the history referenceable by zstd, including
|
|
1979
|
+
* both the prefix and the extDict. At @p op any offset larger than this
|
|
1980
|
+
* is invalid.
|
|
1981
|
+
*/
|
|
1982
|
+
static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
|
|
1983
|
+
{
|
|
1984
|
+
return (size_t)(op - virtualStart);
|
|
1985
|
+
}
|
|
1419
1986
|
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1987
|
+
typedef struct {
|
|
1988
|
+
unsigned longOffsetShare;
|
|
1989
|
+
unsigned maxNbAdditionalBits;
|
|
1990
|
+
} ZSTD_OffsetInfo;
|
|
1991
|
+
|
|
1992
|
+
/* ZSTD_getOffsetInfo() :
|
|
1423
1993
|
* condition : offTable must be valid
|
|
1424
1994
|
* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
|
|
1425
|
-
* compared to maximum possible of (1<<OffFSELog)
|
|
1426
|
-
|
|
1427
|
-
|
|
1995
|
+
* compared to maximum possible of (1<<OffFSELog),
|
|
1996
|
+
* as well as the maximum number additional bits required.
|
|
1997
|
+
*/
|
|
1998
|
+
static ZSTD_OffsetInfo
|
|
1999
|
+
ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
|
|
1428
2000
|
{
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
2001
|
+
ZSTD_OffsetInfo info = {0, 0};
|
|
2002
|
+
/* If nbSeq == 0, then the offTable is uninitialized, but we have
|
|
2003
|
+
* no sequences, so both values should be 0.
|
|
2004
|
+
*/
|
|
2005
|
+
if (nbSeq != 0) {
|
|
2006
|
+
const void* ptr = offTable;
|
|
2007
|
+
U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
|
|
2008
|
+
const ZSTD_seqSymbol* table = offTable + 1;
|
|
2009
|
+
U32 const max = 1 << tableLog;
|
|
2010
|
+
U32 u;
|
|
2011
|
+
DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
|
|
2012
|
+
|
|
2013
|
+
assert(max <= (1 << OffFSELog)); /* max not too large */
|
|
2014
|
+
for (u=0; u<max; u++) {
|
|
2015
|
+
info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
|
|
2016
|
+
if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
|
|
2017
|
+
}
|
|
2018
|
+
|
|
2019
|
+
assert(tableLog <= OffFSELog);
|
|
2020
|
+
info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
|
|
1439
2021
|
}
|
|
1440
2022
|
|
|
1441
|
-
|
|
1442
|
-
|
|
2023
|
+
return info;
|
|
2024
|
+
}
|
|
1443
2025
|
|
|
1444
|
-
|
|
2026
|
+
/**
|
|
2027
|
+
* @returns The maximum offset we can decode in one read of our bitstream, without
|
|
2028
|
+
* reloading more bits in the middle of the offset bits read. Any offsets larger
|
|
2029
|
+
* than this must use the long offset decoder.
|
|
2030
|
+
*/
|
|
2031
|
+
static size_t ZSTD_maxShortOffset(void)
|
|
2032
|
+
{
|
|
2033
|
+
if (MEM_64bits()) {
|
|
2034
|
+
/* We can decode any offset without reloading bits.
|
|
2035
|
+
* This might change if the max window size grows.
|
|
2036
|
+
*/
|
|
2037
|
+
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
|
2038
|
+
return (size_t)-1;
|
|
2039
|
+
} else {
|
|
2040
|
+
/* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
|
|
2041
|
+
* This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
|
|
2042
|
+
* Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
|
|
2043
|
+
*/
|
|
2044
|
+
size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
|
|
2045
|
+
size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
|
|
2046
|
+
assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
|
|
2047
|
+
return maxOffset;
|
|
2048
|
+
}
|
|
1445
2049
|
}
|
|
1446
|
-
#endif
|
|
1447
2050
|
|
|
1448
2051
|
size_t
|
|
1449
2052
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1450
2053
|
void* dst, size_t dstCapacity,
|
|
1451
|
-
const void* src, size_t srcSize, const int frame)
|
|
2054
|
+
const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
|
|
1452
2055
|
{ /* blockType == blockCompressed */
|
|
1453
2056
|
const BYTE* ip = (const BYTE*)src;
|
|
1454
|
-
/* isLongOffset must be true if there are long offsets.
|
|
1455
|
-
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
|
|
1456
|
-
* We don't expect that to be the case in 64-bit mode.
|
|
1457
|
-
* In block mode, window size is not known, so we have to be conservative.
|
|
1458
|
-
* (note: but it could be evaluated from current-lowLimit)
|
|
1459
|
-
*/
|
|
1460
|
-
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
|
1461
2057
|
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
|
1462
2058
|
|
|
1463
|
-
|
|
2059
|
+
/* Note : the wording of the specification
|
|
2060
|
+
* allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
|
|
2061
|
+
* This generally does not happen, as it makes little sense,
|
|
2062
|
+
* since an uncompressed block would feature same size and have no decompression cost.
|
|
2063
|
+
* Also, note that decoder from reference libzstd before < v1.5.4
|
|
2064
|
+
* would consider this edge case as an error.
|
|
2065
|
+
* As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
|
|
2066
|
+
* for broader compatibility with the deployed ecosystem of zstd decoders */
|
|
2067
|
+
RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
|
1464
2068
|
|
|
1465
2069
|
/* Decode literals section */
|
|
1466
|
-
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
|
1467
|
-
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock :
|
|
2070
|
+
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
|
|
2071
|
+
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
|
|
1468
2072
|
if (ZSTD_isError(litCSize)) return litCSize;
|
|
1469
2073
|
ip += litCSize;
|
|
1470
2074
|
srcSize -= litCSize;
|
|
@@ -1472,6 +2076,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1472
2076
|
|
|
1473
2077
|
/* Build Decoding Tables */
|
|
1474
2078
|
{
|
|
2079
|
+
/* Compute the maximum block size, which must also work when !frame and fParams are unset.
|
|
2080
|
+
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
|
|
2081
|
+
*/
|
|
2082
|
+
size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
|
|
2083
|
+
size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
|
|
2084
|
+
/* isLongOffset must be true if there are long offsets.
|
|
2085
|
+
* Offsets are long if they are larger than ZSTD_maxShortOffset().
|
|
2086
|
+
* We don't expect that to be the case in 64-bit mode.
|
|
2087
|
+
*
|
|
2088
|
+
* We check here to see if our history is large enough to allow long offsets.
|
|
2089
|
+
* If it isn't, then we can't possible have (valid) long offsets. If the offset
|
|
2090
|
+
* is invalid, then it is okay to read it incorrectly.
|
|
2091
|
+
*
|
|
2092
|
+
* If isLongOffsets is true, then we will later check our decoding table to see
|
|
2093
|
+
* if it is even possible to generate long offsets.
|
|
2094
|
+
*/
|
|
2095
|
+
ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
|
|
1475
2096
|
/* These macros control at build-time which decompressor implementation
|
|
1476
2097
|
* we use. If neither is defined, we do some inspection and dispatch at
|
|
1477
2098
|
* runtime.
|
|
@@ -1479,6 +2100,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1479
2100
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
|
1480
2101
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1481
2102
|
int usePrefetchDecoder = dctx->ddictIsCold;
|
|
2103
|
+
#else
|
|
2104
|
+
/* Set to 1 to avoid computing offset info if we don't need to.
|
|
2105
|
+
* Otherwise this value is ignored.
|
|
2106
|
+
*/
|
|
2107
|
+
int usePrefetchDecoder = 1;
|
|
1482
2108
|
#endif
|
|
1483
2109
|
int nbSeq;
|
|
1484
2110
|
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
|
|
@@ -1486,40 +2112,57 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1486
2112
|
ip += seqHSize;
|
|
1487
2113
|
srcSize -= seqHSize;
|
|
1488
2114
|
|
|
1489
|
-
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
|
2115
|
+
RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
|
2116
|
+
RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
|
|
2117
|
+
"invalid dst");
|
|
1490
2118
|
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
2119
|
+
/* If we could potentially have long offsets, or we might want to use the prefetch decoder,
|
|
2120
|
+
* compute information about the share of long offsets, and the maximum nbAdditionalBits.
|
|
2121
|
+
* NOTE: could probably use a larger nbSeq limit
|
|
2122
|
+
*/
|
|
2123
|
+
if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
|
|
2124
|
+
ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
|
|
2125
|
+
if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
|
|
2126
|
+
/* If isLongOffset, but the maximum number of additional bits that we see in our table is small
|
|
2127
|
+
* enough, then we know it is impossible to have too long an offset in this block, so we can
|
|
2128
|
+
* use the regular offset decoder.
|
|
2129
|
+
*/
|
|
2130
|
+
isLongOffset = ZSTD_lo_isRegularOffset;
|
|
2131
|
+
}
|
|
2132
|
+
if (!usePrefetchDecoder) {
|
|
2133
|
+
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
|
2134
|
+
usePrefetchDecoder = (info.longOffsetShare >= minShare);
|
|
2135
|
+
}
|
|
1499
2136
|
}
|
|
1500
|
-
#endif
|
|
1501
2137
|
|
|
1502
2138
|
dctx->ddictIsCold = 0;
|
|
1503
2139
|
|
|
1504
2140
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
|
1505
2141
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1506
|
-
if (usePrefetchDecoder)
|
|
2142
|
+
if (usePrefetchDecoder) {
|
|
2143
|
+
#else
|
|
2144
|
+
(void)usePrefetchDecoder;
|
|
2145
|
+
{
|
|
1507
2146
|
#endif
|
|
1508
2147
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1509
2148
|
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1510
2149
|
#endif
|
|
2150
|
+
}
|
|
1511
2151
|
|
|
1512
2152
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1513
2153
|
/* else */
|
|
1514
|
-
|
|
2154
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
2155
|
+
return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
2156
|
+
else
|
|
2157
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1515
2158
|
#endif
|
|
1516
2159
|
}
|
|
1517
2160
|
}
|
|
1518
2161
|
|
|
1519
2162
|
|
|
1520
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
|
|
2163
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
|
|
1521
2164
|
{
|
|
1522
|
-
if (dst != dctx->previousDstEnd) { /* not contiguous */
|
|
2165
|
+
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
|
|
1523
2166
|
dctx->dictEnd = dctx->previousDstEnd;
|
|
1524
2167
|
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
|
1525
2168
|
dctx->prefixStart = dst;
|
|
@@ -1528,13 +2171,22 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
|
|
|
1528
2171
|
}
|
|
1529
2172
|
|
|
1530
2173
|
|
|
1531
|
-
size_t
|
|
1532
|
-
|
|
1533
|
-
|
|
2174
|
+
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
|
|
2175
|
+
void* dst, size_t dstCapacity,
|
|
2176
|
+
const void* src, size_t srcSize)
|
|
1534
2177
|
{
|
|
1535
2178
|
size_t dSize;
|
|
1536
|
-
ZSTD_checkContinuity(dctx, dst);
|
|
1537
|
-
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
|
|
2179
|
+
ZSTD_checkContinuity(dctx, dst, dstCapacity);
|
|
2180
|
+
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
|
|
1538
2181
|
dctx->previousDstEnd = (char*)dst + dSize;
|
|
1539
2182
|
return dSize;
|
|
1540
2183
|
}
|
|
2184
|
+
|
|
2185
|
+
|
|
2186
|
+
/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
|
|
2187
|
+
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
|
2188
|
+
void* dst, size_t dstCapacity,
|
|
2189
|
+
const void* src, size_t srcSize)
|
|
2190
|
+
{
|
|
2191
|
+
return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
|
|
2192
|
+
}
|