extzstd 0.3.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -20,12 +20,12 @@
|
|
|
20
20
|
#include "../common/mem.h" /* low level memory routines */
|
|
21
21
|
#define FSE_STATIC_LINKING_ONLY
|
|
22
22
|
#include "../common/fse.h"
|
|
23
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
24
23
|
#include "../common/huf.h"
|
|
25
24
|
#include "../common/zstd_internal.h"
|
|
26
25
|
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
|
27
26
|
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
|
28
27
|
#include "zstd_decompress_block.h"
|
|
28
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
|
29
29
|
|
|
30
30
|
/*_*******************************************************
|
|
31
31
|
* Macros
|
|
@@ -51,6 +51,13 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
|
|
|
51
51
|
* Block decoding
|
|
52
52
|
***************************************************************/
|
|
53
53
|
|
|
54
|
+
static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
|
|
55
|
+
{
|
|
56
|
+
size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
|
|
57
|
+
assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
|
|
58
|
+
return blockSizeMax;
|
|
59
|
+
}
|
|
60
|
+
|
|
54
61
|
/*! ZSTD_getcBlockSize() :
|
|
55
62
|
* Provides the size of compressed block from block header `src` */
|
|
56
63
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
@@ -69,36 +76,90 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
|
69
76
|
}
|
|
70
77
|
}
|
|
71
78
|
|
|
79
|
+
/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
|
|
80
|
+
static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
|
|
81
|
+
const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
|
|
82
|
+
{
|
|
83
|
+
size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
|
|
84
|
+
assert(litSize <= blockSizeMax);
|
|
85
|
+
assert(dctx->isFrameDecompression || streaming == not_streaming);
|
|
86
|
+
assert(expectedWriteSize <= blockSizeMax);
|
|
87
|
+
if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
|
|
88
|
+
/* If we aren't streaming, we can just put the literals after the output
|
|
89
|
+
* of the current block. We don't need to worry about overwriting the
|
|
90
|
+
* extDict of our window, because it doesn't exist.
|
|
91
|
+
* So if we have space after the end of the block, just put it there.
|
|
92
|
+
*/
|
|
93
|
+
dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
|
|
94
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
|
95
|
+
dctx->litBufferLocation = ZSTD_in_dst;
|
|
96
|
+
} else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
|
|
97
|
+
/* Literals fit entirely within the extra buffer, put them there to avoid
|
|
98
|
+
* having to split the literals.
|
|
99
|
+
*/
|
|
100
|
+
dctx->litBuffer = dctx->litExtraBuffer;
|
|
101
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
|
102
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
103
|
+
} else {
|
|
104
|
+
assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
|
|
105
|
+
/* Literals must be split between the output block and the extra lit
|
|
106
|
+
* buffer. We fill the extra lit buffer with the tail of the literals,
|
|
107
|
+
* and put the rest of the literals at the end of the block, with
|
|
108
|
+
* WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
|
|
109
|
+
* This MUST not write more than our maxBlockSize beyond dst, because in
|
|
110
|
+
* streaming mode, that could overwrite part of our extDict window.
|
|
111
|
+
*/
|
|
112
|
+
if (splitImmediately) {
|
|
113
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
|
114
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
|
115
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
|
|
116
|
+
} else {
|
|
117
|
+
/* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
|
|
118
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
|
|
119
|
+
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
|
|
120
|
+
}
|
|
121
|
+
dctx->litBufferLocation = ZSTD_split;
|
|
122
|
+
assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
72
125
|
|
|
73
|
-
/* Hidden declaration for fullbench */
|
|
74
|
-
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
75
|
-
const void* src, size_t srcSize);
|
|
76
126
|
/*! ZSTD_decodeLiteralsBlock() :
|
|
127
|
+
* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
|
|
128
|
+
* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
|
|
129
|
+
* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
|
|
130
|
+
* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
|
|
131
|
+
*
|
|
77
132
|
* @return : nb of bytes read from src (< srcSize )
|
|
78
133
|
* note : symbol not declared but exposed for fullbench */
|
|
79
|
-
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
80
|
-
const void* src, size_t srcSize
|
|
134
|
+
static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
135
|
+
const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
|
|
136
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming)
|
|
81
137
|
{
|
|
82
138
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
|
83
139
|
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
|
84
140
|
|
|
85
141
|
{ const BYTE* const istart = (const BYTE*) src;
|
|
86
142
|
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
|
143
|
+
size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
|
|
87
144
|
|
|
88
145
|
switch(litEncType)
|
|
89
146
|
{
|
|
90
147
|
case set_repeat:
|
|
91
148
|
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
|
92
149
|
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
|
93
|
-
|
|
150
|
+
ZSTD_FALLTHROUGH;
|
|
94
151
|
|
|
95
152
|
case set_compressed:
|
|
96
|
-
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE ==
|
|
153
|
+
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
|
|
97
154
|
{ size_t lhSize, litSize, litCSize;
|
|
98
155
|
U32 singleStream=0;
|
|
99
156
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
|
100
157
|
U32 const lhc = MEM_readLE32(istart);
|
|
101
158
|
size_t hufSuccess;
|
|
159
|
+
size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
|
|
160
|
+
int const flags = 0
|
|
161
|
+
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
|
|
162
|
+
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
|
|
102
163
|
switch(lhlCode)
|
|
103
164
|
{
|
|
104
165
|
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -121,8 +182,15 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
121
182
|
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
|
122
183
|
break;
|
|
123
184
|
}
|
|
124
|
-
RETURN_ERROR_IF(litSize >
|
|
185
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
186
|
+
RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
|
|
187
|
+
if (!singleStream)
|
|
188
|
+
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
|
|
189
|
+
"Not enough literals (%zu) for the 4-streams mode (min %u)",
|
|
190
|
+
litSize, MIN_LITERALS_FOR_4_STREAMS);
|
|
125
191
|
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
|
192
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
|
|
193
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
|
|
126
194
|
|
|
127
195
|
/* prefetch huffman table if cold */
|
|
128
196
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
|
@@ -131,13 +199,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
131
199
|
|
|
132
200
|
if (litEncType==set_repeat) {
|
|
133
201
|
if (singleStream) {
|
|
134
|
-
hufSuccess =
|
|
202
|
+
hufSuccess = HUF_decompress1X_usingDTable(
|
|
135
203
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
|
136
|
-
dctx->HUFptr,
|
|
204
|
+
dctx->HUFptr, flags);
|
|
137
205
|
} else {
|
|
138
|
-
|
|
206
|
+
assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
|
|
207
|
+
hufSuccess = HUF_decompress4X_usingDTable(
|
|
139
208
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
|
140
|
-
dctx->HUFptr,
|
|
209
|
+
dctx->HUFptr, flags);
|
|
141
210
|
}
|
|
142
211
|
} else {
|
|
143
212
|
if (singleStream) {
|
|
@@ -145,20 +214,29 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
145
214
|
hufSuccess = HUF_decompress1X_DCtx_wksp(
|
|
146
215
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
147
216
|
istart+lhSize, litCSize, dctx->workspace,
|
|
148
|
-
sizeof(dctx->workspace));
|
|
217
|
+
sizeof(dctx->workspace), flags);
|
|
149
218
|
#else
|
|
150
|
-
hufSuccess =
|
|
219
|
+
hufSuccess = HUF_decompress1X1_DCtx_wksp(
|
|
151
220
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
152
221
|
istart+lhSize, litCSize, dctx->workspace,
|
|
153
|
-
sizeof(dctx->workspace),
|
|
222
|
+
sizeof(dctx->workspace), flags);
|
|
154
223
|
#endif
|
|
155
224
|
} else {
|
|
156
|
-
hufSuccess =
|
|
225
|
+
hufSuccess = HUF_decompress4X_hufOnly_wksp(
|
|
157
226
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
158
227
|
istart+lhSize, litCSize, dctx->workspace,
|
|
159
|
-
sizeof(dctx->workspace),
|
|
228
|
+
sizeof(dctx->workspace), flags);
|
|
160
229
|
}
|
|
161
230
|
}
|
|
231
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
232
|
+
{
|
|
233
|
+
assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
|
|
234
|
+
ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
|
235
|
+
ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
236
|
+
dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
|
237
|
+
dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
|
|
238
|
+
assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
|
|
239
|
+
}
|
|
162
240
|
|
|
163
241
|
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
|
164
242
|
|
|
@@ -166,13 +244,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
166
244
|
dctx->litSize = litSize;
|
|
167
245
|
dctx->litEntropy = 1;
|
|
168
246
|
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
|
169
|
-
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
170
247
|
return litCSize + lhSize;
|
|
171
248
|
}
|
|
172
249
|
|
|
173
250
|
case set_basic:
|
|
174
251
|
{ size_t litSize, lhSize;
|
|
175
252
|
U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
|
253
|
+
size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
|
|
176
254
|
switch(lhlCode)
|
|
177
255
|
{
|
|
178
256
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -185,27 +263,42 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
185
263
|
break;
|
|
186
264
|
case 3:
|
|
187
265
|
lhSize = 3;
|
|
266
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
|
|
188
267
|
litSize = MEM_readLE24(istart) >> 4;
|
|
189
268
|
break;
|
|
190
269
|
}
|
|
191
270
|
|
|
271
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
272
|
+
RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
|
|
273
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
|
274
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
|
192
275
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
|
193
276
|
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
|
194
|
-
|
|
277
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
278
|
+
{
|
|
279
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
280
|
+
ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
|
281
|
+
}
|
|
282
|
+
else
|
|
283
|
+
{
|
|
284
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
|
|
285
|
+
}
|
|
195
286
|
dctx->litPtr = dctx->litBuffer;
|
|
196
287
|
dctx->litSize = litSize;
|
|
197
|
-
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
198
288
|
return lhSize+litSize;
|
|
199
289
|
}
|
|
200
290
|
/* direct reference into compressed stream */
|
|
201
291
|
dctx->litPtr = istart+lhSize;
|
|
202
292
|
dctx->litSize = litSize;
|
|
293
|
+
dctx->litBufferEnd = dctx->litPtr + litSize;
|
|
294
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
203
295
|
return lhSize+litSize;
|
|
204
296
|
}
|
|
205
297
|
|
|
206
298
|
case set_rle:
|
|
207
299
|
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
|
208
300
|
size_t litSize, lhSize;
|
|
301
|
+
size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
|
|
209
302
|
switch(lhlCode)
|
|
210
303
|
{
|
|
211
304
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -214,16 +307,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
214
307
|
break;
|
|
215
308
|
case 1:
|
|
216
309
|
lhSize = 2;
|
|
310
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
|
|
217
311
|
litSize = MEM_readLE16(istart) >> 4;
|
|
218
312
|
break;
|
|
219
313
|
case 3:
|
|
220
314
|
lhSize = 3;
|
|
315
|
+
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
|
|
221
316
|
litSize = MEM_readLE24(istart) >> 4;
|
|
222
|
-
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
|
223
317
|
break;
|
|
224
318
|
}
|
|
225
|
-
RETURN_ERROR_IF(litSize >
|
|
226
|
-
|
|
319
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
320
|
+
RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
|
|
321
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
|
322
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
|
323
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
324
|
+
{
|
|
325
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
326
|
+
ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
|
|
327
|
+
}
|
|
328
|
+
else
|
|
329
|
+
{
|
|
330
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
|
|
331
|
+
}
|
|
227
332
|
dctx->litPtr = dctx->litBuffer;
|
|
228
333
|
dctx->litSize = litSize;
|
|
229
334
|
return lhSize+1;
|
|
@@ -234,9 +339,21 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
234
339
|
}
|
|
235
340
|
}
|
|
236
341
|
|
|
342
|
+
/* Hidden declaration for fullbench */
|
|
343
|
+
size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
|
|
344
|
+
const void* src, size_t srcSize,
|
|
345
|
+
void* dst, size_t dstCapacity);
|
|
346
|
+
size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
|
|
347
|
+
const void* src, size_t srcSize,
|
|
348
|
+
void* dst, size_t dstCapacity)
|
|
349
|
+
{
|
|
350
|
+
dctx->isFrameDecompression = 0;
|
|
351
|
+
return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
|
|
352
|
+
}
|
|
353
|
+
|
|
237
354
|
/* Default FSE distribution tables.
|
|
238
355
|
* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
|
|
239
|
-
* https://github.com/facebook/zstd/blob/
|
|
356
|
+
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
|
|
240
357
|
* They were generated programmatically with following method :
|
|
241
358
|
* - start from default distributions, present in /lib/common/zstd_internal.h
|
|
242
359
|
* - generate tables normally, using ZSTD_buildFSETable()
|
|
@@ -343,7 +460,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
|
|
|
343
460
|
}; /* ML_defaultDTable */
|
|
344
461
|
|
|
345
462
|
|
|
346
|
-
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue,
|
|
463
|
+
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
|
|
347
464
|
{
|
|
348
465
|
void* ptr = dt;
|
|
349
466
|
ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
|
|
@@ -355,7 +472,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
|
355
472
|
cell->nbBits = 0;
|
|
356
473
|
cell->nextState = 0;
|
|
357
474
|
assert(nbAddBits < 255);
|
|
358
|
-
cell->nbAdditionalBits =
|
|
475
|
+
cell->nbAdditionalBits = nbAddBits;
|
|
359
476
|
cell->baseValue = baseValue;
|
|
360
477
|
}
|
|
361
478
|
|
|
@@ -367,7 +484,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
|
367
484
|
FORCE_INLINE_TEMPLATE
|
|
368
485
|
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
369
486
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
370
|
-
const U32* baseValue, const
|
|
487
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
371
488
|
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
372
489
|
{
|
|
373
490
|
ZSTD_seqSymbol* const tableDecode = dt+1;
|
|
@@ -430,14 +547,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
|
430
547
|
for (i = 8; i < n; i += 8) {
|
|
431
548
|
MEM_write64(spread + pos + i, sv);
|
|
432
549
|
}
|
|
433
|
-
|
|
550
|
+
assert(n>=0);
|
|
551
|
+
pos += (size_t)n;
|
|
434
552
|
}
|
|
435
553
|
}
|
|
436
554
|
/* Now we spread those positions across the table.
|
|
437
|
-
* The benefit of doing it in two stages is that we avoid the
|
|
555
|
+
* The benefit of doing it in two stages is that we avoid the
|
|
438
556
|
* variable size inner loop, which caused lots of branch misses.
|
|
439
557
|
* Now we can run through all the positions without any branch misses.
|
|
440
|
-
* We unroll the loop twice, since that is what
|
|
558
|
+
* We unroll the loop twice, since that is what empirically worked best.
|
|
441
559
|
*/
|
|
442
560
|
{
|
|
443
561
|
size_t position = 0;
|
|
@@ -464,7 +582,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
|
464
582
|
for (i=0; i<n; i++) {
|
|
465
583
|
tableDecode[position].baseValue = s;
|
|
466
584
|
position = (position + step) & tableMask;
|
|
467
|
-
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
|
585
|
+
while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
|
|
468
586
|
} }
|
|
469
587
|
assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
|
|
470
588
|
}
|
|
@@ -475,10 +593,10 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
|
475
593
|
for (u=0; u<tableSize; u++) {
|
|
476
594
|
U32 const symbol = tableDecode[u].baseValue;
|
|
477
595
|
U32 const nextState = symbolNext[symbol]++;
|
|
478
|
-
tableDecode[u].nbBits = (BYTE) (tableLog -
|
|
596
|
+
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
|
479
597
|
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
|
480
598
|
assert(nbAdditionalBits[symbol] < 255);
|
|
481
|
-
tableDecode[u].nbAdditionalBits =
|
|
599
|
+
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
|
|
482
600
|
tableDecode[u].baseValue = baseValue[symbol];
|
|
483
601
|
}
|
|
484
602
|
}
|
|
@@ -487,7 +605,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
|
487
605
|
/* Avoids the FORCE_INLINE of the _body() function. */
|
|
488
606
|
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
|
489
607
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
490
|
-
const U32* baseValue, const
|
|
608
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
491
609
|
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
492
610
|
{
|
|
493
611
|
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
@@ -495,9 +613,9 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
|
|
495
613
|
}
|
|
496
614
|
|
|
497
615
|
#if DYNAMIC_BMI2
|
|
498
|
-
|
|
616
|
+
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
|
499
617
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
500
|
-
const U32* baseValue, const
|
|
618
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
501
619
|
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
502
620
|
{
|
|
503
621
|
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
@@ -507,7 +625,7 @@ TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol
|
|
|
507
625
|
|
|
508
626
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
509
627
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
510
|
-
const U32* baseValue, const
|
|
628
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
511
629
|
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
|
512
630
|
{
|
|
513
631
|
#if DYNAMIC_BMI2
|
|
@@ -529,7 +647,7 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
529
647
|
static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
|
|
530
648
|
symbolEncodingType_e type, unsigned max, U32 maxLog,
|
|
531
649
|
const void* src, size_t srcSize,
|
|
532
|
-
const U32* baseValue, const
|
|
650
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
533
651
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
|
534
652
|
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
|
535
653
|
int bmi2)
|
|
@@ -541,7 +659,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
541
659
|
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
|
542
660
|
{ U32 const symbol = *(const BYTE*)src;
|
|
543
661
|
U32 const baseline = baseValue[symbol];
|
|
544
|
-
|
|
662
|
+
U8 const nbBits = nbAdditionalBits[symbol];
|
|
545
663
|
ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
|
|
546
664
|
}
|
|
547
665
|
*DTablePtr = DTableSpace;
|
|
@@ -577,7 +695,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
577
695
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
578
696
|
const void* src, size_t srcSize)
|
|
579
697
|
{
|
|
580
|
-
const BYTE* const istart = (const BYTE*
|
|
698
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
581
699
|
const BYTE* const iend = istart + srcSize;
|
|
582
700
|
const BYTE* ip = istart;
|
|
583
701
|
int nbSeq;
|
|
@@ -588,11 +706,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
588
706
|
|
|
589
707
|
/* SeqHead */
|
|
590
708
|
nbSeq = *ip++;
|
|
591
|
-
if (!nbSeq) {
|
|
592
|
-
*nbSeqPtr=0;
|
|
593
|
-
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
|
|
594
|
-
return 1;
|
|
595
|
-
}
|
|
596
709
|
if (nbSeq > 0x7F) {
|
|
597
710
|
if (nbSeq == 0xFF) {
|
|
598
711
|
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
|
@@ -605,8 +718,16 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
605
718
|
}
|
|
606
719
|
*nbSeqPtr = nbSeq;
|
|
607
720
|
|
|
721
|
+
if (nbSeq == 0) {
|
|
722
|
+
/* No sequence : section ends immediately */
|
|
723
|
+
RETURN_ERROR_IF(ip != iend, corruption_detected,
|
|
724
|
+
"extraneous data present in the Sequences section");
|
|
725
|
+
return (size_t)(ip - istart);
|
|
726
|
+
}
|
|
727
|
+
|
|
608
728
|
/* FSE table descriptors */
|
|
609
729
|
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
|
|
730
|
+
RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
|
|
610
731
|
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
|
611
732
|
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
|
612
733
|
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
|
@@ -620,7 +741,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
620
741
|
LL_defaultDTable, dctx->fseEntropy,
|
|
621
742
|
dctx->ddictIsCold, nbSeq,
|
|
622
743
|
dctx->workspace, sizeof(dctx->workspace),
|
|
623
|
-
dctx
|
|
744
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
624
745
|
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
625
746
|
ip += llhSize;
|
|
626
747
|
}
|
|
@@ -632,7 +753,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
632
753
|
OF_defaultDTable, dctx->fseEntropy,
|
|
633
754
|
dctx->ddictIsCold, nbSeq,
|
|
634
755
|
dctx->workspace, sizeof(dctx->workspace),
|
|
635
|
-
dctx
|
|
756
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
636
757
|
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
637
758
|
ip += ofhSize;
|
|
638
759
|
}
|
|
@@ -644,7 +765,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
644
765
|
ML_defaultDTable, dctx->fseEntropy,
|
|
645
766
|
dctx->ddictIsCold, nbSeq,
|
|
646
767
|
dctx->workspace, sizeof(dctx->workspace),
|
|
647
|
-
dctx
|
|
768
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
648
769
|
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
649
770
|
ip += mlhSize;
|
|
650
771
|
}
|
|
@@ -658,7 +779,6 @@ typedef struct {
|
|
|
658
779
|
size_t litLength;
|
|
659
780
|
size_t matchLength;
|
|
660
781
|
size_t offset;
|
|
661
|
-
const BYTE* match;
|
|
662
782
|
} seq_t;
|
|
663
783
|
|
|
664
784
|
typedef struct {
|
|
@@ -672,9 +792,6 @@ typedef struct {
|
|
|
672
792
|
ZSTD_fseState stateOffb;
|
|
673
793
|
ZSTD_fseState stateML;
|
|
674
794
|
size_t prevOffset[ZSTD_REP_NUM];
|
|
675
|
-
const BYTE* prefixStart;
|
|
676
|
-
const BYTE* dictEnd;
|
|
677
|
-
size_t pos;
|
|
678
795
|
} seqState_t;
|
|
679
796
|
|
|
680
797
|
/*! ZSTD_overlapCopy8() :
|
|
@@ -717,7 +834,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
|
717
834
|
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
|
718
835
|
* The src buffer must be before the dst buffer.
|
|
719
836
|
*/
|
|
720
|
-
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
837
|
+
static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
721
838
|
ptrdiff_t const diff = op - ip;
|
|
722
839
|
BYTE* const oend = op + length;
|
|
723
840
|
|
|
@@ -733,6 +850,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
733
850
|
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
|
734
851
|
assert(length >= 8);
|
|
735
852
|
ZSTD_overlapCopy8(&op, &ip, diff);
|
|
853
|
+
length -= 8;
|
|
736
854
|
assert(op - ip >= 8);
|
|
737
855
|
assert(op <= oend);
|
|
738
856
|
}
|
|
@@ -747,8 +865,31 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
747
865
|
assert(oend > oend_w);
|
|
748
866
|
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
|
749
867
|
ip += oend_w - op;
|
|
750
|
-
op
|
|
868
|
+
op += oend_w - op;
|
|
869
|
+
}
|
|
870
|
+
/* Handle the leftovers. */
|
|
871
|
+
while (op < oend) *op++ = *ip++;
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
/* ZSTD_safecopyDstBeforeSrc():
|
|
875
|
+
* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
|
|
876
|
+
* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
|
|
877
|
+
static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
|
|
878
|
+
ptrdiff_t const diff = op - ip;
|
|
879
|
+
BYTE* const oend = op + length;
|
|
880
|
+
|
|
881
|
+
if (length < 8 || diff > -8) {
|
|
882
|
+
/* Handle short lengths, close overlaps, and dst not before src. */
|
|
883
|
+
while (op < oend) *op++ = *ip++;
|
|
884
|
+
return;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
|
|
888
|
+
ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
|
|
889
|
+
ip += oend - WILDCOPY_OVERLENGTH - op;
|
|
890
|
+
op += oend - WILDCOPY_OVERLENGTH - op;
|
|
751
891
|
}
|
|
892
|
+
|
|
752
893
|
/* Handle the leftovers. */
|
|
753
894
|
while (op < oend) *op++ = *ip++;
|
|
754
895
|
}
|
|
@@ -762,10 +903,11 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
762
903
|
* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
|
|
763
904
|
*/
|
|
764
905
|
FORCE_NOINLINE
|
|
906
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
765
907
|
size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
908
|
+
BYTE* const oend, seq_t sequence,
|
|
909
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
910
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
769
911
|
{
|
|
770
912
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
771
913
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
@@ -788,27 +930,78 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
|
788
930
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
789
931
|
/* offset beyond prefix */
|
|
790
932
|
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
791
|
-
match = dictEnd - (prefixStart-match);
|
|
933
|
+
match = dictEnd - (prefixStart - match);
|
|
792
934
|
if (match + sequence.matchLength <= dictEnd) {
|
|
793
935
|
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
794
936
|
return sequenceLength;
|
|
795
937
|
}
|
|
796
938
|
/* span extDict & currentPrefixSegment */
|
|
797
939
|
{ size_t const length1 = dictEnd - match;
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
940
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
941
|
+
op = oLitEnd + length1;
|
|
942
|
+
sequence.matchLength -= length1;
|
|
943
|
+
match = prefixStart;
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
947
|
+
return sequenceLength;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
/* ZSTD_execSequenceEndSplitLitBuffer():
|
|
951
|
+
* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
|
|
952
|
+
*/
|
|
953
|
+
FORCE_NOINLINE
|
|
954
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
955
|
+
size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
|
|
956
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
|
957
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
958
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
959
|
+
{
|
|
960
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
|
961
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
962
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
963
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
|
967
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
|
968
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
|
969
|
+
assert(op < op + sequenceLength);
|
|
970
|
+
assert(oLitEnd < op + sequenceLength);
|
|
971
|
+
|
|
972
|
+
/* copy literals */
|
|
973
|
+
RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
|
|
974
|
+
ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
|
|
975
|
+
op = oLitEnd;
|
|
976
|
+
*litPtr = iLitEnd;
|
|
977
|
+
|
|
978
|
+
/* copy Match */
|
|
979
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
980
|
+
/* offset beyond prefix */
|
|
981
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
982
|
+
match = dictEnd - (prefixStart - match);
|
|
983
|
+
if (match + sequence.matchLength <= dictEnd) {
|
|
984
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
985
|
+
return sequenceLength;
|
|
986
|
+
}
|
|
987
|
+
/* span extDict & currentPrefixSegment */
|
|
988
|
+
{ size_t const length1 = dictEnd - match;
|
|
989
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
990
|
+
op = oLitEnd + length1;
|
|
991
|
+
sequence.matchLength -= length1;
|
|
992
|
+
match = prefixStart;
|
|
993
|
+
}
|
|
994
|
+
}
|
|
803
995
|
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
804
996
|
return sequenceLength;
|
|
805
997
|
}
|
|
806
998
|
|
|
807
999
|
HINT_INLINE
|
|
1000
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
808
1001
|
size_t ZSTD_execSequence(BYTE* op,
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
1002
|
+
BYTE* const oend, seq_t sequence,
|
|
1003
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
1004
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
812
1005
|
{
|
|
813
1006
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
814
1007
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
@@ -819,6 +1012,104 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
819
1012
|
|
|
820
1013
|
assert(op != NULL /* Precondition */);
|
|
821
1014
|
assert(oend_w < oend /* No underflow */);
|
|
1015
|
+
|
|
1016
|
+
#if defined(__aarch64__)
|
|
1017
|
+
/* prefetch sequence starting from match that will be used for copy later */
|
|
1018
|
+
PREFETCH_L1(match);
|
|
1019
|
+
#endif
|
|
1020
|
+
/* Handle edge cases in a slow path:
|
|
1021
|
+
* - Read beyond end of literals
|
|
1022
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
1023
|
+
* - 32-bit mode and the match length overflows
|
|
1024
|
+
*/
|
|
1025
|
+
if (UNLIKELY(
|
|
1026
|
+
iLitEnd > litLimit ||
|
|
1027
|
+
oMatchEnd > oend_w ||
|
|
1028
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
1029
|
+
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
1030
|
+
|
|
1031
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
1032
|
+
assert(op <= oLitEnd /* No overflow */);
|
|
1033
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
|
1034
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
|
1035
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
|
1036
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
|
1037
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
|
1038
|
+
|
|
1039
|
+
/* Copy Literals:
|
|
1040
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
|
1041
|
+
* We likely don't need the full 32-byte wildcopy.
|
|
1042
|
+
*/
|
|
1043
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
|
1044
|
+
ZSTD_copy16(op, (*litPtr));
|
|
1045
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
|
1046
|
+
ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
|
|
1047
|
+
}
|
|
1048
|
+
op = oLitEnd;
|
|
1049
|
+
*litPtr = iLitEnd; /* update for next sequence */
|
|
1050
|
+
|
|
1051
|
+
/* Copy Match */
|
|
1052
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
1053
|
+
/* offset beyond prefix -> go into extDict */
|
|
1054
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
|
1055
|
+
match = dictEnd + (match - prefixStart);
|
|
1056
|
+
if (match + sequence.matchLength <= dictEnd) {
|
|
1057
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
1058
|
+
return sequenceLength;
|
|
1059
|
+
}
|
|
1060
|
+
/* span extDict & currentPrefixSegment */
|
|
1061
|
+
{ size_t const length1 = dictEnd - match;
|
|
1062
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
1063
|
+
op = oLitEnd + length1;
|
|
1064
|
+
sequence.matchLength -= length1;
|
|
1065
|
+
match = prefixStart;
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
/* Match within prefix of 1 or more bytes */
|
|
1069
|
+
assert(op <= oMatchEnd);
|
|
1070
|
+
assert(oMatchEnd <= oend_w);
|
|
1071
|
+
assert(match >= prefixStart);
|
|
1072
|
+
assert(sequence.matchLength >= 1);
|
|
1073
|
+
|
|
1074
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
|
1075
|
+
* without overlap checking.
|
|
1076
|
+
*/
|
|
1077
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
|
1078
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
|
1079
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
|
1080
|
+
* than 16 bytes.
|
|
1081
|
+
*/
|
|
1082
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
|
1083
|
+
return sequenceLength;
|
|
1084
|
+
}
|
|
1085
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
|
1086
|
+
|
|
1087
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
|
1088
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
|
1089
|
+
|
|
1090
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
|
1091
|
+
if (sequence.matchLength > 8) {
|
|
1092
|
+
assert(op < oMatchEnd);
|
|
1093
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
|
|
1094
|
+
}
|
|
1095
|
+
return sequenceLength;
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
HINT_INLINE
|
|
1099
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
1100
|
+
size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
|
|
1101
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
|
1102
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
1103
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
1104
|
+
{
|
|
1105
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
|
1106
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
1107
|
+
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
1108
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
1109
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
|
1110
|
+
|
|
1111
|
+
assert(op != NULL /* Precondition */);
|
|
1112
|
+
assert(oend_w < oend /* No underflow */);
|
|
822
1113
|
/* Handle edge cases in a slow path:
|
|
823
1114
|
* - Read beyond end of literals
|
|
824
1115
|
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
@@ -828,7 +1119,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
828
1119
|
iLitEnd > litLimit ||
|
|
829
1120
|
oMatchEnd > oend_w ||
|
|
830
1121
|
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
831
|
-
return
|
|
1122
|
+
return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
832
1123
|
|
|
833
1124
|
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
834
1125
|
assert(op <= oLitEnd /* No overflow */);
|
|
@@ -896,6 +1187,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
896
1187
|
return sequenceLength;
|
|
897
1188
|
}
|
|
898
1189
|
|
|
1190
|
+
|
|
899
1191
|
static void
|
|
900
1192
|
ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
|
|
901
1193
|
{
|
|
@@ -909,24 +1201,14 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS
|
|
|
909
1201
|
}
|
|
910
1202
|
|
|
911
1203
|
FORCE_INLINE_TEMPLATE void
|
|
912
|
-
|
|
913
|
-
{
|
|
914
|
-
ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
|
|
915
|
-
U32 const nbBits = DInfo.nbBits;
|
|
916
|
-
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
917
|
-
DStatePtr->state = DInfo.nextState + lowBits;
|
|
918
|
-
}
|
|
919
|
-
|
|
920
|
-
FORCE_INLINE_TEMPLATE void
|
|
921
|
-
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
|
1204
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
|
|
922
1205
|
{
|
|
923
|
-
U32 const nbBits = DInfo.nbBits;
|
|
924
1206
|
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
925
|
-
DStatePtr->state =
|
|
1207
|
+
DStatePtr->state = nextState + lowBits;
|
|
926
1208
|
}
|
|
927
1209
|
|
|
928
1210
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
|
929
|
-
* offset bits. But we can only read at most
|
|
1211
|
+
* offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
|
|
930
1212
|
* bits before reloading. This value is the maximum number of bytes we read
|
|
931
1213
|
* after reloading when we are decoding long offsets.
|
|
932
1214
|
*/
|
|
@@ -936,123 +1218,136 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
|
|
|
936
1218
|
: 0)
|
|
937
1219
|
|
|
938
1220
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
|
939
|
-
typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
|
|
940
1221
|
|
|
1222
|
+
/**
|
|
1223
|
+
* ZSTD_decodeSequence():
|
|
1224
|
+
* @p longOffsets : tells the decoder to reload more bit while decoding large offsets
|
|
1225
|
+
* only used in 32-bit mode
|
|
1226
|
+
* @return : Sequence (litL + matchL + offset)
|
|
1227
|
+
*/
|
|
941
1228
|
FORCE_INLINE_TEMPLATE seq_t
|
|
942
|
-
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const
|
|
1229
|
+
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
|
|
943
1230
|
{
|
|
944
1231
|
seq_t seq;
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
1232
|
+
/*
|
|
1233
|
+
* ZSTD_seqSymbol is a 64 bits wide structure.
|
|
1234
|
+
* It can be loaded in one operation
|
|
1235
|
+
* and its fields extracted by simply shifting or bit-extracting on aarch64.
|
|
1236
|
+
* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
|
|
1237
|
+
* operations that cause performance drop. This can be avoided by using this
|
|
1238
|
+
* ZSTD_memcpy hack.
|
|
1239
|
+
*/
|
|
1240
|
+
#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
|
|
1241
|
+
ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
|
|
1242
|
+
ZSTD_seqSymbol* const llDInfo = &llDInfoS;
|
|
1243
|
+
ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
|
|
1244
|
+
ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
|
|
1245
|
+
ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
|
|
1246
|
+
ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
|
|
1247
|
+
ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
|
|
1248
|
+
#else
|
|
1249
|
+
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
|
|
1250
|
+
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
|
|
1251
|
+
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
|
|
1252
|
+
#endif
|
|
1253
|
+
seq.matchLength = mlDInfo->baseValue;
|
|
1254
|
+
seq.litLength = llDInfo->baseValue;
|
|
1255
|
+
{ U32 const ofBase = ofDInfo->baseValue;
|
|
1256
|
+
BYTE const llBits = llDInfo->nbAdditionalBits;
|
|
1257
|
+
BYTE const mlBits = mlDInfo->nbAdditionalBits;
|
|
1258
|
+
BYTE const ofBits = ofDInfo->nbAdditionalBits;
|
|
1259
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
|
1260
|
+
|
|
1261
|
+
U16 const llNext = llDInfo->nextState;
|
|
1262
|
+
U16 const mlNext = mlDInfo->nextState;
|
|
1263
|
+
U16 const ofNext = ofDInfo->nextState;
|
|
1264
|
+
U32 const llnbBits = llDInfo->nbBits;
|
|
1265
|
+
U32 const mlnbBits = mlDInfo->nbBits;
|
|
1266
|
+
U32 const ofnbBits = ofDInfo->nbBits;
|
|
1267
|
+
|
|
1268
|
+
assert(llBits <= MaxLLBits);
|
|
1269
|
+
assert(mlBits <= MaxMLBits);
|
|
1270
|
+
assert(ofBits <= MaxOff);
|
|
1271
|
+
/*
|
|
1272
|
+
* As gcc has better branch and block analyzers, sometimes it is only
|
|
1273
|
+
* valuable to mark likeliness for clang, it gives around 3-4% of
|
|
1274
|
+
* performance.
|
|
1275
|
+
*/
|
|
1276
|
+
|
|
1277
|
+
/* sequence */
|
|
1278
|
+
{ size_t offset;
|
|
1279
|
+
if (ofBits > 1) {
|
|
1280
|
+
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
1281
|
+
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
1282
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
|
|
1283
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
|
|
1284
|
+
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
|
1285
|
+
/* Always read extra bits, this keeps the logic simple,
|
|
1286
|
+
* avoids branches, and avoids accidentally reading 0 bits.
|
|
1287
|
+
*/
|
|
1288
|
+
U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
|
|
1289
|
+
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
|
1290
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1291
|
+
offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
|
1292
|
+
} else {
|
|
1293
|
+
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
1294
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
984
1295
|
}
|
|
1296
|
+
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1297
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1298
|
+
seqState->prevOffset[0] = offset;
|
|
985
1299
|
} else {
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
seqState->prevOffset[
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1300
|
+
U32 const ll0 = (llDInfo->baseValue == 0);
|
|
1301
|
+
if (LIKELY((ofBits == 0))) {
|
|
1302
|
+
offset = seqState->prevOffset[ll0];
|
|
1303
|
+
seqState->prevOffset[1] = seqState->prevOffset[!ll0];
|
|
1304
|
+
seqState->prevOffset[0] = offset;
|
|
1305
|
+
} else {
|
|
1306
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
|
1307
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
1308
|
+
temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
|
|
1309
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1310
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1311
|
+
seqState->prevOffset[0] = offset = temp;
|
|
1312
|
+
} } }
|
|
1313
|
+
seq.offset = offset;
|
|
1314
|
+
}
|
|
995
1315
|
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
|
1316
|
+
if (mlBits > 0)
|
|
1317
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
|
999
1318
|
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1319
|
+
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
|
1320
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1321
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
1322
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1323
|
+
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
|
1324
|
+
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
|
1006
1325
|
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
|
1326
|
+
if (llBits > 0)
|
|
1327
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
|
1010
1328
|
|
|
1011
|
-
|
|
1012
|
-
|
|
1329
|
+
if (MEM_32bits())
|
|
1330
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1013
1331
|
|
|
1014
|
-
|
|
1015
|
-
|
|
1332
|
+
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
|
1333
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
1016
1334
|
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
* No consequence though : no memory access will occur, offset is only used for prefetching */
|
|
1022
|
-
seqState->pos = pos + seq.matchLength;
|
|
1023
|
-
}
|
|
1024
|
-
|
|
1025
|
-
/* ANS state update
|
|
1026
|
-
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
|
1027
|
-
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
|
1028
|
-
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
|
1029
|
-
* better option, so it is the default for other compilers. But, if you
|
|
1030
|
-
* measure that it is worse, please put up a pull request.
|
|
1031
|
-
*/
|
|
1032
|
-
{
|
|
1033
|
-
#if defined(__GNUC__) && !defined(__clang__)
|
|
1034
|
-
const int kUseUpdateFseState = 1;
|
|
1035
|
-
#else
|
|
1036
|
-
const int kUseUpdateFseState = 0;
|
|
1037
|
-
#endif
|
|
1038
|
-
if (kUseUpdateFseState) {
|
|
1039
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
|
1040
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
|
1335
|
+
if (!isLastSeq) {
|
|
1336
|
+
/* don't update FSE state for last Sequence */
|
|
1337
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
|
|
1338
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
|
|
1041
1339
|
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
|
|
1045
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
|
|
1046
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1047
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
|
|
1340
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
|
|
1341
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1048
1342
|
}
|
|
1049
1343
|
}
|
|
1050
1344
|
|
|
1051
1345
|
return seq;
|
|
1052
1346
|
}
|
|
1053
1347
|
|
|
1054
|
-
#
|
|
1055
|
-
|
|
1348
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1349
|
+
#if DEBUGLEVEL >= 1
|
|
1350
|
+
static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
|
1056
1351
|
{
|
|
1057
1352
|
size_t const windowSize = dctx->fParams.windowSize;
|
|
1058
1353
|
/* No dictionary used. */
|
|
@@ -1066,30 +1361,33 @@ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefix
|
|
|
1066
1361
|
/* Dictionary is active. */
|
|
1067
1362
|
return 1;
|
|
1068
1363
|
}
|
|
1364
|
+
#endif
|
|
1069
1365
|
|
|
1070
|
-
|
|
1366
|
+
static void ZSTD_assertValidSequence(
|
|
1071
1367
|
ZSTD_DCtx const* dctx,
|
|
1072
1368
|
BYTE const* op, BYTE const* oend,
|
|
1073
1369
|
seq_t const seq,
|
|
1074
1370
|
BYTE const* prefixStart, BYTE const* virtualStart)
|
|
1075
1371
|
{
|
|
1076
1372
|
#if DEBUGLEVEL >= 1
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1373
|
+
if (dctx->isFrameDecompression) {
|
|
1374
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
|
1375
|
+
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
|
1376
|
+
BYTE const* const oLitEnd = op + seq.litLength;
|
|
1377
|
+
DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
|
|
1378
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
1379
|
+
assert(op <= oend);
|
|
1380
|
+
assert((size_t)(oend - op) >= sequenceSize);
|
|
1381
|
+
assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
|
|
1382
|
+
if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
|
|
1383
|
+
size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
|
|
1384
|
+
/* Offset must be within the dictionary. */
|
|
1385
|
+
assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
|
|
1386
|
+
assert(seq.offset <= windowSize + dictSize);
|
|
1387
|
+
} else {
|
|
1388
|
+
/* Offset must be within our window. */
|
|
1389
|
+
assert(seq.offset <= windowSize);
|
|
1390
|
+
}
|
|
1093
1391
|
}
|
|
1094
1392
|
#else
|
|
1095
1393
|
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
|
|
@@ -1098,31 +1396,30 @@ MEM_STATIC void ZSTD_assertValidSequence(
|
|
|
1098
1396
|
#endif
|
|
1099
1397
|
|
|
1100
1398
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1399
|
+
|
|
1400
|
+
|
|
1101
1401
|
FORCE_INLINE_TEMPLATE size_t
|
|
1102
1402
|
DONT_VECTORIZE
|
|
1103
|
-
|
|
1403
|
+
ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
|
|
1104
1404
|
void* dst, size_t maxDstSize,
|
|
1105
1405
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1106
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1107
|
-
const int frame)
|
|
1406
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1108
1407
|
{
|
|
1109
1408
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
1110
1409
|
const BYTE* const iend = ip + seqSize;
|
|
1111
|
-
BYTE* const ostart = (BYTE*
|
|
1112
|
-
BYTE* const oend = ostart
|
|
1410
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1411
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
|
|
1113
1412
|
BYTE* op = ostart;
|
|
1114
1413
|
const BYTE* litPtr = dctx->litPtr;
|
|
1115
|
-
const BYTE*
|
|
1414
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
|
1116
1415
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
1117
1416
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
|
1118
1417
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
1119
|
-
DEBUGLOG(5, "
|
|
1120
|
-
(void)frame;
|
|
1418
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
|
|
1121
1419
|
|
|
1122
|
-
/*
|
|
1420
|
+
/* Literals are split between internal buffer & output buffer */
|
|
1123
1421
|
if (nbSeq) {
|
|
1124
1422
|
seqState_t seqState;
|
|
1125
|
-
size_t error = 0;
|
|
1126
1423
|
dctx->fseEntropy = 1;
|
|
1127
1424
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1128
1425
|
RETURN_ERROR_IF(
|
|
@@ -1138,134 +1435,331 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
|
1138
1435
|
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
|
1139
1436
|
BIT_DStream_completed < BIT_DStream_overflow);
|
|
1140
1437
|
|
|
1438
|
+
/* decompress without overrunning litPtr begins */
|
|
1439
|
+
{ seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
|
|
1440
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
|
1441
|
+
*
|
|
1442
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
|
1443
|
+
* speed swings based on the alignment of the decompression loop. This
|
|
1444
|
+
* performance swing is caused by parts of the decompression loop falling
|
|
1445
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
|
1446
|
+
* when it can't we get much worse performance. You can measure if you've
|
|
1447
|
+
* hit the good case or the bad case with this perf command for some
|
|
1448
|
+
* compressed file test.zst:
|
|
1449
|
+
*
|
|
1450
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
|
1451
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
|
1452
|
+
*
|
|
1453
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
|
1454
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
|
1455
|
+
* If it is pretty even then you may be in an okay case.
|
|
1456
|
+
*
|
|
1457
|
+
* This issue has been reproduced on the following CPUs:
|
|
1458
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
|
1459
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
|
1460
|
+
* I never got performance swings, but I was able to
|
|
1461
|
+
* go from the good case of mostly DSB to half of the
|
|
1462
|
+
* cycles served from MITE.
|
|
1463
|
+
* - Coffeelake: Intel i9-9900k
|
|
1464
|
+
* - Coffeelake: Intel i7-9700k
|
|
1465
|
+
*
|
|
1466
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
|
1467
|
+
* of the following CPUS:
|
|
1468
|
+
* - Haswell
|
|
1469
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
|
1470
|
+
* - Skylake
|
|
1471
|
+
*
|
|
1472
|
+
* Alignment is done for each of the three major decompression loops:
|
|
1473
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
|
|
1474
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
|
|
1475
|
+
* - ZSTD_decompressSequences_body
|
|
1476
|
+
* Alignment choices are made to minimize large swings on bad cases and influence on performance
|
|
1477
|
+
* from changes external to this code, rather than to overoptimize on the current commit.
|
|
1478
|
+
*
|
|
1479
|
+
* If you are seeing performance stability this script can help test.
|
|
1480
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
|
1481
|
+
*
|
|
1482
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
|
1483
|
+
*/
|
|
1141
1484
|
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
* If you see most cycles served out of the MITE you've hit the bad case.
|
|
1156
|
-
* If you see most cycles served out of the DSB you've hit the good case.
|
|
1157
|
-
* If it is pretty even then you may be in an okay case.
|
|
1158
|
-
*
|
|
1159
|
-
* I've been able to reproduce this issue on the following CPUs:
|
|
1160
|
-
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
|
1161
|
-
* Use Instruments->Counters to get DSB/MITE cycles.
|
|
1162
|
-
* I never got performance swings, but I was able to
|
|
1163
|
-
* go from the good case of mostly DSB to half of the
|
|
1164
|
-
* cycles served from MITE.
|
|
1165
|
-
* - Coffeelake: Intel i9-9900k
|
|
1166
|
-
*
|
|
1167
|
-
* I haven't been able to reproduce the instability or DSB misses on any
|
|
1168
|
-
* of the following CPUS:
|
|
1169
|
-
* - Haswell
|
|
1170
|
-
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
|
1171
|
-
* - Skylake
|
|
1172
|
-
*
|
|
1173
|
-
* If you are seeing performance stability this script can help test.
|
|
1174
|
-
* It tests on 4 commits in zstd where I saw performance change.
|
|
1175
|
-
*
|
|
1176
|
-
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
|
1177
|
-
*/
|
|
1178
|
-
__asm__(".p2align 5");
|
|
1179
|
-
__asm__("nop");
|
|
1180
|
-
__asm__(".p2align 4");
|
|
1485
|
+
__asm__(".p2align 6");
|
|
1486
|
+
# if __GNUC__ >= 7
|
|
1487
|
+
/* good for gcc-7, gcc-9, and gcc-11 */
|
|
1488
|
+
__asm__("nop");
|
|
1489
|
+
__asm__(".p2align 5");
|
|
1490
|
+
__asm__("nop");
|
|
1491
|
+
__asm__(".p2align 4");
|
|
1492
|
+
# if __GNUC__ == 8 || __GNUC__ == 10
|
|
1493
|
+
/* good for gcc-8 and gcc-10 */
|
|
1494
|
+
__asm__("nop");
|
|
1495
|
+
__asm__(".p2align 3");
|
|
1496
|
+
# endif
|
|
1497
|
+
# endif
|
|
1181
1498
|
#endif
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1499
|
+
|
|
1500
|
+
/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
|
|
1501
|
+
for ( ; nbSeq; nbSeq--) {
|
|
1502
|
+
sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
|
|
1503
|
+
if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
|
|
1504
|
+
{ size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1185
1505
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1186
|
-
|
|
1187
|
-
|
|
1506
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1507
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1188
1508
|
#endif
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1509
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1510
|
+
return oneSeqSize;
|
|
1511
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1512
|
+
op += oneSeqSize;
|
|
1513
|
+
} }
|
|
1514
|
+
DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
|
|
1515
|
+
|
|
1516
|
+
/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
|
|
1517
|
+
if (nbSeq > 0) {
|
|
1518
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1519
|
+
DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
|
|
1520
|
+
if (leftoverLit) {
|
|
1521
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1522
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1523
|
+
sequence.litLength -= leftoverLit;
|
|
1524
|
+
op += leftoverLit;
|
|
1525
|
+
}
|
|
1526
|
+
litPtr = dctx->litExtraBuffer;
|
|
1527
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1528
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1529
|
+
{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1530
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1531
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1532
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1533
|
+
#endif
|
|
1534
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1535
|
+
return oneSeqSize;
|
|
1536
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1537
|
+
op += oneSeqSize;
|
|
1538
|
+
}
|
|
1539
|
+
nbSeq--;
|
|
1540
|
+
}
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1543
|
+
if (nbSeq > 0) {
|
|
1544
|
+
/* there is remaining lit from extra buffer */
|
|
1545
|
+
|
|
1546
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1547
|
+
__asm__(".p2align 6");
|
|
1548
|
+
__asm__("nop");
|
|
1549
|
+
# if __GNUC__ != 7
|
|
1550
|
+
/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
|
|
1551
|
+
__asm__(".p2align 4");
|
|
1552
|
+
__asm__("nop");
|
|
1553
|
+
__asm__(".p2align 3");
|
|
1554
|
+
# elif __GNUC__ >= 11
|
|
1555
|
+
__asm__(".p2align 3");
|
|
1556
|
+
# else
|
|
1557
|
+
__asm__(".p2align 5");
|
|
1558
|
+
__asm__("nop");
|
|
1559
|
+
__asm__(".p2align 3");
|
|
1560
|
+
# endif
|
|
1561
|
+
#endif
|
|
1562
|
+
|
|
1563
|
+
for ( ; nbSeq ; nbSeq--) {
|
|
1564
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
|
|
1565
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1566
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1567
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1568
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1569
|
+
#endif
|
|
1570
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1571
|
+
return oneSeqSize;
|
|
1572
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1573
|
+
op += oneSeqSize;
|
|
1198
1574
|
}
|
|
1199
|
-
if (UNLIKELY(!--nbSeq)) break;
|
|
1200
1575
|
}
|
|
1201
1576
|
|
|
1202
1577
|
/* check if reached exact end */
|
|
1203
|
-
DEBUGLOG(5, "
|
|
1204
|
-
if (ZSTD_isError(error)) return error;
|
|
1578
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
|
|
1205
1579
|
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
|
1206
|
-
|
|
1580
|
+
DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
|
|
1581
|
+
RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
|
|
1207
1582
|
/* save reps for next block */
|
|
1208
1583
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
|
1209
1584
|
}
|
|
1210
1585
|
|
|
1211
1586
|
/* last literal segment */
|
|
1212
|
-
|
|
1587
|
+
if (dctx->litBufferLocation == ZSTD_split) {
|
|
1588
|
+
/* split hasn't been reached yet, first get dst then copy litExtraBuffer */
|
|
1589
|
+
size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
|
|
1590
|
+
DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
|
|
1591
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
|
1592
|
+
if (op != NULL) {
|
|
1593
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1594
|
+
op += lastLLSize;
|
|
1595
|
+
}
|
|
1596
|
+
litPtr = dctx->litExtraBuffer;
|
|
1597
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1598
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1599
|
+
}
|
|
1600
|
+
/* copy last literals from internal buffer */
|
|
1601
|
+
{ size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
|
|
1602
|
+
DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
|
|
1213
1603
|
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1214
1604
|
if (op != NULL) {
|
|
1215
1605
|
ZSTD_memcpy(op, litPtr, lastLLSize);
|
|
1216
1606
|
op += lastLLSize;
|
|
1607
|
+
} }
|
|
1608
|
+
|
|
1609
|
+
DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
|
|
1610
|
+
return (size_t)(op - ostart);
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
FORCE_INLINE_TEMPLATE size_t
|
|
1614
|
+
DONT_VECTORIZE
|
|
1615
|
+
ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
|
|
1616
|
+
void* dst, size_t maxDstSize,
|
|
1617
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1618
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1619
|
+
{
|
|
1620
|
+
const BYTE* ip = (const BYTE*)seqStart;
|
|
1621
|
+
const BYTE* const iend = ip + seqSize;
|
|
1622
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1623
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
|
|
1624
|
+
BYTE* op = ostart;
|
|
1625
|
+
const BYTE* litPtr = dctx->litPtr;
|
|
1626
|
+
const BYTE* const litEnd = litPtr + dctx->litSize;
|
|
1627
|
+
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
|
|
1628
|
+
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
|
|
1629
|
+
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
|
|
1630
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
|
|
1631
|
+
|
|
1632
|
+
/* Regen sequences */
|
|
1633
|
+
if (nbSeq) {
|
|
1634
|
+
seqState_t seqState;
|
|
1635
|
+
dctx->fseEntropy = 1;
|
|
1636
|
+
{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1637
|
+
RETURN_ERROR_IF(
|
|
1638
|
+
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
|
|
1639
|
+
corruption_detected, "");
|
|
1640
|
+
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
1641
|
+
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
1642
|
+
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1643
|
+
assert(dst != NULL);
|
|
1644
|
+
|
|
1645
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1646
|
+
__asm__(".p2align 6");
|
|
1647
|
+
__asm__("nop");
|
|
1648
|
+
# if __GNUC__ >= 7
|
|
1649
|
+
__asm__(".p2align 5");
|
|
1650
|
+
__asm__("nop");
|
|
1651
|
+
__asm__(".p2align 3");
|
|
1652
|
+
# else
|
|
1653
|
+
__asm__(".p2align 4");
|
|
1654
|
+
__asm__("nop");
|
|
1655
|
+
__asm__(".p2align 3");
|
|
1656
|
+
# endif
|
|
1657
|
+
#endif
|
|
1658
|
+
|
|
1659
|
+
for ( ; nbSeq ; nbSeq--) {
|
|
1660
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
|
|
1661
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
|
1662
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1663
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1664
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1665
|
+
#endif
|
|
1666
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1667
|
+
return oneSeqSize;
|
|
1668
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1669
|
+
op += oneSeqSize;
|
|
1217
1670
|
}
|
|
1671
|
+
|
|
1672
|
+
/* check if reached exact end */
|
|
1673
|
+
assert(nbSeq == 0);
|
|
1674
|
+
RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
|
|
1675
|
+
/* save reps for next block */
|
|
1676
|
+
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
|
1218
1677
|
}
|
|
1219
1678
|
|
|
1220
|
-
|
|
1679
|
+
/* last literal segment */
|
|
1680
|
+
{ size_t const lastLLSize = (size_t)(litEnd - litPtr);
|
|
1681
|
+
DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
|
|
1682
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1683
|
+
if (op != NULL) {
|
|
1684
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
|
1685
|
+
op += lastLLSize;
|
|
1686
|
+
} }
|
|
1687
|
+
|
|
1688
|
+
DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
|
|
1689
|
+
return (size_t)(op - ostart);
|
|
1221
1690
|
}
|
|
1222
1691
|
|
|
1223
1692
|
static size_t
|
|
1224
1693
|
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
|
1225
1694
|
void* dst, size_t maxDstSize,
|
|
1226
1695
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1227
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1228
|
-
const int frame)
|
|
1696
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1229
1697
|
{
|
|
1230
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
|
1698
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1699
|
+
}
|
|
1700
|
+
|
|
1701
|
+
static size_t
|
|
1702
|
+
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
|
|
1703
|
+
void* dst, size_t maxDstSize,
|
|
1704
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1705
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1706
|
+
{
|
|
1707
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1231
1708
|
}
|
|
1232
1709
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1233
1710
|
|
|
1234
1711
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1712
|
+
|
|
1713
|
+
FORCE_INLINE_TEMPLATE
|
|
1714
|
+
|
|
1715
|
+
size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
|
|
1716
|
+
const BYTE* const prefixStart, const BYTE* const dictEnd)
|
|
1717
|
+
{
|
|
1718
|
+
prefetchPos += sequence.litLength;
|
|
1719
|
+
{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
|
|
1720
|
+
/* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1721
|
+
* No consequence though : memory address is only used for prefetching, not for dereferencing */
|
|
1722
|
+
const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
|
|
1723
|
+
PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
|
1724
|
+
}
|
|
1725
|
+
return prefetchPos + sequence.matchLength;
|
|
1726
|
+
}
|
|
1727
|
+
|
|
1728
|
+
/* This decoding function employs prefetching
|
|
1729
|
+
* to reduce latency impact of cache misses.
|
|
1730
|
+
* It's generally employed when block contains a significant portion of long-distance matches
|
|
1731
|
+
* or when coupled with a "cold" dictionary */
|
|
1235
1732
|
FORCE_INLINE_TEMPLATE size_t
|
|
1236
1733
|
ZSTD_decompressSequencesLong_body(
|
|
1237
1734
|
ZSTD_DCtx* dctx,
|
|
1238
1735
|
void* dst, size_t maxDstSize,
|
|
1239
1736
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1240
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1241
|
-
const int frame)
|
|
1737
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1242
1738
|
{
|
|
1243
1739
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
1244
1740
|
const BYTE* const iend = ip + seqSize;
|
|
1245
|
-
BYTE* const ostart = (BYTE*
|
|
1246
|
-
BYTE* const oend = ostart
|
|
1741
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1742
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
|
|
1247
1743
|
BYTE* op = ostart;
|
|
1248
1744
|
const BYTE* litPtr = dctx->litPtr;
|
|
1249
|
-
const BYTE*
|
|
1745
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
|
1250
1746
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
1251
1747
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
|
1252
1748
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
1253
|
-
(void)frame;
|
|
1254
1749
|
|
|
1255
1750
|
/* Regen sequences */
|
|
1256
1751
|
if (nbSeq) {
|
|
1257
|
-
#define STORED_SEQS
|
|
1752
|
+
#define STORED_SEQS 8
|
|
1258
1753
|
#define STORED_SEQS_MASK (STORED_SEQS-1)
|
|
1259
|
-
#define ADVANCED_SEQS
|
|
1754
|
+
#define ADVANCED_SEQS STORED_SEQS
|
|
1260
1755
|
seq_t sequences[STORED_SEQS];
|
|
1261
1756
|
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
|
|
1262
1757
|
seqState_t seqState;
|
|
1263
1758
|
int seqNb;
|
|
1759
|
+
size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
|
|
1760
|
+
|
|
1264
1761
|
dctx->fseEntropy = 1;
|
|
1265
1762
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1266
|
-
seqState.prefixStart = prefixStart;
|
|
1267
|
-
seqState.pos = (size_t)(op-prefixStart);
|
|
1268
|
-
seqState.dictEnd = dictEnd;
|
|
1269
1763
|
assert(dst != NULL);
|
|
1270
1764
|
assert(iend >= ip);
|
|
1271
1765
|
RETURN_ERROR_IF(
|
|
@@ -1276,37 +1770,95 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1276
1770
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1277
1771
|
|
|
1278
1772
|
/* prepare in advance */
|
|
1279
|
-
for (seqNb=0;
|
|
1280
|
-
|
|
1281
|
-
|
|
1773
|
+
for (seqNb=0; seqNb<seqAdvance; seqNb++) {
|
|
1774
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
|
|
1775
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1776
|
+
sequences[seqNb] = sequence;
|
|
1282
1777
|
}
|
|
1283
|
-
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
|
1284
1778
|
|
|
1285
|
-
/*
|
|
1286
|
-
for (
|
|
1287
|
-
seq_t
|
|
1288
|
-
|
|
1779
|
+
/* decompress without stomping litBuffer */
|
|
1780
|
+
for (; seqNb < nbSeq; seqNb++) {
|
|
1781
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
|
|
1782
|
+
|
|
1783
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
|
|
1784
|
+
/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
|
|
1785
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1786
|
+
if (leftoverLit)
|
|
1787
|
+
{
|
|
1788
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1789
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1790
|
+
sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
|
|
1791
|
+
op += leftoverLit;
|
|
1792
|
+
}
|
|
1793
|
+
litPtr = dctx->litExtraBuffer;
|
|
1794
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1795
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1796
|
+
{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1289
1797
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1290
|
-
|
|
1291
|
-
|
|
1798
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1799
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1292
1800
|
#endif
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1801
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1802
|
+
|
|
1803
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1804
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1805
|
+
op += oneSeqSize;
|
|
1806
|
+
} }
|
|
1807
|
+
else
|
|
1808
|
+
{
|
|
1809
|
+
/* lit buffer is either wholly contained in first or second split, or not split at all*/
|
|
1810
|
+
size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
|
1811
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
|
1812
|
+
ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1813
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1814
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1815
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1816
|
+
#endif
|
|
1817
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1818
|
+
|
|
1819
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1820
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1821
|
+
op += oneSeqSize;
|
|
1822
|
+
}
|
|
1297
1823
|
}
|
|
1298
|
-
RETURN_ERROR_IF(
|
|
1824
|
+
RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
|
|
1299
1825
|
|
|
1300
1826
|
/* finish queue */
|
|
1301
1827
|
seqNb -= seqAdvance;
|
|
1302
1828
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
|
1303
|
-
|
|
1829
|
+
seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
|
|
1830
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
|
|
1831
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1832
|
+
if (leftoverLit) {
|
|
1833
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1834
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1835
|
+
sequence->litLength -= leftoverLit;
|
|
1836
|
+
op += leftoverLit;
|
|
1837
|
+
}
|
|
1838
|
+
litPtr = dctx->litExtraBuffer;
|
|
1839
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1840
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1841
|
+
{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1304
1842
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1305
|
-
|
|
1306
|
-
|
|
1843
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1844
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1307
1845
|
#endif
|
|
1308
|
-
|
|
1309
|
-
|
|
1846
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1847
|
+
op += oneSeqSize;
|
|
1848
|
+
}
|
|
1849
|
+
}
|
|
1850
|
+
else
|
|
1851
|
+
{
|
|
1852
|
+
size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
|
1853
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
|
1854
|
+
ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1855
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1856
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1857
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1858
|
+
#endif
|
|
1859
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1860
|
+
op += oneSeqSize;
|
|
1861
|
+
}
|
|
1310
1862
|
}
|
|
1311
1863
|
|
|
1312
1864
|
/* save reps for next block */
|
|
@@ -1314,25 +1866,34 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1314
1866
|
}
|
|
1315
1867
|
|
|
1316
1868
|
/* last literal segment */
|
|
1317
|
-
{
|
|
1869
|
+
if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
|
|
1870
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1871
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
|
1872
|
+
if (op != NULL) {
|
|
1873
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1874
|
+
op += lastLLSize;
|
|
1875
|
+
}
|
|
1876
|
+
litPtr = dctx->litExtraBuffer;
|
|
1877
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1878
|
+
}
|
|
1879
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1318
1880
|
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1319
1881
|
if (op != NULL) {
|
|
1320
|
-
|
|
1882
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1321
1883
|
op += lastLLSize;
|
|
1322
1884
|
}
|
|
1323
1885
|
}
|
|
1324
1886
|
|
|
1325
|
-
return op-ostart;
|
|
1887
|
+
return (size_t)(op - ostart);
|
|
1326
1888
|
}
|
|
1327
1889
|
|
|
1328
1890
|
static size_t
|
|
1329
1891
|
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
1330
1892
|
void* dst, size_t maxDstSize,
|
|
1331
1893
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1332
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1333
|
-
const int frame)
|
|
1894
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1334
1895
|
{
|
|
1335
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
|
1896
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1336
1897
|
}
|
|
1337
1898
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1338
1899
|
|
|
@@ -1341,27 +1902,34 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
|
1341
1902
|
#if DYNAMIC_BMI2
|
|
1342
1903
|
|
|
1343
1904
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1344
|
-
static
|
|
1905
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1345
1906
|
DONT_VECTORIZE
|
|
1346
1907
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
|
1347
1908
|
void* dst, size_t maxDstSize,
|
|
1348
1909
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1349
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1350
|
-
const int frame)
|
|
1910
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1351
1911
|
{
|
|
1352
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
|
1912
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1913
|
+
}
|
|
1914
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1915
|
+
DONT_VECTORIZE
|
|
1916
|
+
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
|
|
1917
|
+
void* dst, size_t maxDstSize,
|
|
1918
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1919
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1920
|
+
{
|
|
1921
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1353
1922
|
}
|
|
1354
1923
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1355
1924
|
|
|
1356
1925
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1357
|
-
static
|
|
1926
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1358
1927
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
|
1359
1928
|
void* dst, size_t maxDstSize,
|
|
1360
1929
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1361
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1362
|
-
const int frame)
|
|
1930
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1363
1931
|
{
|
|
1364
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
|
1932
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1365
1933
|
}
|
|
1366
1934
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1367
1935
|
|
|
@@ -1371,23 +1939,34 @@ typedef size_t (*ZSTD_decompressSequences_t)(
|
|
|
1371
1939
|
ZSTD_DCtx* dctx,
|
|
1372
1940
|
void* dst, size_t maxDstSize,
|
|
1373
1941
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1374
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1375
|
-
const int frame);
|
|
1942
|
+
const ZSTD_longOffset_e isLongOffset);
|
|
1376
1943
|
|
|
1377
1944
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1378
1945
|
static size_t
|
|
1379
1946
|
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1380
1947
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1381
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1382
|
-
const int frame)
|
|
1948
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1383
1949
|
{
|
|
1384
1950
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
|
1385
1951
|
#if DYNAMIC_BMI2
|
|
1386
|
-
if (dctx
|
|
1387
|
-
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
|
1952
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1953
|
+
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1388
1954
|
}
|
|
1389
1955
|
#endif
|
|
1390
|
-
|
|
1956
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1957
|
+
}
|
|
1958
|
+
static size_t
|
|
1959
|
+
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1960
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1961
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1962
|
+
{
|
|
1963
|
+
DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
|
|
1964
|
+
#if DYNAMIC_BMI2
|
|
1965
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1966
|
+
return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1967
|
+
}
|
|
1968
|
+
#endif
|
|
1969
|
+
return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1391
1970
|
}
|
|
1392
1971
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1393
1972
|
|
|
@@ -1402,69 +1981,114 @@ static size_t
|
|
|
1402
1981
|
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
1403
1982
|
void* dst, size_t maxDstSize,
|
|
1404
1983
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1405
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1406
|
-
const int frame)
|
|
1984
|
+
const ZSTD_longOffset_e isLongOffset)
|
|
1407
1985
|
{
|
|
1408
1986
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
|
1409
1987
|
#if DYNAMIC_BMI2
|
|
1410
|
-
if (dctx
|
|
1411
|
-
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
|
1988
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1989
|
+
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1412
1990
|
}
|
|
1413
1991
|
#endif
|
|
1414
|
-
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
|
1992
|
+
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1415
1993
|
}
|
|
1416
1994
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1417
1995
|
|
|
1418
1996
|
|
|
1997
|
+
/**
|
|
1998
|
+
* @returns The total size of the history referenceable by zstd, including
|
|
1999
|
+
* both the prefix and the extDict. At @p op any offset larger than this
|
|
2000
|
+
* is invalid.
|
|
2001
|
+
*/
|
|
2002
|
+
static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
|
|
2003
|
+
{
|
|
2004
|
+
return (size_t)(op - virtualStart);
|
|
2005
|
+
}
|
|
2006
|
+
|
|
2007
|
+
typedef struct {
|
|
2008
|
+
unsigned longOffsetShare;
|
|
2009
|
+
unsigned maxNbAdditionalBits;
|
|
2010
|
+
} ZSTD_OffsetInfo;
|
|
1419
2011
|
|
|
1420
|
-
|
|
1421
|
-
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1422
|
-
/* ZSTD_getLongOffsetsShare() :
|
|
2012
|
+
/* ZSTD_getOffsetInfo() :
|
|
1423
2013
|
* condition : offTable must be valid
|
|
1424
2014
|
* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
|
|
1425
|
-
* compared to maximum possible of (1<<OffFSELog)
|
|
1426
|
-
|
|
1427
|
-
|
|
2015
|
+
* compared to maximum possible of (1<<OffFSELog),
|
|
2016
|
+
* as well as the maximum number additional bits required.
|
|
2017
|
+
*/
|
|
2018
|
+
static ZSTD_OffsetInfo
|
|
2019
|
+
ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
|
|
1428
2020
|
{
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
2021
|
+
ZSTD_OffsetInfo info = {0, 0};
|
|
2022
|
+
/* If nbSeq == 0, then the offTable is uninitialized, but we have
|
|
2023
|
+
* no sequences, so both values should be 0.
|
|
2024
|
+
*/
|
|
2025
|
+
if (nbSeq != 0) {
|
|
2026
|
+
const void* ptr = offTable;
|
|
2027
|
+
U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
|
|
2028
|
+
const ZSTD_seqSymbol* table = offTable + 1;
|
|
2029
|
+
U32 const max = 1 << tableLog;
|
|
2030
|
+
U32 u;
|
|
2031
|
+
DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
|
|
2032
|
+
|
|
2033
|
+
assert(max <= (1 << OffFSELog)); /* max not too large */
|
|
2034
|
+
for (u=0; u<max; u++) {
|
|
2035
|
+
info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
|
|
2036
|
+
if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
|
|
2037
|
+
}
|
|
2038
|
+
|
|
2039
|
+
assert(tableLog <= OffFSELog);
|
|
2040
|
+
info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
|
|
1439
2041
|
}
|
|
1440
2042
|
|
|
1441
|
-
|
|
1442
|
-
|
|
2043
|
+
return info;
|
|
2044
|
+
}
|
|
1443
2045
|
|
|
1444
|
-
|
|
2046
|
+
/**
|
|
2047
|
+
* @returns The maximum offset we can decode in one read of our bitstream, without
|
|
2048
|
+
* reloading more bits in the middle of the offset bits read. Any offsets larger
|
|
2049
|
+
* than this must use the long offset decoder.
|
|
2050
|
+
*/
|
|
2051
|
+
static size_t ZSTD_maxShortOffset(void)
|
|
2052
|
+
{
|
|
2053
|
+
if (MEM_64bits()) {
|
|
2054
|
+
/* We can decode any offset without reloading bits.
|
|
2055
|
+
* This might change if the max window size grows.
|
|
2056
|
+
*/
|
|
2057
|
+
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
|
2058
|
+
return (size_t)-1;
|
|
2059
|
+
} else {
|
|
2060
|
+
/* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
|
|
2061
|
+
* This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
|
|
2062
|
+
* Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
|
|
2063
|
+
*/
|
|
2064
|
+
size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
|
|
2065
|
+
size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
|
|
2066
|
+
assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
|
|
2067
|
+
return maxOffset;
|
|
2068
|
+
}
|
|
1445
2069
|
}
|
|
1446
|
-
#endif
|
|
1447
2070
|
|
|
1448
2071
|
size_t
|
|
1449
2072
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1450
2073
|
void* dst, size_t dstCapacity,
|
|
1451
|
-
const void* src, size_t srcSize, const
|
|
2074
|
+
const void* src, size_t srcSize, const streaming_operation streaming)
|
|
1452
2075
|
{ /* blockType == blockCompressed */
|
|
1453
2076
|
const BYTE* ip = (const BYTE*)src;
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
*
|
|
1458
|
-
*
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
2077
|
+
DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
|
|
2078
|
+
|
|
2079
|
+
/* Note : the wording of the specification
|
|
2080
|
+
* allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
|
|
2081
|
+
* This generally does not happen, as it makes little sense,
|
|
2082
|
+
* since an uncompressed block would feature same size and have no decompression cost.
|
|
2083
|
+
* Also, note that decoder from reference libzstd before < v1.5.4
|
|
2084
|
+
* would consider this edge case as an error.
|
|
2085
|
+
* As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
|
|
2086
|
+
* for broader compatibility with the deployed ecosystem of zstd decoders */
|
|
2087
|
+
RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
|
|
1464
2088
|
|
|
1465
2089
|
/* Decode literals section */
|
|
1466
|
-
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
|
1467
|
-
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock :
|
|
2090
|
+
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
|
|
2091
|
+
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
|
|
1468
2092
|
if (ZSTD_isError(litCSize)) return litCSize;
|
|
1469
2093
|
ip += litCSize;
|
|
1470
2094
|
srcSize -= litCSize;
|
|
@@ -1472,6 +2096,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1472
2096
|
|
|
1473
2097
|
/* Build Decoding Tables */
|
|
1474
2098
|
{
|
|
2099
|
+
/* Compute the maximum block size, which must also work when !frame and fParams are unset.
|
|
2100
|
+
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
|
|
2101
|
+
*/
|
|
2102
|
+
size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
|
|
2103
|
+
size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
|
|
2104
|
+
/* isLongOffset must be true if there are long offsets.
|
|
2105
|
+
* Offsets are long if they are larger than ZSTD_maxShortOffset().
|
|
2106
|
+
* We don't expect that to be the case in 64-bit mode.
|
|
2107
|
+
*
|
|
2108
|
+
* We check here to see if our history is large enough to allow long offsets.
|
|
2109
|
+
* If it isn't, then we can't possible have (valid) long offsets. If the offset
|
|
2110
|
+
* is invalid, then it is okay to read it incorrectly.
|
|
2111
|
+
*
|
|
2112
|
+
* If isLongOffsets is true, then we will later check our decoding table to see
|
|
2113
|
+
* if it is even possible to generate long offsets.
|
|
2114
|
+
*/
|
|
2115
|
+
ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
|
|
1475
2116
|
/* These macros control at build-time which decompressor implementation
|
|
1476
2117
|
* we use. If neither is defined, we do some inspection and dispatch at
|
|
1477
2118
|
* runtime.
|
|
@@ -1479,6 +2120,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1479
2120
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
|
1480
2121
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1481
2122
|
int usePrefetchDecoder = dctx->ddictIsCold;
|
|
2123
|
+
#else
|
|
2124
|
+
/* Set to 1 to avoid computing offset info if we don't need to.
|
|
2125
|
+
* Otherwise this value is ignored.
|
|
2126
|
+
*/
|
|
2127
|
+
int usePrefetchDecoder = 1;
|
|
1482
2128
|
#endif
|
|
1483
2129
|
int nbSeq;
|
|
1484
2130
|
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
|
|
@@ -1486,40 +2132,58 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1486
2132
|
ip += seqHSize;
|
|
1487
2133
|
srcSize -= seqHSize;
|
|
1488
2134
|
|
|
1489
|
-
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
|
2135
|
+
RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
|
2136
|
+
RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
|
|
2137
|
+
"invalid dst");
|
|
1490
2138
|
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
2139
|
+
/* If we could potentially have long offsets, or we might want to use the prefetch decoder,
|
|
2140
|
+
* compute information about the share of long offsets, and the maximum nbAdditionalBits.
|
|
2141
|
+
* NOTE: could probably use a larger nbSeq limit
|
|
2142
|
+
*/
|
|
2143
|
+
if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
|
|
2144
|
+
ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
|
|
2145
|
+
if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
|
|
2146
|
+
/* If isLongOffset, but the maximum number of additional bits that we see in our table is small
|
|
2147
|
+
* enough, then we know it is impossible to have too long an offset in this block, so we can
|
|
2148
|
+
* use the regular offset decoder.
|
|
2149
|
+
*/
|
|
2150
|
+
isLongOffset = ZSTD_lo_isRegularOffset;
|
|
2151
|
+
}
|
|
2152
|
+
if (!usePrefetchDecoder) {
|
|
2153
|
+
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
|
2154
|
+
usePrefetchDecoder = (info.longOffsetShare >= minShare);
|
|
2155
|
+
}
|
|
1499
2156
|
}
|
|
1500
|
-
#endif
|
|
1501
2157
|
|
|
1502
2158
|
dctx->ddictIsCold = 0;
|
|
1503
2159
|
|
|
1504
2160
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
|
1505
2161
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1506
|
-
if (usePrefetchDecoder)
|
|
2162
|
+
if (usePrefetchDecoder) {
|
|
2163
|
+
#else
|
|
2164
|
+
(void)usePrefetchDecoder;
|
|
2165
|
+
{
|
|
1507
2166
|
#endif
|
|
1508
2167
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1509
|
-
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset
|
|
2168
|
+
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
1510
2169
|
#endif
|
|
2170
|
+
}
|
|
1511
2171
|
|
|
1512
2172
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1513
2173
|
/* else */
|
|
1514
|
-
|
|
2174
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
2175
|
+
return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
2176
|
+
else
|
|
2177
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
1515
2178
|
#endif
|
|
1516
2179
|
}
|
|
1517
2180
|
}
|
|
1518
2181
|
|
|
1519
2182
|
|
|
1520
|
-
|
|
2183
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
|
2184
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
|
|
1521
2185
|
{
|
|
1522
|
-
if (dst != dctx->previousDstEnd) { /* not contiguous */
|
|
2186
|
+
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
|
|
1523
2187
|
dctx->dictEnd = dctx->previousDstEnd;
|
|
1524
2188
|
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
|
1525
2189
|
dctx->prefixStart = dst;
|
|
@@ -1528,13 +2192,24 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
|
|
|
1528
2192
|
}
|
|
1529
2193
|
|
|
1530
2194
|
|
|
1531
|
-
size_t
|
|
1532
|
-
|
|
1533
|
-
|
|
2195
|
+
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
|
|
2196
|
+
void* dst, size_t dstCapacity,
|
|
2197
|
+
const void* src, size_t srcSize)
|
|
1534
2198
|
{
|
|
1535
2199
|
size_t dSize;
|
|
1536
|
-
|
|
1537
|
-
|
|
2200
|
+
dctx->isFrameDecompression = 0;
|
|
2201
|
+
ZSTD_checkContinuity(dctx, dst, dstCapacity);
|
|
2202
|
+
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
|
|
2203
|
+
FORWARD_IF_ERROR(dSize, "");
|
|
1538
2204
|
dctx->previousDstEnd = (char*)dst + dSize;
|
|
1539
2205
|
return dSize;
|
|
1540
2206
|
}
|
|
2207
|
+
|
|
2208
|
+
|
|
2209
|
+
/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
|
|
2210
|
+
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
|
2211
|
+
void* dst, size_t dstCapacity,
|
|
2212
|
+
const void* src, size_t srcSize)
|
|
2213
|
+
{
|
|
2214
|
+
return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
|
|
2215
|
+
}
|