zstd-ruby 1.4.4.0 → 1.5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +241 -173
- data/ext/zstdruby/libzstd/README.md +76 -18
- data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
- data/ext/zstdruby/libzstd/common/compiler.h +196 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +51 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
- data/ext/zstdruby/libzstd/common/huf.h +60 -54
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +10 -8
- data/ext/zstdruby/libzstd/common/threading.h +4 -3
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
- data/ext/zstdruby/libzstd/zstd.h +760 -234
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +20 -9
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -14,15 +14,15 @@
|
|
|
14
14
|
/*-*******************************************************
|
|
15
15
|
* Dependencies
|
|
16
16
|
*********************************************************/
|
|
17
|
-
#include
|
|
18
|
-
#include "compiler.h" /* prefetch */
|
|
19
|
-
#include "cpu.h" /* bmi2 */
|
|
20
|
-
#include "mem.h" /* low level memory routines */
|
|
17
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
|
18
|
+
#include "../common/compiler.h" /* prefetch */
|
|
19
|
+
#include "../common/cpu.h" /* bmi2 */
|
|
20
|
+
#include "../common/mem.h" /* low level memory routines */
|
|
21
21
|
#define FSE_STATIC_LINKING_ONLY
|
|
22
|
-
#include "fse.h"
|
|
22
|
+
#include "../common/fse.h"
|
|
23
23
|
#define HUF_STATIC_LINKING_ONLY
|
|
24
|
-
#include "huf.h"
|
|
25
|
-
#include "zstd_internal.h"
|
|
24
|
+
#include "../common/huf.h"
|
|
25
|
+
#include "../common/zstd_internal.h"
|
|
26
26
|
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
|
27
27
|
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
|
28
28
|
#include "zstd_decompress_block.h"
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
/*_*******************************************************
|
|
45
45
|
* Memory operations
|
|
46
46
|
**********************************************************/
|
|
47
|
-
static void ZSTD_copy4(void* dst, const void* src) {
|
|
47
|
+
static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
/*-*************************************************************
|
|
@@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
|
|
56
56
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
57
57
|
blockProperties_t* bpPtr)
|
|
58
58
|
{
|
|
59
|
-
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
|
|
59
|
+
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
|
|
60
60
|
|
|
61
61
|
{ U32 const cBlockHeader = MEM_readLE24(src);
|
|
62
62
|
U32 const cSize = cBlockHeader >> 3;
|
|
@@ -64,23 +64,64 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
|
64
64
|
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
|
|
65
65
|
bpPtr->origSize = cSize; /* only useful for RLE */
|
|
66
66
|
if (bpPtr->blockType == bt_rle) return 1;
|
|
67
|
-
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
|
|
67
|
+
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
|
|
68
68
|
return cSize;
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
+
/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
|
|
73
|
+
static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
|
|
74
|
+
const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
|
|
75
|
+
{
|
|
76
|
+
if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
|
|
77
|
+
{
|
|
78
|
+
/* room for litbuffer to fit without read faulting */
|
|
79
|
+
dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
|
|
80
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
|
81
|
+
dctx->litBufferLocation = ZSTD_in_dst;
|
|
82
|
+
}
|
|
83
|
+
else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
|
|
84
|
+
{
|
|
85
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
|
86
|
+
if (splitImmediately) {
|
|
87
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
|
88
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
|
89
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
/* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
|
|
93
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
|
|
94
|
+
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
|
|
95
|
+
}
|
|
96
|
+
dctx->litBufferLocation = ZSTD_split;
|
|
97
|
+
}
|
|
98
|
+
else
|
|
99
|
+
{
|
|
100
|
+
/* fits entirely within litExtraBuffer, so no split is necessary */
|
|
101
|
+
dctx->litBuffer = dctx->litExtraBuffer;
|
|
102
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
|
103
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
72
106
|
|
|
73
107
|
/* Hidden declaration for fullbench */
|
|
74
108
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
75
|
-
const void* src, size_t srcSize
|
|
109
|
+
const void* src, size_t srcSize,
|
|
110
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming);
|
|
76
111
|
/*! ZSTD_decodeLiteralsBlock() :
|
|
112
|
+
* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
|
|
113
|
+
* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
|
|
114
|
+
* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
|
|
115
|
+
* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
|
|
116
|
+
*
|
|
77
117
|
* @return : nb of bytes read from src (< srcSize )
|
|
78
118
|
* note : symbol not declared but exposed for fullbench */
|
|
79
119
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
80
|
-
const void* src, size_t srcSize
|
|
120
|
+
const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
|
|
121
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming)
|
|
81
122
|
{
|
|
82
123
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
|
83
|
-
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
|
|
124
|
+
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
|
84
125
|
|
|
85
126
|
{ const BYTE* const istart = (const BYTE*) src;
|
|
86
127
|
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
|
@@ -89,8 +130,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
89
130
|
{
|
|
90
131
|
case set_repeat:
|
|
91
132
|
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
|
92
|
-
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
|
|
93
|
-
|
|
133
|
+
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
|
134
|
+
ZSTD_FALLTHROUGH;
|
|
94
135
|
|
|
95
136
|
case set_compressed:
|
|
96
137
|
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
|
|
@@ -99,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
99
140
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
|
100
141
|
U32 const lhc = MEM_readLE32(istart);
|
|
101
142
|
size_t hufSuccess;
|
|
143
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
|
102
144
|
switch(lhlCode)
|
|
103
145
|
{
|
|
104
146
|
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -121,8 +163,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
121
163
|
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
|
122
164
|
break;
|
|
123
165
|
}
|
|
124
|
-
RETURN_ERROR_IF(litSize >
|
|
125
|
-
RETURN_ERROR_IF(
|
|
166
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
167
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
168
|
+
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
|
169
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
|
|
170
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
|
|
126
171
|
|
|
127
172
|
/* prefetch huffman table if cold */
|
|
128
173
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
|
@@ -133,11 +178,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
133
178
|
if (singleStream) {
|
|
134
179
|
hufSuccess = HUF_decompress1X_usingDTable_bmi2(
|
|
135
180
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
|
136
|
-
dctx->HUFptr, dctx
|
|
181
|
+
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
|
|
137
182
|
} else {
|
|
138
183
|
hufSuccess = HUF_decompress4X_usingDTable_bmi2(
|
|
139
184
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
|
140
|
-
dctx->HUFptr, dctx
|
|
185
|
+
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
|
|
141
186
|
}
|
|
142
187
|
} else {
|
|
143
188
|
if (singleStream) {
|
|
@@ -150,29 +195,36 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
150
195
|
hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
|
|
151
196
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
152
197
|
istart+lhSize, litCSize, dctx->workspace,
|
|
153
|
-
sizeof(dctx->workspace), dctx
|
|
198
|
+
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
|
|
154
199
|
#endif
|
|
155
200
|
} else {
|
|
156
201
|
hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
|
|
157
202
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
158
203
|
istart+lhSize, litCSize, dctx->workspace,
|
|
159
|
-
sizeof(dctx->workspace), dctx
|
|
204
|
+
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
|
|
160
205
|
}
|
|
161
206
|
}
|
|
207
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
208
|
+
{
|
|
209
|
+
ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
|
210
|
+
ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
211
|
+
dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
|
212
|
+
dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
|
|
213
|
+
}
|
|
162
214
|
|
|
163
|
-
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
|
|
215
|
+
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
|
164
216
|
|
|
165
217
|
dctx->litPtr = dctx->litBuffer;
|
|
166
218
|
dctx->litSize = litSize;
|
|
167
219
|
dctx->litEntropy = 1;
|
|
168
220
|
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
|
169
|
-
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
170
221
|
return litCSize + lhSize;
|
|
171
222
|
}
|
|
172
223
|
|
|
173
224
|
case set_basic:
|
|
174
225
|
{ size_t litSize, lhSize;
|
|
175
226
|
U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
|
227
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
|
176
228
|
switch(lhlCode)
|
|
177
229
|
{
|
|
178
230
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -189,23 +241,36 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
189
241
|
break;
|
|
190
242
|
}
|
|
191
243
|
|
|
244
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
245
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
|
246
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
|
192
247
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
|
193
|
-
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
|
|
194
|
-
|
|
248
|
+
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
|
249
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
250
|
+
{
|
|
251
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
252
|
+
ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
|
253
|
+
}
|
|
254
|
+
else
|
|
255
|
+
{
|
|
256
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
|
|
257
|
+
}
|
|
195
258
|
dctx->litPtr = dctx->litBuffer;
|
|
196
259
|
dctx->litSize = litSize;
|
|
197
|
-
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
198
260
|
return lhSize+litSize;
|
|
199
261
|
}
|
|
200
262
|
/* direct reference into compressed stream */
|
|
201
263
|
dctx->litPtr = istart+lhSize;
|
|
202
264
|
dctx->litSize = litSize;
|
|
265
|
+
dctx->litBufferEnd = dctx->litPtr + litSize;
|
|
266
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
203
267
|
return lhSize+litSize;
|
|
204
268
|
}
|
|
205
269
|
|
|
206
270
|
case set_rle:
|
|
207
271
|
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
|
208
272
|
size_t litSize, lhSize;
|
|
273
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
|
209
274
|
switch(lhlCode)
|
|
210
275
|
{
|
|
211
276
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -222,8 +287,19 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
222
287
|
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
|
223
288
|
break;
|
|
224
289
|
}
|
|
225
|
-
RETURN_ERROR_IF(litSize >
|
|
226
|
-
|
|
290
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
291
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
292
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
|
293
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
|
294
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
295
|
+
{
|
|
296
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
297
|
+
ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
|
|
298
|
+
}
|
|
299
|
+
else
|
|
300
|
+
{
|
|
301
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
|
|
302
|
+
}
|
|
227
303
|
dctx->litPtr = dctx->litBuffer;
|
|
228
304
|
dctx->litSize = litSize;
|
|
229
305
|
return lhSize+1;
|
|
@@ -236,7 +312,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
236
312
|
|
|
237
313
|
/* Default FSE distribution tables.
|
|
238
314
|
* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
|
|
239
|
-
* https://github.com/facebook/zstd/blob/
|
|
315
|
+
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
|
|
240
316
|
* They were generated programmatically with following method :
|
|
241
317
|
* - start from default distributions, present in /lib/common/zstd_internal.h
|
|
242
318
|
* - generate tables normally, using ZSTD_buildFSETable()
|
|
@@ -343,7 +419,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
|
|
|
343
419
|
}; /* ML_defaultDTable */
|
|
344
420
|
|
|
345
421
|
|
|
346
|
-
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue,
|
|
422
|
+
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
|
|
347
423
|
{
|
|
348
424
|
void* ptr = dt;
|
|
349
425
|
ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
|
|
@@ -355,7 +431,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
|
355
431
|
cell->nbBits = 0;
|
|
356
432
|
cell->nextState = 0;
|
|
357
433
|
assert(nbAddBits < 255);
|
|
358
|
-
cell->nbAdditionalBits =
|
|
434
|
+
cell->nbAdditionalBits = nbAddBits;
|
|
359
435
|
cell->baseValue = baseValue;
|
|
360
436
|
}
|
|
361
437
|
|
|
@@ -364,23 +440,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
|
364
440
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
|
365
441
|
* cannot fail if input is valid =>
|
|
366
442
|
* all inputs are presumed validated at this stage */
|
|
367
|
-
|
|
368
|
-
|
|
443
|
+
FORCE_INLINE_TEMPLATE
|
|
444
|
+
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
369
445
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
370
|
-
const U32* baseValue, const
|
|
371
|
-
unsigned tableLog)
|
|
446
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
447
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
372
448
|
{
|
|
373
449
|
ZSTD_seqSymbol* const tableDecode = dt+1;
|
|
374
|
-
U16 symbolNext[MaxSeq+1];
|
|
375
|
-
|
|
376
450
|
U32 const maxSV1 = maxSymbolValue + 1;
|
|
377
451
|
U32 const tableSize = 1 << tableLog;
|
|
378
|
-
|
|
452
|
+
|
|
453
|
+
U16* symbolNext = (U16*)wksp;
|
|
454
|
+
BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
|
|
455
|
+
U32 highThreshold = tableSize - 1;
|
|
456
|
+
|
|
379
457
|
|
|
380
458
|
/* Sanity Checks */
|
|
381
459
|
assert(maxSymbolValue <= MaxSeq);
|
|
382
460
|
assert(tableLog <= MaxFSELog);
|
|
383
|
-
|
|
461
|
+
assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
|
|
462
|
+
(void)wkspSize;
|
|
384
463
|
/* Init, lay down lowprob symbols */
|
|
385
464
|
{ ZSTD_seqSymbol_header DTableH;
|
|
386
465
|
DTableH.tableLog = tableLog;
|
|
@@ -396,16 +475,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
396
475
|
assert(normalizedCounter[s]>=0);
|
|
397
476
|
symbolNext[s] = (U16)normalizedCounter[s];
|
|
398
477
|
} } }
|
|
399
|
-
|
|
478
|
+
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
|
400
479
|
}
|
|
401
480
|
|
|
402
481
|
/* Spread symbols */
|
|
403
|
-
|
|
482
|
+
assert(tableSize <= 512);
|
|
483
|
+
/* Specialized symbol spreading for the case when there are
|
|
484
|
+
* no low probability (-1 count) symbols. When compressing
|
|
485
|
+
* small blocks we avoid low probability symbols to hit this
|
|
486
|
+
* case, since header decoding speed matters more.
|
|
487
|
+
*/
|
|
488
|
+
if (highThreshold == tableSize - 1) {
|
|
489
|
+
size_t const tableMask = tableSize-1;
|
|
490
|
+
size_t const step = FSE_TABLESTEP(tableSize);
|
|
491
|
+
/* First lay down the symbols in order.
|
|
492
|
+
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
|
493
|
+
* misses since small blocks generally have small table logs, so nearly
|
|
494
|
+
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
|
495
|
+
* our buffer to handle the over-write.
|
|
496
|
+
*/
|
|
497
|
+
{
|
|
498
|
+
U64 const add = 0x0101010101010101ull;
|
|
499
|
+
size_t pos = 0;
|
|
500
|
+
U64 sv = 0;
|
|
501
|
+
U32 s;
|
|
502
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
|
503
|
+
int i;
|
|
504
|
+
int const n = normalizedCounter[s];
|
|
505
|
+
MEM_write64(spread + pos, sv);
|
|
506
|
+
for (i = 8; i < n; i += 8) {
|
|
507
|
+
MEM_write64(spread + pos + i, sv);
|
|
508
|
+
}
|
|
509
|
+
pos += n;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
/* Now we spread those positions across the table.
|
|
513
|
+
* The benefit of doing it in two stages is that we avoid the the
|
|
514
|
+
* variable size inner loop, which caused lots of branch misses.
|
|
515
|
+
* Now we can run through all the positions without any branch misses.
|
|
516
|
+
* We unroll the loop twice, since that is what emperically worked best.
|
|
517
|
+
*/
|
|
518
|
+
{
|
|
519
|
+
size_t position = 0;
|
|
520
|
+
size_t s;
|
|
521
|
+
size_t const unroll = 2;
|
|
522
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
|
523
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
|
524
|
+
size_t u;
|
|
525
|
+
for (u = 0; u < unroll; ++u) {
|
|
526
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
|
527
|
+
tableDecode[uPosition].baseValue = spread[s + u];
|
|
528
|
+
}
|
|
529
|
+
position = (position + (unroll * step)) & tableMask;
|
|
530
|
+
}
|
|
531
|
+
assert(position == 0);
|
|
532
|
+
}
|
|
533
|
+
} else {
|
|
534
|
+
U32 const tableMask = tableSize-1;
|
|
404
535
|
U32 const step = FSE_TABLESTEP(tableSize);
|
|
405
536
|
U32 s, position = 0;
|
|
406
537
|
for (s=0; s<maxSV1; s++) {
|
|
407
538
|
int i;
|
|
408
|
-
|
|
539
|
+
int const n = normalizedCounter[s];
|
|
540
|
+
for (i=0; i<n; i++) {
|
|
409
541
|
tableDecode[position].baseValue = s;
|
|
410
542
|
position = (position + step) & tableMask;
|
|
411
543
|
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
|
@@ -414,16 +546,56 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
414
546
|
}
|
|
415
547
|
|
|
416
548
|
/* Build Decoding table */
|
|
417
|
-
{
|
|
549
|
+
{
|
|
550
|
+
U32 u;
|
|
418
551
|
for (u=0; u<tableSize; u++) {
|
|
419
552
|
U32 const symbol = tableDecode[u].baseValue;
|
|
420
553
|
U32 const nextState = symbolNext[symbol]++;
|
|
421
554
|
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
|
|
422
555
|
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
|
423
556
|
assert(nbAdditionalBits[symbol] < 255);
|
|
424
|
-
tableDecode[u].nbAdditionalBits =
|
|
557
|
+
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
|
|
425
558
|
tableDecode[u].baseValue = baseValue[symbol];
|
|
426
|
-
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
/* Avoids the FORCE_INLINE of the _body() function. */
|
|
564
|
+
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
|
565
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
566
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
567
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
568
|
+
{
|
|
569
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
570
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
#if DYNAMIC_BMI2
|
|
574
|
+
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
|
575
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
576
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
577
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
578
|
+
{
|
|
579
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
580
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
581
|
+
}
|
|
582
|
+
#endif
|
|
583
|
+
|
|
584
|
+
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
585
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
586
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
587
|
+
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
|
588
|
+
{
|
|
589
|
+
#if DYNAMIC_BMI2
|
|
590
|
+
if (bmi2) {
|
|
591
|
+
ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
|
|
592
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
#endif
|
|
596
|
+
(void)bmi2;
|
|
597
|
+
ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
|
|
598
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
427
599
|
}
|
|
428
600
|
|
|
429
601
|
|
|
@@ -433,18 +605,19 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
433
605
|
static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
|
|
434
606
|
symbolEncodingType_e type, unsigned max, U32 maxLog,
|
|
435
607
|
const void* src, size_t srcSize,
|
|
436
|
-
const U32* baseValue, const
|
|
608
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
437
609
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
|
438
|
-
int ddictIsCold, int nbSeq
|
|
610
|
+
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
|
611
|
+
int bmi2)
|
|
439
612
|
{
|
|
440
613
|
switch(type)
|
|
441
614
|
{
|
|
442
615
|
case set_rle :
|
|
443
|
-
RETURN_ERROR_IF(!srcSize, srcSize_wrong);
|
|
444
|
-
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
|
|
616
|
+
RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
|
|
617
|
+
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
|
445
618
|
{ U32 const symbol = *(const BYTE*)src;
|
|
446
619
|
U32 const baseline = baseValue[symbol];
|
|
447
|
-
|
|
620
|
+
U8 const nbBits = nbAdditionalBits[symbol];
|
|
448
621
|
ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
|
|
449
622
|
}
|
|
450
623
|
*DTablePtr = DTableSpace;
|
|
@@ -453,7 +626,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
453
626
|
*DTablePtr = defaultTable;
|
|
454
627
|
return 0;
|
|
455
628
|
case set_repeat:
|
|
456
|
-
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
|
|
629
|
+
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
|
|
457
630
|
/* prefetch FSE table if used */
|
|
458
631
|
if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
|
|
459
632
|
const void* const pStart = *DTablePtr;
|
|
@@ -465,9 +638,9 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
465
638
|
{ unsigned tableLog;
|
|
466
639
|
S16 norm[MaxSeq+1];
|
|
467
640
|
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
|
468
|
-
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
|
|
469
|
-
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
|
|
470
|
-
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
|
641
|
+
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
|
|
642
|
+
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
|
|
643
|
+
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
|
|
471
644
|
*DTablePtr = DTableSpace;
|
|
472
645
|
return headerSize;
|
|
473
646
|
}
|
|
@@ -480,35 +653,36 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
480
653
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
481
654
|
const void* src, size_t srcSize)
|
|
482
655
|
{
|
|
483
|
-
const BYTE* const istart = (const BYTE*
|
|
656
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
484
657
|
const BYTE* const iend = istart + srcSize;
|
|
485
658
|
const BYTE* ip = istart;
|
|
486
659
|
int nbSeq;
|
|
487
660
|
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
|
488
661
|
|
|
489
662
|
/* check */
|
|
490
|
-
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
|
|
663
|
+
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
|
|
491
664
|
|
|
492
665
|
/* SeqHead */
|
|
493
666
|
nbSeq = *ip++;
|
|
494
667
|
if (!nbSeq) {
|
|
495
668
|
*nbSeqPtr=0;
|
|
496
|
-
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
|
|
669
|
+
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
|
|
497
670
|
return 1;
|
|
498
671
|
}
|
|
499
672
|
if (nbSeq > 0x7F) {
|
|
500
673
|
if (nbSeq == 0xFF) {
|
|
501
|
-
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
|
|
502
|
-
nbSeq = MEM_readLE16(ip) + LONGNBSEQ
|
|
674
|
+
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
|
675
|
+
nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
|
|
676
|
+
ip+=2;
|
|
503
677
|
} else {
|
|
504
|
-
RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
|
|
678
|
+
RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
|
|
505
679
|
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
|
506
680
|
}
|
|
507
681
|
}
|
|
508
682
|
*nbSeqPtr = nbSeq;
|
|
509
683
|
|
|
510
684
|
/* FSE table descriptors */
|
|
511
|
-
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
|
|
685
|
+
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
|
|
512
686
|
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
|
513
687
|
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
|
514
688
|
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
|
@@ -520,8 +694,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
520
694
|
ip, iend-ip,
|
|
521
695
|
LL_base, LL_bits,
|
|
522
696
|
LL_defaultDTable, dctx->fseEntropy,
|
|
523
|
-
dctx->ddictIsCold, nbSeq
|
|
524
|
-
|
|
697
|
+
dctx->ddictIsCold, nbSeq,
|
|
698
|
+
dctx->workspace, sizeof(dctx->workspace),
|
|
699
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
700
|
+
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
525
701
|
ip += llhSize;
|
|
526
702
|
}
|
|
527
703
|
|
|
@@ -530,8 +706,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
530
706
|
ip, iend-ip,
|
|
531
707
|
OF_base, OF_bits,
|
|
532
708
|
OF_defaultDTable, dctx->fseEntropy,
|
|
533
|
-
dctx->ddictIsCold, nbSeq
|
|
534
|
-
|
|
709
|
+
dctx->ddictIsCold, nbSeq,
|
|
710
|
+
dctx->workspace, sizeof(dctx->workspace),
|
|
711
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
712
|
+
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
535
713
|
ip += ofhSize;
|
|
536
714
|
}
|
|
537
715
|
|
|
@@ -540,8 +718,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
540
718
|
ip, iend-ip,
|
|
541
719
|
ML_base, ML_bits,
|
|
542
720
|
ML_defaultDTable, dctx->fseEntropy,
|
|
543
|
-
dctx->ddictIsCold, nbSeq
|
|
544
|
-
|
|
721
|
+
dctx->ddictIsCold, nbSeq,
|
|
722
|
+
dctx->workspace, sizeof(dctx->workspace),
|
|
723
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
724
|
+
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
545
725
|
ip += mlhSize;
|
|
546
726
|
}
|
|
547
727
|
}
|
|
@@ -554,7 +734,6 @@ typedef struct {
|
|
|
554
734
|
size_t litLength;
|
|
555
735
|
size_t matchLength;
|
|
556
736
|
size_t offset;
|
|
557
|
-
const BYTE* match;
|
|
558
737
|
} seq_t;
|
|
559
738
|
|
|
560
739
|
typedef struct {
|
|
@@ -568,9 +747,6 @@ typedef struct {
|
|
|
568
747
|
ZSTD_fseState stateOffb;
|
|
569
748
|
ZSTD_fseState stateML;
|
|
570
749
|
size_t prevOffset[ZSTD_REP_NUM];
|
|
571
|
-
const BYTE* prefixStart;
|
|
572
|
-
const BYTE* dictEnd;
|
|
573
|
-
size_t pos;
|
|
574
750
|
} seqState_t;
|
|
575
751
|
|
|
576
752
|
/*! ZSTD_overlapCopy8() :
|
|
@@ -580,7 +756,7 @@ typedef struct {
|
|
|
580
756
|
* Precondition: *ip <= *op
|
|
581
757
|
* Postcondition: *op - *op >= 8
|
|
582
758
|
*/
|
|
583
|
-
|
|
759
|
+
HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
584
760
|
assert(*ip <= *op);
|
|
585
761
|
if (offset < 8) {
|
|
586
762
|
/* close range match, overlap */
|
|
@@ -613,7 +789,7 @@ static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
|
613
789
|
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
|
614
790
|
* The src buffer must be before the dst buffer.
|
|
615
791
|
*/
|
|
616
|
-
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
792
|
+
static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
617
793
|
ptrdiff_t const diff = op - ip;
|
|
618
794
|
BYTE* const oend = op + length;
|
|
619
795
|
|
|
@@ -629,6 +805,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
629
805
|
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
|
630
806
|
assert(length >= 8);
|
|
631
807
|
ZSTD_overlapCopy8(&op, &ip, diff);
|
|
808
|
+
length -= 8;
|
|
632
809
|
assert(op - ip >= 8);
|
|
633
810
|
assert(op <= oend);
|
|
634
811
|
}
|
|
@@ -643,12 +820,35 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
643
820
|
assert(oend > oend_w);
|
|
644
821
|
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
|
645
822
|
ip += oend_w - op;
|
|
646
|
-
op
|
|
823
|
+
op += oend_w - op;
|
|
647
824
|
}
|
|
648
825
|
/* Handle the leftovers. */
|
|
649
826
|
while (op < oend) *op++ = *ip++;
|
|
650
827
|
}
|
|
651
828
|
|
|
829
|
+
/* ZSTD_safecopyDstBeforeSrc():
|
|
830
|
+
* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
|
|
831
|
+
* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
|
|
832
|
+
static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
|
|
833
|
+
ptrdiff_t const diff = op - ip;
|
|
834
|
+
BYTE* const oend = op + length;
|
|
835
|
+
|
|
836
|
+
if (length < 8 || diff > -8) {
|
|
837
|
+
/* Handle short lengths, close overlaps, and dst not before src. */
|
|
838
|
+
while (op < oend) *op++ = *ip++;
|
|
839
|
+
return;
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
|
|
843
|
+
ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
|
|
844
|
+
ip += oend - WILDCOPY_OVERLENGTH - op;
|
|
845
|
+
op += oend - WILDCOPY_OVERLENGTH - op;
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
/* Handle the leftovers. */
|
|
849
|
+
while (op < oend) *op++ = *ip++;
|
|
850
|
+
}
|
|
851
|
+
|
|
652
852
|
/* ZSTD_execSequenceEnd():
|
|
653
853
|
* This version handles cases that are near the end of the output buffer. It requires
|
|
654
854
|
* more careful checks to make sure there is no overflow. By separating out these hard
|
|
@@ -659,21 +859,21 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
659
859
|
*/
|
|
660
860
|
FORCE_NOINLINE
|
|
661
861
|
size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
862
|
+
BYTE* const oend, seq_t sequence,
|
|
863
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
864
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
665
865
|
{
|
|
666
866
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
667
867
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
668
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
669
868
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
670
869
|
const BYTE* match = oLitEnd - sequence.offset;
|
|
671
870
|
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
672
871
|
|
|
673
|
-
/* bounds checks */
|
|
674
|
-
|
|
675
|
-
RETURN_ERROR_IF(
|
|
676
|
-
|
|
872
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
|
873
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
|
874
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
|
875
|
+
assert(op < op + sequenceLength);
|
|
876
|
+
assert(oLitEnd < op + sequenceLength);
|
|
677
877
|
|
|
678
878
|
/* copy literals */
|
|
679
879
|
ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
|
|
@@ -683,42 +883,102 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
|
683
883
|
/* copy Match */
|
|
684
884
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
685
885
|
/* offset beyond prefix */
|
|
686
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
|
687
|
-
match = dictEnd - (prefixStart-match);
|
|
886
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
887
|
+
match = dictEnd - (prefixStart - match);
|
|
688
888
|
if (match + sequence.matchLength <= dictEnd) {
|
|
689
|
-
|
|
889
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
690
890
|
return sequenceLength;
|
|
691
891
|
}
|
|
692
892
|
/* span extDict & currentPrefixSegment */
|
|
693
893
|
{ size_t const length1 = dictEnd - match;
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
894
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
895
|
+
op = oLitEnd + length1;
|
|
896
|
+
sequence.matchLength -= length1;
|
|
897
|
+
match = prefixStart;
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
901
|
+
return sequenceLength;
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
/* ZSTD_execSequenceEndSplitLitBuffer():
|
|
905
|
+
* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
|
|
906
|
+
*/
|
|
907
|
+
FORCE_NOINLINE
|
|
908
|
+
size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
|
|
909
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
|
910
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
911
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
912
|
+
{
|
|
913
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
|
914
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
915
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
916
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
|
917
|
+
|
|
918
|
+
|
|
919
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
|
920
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
|
921
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
|
922
|
+
assert(op < op + sequenceLength);
|
|
923
|
+
assert(oLitEnd < op + sequenceLength);
|
|
924
|
+
|
|
925
|
+
/* copy literals */
|
|
926
|
+
RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
|
|
927
|
+
ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
|
|
928
|
+
op = oLitEnd;
|
|
929
|
+
*litPtr = iLitEnd;
|
|
930
|
+
|
|
931
|
+
/* copy Match */
|
|
932
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
933
|
+
/* offset beyond prefix */
|
|
934
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
935
|
+
match = dictEnd - (prefixStart - match);
|
|
936
|
+
if (match + sequence.matchLength <= dictEnd) {
|
|
937
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
938
|
+
return sequenceLength;
|
|
939
|
+
}
|
|
940
|
+
/* span extDict & currentPrefixSegment */
|
|
941
|
+
{ size_t const length1 = dictEnd - match;
|
|
942
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
943
|
+
op = oLitEnd + length1;
|
|
944
|
+
sequence.matchLength -= length1;
|
|
945
|
+
match = prefixStart;
|
|
946
|
+
}
|
|
947
|
+
}
|
|
699
948
|
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
700
949
|
return sequenceLength;
|
|
701
950
|
}
|
|
702
951
|
|
|
703
952
|
HINT_INLINE
|
|
704
953
|
size_t ZSTD_execSequence(BYTE* op,
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
954
|
+
BYTE* const oend, seq_t sequence,
|
|
955
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
956
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
708
957
|
{
|
|
709
958
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
710
959
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
711
960
|
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
712
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
961
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
|
|
713
962
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
714
963
|
const BYTE* match = oLitEnd - sequence.offset;
|
|
715
964
|
|
|
716
|
-
|
|
717
|
-
assert(
|
|
718
|
-
|
|
965
|
+
assert(op != NULL /* Precondition */);
|
|
966
|
+
assert(oend_w < oend /* No underflow */);
|
|
967
|
+
/* Handle edge cases in a slow path:
|
|
968
|
+
* - Read beyond end of literals
|
|
969
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
970
|
+
* - 32-bit mode and the match length overflows
|
|
971
|
+
*/
|
|
972
|
+
if (UNLIKELY(
|
|
973
|
+
iLitEnd > litLimit ||
|
|
974
|
+
oMatchEnd > oend_w ||
|
|
975
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
719
976
|
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
720
977
|
|
|
721
978
|
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
979
|
+
assert(op <= oLitEnd /* No overflow */);
|
|
980
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
|
981
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
|
722
982
|
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
|
723
983
|
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
|
724
984
|
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
|
@@ -729,7 +989,99 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
729
989
|
*/
|
|
730
990
|
assert(WILDCOPY_OVERLENGTH >= 16);
|
|
731
991
|
ZSTD_copy16(op, (*litPtr));
|
|
732
|
-
if (sequence.litLength > 16) {
|
|
992
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
|
993
|
+
ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
|
|
994
|
+
}
|
|
995
|
+
op = oLitEnd;
|
|
996
|
+
*litPtr = iLitEnd; /* update for next sequence */
|
|
997
|
+
|
|
998
|
+
/* Copy Match */
|
|
999
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
1000
|
+
/* offset beyond prefix -> go into extDict */
|
|
1001
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
|
1002
|
+
match = dictEnd + (match - prefixStart);
|
|
1003
|
+
if (match + sequence.matchLength <= dictEnd) {
|
|
1004
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
1005
|
+
return sequenceLength;
|
|
1006
|
+
}
|
|
1007
|
+
/* span extDict & currentPrefixSegment */
|
|
1008
|
+
{ size_t const length1 = dictEnd - match;
|
|
1009
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
1010
|
+
op = oLitEnd + length1;
|
|
1011
|
+
sequence.matchLength -= length1;
|
|
1012
|
+
match = prefixStart;
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
/* Match within prefix of 1 or more bytes */
|
|
1016
|
+
assert(op <= oMatchEnd);
|
|
1017
|
+
assert(oMatchEnd <= oend_w);
|
|
1018
|
+
assert(match >= prefixStart);
|
|
1019
|
+
assert(sequence.matchLength >= 1);
|
|
1020
|
+
|
|
1021
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
|
1022
|
+
* without overlap checking.
|
|
1023
|
+
*/
|
|
1024
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
|
1025
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
|
1026
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
|
1027
|
+
* than 16 bytes.
|
|
1028
|
+
*/
|
|
1029
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
|
1030
|
+
return sequenceLength;
|
|
1031
|
+
}
|
|
1032
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
|
1033
|
+
|
|
1034
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
|
1035
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
|
1036
|
+
|
|
1037
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
|
1038
|
+
if (sequence.matchLength > 8) {
|
|
1039
|
+
assert(op < oMatchEnd);
|
|
1040
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
|
|
1041
|
+
}
|
|
1042
|
+
return sequenceLength;
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
HINT_INLINE
|
|
1046
|
+
size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
|
|
1047
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
|
1048
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
1049
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
1050
|
+
{
|
|
1051
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
|
1052
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
1053
|
+
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
1054
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
1055
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
|
1056
|
+
|
|
1057
|
+
assert(op != NULL /* Precondition */);
|
|
1058
|
+
assert(oend_w < oend /* No underflow */);
|
|
1059
|
+
/* Handle edge cases in a slow path:
|
|
1060
|
+
* - Read beyond end of literals
|
|
1061
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
1062
|
+
* - 32-bit mode and the match length overflows
|
|
1063
|
+
*/
|
|
1064
|
+
if (UNLIKELY(
|
|
1065
|
+
iLitEnd > litLimit ||
|
|
1066
|
+
oMatchEnd > oend_w ||
|
|
1067
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
1068
|
+
return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
1069
|
+
|
|
1070
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
1071
|
+
assert(op <= oLitEnd /* No overflow */);
|
|
1072
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
|
1073
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
|
1074
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
|
1075
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
|
1076
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
|
1077
|
+
|
|
1078
|
+
/* Copy Literals:
|
|
1079
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
|
1080
|
+
* We likely don't need the full 32-byte wildcopy.
|
|
1081
|
+
*/
|
|
1082
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
|
1083
|
+
ZSTD_copy16(op, (*litPtr));
|
|
1084
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
|
733
1085
|
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
|
|
734
1086
|
}
|
|
735
1087
|
op = oLitEnd;
|
|
@@ -738,15 +1090,15 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
738
1090
|
/* Copy Match */
|
|
739
1091
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
740
1092
|
/* offset beyond prefix -> go into extDict */
|
|
741
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
|
1093
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
|
742
1094
|
match = dictEnd + (match - prefixStart);
|
|
743
1095
|
if (match + sequence.matchLength <= dictEnd) {
|
|
744
|
-
|
|
1096
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
745
1097
|
return sequenceLength;
|
|
746
1098
|
}
|
|
747
1099
|
/* span extDict & currentPrefixSegment */
|
|
748
1100
|
{ size_t const length1 = dictEnd - match;
|
|
749
|
-
|
|
1101
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
750
1102
|
op = oLitEnd + length1;
|
|
751
1103
|
sequence.matchLength -= length1;
|
|
752
1104
|
match = prefixStart;
|
|
@@ -760,7 +1112,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
760
1112
|
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
|
761
1113
|
* without overlap checking.
|
|
762
1114
|
*/
|
|
763
|
-
if (sequence.offset >= WILDCOPY_VECLEN) {
|
|
1115
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
|
764
1116
|
/* We bet on a full wildcopy for matches, since we expect matches to be
|
|
765
1117
|
* longer than literals (in general). In silesia, ~10% of matches are longer
|
|
766
1118
|
* than 16 bytes.
|
|
@@ -781,6 +1133,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
781
1133
|
return sequenceLength;
|
|
782
1134
|
}
|
|
783
1135
|
|
|
1136
|
+
|
|
784
1137
|
static void
|
|
785
1138
|
ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
|
|
786
1139
|
{
|
|
@@ -794,12 +1147,10 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS
|
|
|
794
1147
|
}
|
|
795
1148
|
|
|
796
1149
|
FORCE_INLINE_TEMPLATE void
|
|
797
|
-
|
|
1150
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
|
|
798
1151
|
{
|
|
799
|
-
ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
|
|
800
|
-
U32 const nbBits = DInfo.nbBits;
|
|
801
1152
|
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
802
|
-
DStatePtr->state =
|
|
1153
|
+
DStatePtr->state = nextState + lowBits;
|
|
803
1154
|
}
|
|
804
1155
|
|
|
805
1156
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
|
@@ -814,102 +1165,178 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
|
814
1165
|
|
|
815
1166
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
|
816
1167
|
|
|
817
|
-
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
818
1168
|
FORCE_INLINE_TEMPLATE seq_t
|
|
819
1169
|
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
820
1170
|
{
|
|
821
1171
|
seq_t seq;
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
U32 const
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
} else {
|
|
845
|
-
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
846
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
847
|
-
}
|
|
848
|
-
}
|
|
1172
|
+
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
|
|
1173
|
+
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
|
|
1174
|
+
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
|
|
1175
|
+
seq.matchLength = mlDInfo->baseValue;
|
|
1176
|
+
seq.litLength = llDInfo->baseValue;
|
|
1177
|
+
{ U32 const ofBase = ofDInfo->baseValue;
|
|
1178
|
+
BYTE const llBits = llDInfo->nbAdditionalBits;
|
|
1179
|
+
BYTE const mlBits = mlDInfo->nbAdditionalBits;
|
|
1180
|
+
BYTE const ofBits = ofDInfo->nbAdditionalBits;
|
|
1181
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
|
1182
|
+
|
|
1183
|
+
U16 const llNext = llDInfo->nextState;
|
|
1184
|
+
U16 const mlNext = mlDInfo->nextState;
|
|
1185
|
+
U16 const ofNext = ofDInfo->nextState;
|
|
1186
|
+
U32 const llnbBits = llDInfo->nbBits;
|
|
1187
|
+
U32 const mlnbBits = mlDInfo->nbBits;
|
|
1188
|
+
U32 const ofnbBits = ofDInfo->nbBits;
|
|
1189
|
+
/*
|
|
1190
|
+
* As gcc has better branch and block analyzers, sometimes it is only
|
|
1191
|
+
* valuable to mark likelyness for clang, it gives around 3-4% of
|
|
1192
|
+
* performance.
|
|
1193
|
+
*/
|
|
849
1194
|
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
1195
|
+
/* sequence */
|
|
1196
|
+
{ size_t offset;
|
|
1197
|
+
#if defined(__clang__)
|
|
1198
|
+
if (LIKELY(ofBits > 1)) {
|
|
1199
|
+
#else
|
|
1200
|
+
if (ofBits > 1) {
|
|
1201
|
+
#endif
|
|
1202
|
+
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
1203
|
+
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
1204
|
+
assert(ofBits <= MaxOff);
|
|
1205
|
+
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
|
1206
|
+
U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
|
|
1207
|
+
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
|
1208
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1209
|
+
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
|
1210
|
+
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
|
|
1211
|
+
} else {
|
|
1212
|
+
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
1213
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
1214
|
+
}
|
|
1215
|
+
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
856
1216
|
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
857
|
-
seqState->prevOffset[0] = offset
|
|
858
|
-
} else {
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
1217
|
+
seqState->prevOffset[0] = offset;
|
|
1218
|
+
} else {
|
|
1219
|
+
U32 const ll0 = (llDInfo->baseValue == 0);
|
|
1220
|
+
if (LIKELY((ofBits == 0))) {
|
|
1221
|
+
offset = seqState->prevOffset[ll0];
|
|
1222
|
+
seqState->prevOffset[1] = seqState->prevOffset[!ll0];
|
|
1223
|
+
seqState->prevOffset[0] = offset;
|
|
1224
|
+
} else {
|
|
1225
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
|
1226
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
1227
|
+
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
1228
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1229
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1230
|
+
seqState->prevOffset[0] = offset = temp;
|
|
1231
|
+
} } }
|
|
1232
|
+
seq.offset = offset;
|
|
865
1233
|
}
|
|
866
|
-
seq.offset = offset;
|
|
867
|
-
}
|
|
868
1234
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
1235
|
+
#if defined(__clang__)
|
|
1236
|
+
if (UNLIKELY(mlBits > 0))
|
|
1237
|
+
#else
|
|
1238
|
+
if (mlBits > 0)
|
|
1239
|
+
#endif
|
|
1240
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
|
1241
|
+
|
|
1242
|
+
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
|
1243
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1244
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
1245
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1246
|
+
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
|
1247
|
+
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
|
1248
|
+
|
|
1249
|
+
#if defined(__clang__)
|
|
1250
|
+
if (UNLIKELY(llBits > 0))
|
|
1251
|
+
#else
|
|
1252
|
+
if (llBits > 0)
|
|
1253
|
+
#endif
|
|
1254
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
|
1255
|
+
|
|
1256
|
+
if (MEM_32bits())
|
|
1257
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1258
|
+
|
|
1259
|
+
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
|
1260
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
1261
|
+
|
|
1262
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
|
|
1263
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
|
|
1264
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1265
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
|
|
1266
|
+
}
|
|
891
1267
|
|
|
892
1268
|
return seq;
|
|
893
1269
|
}
|
|
894
1270
|
|
|
1271
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
1272
|
+
MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
|
1273
|
+
{
|
|
1274
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
|
1275
|
+
/* No dictionary used. */
|
|
1276
|
+
if (dctx->dictContentEndForFuzzing == NULL) return 0;
|
|
1277
|
+
/* Dictionary is our prefix. */
|
|
1278
|
+
if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
|
|
1279
|
+
/* Dictionary is not our ext-dict. */
|
|
1280
|
+
if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
|
|
1281
|
+
/* Dictionary is not within our window size. */
|
|
1282
|
+
if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
|
|
1283
|
+
/* Dictionary is active. */
|
|
1284
|
+
return 1;
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
MEM_STATIC void ZSTD_assertValidSequence(
|
|
1288
|
+
ZSTD_DCtx const* dctx,
|
|
1289
|
+
BYTE const* op, BYTE const* oend,
|
|
1290
|
+
seq_t const seq,
|
|
1291
|
+
BYTE const* prefixStart, BYTE const* virtualStart)
|
|
1292
|
+
{
|
|
1293
|
+
#if DEBUGLEVEL >= 1
|
|
1294
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
|
1295
|
+
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
|
1296
|
+
BYTE const* const oLitEnd = op + seq.litLength;
|
|
1297
|
+
DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
|
|
1298
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
1299
|
+
assert(op <= oend);
|
|
1300
|
+
assert((size_t)(oend - op) >= sequenceSize);
|
|
1301
|
+
assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
|
|
1302
|
+
if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
|
|
1303
|
+
size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
|
|
1304
|
+
/* Offset must be within the dictionary. */
|
|
1305
|
+
assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
|
|
1306
|
+
assert(seq.offset <= windowSize + dictSize);
|
|
1307
|
+
} else {
|
|
1308
|
+
/* Offset must be within our window. */
|
|
1309
|
+
assert(seq.offset <= windowSize);
|
|
1310
|
+
}
|
|
1311
|
+
#else
|
|
1312
|
+
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
|
|
1313
|
+
#endif
|
|
1314
|
+
}
|
|
1315
|
+
#endif
|
|
1316
|
+
|
|
1317
|
+
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1318
|
+
|
|
1319
|
+
|
|
895
1320
|
FORCE_INLINE_TEMPLATE size_t
|
|
896
1321
|
DONT_VECTORIZE
|
|
897
|
-
|
|
1322
|
+
ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
|
|
898
1323
|
void* dst, size_t maxDstSize,
|
|
899
1324
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
900
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1325
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1326
|
+
const int frame)
|
|
901
1327
|
{
|
|
902
1328
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
903
1329
|
const BYTE* const iend = ip + seqSize;
|
|
904
|
-
BYTE* const ostart = (BYTE*
|
|
1330
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
905
1331
|
BYTE* const oend = ostart + maxDstSize;
|
|
906
1332
|
BYTE* op = ostart;
|
|
907
1333
|
const BYTE* litPtr = dctx->litPtr;
|
|
908
|
-
const BYTE*
|
|
1334
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
|
909
1335
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
910
1336
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
|
911
1337
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
912
|
-
DEBUGLOG(5, "
|
|
1338
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
|
|
1339
|
+
(void)frame;
|
|
913
1340
|
|
|
914
1341
|
/* Regen sequences */
|
|
915
1342
|
if (nbSeq) {
|
|
@@ -918,38 +1345,279 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
|
918
1345
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
919
1346
|
RETURN_ERROR_IF(
|
|
920
1347
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
|
921
|
-
corruption_detected);
|
|
1348
|
+
corruption_detected, "");
|
|
922
1349
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
923
1350
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
924
1351
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1352
|
+
assert(dst != NULL);
|
|
925
1353
|
|
|
926
1354
|
ZSTD_STATIC_ASSERT(
|
|
927
1355
|
BIT_DStream_unfinished < BIT_DStream_completed &&
|
|
928
1356
|
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
|
929
1357
|
BIT_DStream_completed < BIT_DStream_overflow);
|
|
930
1358
|
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
1359
|
+
/* decompress without overrunning litPtr begins */
|
|
1360
|
+
{
|
|
1361
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1362
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
|
1363
|
+
*
|
|
1364
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
|
1365
|
+
* speed swings based on the alignment of the decompression loop. This
|
|
1366
|
+
* performance swing is caused by parts of the decompression loop falling
|
|
1367
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
|
1368
|
+
* when it can't we get much worse performance. You can measure if you've
|
|
1369
|
+
* hit the good case or the bad case with this perf command for some
|
|
1370
|
+
* compressed file test.zst:
|
|
1371
|
+
*
|
|
1372
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
|
1373
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
|
1374
|
+
*
|
|
1375
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
|
1376
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
|
1377
|
+
* If it is pretty even then you may be in an okay case.
|
|
1378
|
+
*
|
|
1379
|
+
* This issue has been reproduced on the following CPUs:
|
|
1380
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
|
1381
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
|
1382
|
+
* I never got performance swings, but I was able to
|
|
1383
|
+
* go from the good case of mostly DSB to half of the
|
|
1384
|
+
* cycles served from MITE.
|
|
1385
|
+
* - Coffeelake: Intel i9-9900k
|
|
1386
|
+
* - Coffeelake: Intel i7-9700k
|
|
1387
|
+
*
|
|
1388
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
|
1389
|
+
* of the following CPUS:
|
|
1390
|
+
* - Haswell
|
|
1391
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
|
1392
|
+
* - Skylake
|
|
1393
|
+
*
|
|
1394
|
+
* Alignment is done for each of the three major decompression loops:
|
|
1395
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
|
|
1396
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
|
|
1397
|
+
* - ZSTD_decompressSequences_body
|
|
1398
|
+
* Alignment choices are made to minimize large swings on bad cases and influence on performance
|
|
1399
|
+
* from changes external to this code, rather than to overoptimize on the current commit.
|
|
1400
|
+
*
|
|
1401
|
+
* If you are seeing performance stability this script can help test.
|
|
1402
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
|
1403
|
+
*
|
|
1404
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
|
1405
|
+
*/
|
|
1406
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1407
|
+
__asm__(".p2align 6");
|
|
1408
|
+
# if __GNUC__ >= 7
|
|
1409
|
+
/* good for gcc-7, gcc-9, and gcc-11 */
|
|
1410
|
+
__asm__("nop");
|
|
1411
|
+
__asm__(".p2align 5");
|
|
1412
|
+
__asm__("nop");
|
|
1413
|
+
__asm__(".p2align 4");
|
|
1414
|
+
# if __GNUC__ == 8 || __GNUC__ == 10
|
|
1415
|
+
/* good for gcc-8 and gcc-10 */
|
|
1416
|
+
__asm__("nop");
|
|
1417
|
+
__asm__(".p2align 3");
|
|
1418
|
+
# endif
|
|
1419
|
+
# endif
|
|
1420
|
+
#endif
|
|
1421
|
+
|
|
1422
|
+
/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
|
|
1423
|
+
for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
|
|
1424
|
+
size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1425
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1426
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1427
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1428
|
+
#endif
|
|
1429
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1430
|
+
return oneSeqSize;
|
|
935
1431
|
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
936
|
-
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
937
1432
|
op += oneSeqSize;
|
|
938
|
-
|
|
1433
|
+
if (UNLIKELY(!--nbSeq))
|
|
1434
|
+
break;
|
|
1435
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1436
|
+
sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1437
|
+
}
|
|
1438
|
+
|
|
1439
|
+
/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
|
|
1440
|
+
if (nbSeq > 0) {
|
|
1441
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1442
|
+
if (leftoverLit)
|
|
1443
|
+
{
|
|
1444
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1445
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1446
|
+
sequence.litLength -= leftoverLit;
|
|
1447
|
+
op += leftoverLit;
|
|
1448
|
+
}
|
|
1449
|
+
litPtr = dctx->litExtraBuffer;
|
|
1450
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1451
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1452
|
+
{
|
|
1453
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1454
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1455
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1456
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1457
|
+
#endif
|
|
1458
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1459
|
+
return oneSeqSize;
|
|
1460
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1461
|
+
op += oneSeqSize;
|
|
1462
|
+
if (--nbSeq)
|
|
1463
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
if (nbSeq > 0) /* there is remaining lit from extra buffer */
|
|
1469
|
+
{
|
|
1470
|
+
|
|
1471
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1472
|
+
__asm__(".p2align 6");
|
|
1473
|
+
__asm__("nop");
|
|
1474
|
+
# if __GNUC__ != 7
|
|
1475
|
+
/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
|
|
1476
|
+
__asm__(".p2align 4");
|
|
1477
|
+
__asm__("nop");
|
|
1478
|
+
__asm__(".p2align 3");
|
|
1479
|
+
# elif __GNUC__ >= 11
|
|
1480
|
+
__asm__(".p2align 3");
|
|
1481
|
+
# else
|
|
1482
|
+
__asm__(".p2align 5");
|
|
1483
|
+
__asm__("nop");
|
|
1484
|
+
__asm__(".p2align 3");
|
|
1485
|
+
# endif
|
|
1486
|
+
#endif
|
|
1487
|
+
|
|
1488
|
+
for (; ; ) {
|
|
1489
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1490
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1491
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1492
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1493
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1494
|
+
#endif
|
|
1495
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1496
|
+
return oneSeqSize;
|
|
1497
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1498
|
+
op += oneSeqSize;
|
|
1499
|
+
if (UNLIKELY(!--nbSeq))
|
|
1500
|
+
break;
|
|
1501
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1504
|
+
|
|
1505
|
+
/* check if reached exact end */
|
|
1506
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
|
|
1507
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
|
1508
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
|
1509
|
+
/* save reps for next block */
|
|
1510
|
+
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
/* last literal segment */
|
|
1514
|
+
if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
|
|
1515
|
+
{
|
|
1516
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1517
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
|
1518
|
+
if (op != NULL) {
|
|
1519
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1520
|
+
op += lastLLSize;
|
|
1521
|
+
}
|
|
1522
|
+
litPtr = dctx->litExtraBuffer;
|
|
1523
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1524
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1525
|
+
}
|
|
1526
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1527
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1528
|
+
if (op != NULL) {
|
|
1529
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
|
1530
|
+
op += lastLLSize;
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
return op-ostart;
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
FORCE_INLINE_TEMPLATE size_t
|
|
1538
|
+
DONT_VECTORIZE
|
|
1539
|
+
ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
|
|
1540
|
+
void* dst, size_t maxDstSize,
|
|
1541
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1542
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1543
|
+
const int frame)
|
|
1544
|
+
{
|
|
1545
|
+
const BYTE* ip = (const BYTE*)seqStart;
|
|
1546
|
+
const BYTE* const iend = ip + seqSize;
|
|
1547
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1548
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
|
|
1549
|
+
BYTE* op = ostart;
|
|
1550
|
+
const BYTE* litPtr = dctx->litPtr;
|
|
1551
|
+
const BYTE* const litEnd = litPtr + dctx->litSize;
|
|
1552
|
+
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
|
|
1553
|
+
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
|
|
1554
|
+
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
|
|
1555
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_body");
|
|
1556
|
+
(void)frame;
|
|
1557
|
+
|
|
1558
|
+
/* Regen sequences */
|
|
1559
|
+
if (nbSeq) {
|
|
1560
|
+
seqState_t seqState;
|
|
1561
|
+
dctx->fseEntropy = 1;
|
|
1562
|
+
{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1563
|
+
RETURN_ERROR_IF(
|
|
1564
|
+
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
|
|
1565
|
+
corruption_detected, "");
|
|
1566
|
+
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
1567
|
+
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
1568
|
+
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1569
|
+
assert(dst != NULL);
|
|
1570
|
+
|
|
1571
|
+
ZSTD_STATIC_ASSERT(
|
|
1572
|
+
BIT_DStream_unfinished < BIT_DStream_completed &&
|
|
1573
|
+
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
|
1574
|
+
BIT_DStream_completed < BIT_DStream_overflow);
|
|
1575
|
+
|
|
1576
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1577
|
+
__asm__(".p2align 6");
|
|
1578
|
+
__asm__("nop");
|
|
1579
|
+
# if __GNUC__ >= 7
|
|
1580
|
+
__asm__(".p2align 5");
|
|
1581
|
+
__asm__("nop");
|
|
1582
|
+
__asm__(".p2align 3");
|
|
1583
|
+
# else
|
|
1584
|
+
__asm__(".p2align 4");
|
|
1585
|
+
__asm__("nop");
|
|
1586
|
+
__asm__(".p2align 3");
|
|
1587
|
+
# endif
|
|
1588
|
+
#endif
|
|
1589
|
+
|
|
1590
|
+
for ( ; ; ) {
|
|
1591
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1592
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
|
1593
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1594
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1595
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1596
|
+
#endif
|
|
1597
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1598
|
+
return oneSeqSize;
|
|
1599
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1600
|
+
op += oneSeqSize;
|
|
1601
|
+
if (UNLIKELY(!--nbSeq))
|
|
1602
|
+
break;
|
|
1603
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1604
|
+
}
|
|
939
1605
|
|
|
940
1606
|
/* check if reached exact end */
|
|
941
1607
|
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
|
942
|
-
RETURN_ERROR_IF(nbSeq, corruption_detected);
|
|
943
|
-
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
|
|
1608
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
|
1609
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
|
944
1610
|
/* save reps for next block */
|
|
945
1611
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
|
946
1612
|
}
|
|
947
1613
|
|
|
948
1614
|
/* last literal segment */
|
|
949
1615
|
{ size_t const lastLLSize = litEnd - litPtr;
|
|
950
|
-
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
|
951
|
-
|
|
952
|
-
|
|
1616
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1617
|
+
if (op != NULL) {
|
|
1618
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
|
1619
|
+
op += lastLLSize;
|
|
1620
|
+
}
|
|
953
1621
|
}
|
|
954
1622
|
|
|
955
1623
|
return op-ostart;
|
|
@@ -959,157 +1627,180 @@ static size_t
|
|
|
959
1627
|
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
|
960
1628
|
void* dst, size_t maxDstSize,
|
|
961
1629
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
962
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1630
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1631
|
+
const int frame)
|
|
963
1632
|
{
|
|
964
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1633
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
965
1634
|
}
|
|
966
|
-
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
967
|
-
|
|
968
1635
|
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
1636
|
+
static size_t
|
|
1637
|
+
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
|
|
1638
|
+
void* dst, size_t maxDstSize,
|
|
1639
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1640
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1641
|
+
const int frame)
|
|
973
1642
|
{
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
|
|
978
|
-
U32 const totalBits = llBits+mlBits+ofBits;
|
|
979
|
-
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
|
|
980
|
-
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
|
|
981
|
-
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
|
|
982
|
-
|
|
983
|
-
/* sequence */
|
|
984
|
-
{ size_t offset;
|
|
985
|
-
if (!ofBits)
|
|
986
|
-
offset = 0;
|
|
987
|
-
else {
|
|
988
|
-
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
989
|
-
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
990
|
-
assert(ofBits <= MaxOff);
|
|
991
|
-
if (MEM_32bits() && longOffsets) {
|
|
992
|
-
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
|
993
|
-
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
|
994
|
-
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
|
995
|
-
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
|
996
|
-
} else {
|
|
997
|
-
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
998
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
999
|
-
}
|
|
1000
|
-
}
|
|
1643
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1644
|
+
}
|
|
1645
|
+
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1001
1646
|
|
|
1002
|
-
|
|
1003
|
-
offset += (llBase==0);
|
|
1004
|
-
if (offset) {
|
|
1005
|
-
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
1006
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
1007
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1008
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1009
|
-
seqState->prevOffset[0] = offset = temp;
|
|
1010
|
-
} else {
|
|
1011
|
-
offset = seqState->prevOffset[0];
|
|
1012
|
-
}
|
|
1013
|
-
} else {
|
|
1014
|
-
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1015
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1016
|
-
seqState->prevOffset[0] = offset;
|
|
1017
|
-
}
|
|
1018
|
-
seq.offset = offset;
|
|
1019
|
-
}
|
|
1647
|
+
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1020
1648
|
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
if (MEM_32bits())
|
|
1031
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1032
|
-
|
|
1033
|
-
{ size_t const pos = seqState->pos + seq.litLength;
|
|
1034
|
-
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
|
1035
|
-
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1036
|
-
* No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
|
|
1037
|
-
seqState->pos = pos + seq.matchLength;
|
|
1649
|
+
FORCE_INLINE_TEMPLATE size_t
|
|
1650
|
+
ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
|
|
1651
|
+
const BYTE* const prefixStart, const BYTE* const dictEnd)
|
|
1652
|
+
{
|
|
1653
|
+
prefetchPos += sequence.litLength;
|
|
1654
|
+
{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
|
|
1655
|
+
const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1656
|
+
* No consequence though : memory address is only used for prefetching, not for dereferencing */
|
|
1657
|
+
PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
|
1038
1658
|
}
|
|
1039
|
-
|
|
1040
|
-
/* ANS state update */
|
|
1041
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
|
1042
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
|
1043
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1044
|
-
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
|
1045
|
-
|
|
1046
|
-
return seq;
|
|
1659
|
+
return prefetchPos + sequence.matchLength;
|
|
1047
1660
|
}
|
|
1048
1661
|
|
|
1662
|
+
/* This decoding function employs prefetching
|
|
1663
|
+
* to reduce latency impact of cache misses.
|
|
1664
|
+
* It's generally employed when block contains a significant portion of long-distance matches
|
|
1665
|
+
* or when coupled with a "cold" dictionary */
|
|
1049
1666
|
FORCE_INLINE_TEMPLATE size_t
|
|
1050
1667
|
ZSTD_decompressSequencesLong_body(
|
|
1051
1668
|
ZSTD_DCtx* dctx,
|
|
1052
1669
|
void* dst, size_t maxDstSize,
|
|
1053
1670
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1054
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1671
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1672
|
+
const int frame)
|
|
1055
1673
|
{
|
|
1056
1674
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
1057
1675
|
const BYTE* const iend = ip + seqSize;
|
|
1058
|
-
BYTE* const ostart = (BYTE*
|
|
1059
|
-
BYTE* const oend = ostart + maxDstSize;
|
|
1676
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1677
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
|
|
1060
1678
|
BYTE* op = ostart;
|
|
1061
1679
|
const BYTE* litPtr = dctx->litPtr;
|
|
1062
|
-
const BYTE*
|
|
1680
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
|
1063
1681
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
1064
1682
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
|
1065
1683
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
1684
|
+
(void)frame;
|
|
1066
1685
|
|
|
1067
1686
|
/* Regen sequences */
|
|
1068
1687
|
if (nbSeq) {
|
|
1069
|
-
#define STORED_SEQS
|
|
1688
|
+
#define STORED_SEQS 8
|
|
1070
1689
|
#define STORED_SEQS_MASK (STORED_SEQS-1)
|
|
1071
|
-
#define ADVANCED_SEQS
|
|
1690
|
+
#define ADVANCED_SEQS STORED_SEQS
|
|
1072
1691
|
seq_t sequences[STORED_SEQS];
|
|
1073
1692
|
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
|
|
1074
1693
|
seqState_t seqState;
|
|
1075
1694
|
int seqNb;
|
|
1695
|
+
size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
|
|
1696
|
+
|
|
1076
1697
|
dctx->fseEntropy = 1;
|
|
1077
1698
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1078
|
-
|
|
1079
|
-
seqState.pos = (size_t)(op-prefixStart);
|
|
1080
|
-
seqState.dictEnd = dictEnd;
|
|
1699
|
+
assert(dst != NULL);
|
|
1081
1700
|
assert(iend >= ip);
|
|
1082
1701
|
RETURN_ERROR_IF(
|
|
1083
1702
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
|
1084
|
-
corruption_detected);
|
|
1703
|
+
corruption_detected, "");
|
|
1085
1704
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
1086
1705
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
1087
1706
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1088
1707
|
|
|
1089
1708
|
/* prepare in advance */
|
|
1090
1709
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
|
1091
|
-
|
|
1092
|
-
|
|
1710
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1711
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1712
|
+
sequences[seqNb] = sequence;
|
|
1093
1713
|
}
|
|
1094
|
-
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
|
|
1095
|
-
|
|
1096
|
-
/*
|
|
1097
|
-
for (
|
|
1098
|
-
seq_t
|
|
1099
|
-
size_t
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1714
|
+
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
|
1715
|
+
|
|
1716
|
+
/* decompress without stomping litBuffer */
|
|
1717
|
+
for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
|
|
1718
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1719
|
+
size_t oneSeqSize;
|
|
1720
|
+
|
|
1721
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
|
|
1722
|
+
{
|
|
1723
|
+
/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
|
|
1724
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1725
|
+
if (leftoverLit)
|
|
1726
|
+
{
|
|
1727
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1728
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1729
|
+
sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
|
|
1730
|
+
op += leftoverLit;
|
|
1731
|
+
}
|
|
1732
|
+
litPtr = dctx->litExtraBuffer;
|
|
1733
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1734
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1735
|
+
oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1736
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1737
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1738
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1739
|
+
#endif
|
|
1740
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1741
|
+
|
|
1742
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1743
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1744
|
+
op += oneSeqSize;
|
|
1745
|
+
}
|
|
1746
|
+
else
|
|
1747
|
+
{
|
|
1748
|
+
/* lit buffer is either wholly contained in first or second split, or not split at all*/
|
|
1749
|
+
oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
|
1750
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
|
1751
|
+
ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1752
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1753
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1754
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1755
|
+
#endif
|
|
1756
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1757
|
+
|
|
1758
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1759
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1760
|
+
op += oneSeqSize;
|
|
1761
|
+
}
|
|
1104
1762
|
}
|
|
1105
|
-
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
|
|
1763
|
+
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
|
|
1106
1764
|
|
|
1107
1765
|
/* finish queue */
|
|
1108
1766
|
seqNb -= seqAdvance;
|
|
1109
1767
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
|
1110
|
-
|
|
1111
|
-
if (
|
|
1112
|
-
|
|
1768
|
+
seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
|
|
1769
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
|
|
1770
|
+
{
|
|
1771
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1772
|
+
if (leftoverLit)
|
|
1773
|
+
{
|
|
1774
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1775
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1776
|
+
sequence->litLength -= leftoverLit;
|
|
1777
|
+
op += leftoverLit;
|
|
1778
|
+
}
|
|
1779
|
+
litPtr = dctx->litExtraBuffer;
|
|
1780
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1781
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1782
|
+
{
|
|
1783
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1784
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1785
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1786
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1787
|
+
#endif
|
|
1788
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1789
|
+
op += oneSeqSize;
|
|
1790
|
+
}
|
|
1791
|
+
}
|
|
1792
|
+
else
|
|
1793
|
+
{
|
|
1794
|
+
size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
|
1795
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
|
1796
|
+
ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1797
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1798
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1799
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1800
|
+
#endif
|
|
1801
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1802
|
+
op += oneSeqSize;
|
|
1803
|
+
}
|
|
1113
1804
|
}
|
|
1114
1805
|
|
|
1115
1806
|
/* save reps for next block */
|
|
@@ -1117,10 +1808,23 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1117
1808
|
}
|
|
1118
1809
|
|
|
1119
1810
|
/* last literal segment */
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
op
|
|
1811
|
+
if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
|
|
1812
|
+
{
|
|
1813
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1814
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
|
1815
|
+
if (op != NULL) {
|
|
1816
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1817
|
+
op += lastLLSize;
|
|
1818
|
+
}
|
|
1819
|
+
litPtr = dctx->litExtraBuffer;
|
|
1820
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1821
|
+
}
|
|
1822
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1823
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1824
|
+
if (op != NULL) {
|
|
1825
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1826
|
+
op += lastLLSize;
|
|
1827
|
+
}
|
|
1124
1828
|
}
|
|
1125
1829
|
|
|
1126
1830
|
return op-ostart;
|
|
@@ -1130,9 +1834,10 @@ static size_t
|
|
|
1130
1834
|
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
1131
1835
|
void* dst, size_t maxDstSize,
|
|
1132
1836
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1133
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1837
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1838
|
+
const int frame)
|
|
1134
1839
|
{
|
|
1135
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1840
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1136
1841
|
}
|
|
1137
1842
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1138
1843
|
|
|
@@ -1141,25 +1846,37 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
|
1141
1846
|
#if DYNAMIC_BMI2
|
|
1142
1847
|
|
|
1143
1848
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1144
|
-
static
|
|
1849
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1145
1850
|
DONT_VECTORIZE
|
|
1146
1851
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
|
1147
1852
|
void* dst, size_t maxDstSize,
|
|
1148
1853
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1149
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1854
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1855
|
+
const int frame)
|
|
1150
1856
|
{
|
|
1151
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1857
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1858
|
+
}
|
|
1859
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1860
|
+
DONT_VECTORIZE
|
|
1861
|
+
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
|
|
1862
|
+
void* dst, size_t maxDstSize,
|
|
1863
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1864
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1865
|
+
const int frame)
|
|
1866
|
+
{
|
|
1867
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1152
1868
|
}
|
|
1153
1869
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1154
1870
|
|
|
1155
1871
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1156
|
-
static
|
|
1872
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1157
1873
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
|
1158
1874
|
void* dst, size_t maxDstSize,
|
|
1159
1875
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1160
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1876
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1877
|
+
const int frame)
|
|
1161
1878
|
{
|
|
1162
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1879
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1163
1880
|
}
|
|
1164
1881
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1165
1882
|
|
|
@@ -1169,21 +1886,37 @@ typedef size_t (*ZSTD_decompressSequences_t)(
|
|
|
1169
1886
|
ZSTD_DCtx* dctx,
|
|
1170
1887
|
void* dst, size_t maxDstSize,
|
|
1171
1888
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1172
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1889
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1890
|
+
const int frame);
|
|
1173
1891
|
|
|
1174
1892
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1175
1893
|
static size_t
|
|
1176
1894
|
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1177
1895
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1178
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1896
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1897
|
+
const int frame)
|
|
1179
1898
|
{
|
|
1180
1899
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
|
1181
1900
|
#if DYNAMIC_BMI2
|
|
1182
|
-
if (dctx
|
|
1183
|
-
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1901
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1902
|
+
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1184
1903
|
}
|
|
1185
1904
|
#endif
|
|
1186
|
-
|
|
1905
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1906
|
+
}
|
|
1907
|
+
static size_t
|
|
1908
|
+
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1909
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1910
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1911
|
+
const int frame)
|
|
1912
|
+
{
|
|
1913
|
+
DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
|
|
1914
|
+
#if DYNAMIC_BMI2
|
|
1915
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1916
|
+
return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1917
|
+
}
|
|
1918
|
+
#endif
|
|
1919
|
+
return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1187
1920
|
}
|
|
1188
1921
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1189
1922
|
|
|
@@ -1198,15 +1931,16 @@ static size_t
|
|
|
1198
1931
|
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
1199
1932
|
void* dst, size_t maxDstSize,
|
|
1200
1933
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1201
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1934
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1935
|
+
const int frame)
|
|
1202
1936
|
{
|
|
1203
1937
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
|
1204
1938
|
#if DYNAMIC_BMI2
|
|
1205
|
-
if (dctx
|
|
1206
|
-
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1939
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1940
|
+
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1207
1941
|
}
|
|
1208
1942
|
#endif
|
|
1209
|
-
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1943
|
+
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1210
1944
|
}
|
|
1211
1945
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1212
1946
|
|
|
@@ -1240,11 +1974,10 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
|
|
|
1240
1974
|
}
|
|
1241
1975
|
#endif
|
|
1242
1976
|
|
|
1243
|
-
|
|
1244
1977
|
size_t
|
|
1245
1978
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1246
1979
|
void* dst, size_t dstCapacity,
|
|
1247
|
-
const void* src, size_t srcSize, const int frame)
|
|
1980
|
+
const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
|
|
1248
1981
|
{ /* blockType == blockCompressed */
|
|
1249
1982
|
const BYTE* ip = (const BYTE*)src;
|
|
1250
1983
|
/* isLongOffset must be true if there are long offsets.
|
|
@@ -1256,10 +1989,10 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1256
1989
|
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
|
1257
1990
|
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
|
1258
1991
|
|
|
1259
|
-
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
|
|
1992
|
+
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
|
1260
1993
|
|
|
1261
1994
|
/* Decode literals section */
|
|
1262
|
-
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
|
1995
|
+
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
|
|
1263
1996
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
|
|
1264
1997
|
if (ZSTD_isError(litCSize)) return litCSize;
|
|
1265
1998
|
ip += litCSize;
|
|
@@ -1282,6 +2015,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1282
2015
|
ip += seqHSize;
|
|
1283
2016
|
srcSize -= seqHSize;
|
|
1284
2017
|
|
|
2018
|
+
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
|
2019
|
+
|
|
1285
2020
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
|
1286
2021
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1287
2022
|
if ( !usePrefetchDecoder
|
|
@@ -1300,24 +2035,38 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1300
2035
|
if (usePrefetchDecoder)
|
|
1301
2036
|
#endif
|
|
1302
2037
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1303
|
-
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
2038
|
+
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1304
2039
|
#endif
|
|
1305
2040
|
|
|
1306
2041
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1307
2042
|
/* else */
|
|
1308
|
-
|
|
2043
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
2044
|
+
return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
2045
|
+
else
|
|
2046
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1309
2047
|
#endif
|
|
1310
2048
|
}
|
|
1311
2049
|
}
|
|
1312
2050
|
|
|
1313
2051
|
|
|
2052
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
|
|
2053
|
+
{
|
|
2054
|
+
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
|
|
2055
|
+
dctx->dictEnd = dctx->previousDstEnd;
|
|
2056
|
+
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
|
2057
|
+
dctx->prefixStart = dst;
|
|
2058
|
+
dctx->previousDstEnd = dst;
|
|
2059
|
+
}
|
|
2060
|
+
}
|
|
2061
|
+
|
|
2062
|
+
|
|
1314
2063
|
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
|
1315
2064
|
void* dst, size_t dstCapacity,
|
|
1316
2065
|
const void* src, size_t srcSize)
|
|
1317
2066
|
{
|
|
1318
2067
|
size_t dSize;
|
|
1319
|
-
ZSTD_checkContinuity(dctx, dst);
|
|
1320
|
-
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
|
|
2068
|
+
ZSTD_checkContinuity(dctx, dst, dstCapacity);
|
|
2069
|
+
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
|
|
1321
2070
|
dctx->previousDstEnd = (char*)dst + dSize;
|
|
1322
2071
|
return dSize;
|
|
1323
2072
|
}
|