zstd-ruby 1.4.4.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
- data/ext/zstdruby/libzstd/common/compiler.h +219 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
- data/ext/zstdruby/libzstd/common/error_private.c +11 -2
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +47 -116
- data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
- data/ext/zstdruby/libzstd/common/huf.h +112 -197
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +11 -5
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +78 -22
- data/ext/zstdruby/libzstd/common/threading.h +9 -13
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
- data/ext/zstdruby/libzstd/zstd.h +1277 -306
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +24 -39
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -289
- data/ext/zstdruby/libzstd/README.md +0 -159
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -14,18 +14,18 @@
|
|
|
14
14
|
/*-*******************************************************
|
|
15
15
|
* Dependencies
|
|
16
16
|
*********************************************************/
|
|
17
|
-
#include
|
|
18
|
-
#include "compiler.h" /* prefetch */
|
|
19
|
-
#include "cpu.h" /* bmi2 */
|
|
20
|
-
#include "mem.h" /* low level memory routines */
|
|
17
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
|
18
|
+
#include "../common/compiler.h" /* prefetch */
|
|
19
|
+
#include "../common/cpu.h" /* bmi2 */
|
|
20
|
+
#include "../common/mem.h" /* low level memory routines */
|
|
21
21
|
#define FSE_STATIC_LINKING_ONLY
|
|
22
|
-
#include "fse.h"
|
|
23
|
-
#
|
|
24
|
-
#include "
|
|
25
|
-
#include "zstd_internal.h"
|
|
22
|
+
#include "../common/fse.h"
|
|
23
|
+
#include "../common/huf.h"
|
|
24
|
+
#include "../common/zstd_internal.h"
|
|
26
25
|
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
|
27
26
|
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
|
28
27
|
#include "zstd_decompress_block.h"
|
|
28
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
|
29
29
|
|
|
30
30
|
/*_*******************************************************
|
|
31
31
|
* Macros
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
/*_*******************************************************
|
|
45
45
|
* Memory operations
|
|
46
46
|
**********************************************************/
|
|
47
|
-
static void ZSTD_copy4(void* dst, const void* src) {
|
|
47
|
+
static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
/*-*************************************************************
|
|
@@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
|
|
56
56
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
57
57
|
blockProperties_t* bpPtr)
|
|
58
58
|
{
|
|
59
|
-
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
|
|
59
|
+
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
|
|
60
60
|
|
|
61
61
|
{ U32 const cBlockHeader = MEM_readLE24(src);
|
|
62
62
|
U32 const cSize = cBlockHeader >> 3;
|
|
@@ -64,23 +64,64 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
|
64
64
|
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
|
|
65
65
|
bpPtr->origSize = cSize; /* only useful for RLE */
|
|
66
66
|
if (bpPtr->blockType == bt_rle) return 1;
|
|
67
|
-
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
|
|
67
|
+
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
|
|
68
68
|
return cSize;
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
+
/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
|
|
73
|
+
static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
|
|
74
|
+
const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
|
|
75
|
+
{
|
|
76
|
+
if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
|
|
77
|
+
{
|
|
78
|
+
/* room for litbuffer to fit without read faulting */
|
|
79
|
+
dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
|
|
80
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
|
81
|
+
dctx->litBufferLocation = ZSTD_in_dst;
|
|
82
|
+
}
|
|
83
|
+
else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
|
|
84
|
+
{
|
|
85
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
|
86
|
+
if (splitImmediately) {
|
|
87
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
|
88
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
|
89
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
/* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
|
|
93
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
|
|
94
|
+
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
|
|
95
|
+
}
|
|
96
|
+
dctx->litBufferLocation = ZSTD_split;
|
|
97
|
+
}
|
|
98
|
+
else
|
|
99
|
+
{
|
|
100
|
+
/* fits entirely within litExtraBuffer, so no split is necessary */
|
|
101
|
+
dctx->litBuffer = dctx->litExtraBuffer;
|
|
102
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
|
103
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
72
106
|
|
|
73
107
|
/* Hidden declaration for fullbench */
|
|
74
108
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
75
|
-
const void* src, size_t srcSize
|
|
109
|
+
const void* src, size_t srcSize,
|
|
110
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming);
|
|
76
111
|
/*! ZSTD_decodeLiteralsBlock() :
|
|
112
|
+
* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
|
|
113
|
+
* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
|
|
114
|
+
* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
|
|
115
|
+
* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
|
|
116
|
+
*
|
|
77
117
|
* @return : nb of bytes read from src (< srcSize )
|
|
78
118
|
* note : symbol not declared but exposed for fullbench */
|
|
79
119
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
80
|
-
const void* src, size_t srcSize
|
|
120
|
+
const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
|
|
121
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming)
|
|
81
122
|
{
|
|
82
123
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
|
83
|
-
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
|
|
124
|
+
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
|
84
125
|
|
|
85
126
|
{ const BYTE* const istart = (const BYTE*) src;
|
|
86
127
|
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
|
@@ -89,16 +130,20 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
89
130
|
{
|
|
90
131
|
case set_repeat:
|
|
91
132
|
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
|
92
|
-
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
|
|
93
|
-
|
|
133
|
+
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
|
134
|
+
ZSTD_FALLTHROUGH;
|
|
94
135
|
|
|
95
136
|
case set_compressed:
|
|
96
|
-
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE ==
|
|
137
|
+
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
|
|
97
138
|
{ size_t lhSize, litSize, litCSize;
|
|
98
139
|
U32 singleStream=0;
|
|
99
140
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
|
100
141
|
U32 const lhc = MEM_readLE32(istart);
|
|
101
142
|
size_t hufSuccess;
|
|
143
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
|
144
|
+
int const flags = 0
|
|
145
|
+
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
|
|
146
|
+
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
|
|
102
147
|
switch(lhlCode)
|
|
103
148
|
{
|
|
104
149
|
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -121,8 +166,15 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
121
166
|
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
|
122
167
|
break;
|
|
123
168
|
}
|
|
124
|
-
RETURN_ERROR_IF(litSize >
|
|
125
|
-
RETURN_ERROR_IF(
|
|
169
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
170
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
171
|
+
if (!singleStream)
|
|
172
|
+
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
|
|
173
|
+
"Not enough literals (%zu) for the 4-streams mode (min %u)",
|
|
174
|
+
litSize, MIN_LITERALS_FOR_4_STREAMS);
|
|
175
|
+
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
|
176
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
|
|
177
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
|
|
126
178
|
|
|
127
179
|
/* prefetch huffman table if cold */
|
|
128
180
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
|
@@ -131,13 +183,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
131
183
|
|
|
132
184
|
if (litEncType==set_repeat) {
|
|
133
185
|
if (singleStream) {
|
|
134
|
-
hufSuccess =
|
|
186
|
+
hufSuccess = HUF_decompress1X_usingDTable(
|
|
135
187
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
|
136
|
-
dctx->HUFptr,
|
|
188
|
+
dctx->HUFptr, flags);
|
|
137
189
|
} else {
|
|
138
|
-
|
|
190
|
+
assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
|
|
191
|
+
hufSuccess = HUF_decompress4X_usingDTable(
|
|
139
192
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
|
140
|
-
dctx->HUFptr,
|
|
193
|
+
dctx->HUFptr, flags);
|
|
141
194
|
}
|
|
142
195
|
} else {
|
|
143
196
|
if (singleStream) {
|
|
@@ -145,34 +198,41 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
145
198
|
hufSuccess = HUF_decompress1X_DCtx_wksp(
|
|
146
199
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
147
200
|
istart+lhSize, litCSize, dctx->workspace,
|
|
148
|
-
sizeof(dctx->workspace));
|
|
201
|
+
sizeof(dctx->workspace), flags);
|
|
149
202
|
#else
|
|
150
|
-
hufSuccess =
|
|
203
|
+
hufSuccess = HUF_decompress1X1_DCtx_wksp(
|
|
151
204
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
152
205
|
istart+lhSize, litCSize, dctx->workspace,
|
|
153
|
-
sizeof(dctx->workspace),
|
|
206
|
+
sizeof(dctx->workspace), flags);
|
|
154
207
|
#endif
|
|
155
208
|
} else {
|
|
156
|
-
hufSuccess =
|
|
209
|
+
hufSuccess = HUF_decompress4X_hufOnly_wksp(
|
|
157
210
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
|
158
211
|
istart+lhSize, litCSize, dctx->workspace,
|
|
159
|
-
sizeof(dctx->workspace),
|
|
212
|
+
sizeof(dctx->workspace), flags);
|
|
160
213
|
}
|
|
161
214
|
}
|
|
215
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
216
|
+
{
|
|
217
|
+
ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
|
218
|
+
ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
219
|
+
dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
|
220
|
+
dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
|
|
221
|
+
}
|
|
162
222
|
|
|
163
|
-
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
|
|
223
|
+
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
|
164
224
|
|
|
165
225
|
dctx->litPtr = dctx->litBuffer;
|
|
166
226
|
dctx->litSize = litSize;
|
|
167
227
|
dctx->litEntropy = 1;
|
|
168
228
|
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
|
169
|
-
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
170
229
|
return litCSize + lhSize;
|
|
171
230
|
}
|
|
172
231
|
|
|
173
232
|
case set_basic:
|
|
174
233
|
{ size_t litSize, lhSize;
|
|
175
234
|
U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
|
235
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
|
176
236
|
switch(lhlCode)
|
|
177
237
|
{
|
|
178
238
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -185,27 +245,41 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
185
245
|
break;
|
|
186
246
|
case 3:
|
|
187
247
|
lhSize = 3;
|
|
248
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
|
|
188
249
|
litSize = MEM_readLE24(istart) >> 4;
|
|
189
250
|
break;
|
|
190
251
|
}
|
|
191
252
|
|
|
253
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
254
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
|
255
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
|
192
256
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
|
193
|
-
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
|
|
194
|
-
|
|
257
|
+
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
|
258
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
259
|
+
{
|
|
260
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
261
|
+
ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
|
262
|
+
}
|
|
263
|
+
else
|
|
264
|
+
{
|
|
265
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
|
|
266
|
+
}
|
|
195
267
|
dctx->litPtr = dctx->litBuffer;
|
|
196
268
|
dctx->litSize = litSize;
|
|
197
|
-
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
198
269
|
return lhSize+litSize;
|
|
199
270
|
}
|
|
200
271
|
/* direct reference into compressed stream */
|
|
201
272
|
dctx->litPtr = istart+lhSize;
|
|
202
273
|
dctx->litSize = litSize;
|
|
274
|
+
dctx->litBufferEnd = dctx->litPtr + litSize;
|
|
275
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
203
276
|
return lhSize+litSize;
|
|
204
277
|
}
|
|
205
278
|
|
|
206
279
|
case set_rle:
|
|
207
280
|
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
|
208
281
|
size_t litSize, lhSize;
|
|
282
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
|
209
283
|
switch(lhlCode)
|
|
210
284
|
{
|
|
211
285
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
|
@@ -214,16 +288,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
214
288
|
break;
|
|
215
289
|
case 1:
|
|
216
290
|
lhSize = 2;
|
|
291
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
|
|
217
292
|
litSize = MEM_readLE16(istart) >> 4;
|
|
218
293
|
break;
|
|
219
294
|
case 3:
|
|
220
295
|
lhSize = 3;
|
|
296
|
+
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
|
|
221
297
|
litSize = MEM_readLE24(istart) >> 4;
|
|
222
|
-
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
|
223
298
|
break;
|
|
224
299
|
}
|
|
225
|
-
RETURN_ERROR_IF(litSize >
|
|
226
|
-
|
|
300
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
|
301
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
302
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
|
303
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
|
304
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
305
|
+
{
|
|
306
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
|
|
307
|
+
ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
|
|
308
|
+
}
|
|
309
|
+
else
|
|
310
|
+
{
|
|
311
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
|
|
312
|
+
}
|
|
227
313
|
dctx->litPtr = dctx->litBuffer;
|
|
228
314
|
dctx->litSize = litSize;
|
|
229
315
|
return lhSize+1;
|
|
@@ -236,7 +322,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
236
322
|
|
|
237
323
|
/* Default FSE distribution tables.
|
|
238
324
|
* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
|
|
239
|
-
* https://github.com/facebook/zstd/blob/
|
|
325
|
+
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
|
|
240
326
|
* They were generated programmatically with following method :
|
|
241
327
|
* - start from default distributions, present in /lib/common/zstd_internal.h
|
|
242
328
|
* - generate tables normally, using ZSTD_buildFSETable()
|
|
@@ -343,7 +429,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
|
|
|
343
429
|
}; /* ML_defaultDTable */
|
|
344
430
|
|
|
345
431
|
|
|
346
|
-
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue,
|
|
432
|
+
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
|
|
347
433
|
{
|
|
348
434
|
void* ptr = dt;
|
|
349
435
|
ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
|
|
@@ -355,7 +441,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
|
355
441
|
cell->nbBits = 0;
|
|
356
442
|
cell->nextState = 0;
|
|
357
443
|
assert(nbAddBits < 255);
|
|
358
|
-
cell->nbAdditionalBits =
|
|
444
|
+
cell->nbAdditionalBits = nbAddBits;
|
|
359
445
|
cell->baseValue = baseValue;
|
|
360
446
|
}
|
|
361
447
|
|
|
@@ -364,23 +450,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
|
364
450
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
|
365
451
|
* cannot fail if input is valid =>
|
|
366
452
|
* all inputs are presumed validated at this stage */
|
|
367
|
-
|
|
368
|
-
|
|
453
|
+
FORCE_INLINE_TEMPLATE
|
|
454
|
+
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
369
455
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
370
|
-
const U32* baseValue, const
|
|
371
|
-
unsigned tableLog)
|
|
456
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
457
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
372
458
|
{
|
|
373
459
|
ZSTD_seqSymbol* const tableDecode = dt+1;
|
|
374
|
-
U16 symbolNext[MaxSeq+1];
|
|
375
|
-
|
|
376
460
|
U32 const maxSV1 = maxSymbolValue + 1;
|
|
377
461
|
U32 const tableSize = 1 << tableLog;
|
|
378
|
-
|
|
462
|
+
|
|
463
|
+
U16* symbolNext = (U16*)wksp;
|
|
464
|
+
BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
|
|
465
|
+
U32 highThreshold = tableSize - 1;
|
|
466
|
+
|
|
379
467
|
|
|
380
468
|
/* Sanity Checks */
|
|
381
469
|
assert(maxSymbolValue <= MaxSeq);
|
|
382
470
|
assert(tableLog <= MaxFSELog);
|
|
383
|
-
|
|
471
|
+
assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
|
|
472
|
+
(void)wkspSize;
|
|
384
473
|
/* Init, lay down lowprob symbols */
|
|
385
474
|
{ ZSTD_seqSymbol_header DTableH;
|
|
386
475
|
DTableH.tableLog = tableLog;
|
|
@@ -396,34 +485,128 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
396
485
|
assert(normalizedCounter[s]>=0);
|
|
397
486
|
symbolNext[s] = (U16)normalizedCounter[s];
|
|
398
487
|
} } }
|
|
399
|
-
|
|
488
|
+
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
|
400
489
|
}
|
|
401
490
|
|
|
402
491
|
/* Spread symbols */
|
|
403
|
-
|
|
492
|
+
assert(tableSize <= 512);
|
|
493
|
+
/* Specialized symbol spreading for the case when there are
|
|
494
|
+
* no low probability (-1 count) symbols. When compressing
|
|
495
|
+
* small blocks we avoid low probability symbols to hit this
|
|
496
|
+
* case, since header decoding speed matters more.
|
|
497
|
+
*/
|
|
498
|
+
if (highThreshold == tableSize - 1) {
|
|
499
|
+
size_t const tableMask = tableSize-1;
|
|
500
|
+
size_t const step = FSE_TABLESTEP(tableSize);
|
|
501
|
+
/* First lay down the symbols in order.
|
|
502
|
+
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
|
503
|
+
* misses since small blocks generally have small table logs, so nearly
|
|
504
|
+
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
|
505
|
+
* our buffer to handle the over-write.
|
|
506
|
+
*/
|
|
507
|
+
{
|
|
508
|
+
U64 const add = 0x0101010101010101ull;
|
|
509
|
+
size_t pos = 0;
|
|
510
|
+
U64 sv = 0;
|
|
511
|
+
U32 s;
|
|
512
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
|
513
|
+
int i;
|
|
514
|
+
int const n = normalizedCounter[s];
|
|
515
|
+
MEM_write64(spread + pos, sv);
|
|
516
|
+
for (i = 8; i < n; i += 8) {
|
|
517
|
+
MEM_write64(spread + pos + i, sv);
|
|
518
|
+
}
|
|
519
|
+
assert(n>=0);
|
|
520
|
+
pos += (size_t)n;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
/* Now we spread those positions across the table.
|
|
524
|
+
* The benefit of doing it in two stages is that we avoid the
|
|
525
|
+
* variable size inner loop, which caused lots of branch misses.
|
|
526
|
+
* Now we can run through all the positions without any branch misses.
|
|
527
|
+
* We unroll the loop twice, since that is what empirically worked best.
|
|
528
|
+
*/
|
|
529
|
+
{
|
|
530
|
+
size_t position = 0;
|
|
531
|
+
size_t s;
|
|
532
|
+
size_t const unroll = 2;
|
|
533
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
|
534
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
|
535
|
+
size_t u;
|
|
536
|
+
for (u = 0; u < unroll; ++u) {
|
|
537
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
|
538
|
+
tableDecode[uPosition].baseValue = spread[s + u];
|
|
539
|
+
}
|
|
540
|
+
position = (position + (unroll * step)) & tableMask;
|
|
541
|
+
}
|
|
542
|
+
assert(position == 0);
|
|
543
|
+
}
|
|
544
|
+
} else {
|
|
545
|
+
U32 const tableMask = tableSize-1;
|
|
404
546
|
U32 const step = FSE_TABLESTEP(tableSize);
|
|
405
547
|
U32 s, position = 0;
|
|
406
548
|
for (s=0; s<maxSV1; s++) {
|
|
407
549
|
int i;
|
|
408
|
-
|
|
550
|
+
int const n = normalizedCounter[s];
|
|
551
|
+
for (i=0; i<n; i++) {
|
|
409
552
|
tableDecode[position].baseValue = s;
|
|
410
553
|
position = (position + step) & tableMask;
|
|
411
|
-
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
|
554
|
+
while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
|
|
412
555
|
} }
|
|
413
556
|
assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
|
|
414
557
|
}
|
|
415
558
|
|
|
416
559
|
/* Build Decoding table */
|
|
417
|
-
{
|
|
560
|
+
{
|
|
561
|
+
U32 u;
|
|
418
562
|
for (u=0; u<tableSize; u++) {
|
|
419
563
|
U32 const symbol = tableDecode[u].baseValue;
|
|
420
564
|
U32 const nextState = symbolNext[symbol]++;
|
|
421
|
-
tableDecode[u].nbBits = (BYTE) (tableLog -
|
|
565
|
+
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
|
422
566
|
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
|
423
567
|
assert(nbAdditionalBits[symbol] < 255);
|
|
424
|
-
tableDecode[u].nbAdditionalBits =
|
|
568
|
+
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
|
|
425
569
|
tableDecode[u].baseValue = baseValue[symbol];
|
|
426
|
-
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
/* Avoids the FORCE_INLINE of the _body() function. */
|
|
575
|
+
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
|
576
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
577
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
578
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
579
|
+
{
|
|
580
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
581
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
#if DYNAMIC_BMI2
|
|
585
|
+
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
|
586
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
587
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
588
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
589
|
+
{
|
|
590
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
591
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
592
|
+
}
|
|
593
|
+
#endif
|
|
594
|
+
|
|
595
|
+
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
596
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
597
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
598
|
+
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
|
599
|
+
{
|
|
600
|
+
#if DYNAMIC_BMI2
|
|
601
|
+
if (bmi2) {
|
|
602
|
+
ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
|
|
603
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
604
|
+
return;
|
|
605
|
+
}
|
|
606
|
+
#endif
|
|
607
|
+
(void)bmi2;
|
|
608
|
+
ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
|
|
609
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
427
610
|
}
|
|
428
611
|
|
|
429
612
|
|
|
@@ -433,18 +616,19 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
433
616
|
static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
|
|
434
617
|
symbolEncodingType_e type, unsigned max, U32 maxLog,
|
|
435
618
|
const void* src, size_t srcSize,
|
|
436
|
-
const U32* baseValue, const
|
|
619
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
|
437
620
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
|
438
|
-
int ddictIsCold, int nbSeq
|
|
621
|
+
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
|
622
|
+
int bmi2)
|
|
439
623
|
{
|
|
440
624
|
switch(type)
|
|
441
625
|
{
|
|
442
626
|
case set_rle :
|
|
443
|
-
RETURN_ERROR_IF(!srcSize, srcSize_wrong);
|
|
444
|
-
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
|
|
627
|
+
RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
|
|
628
|
+
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
|
445
629
|
{ U32 const symbol = *(const BYTE*)src;
|
|
446
630
|
U32 const baseline = baseValue[symbol];
|
|
447
|
-
|
|
631
|
+
U8 const nbBits = nbAdditionalBits[symbol];
|
|
448
632
|
ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
|
|
449
633
|
}
|
|
450
634
|
*DTablePtr = DTableSpace;
|
|
@@ -453,7 +637,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
453
637
|
*DTablePtr = defaultTable;
|
|
454
638
|
return 0;
|
|
455
639
|
case set_repeat:
|
|
456
|
-
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
|
|
640
|
+
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
|
|
457
641
|
/* prefetch FSE table if used */
|
|
458
642
|
if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
|
|
459
643
|
const void* const pStart = *DTablePtr;
|
|
@@ -465,9 +649,9 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
465
649
|
{ unsigned tableLog;
|
|
466
650
|
S16 norm[MaxSeq+1];
|
|
467
651
|
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
|
468
|
-
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
|
|
469
|
-
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
|
|
470
|
-
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
|
652
|
+
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
|
|
653
|
+
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
|
|
654
|
+
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
|
|
471
655
|
*DTablePtr = DTableSpace;
|
|
472
656
|
return headerSize;
|
|
473
657
|
}
|
|
@@ -480,35 +664,36 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
480
664
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
481
665
|
const void* src, size_t srcSize)
|
|
482
666
|
{
|
|
483
|
-
const BYTE* const istart = (const BYTE*
|
|
667
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
484
668
|
const BYTE* const iend = istart + srcSize;
|
|
485
669
|
const BYTE* ip = istart;
|
|
486
670
|
int nbSeq;
|
|
487
671
|
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
|
488
672
|
|
|
489
673
|
/* check */
|
|
490
|
-
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
|
|
674
|
+
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
|
|
491
675
|
|
|
492
676
|
/* SeqHead */
|
|
493
677
|
nbSeq = *ip++;
|
|
494
678
|
if (!nbSeq) {
|
|
495
679
|
*nbSeqPtr=0;
|
|
496
|
-
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
|
|
680
|
+
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
|
|
497
681
|
return 1;
|
|
498
682
|
}
|
|
499
683
|
if (nbSeq > 0x7F) {
|
|
500
684
|
if (nbSeq == 0xFF) {
|
|
501
|
-
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
|
|
502
|
-
nbSeq = MEM_readLE16(ip) + LONGNBSEQ
|
|
685
|
+
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
|
686
|
+
nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
|
|
687
|
+
ip+=2;
|
|
503
688
|
} else {
|
|
504
|
-
RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
|
|
689
|
+
RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
|
|
505
690
|
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
|
506
691
|
}
|
|
507
692
|
}
|
|
508
693
|
*nbSeqPtr = nbSeq;
|
|
509
694
|
|
|
510
695
|
/* FSE table descriptors */
|
|
511
|
-
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
|
|
696
|
+
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
|
|
512
697
|
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
|
513
698
|
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
|
514
699
|
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
|
@@ -520,8 +705,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
520
705
|
ip, iend-ip,
|
|
521
706
|
LL_base, LL_bits,
|
|
522
707
|
LL_defaultDTable, dctx->fseEntropy,
|
|
523
|
-
dctx->ddictIsCold, nbSeq
|
|
524
|
-
|
|
708
|
+
dctx->ddictIsCold, nbSeq,
|
|
709
|
+
dctx->workspace, sizeof(dctx->workspace),
|
|
710
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
711
|
+
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
525
712
|
ip += llhSize;
|
|
526
713
|
}
|
|
527
714
|
|
|
@@ -530,8 +717,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
530
717
|
ip, iend-ip,
|
|
531
718
|
OF_base, OF_bits,
|
|
532
719
|
OF_defaultDTable, dctx->fseEntropy,
|
|
533
|
-
dctx->ddictIsCold, nbSeq
|
|
534
|
-
|
|
720
|
+
dctx->ddictIsCold, nbSeq,
|
|
721
|
+
dctx->workspace, sizeof(dctx->workspace),
|
|
722
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
723
|
+
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
535
724
|
ip += ofhSize;
|
|
536
725
|
}
|
|
537
726
|
|
|
@@ -540,8 +729,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
540
729
|
ip, iend-ip,
|
|
541
730
|
ML_base, ML_bits,
|
|
542
731
|
ML_defaultDTable, dctx->fseEntropy,
|
|
543
|
-
dctx->ddictIsCold, nbSeq
|
|
544
|
-
|
|
732
|
+
dctx->ddictIsCold, nbSeq,
|
|
733
|
+
dctx->workspace, sizeof(dctx->workspace),
|
|
734
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
|
735
|
+
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
545
736
|
ip += mlhSize;
|
|
546
737
|
}
|
|
547
738
|
}
|
|
@@ -554,7 +745,6 @@ typedef struct {
|
|
|
554
745
|
size_t litLength;
|
|
555
746
|
size_t matchLength;
|
|
556
747
|
size_t offset;
|
|
557
|
-
const BYTE* match;
|
|
558
748
|
} seq_t;
|
|
559
749
|
|
|
560
750
|
typedef struct {
|
|
@@ -568,9 +758,6 @@ typedef struct {
|
|
|
568
758
|
ZSTD_fseState stateOffb;
|
|
569
759
|
ZSTD_fseState stateML;
|
|
570
760
|
size_t prevOffset[ZSTD_REP_NUM];
|
|
571
|
-
const BYTE* prefixStart;
|
|
572
|
-
const BYTE* dictEnd;
|
|
573
|
-
size_t pos;
|
|
574
761
|
} seqState_t;
|
|
575
762
|
|
|
576
763
|
/*! ZSTD_overlapCopy8() :
|
|
@@ -580,7 +767,7 @@ typedef struct {
|
|
|
580
767
|
* Precondition: *ip <= *op
|
|
581
768
|
* Postcondition: *op - *op >= 8
|
|
582
769
|
*/
|
|
583
|
-
|
|
770
|
+
HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
584
771
|
assert(*ip <= *op);
|
|
585
772
|
if (offset < 8) {
|
|
586
773
|
/* close range match, overlap */
|
|
@@ -613,7 +800,7 @@ static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
|
613
800
|
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
|
614
801
|
* The src buffer must be before the dst buffer.
|
|
615
802
|
*/
|
|
616
|
-
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
803
|
+
static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
617
804
|
ptrdiff_t const diff = op - ip;
|
|
618
805
|
BYTE* const oend = op + length;
|
|
619
806
|
|
|
@@ -629,6 +816,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
629
816
|
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
|
630
817
|
assert(length >= 8);
|
|
631
818
|
ZSTD_overlapCopy8(&op, &ip, diff);
|
|
819
|
+
length -= 8;
|
|
632
820
|
assert(op - ip >= 8);
|
|
633
821
|
assert(op <= oend);
|
|
634
822
|
}
|
|
@@ -643,12 +831,35 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
643
831
|
assert(oend > oend_w);
|
|
644
832
|
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
|
645
833
|
ip += oend_w - op;
|
|
646
|
-
op
|
|
834
|
+
op += oend_w - op;
|
|
647
835
|
}
|
|
648
836
|
/* Handle the leftovers. */
|
|
649
837
|
while (op < oend) *op++ = *ip++;
|
|
650
838
|
}
|
|
651
839
|
|
|
840
|
+
/* ZSTD_safecopyDstBeforeSrc():
|
|
841
|
+
* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
|
|
842
|
+
* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
|
|
843
|
+
static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
|
|
844
|
+
ptrdiff_t const diff = op - ip;
|
|
845
|
+
BYTE* const oend = op + length;
|
|
846
|
+
|
|
847
|
+
if (length < 8 || diff > -8) {
|
|
848
|
+
/* Handle short lengths, close overlaps, and dst not before src. */
|
|
849
|
+
while (op < oend) *op++ = *ip++;
|
|
850
|
+
return;
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
|
|
854
|
+
ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
|
|
855
|
+
ip += oend - WILDCOPY_OVERLENGTH - op;
|
|
856
|
+
op += oend - WILDCOPY_OVERLENGTH - op;
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
/* Handle the leftovers. */
|
|
860
|
+
while (op < oend) *op++ = *ip++;
|
|
861
|
+
}
|
|
862
|
+
|
|
652
863
|
/* ZSTD_execSequenceEnd():
|
|
653
864
|
* This version handles cases that are near the end of the output buffer. It requires
|
|
654
865
|
* more careful checks to make sure there is no overflow. By separating out these hard
|
|
@@ -659,21 +870,21 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
|
659
870
|
*/
|
|
660
871
|
FORCE_NOINLINE
|
|
661
872
|
size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
873
|
+
BYTE* const oend, seq_t sequence,
|
|
874
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
875
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
665
876
|
{
|
|
666
877
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
667
878
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
668
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
669
879
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
670
880
|
const BYTE* match = oLitEnd - sequence.offset;
|
|
671
881
|
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
672
882
|
|
|
673
|
-
/* bounds checks */
|
|
674
|
-
|
|
675
|
-
RETURN_ERROR_IF(
|
|
676
|
-
|
|
883
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
|
884
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
|
885
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
|
886
|
+
assert(op < op + sequenceLength);
|
|
887
|
+
assert(oLitEnd < op + sequenceLength);
|
|
677
888
|
|
|
678
889
|
/* copy literals */
|
|
679
890
|
ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
|
|
@@ -683,42 +894,199 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
|
683
894
|
/* copy Match */
|
|
684
895
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
685
896
|
/* offset beyond prefix */
|
|
686
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
|
687
|
-
match = dictEnd - (prefixStart-match);
|
|
897
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
898
|
+
match = dictEnd - (prefixStart - match);
|
|
688
899
|
if (match + sequence.matchLength <= dictEnd) {
|
|
689
|
-
|
|
900
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
690
901
|
return sequenceLength;
|
|
691
902
|
}
|
|
692
903
|
/* span extDict & currentPrefixSegment */
|
|
693
904
|
{ size_t const length1 = dictEnd - match;
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
905
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
906
|
+
op = oLitEnd + length1;
|
|
907
|
+
sequence.matchLength -= length1;
|
|
908
|
+
match = prefixStart;
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
912
|
+
return sequenceLength;
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
/* ZSTD_execSequenceEndSplitLitBuffer():
|
|
916
|
+
* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
|
|
917
|
+
*/
|
|
918
|
+
FORCE_NOINLINE
|
|
919
|
+
size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
|
|
920
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
|
921
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
922
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
923
|
+
{
|
|
924
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
|
925
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
926
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
927
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
|
931
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
|
932
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
|
933
|
+
assert(op < op + sequenceLength);
|
|
934
|
+
assert(oLitEnd < op + sequenceLength);
|
|
935
|
+
|
|
936
|
+
/* copy literals */
|
|
937
|
+
RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
|
|
938
|
+
ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
|
|
939
|
+
op = oLitEnd;
|
|
940
|
+
*litPtr = iLitEnd;
|
|
941
|
+
|
|
942
|
+
/* copy Match */
|
|
943
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
944
|
+
/* offset beyond prefix */
|
|
945
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
946
|
+
match = dictEnd - (prefixStart - match);
|
|
947
|
+
if (match + sequence.matchLength <= dictEnd) {
|
|
948
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
949
|
+
return sequenceLength;
|
|
950
|
+
}
|
|
951
|
+
/* span extDict & currentPrefixSegment */
|
|
952
|
+
{ size_t const length1 = dictEnd - match;
|
|
953
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
954
|
+
op = oLitEnd + length1;
|
|
955
|
+
sequence.matchLength -= length1;
|
|
956
|
+
match = prefixStart;
|
|
957
|
+
}
|
|
958
|
+
}
|
|
699
959
|
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
700
960
|
return sequenceLength;
|
|
701
961
|
}
|
|
702
962
|
|
|
703
963
|
HINT_INLINE
|
|
704
964
|
size_t ZSTD_execSequence(BYTE* op,
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
965
|
+
BYTE* const oend, seq_t sequence,
|
|
966
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
967
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
708
968
|
{
|
|
709
969
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
710
970
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
711
971
|
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
712
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
972
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
|
|
713
973
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
714
974
|
const BYTE* match = oLitEnd - sequence.offset;
|
|
715
975
|
|
|
716
|
-
|
|
717
|
-
assert(
|
|
718
|
-
|
|
976
|
+
assert(op != NULL /* Precondition */);
|
|
977
|
+
assert(oend_w < oend /* No underflow */);
|
|
978
|
+
|
|
979
|
+
#if defined(__aarch64__)
|
|
980
|
+
/* prefetch sequence starting from match that will be used for copy later */
|
|
981
|
+
PREFETCH_L1(match);
|
|
982
|
+
#endif
|
|
983
|
+
/* Handle edge cases in a slow path:
|
|
984
|
+
* - Read beyond end of literals
|
|
985
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
986
|
+
* - 32-bit mode and the match length overflows
|
|
987
|
+
*/
|
|
988
|
+
if (UNLIKELY(
|
|
989
|
+
iLitEnd > litLimit ||
|
|
990
|
+
oMatchEnd > oend_w ||
|
|
991
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
719
992
|
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
720
993
|
|
|
721
994
|
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
995
|
+
assert(op <= oLitEnd /* No overflow */);
|
|
996
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
|
997
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
|
998
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
|
999
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
|
1000
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
|
1001
|
+
|
|
1002
|
+
/* Copy Literals:
|
|
1003
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
|
1004
|
+
* We likely don't need the full 32-byte wildcopy.
|
|
1005
|
+
*/
|
|
1006
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
|
1007
|
+
ZSTD_copy16(op, (*litPtr));
|
|
1008
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
|
1009
|
+
ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
|
|
1010
|
+
}
|
|
1011
|
+
op = oLitEnd;
|
|
1012
|
+
*litPtr = iLitEnd; /* update for next sequence */
|
|
1013
|
+
|
|
1014
|
+
/* Copy Match */
|
|
1015
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
1016
|
+
/* offset beyond prefix -> go into extDict */
|
|
1017
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
|
1018
|
+
match = dictEnd + (match - prefixStart);
|
|
1019
|
+
if (match + sequence.matchLength <= dictEnd) {
|
|
1020
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
1021
|
+
return sequenceLength;
|
|
1022
|
+
}
|
|
1023
|
+
/* span extDict & currentPrefixSegment */
|
|
1024
|
+
{ size_t const length1 = dictEnd - match;
|
|
1025
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
1026
|
+
op = oLitEnd + length1;
|
|
1027
|
+
sequence.matchLength -= length1;
|
|
1028
|
+
match = prefixStart;
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
/* Match within prefix of 1 or more bytes */
|
|
1032
|
+
assert(op <= oMatchEnd);
|
|
1033
|
+
assert(oMatchEnd <= oend_w);
|
|
1034
|
+
assert(match >= prefixStart);
|
|
1035
|
+
assert(sequence.matchLength >= 1);
|
|
1036
|
+
|
|
1037
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
|
1038
|
+
* without overlap checking.
|
|
1039
|
+
*/
|
|
1040
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
|
1041
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
|
1042
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
|
1043
|
+
* than 16 bytes.
|
|
1044
|
+
*/
|
|
1045
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
|
1046
|
+
return sequenceLength;
|
|
1047
|
+
}
|
|
1048
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
|
1049
|
+
|
|
1050
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
|
1051
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
|
1052
|
+
|
|
1053
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
|
1054
|
+
if (sequence.matchLength > 8) {
|
|
1055
|
+
assert(op < oMatchEnd);
|
|
1056
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
|
|
1057
|
+
}
|
|
1058
|
+
return sequenceLength;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
HINT_INLINE
|
|
1062
|
+
size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
|
|
1063
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
|
1064
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
1065
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
1066
|
+
{
|
|
1067
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
|
1068
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
1069
|
+
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
1070
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
1071
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
|
1072
|
+
|
|
1073
|
+
assert(op != NULL /* Precondition */);
|
|
1074
|
+
assert(oend_w < oend /* No underflow */);
|
|
1075
|
+
/* Handle edge cases in a slow path:
|
|
1076
|
+
* - Read beyond end of literals
|
|
1077
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
1078
|
+
* - 32-bit mode and the match length overflows
|
|
1079
|
+
*/
|
|
1080
|
+
if (UNLIKELY(
|
|
1081
|
+
iLitEnd > litLimit ||
|
|
1082
|
+
oMatchEnd > oend_w ||
|
|
1083
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
1084
|
+
return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
1085
|
+
|
|
1086
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
1087
|
+
assert(op <= oLitEnd /* No overflow */);
|
|
1088
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
|
1089
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
|
722
1090
|
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
|
723
1091
|
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
|
724
1092
|
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
|
@@ -729,7 +1097,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
729
1097
|
*/
|
|
730
1098
|
assert(WILDCOPY_OVERLENGTH >= 16);
|
|
731
1099
|
ZSTD_copy16(op, (*litPtr));
|
|
732
|
-
if (sequence.litLength > 16) {
|
|
1100
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
|
733
1101
|
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
|
|
734
1102
|
}
|
|
735
1103
|
op = oLitEnd;
|
|
@@ -738,15 +1106,15 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
738
1106
|
/* Copy Match */
|
|
739
1107
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
740
1108
|
/* offset beyond prefix -> go into extDict */
|
|
741
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
|
1109
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
|
742
1110
|
match = dictEnd + (match - prefixStart);
|
|
743
1111
|
if (match + sequence.matchLength <= dictEnd) {
|
|
744
|
-
|
|
1112
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
745
1113
|
return sequenceLength;
|
|
746
1114
|
}
|
|
747
1115
|
/* span extDict & currentPrefixSegment */
|
|
748
1116
|
{ size_t const length1 = dictEnd - match;
|
|
749
|
-
|
|
1117
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
750
1118
|
op = oLitEnd + length1;
|
|
751
1119
|
sequence.matchLength -= length1;
|
|
752
1120
|
match = prefixStart;
|
|
@@ -760,7 +1128,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
760
1128
|
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
|
761
1129
|
* without overlap checking.
|
|
762
1130
|
*/
|
|
763
|
-
if (sequence.offset >= WILDCOPY_VECLEN) {
|
|
1131
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
|
764
1132
|
/* We bet on a full wildcopy for matches, since we expect matches to be
|
|
765
1133
|
* longer than literals (in general). In silesia, ~10% of matches are longer
|
|
766
1134
|
* than 16 bytes.
|
|
@@ -781,6 +1149,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
781
1149
|
return sequenceLength;
|
|
782
1150
|
}
|
|
783
1151
|
|
|
1152
|
+
|
|
784
1153
|
static void
|
|
785
1154
|
ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
|
|
786
1155
|
{
|
|
@@ -794,16 +1163,14 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS
|
|
|
794
1163
|
}
|
|
795
1164
|
|
|
796
1165
|
FORCE_INLINE_TEMPLATE void
|
|
797
|
-
|
|
1166
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
|
|
798
1167
|
{
|
|
799
|
-
ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
|
|
800
|
-
U32 const nbBits = DInfo.nbBits;
|
|
801
1168
|
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
802
|
-
DStatePtr->state =
|
|
1169
|
+
DStatePtr->state = nextState + lowBits;
|
|
803
1170
|
}
|
|
804
1171
|
|
|
805
1172
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
|
806
|
-
* offset bits. But we can only read at most
|
|
1173
|
+
* offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
|
|
807
1174
|
* bits before reloading. This value is the maximum number of bytes we read
|
|
808
1175
|
* after reloading when we are decoding long offsets.
|
|
809
1176
|
*/
|
|
@@ -814,102 +1181,191 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
|
814
1181
|
|
|
815
1182
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
|
816
1183
|
|
|
817
|
-
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
818
1184
|
FORCE_INLINE_TEMPLATE seq_t
|
|
819
1185
|
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
820
1186
|
{
|
|
821
1187
|
seq_t seq;
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
1188
|
+
/*
|
|
1189
|
+
* ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
|
|
1190
|
+
* loaded in one operation and extracted its fields by simply shifting or
|
|
1191
|
+
* bit-extracting on aarch64.
|
|
1192
|
+
* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
|
|
1193
|
+
* operations that cause performance drop. This can be avoided by using this
|
|
1194
|
+
* ZSTD_memcpy hack.
|
|
1195
|
+
*/
|
|
1196
|
+
#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
|
|
1197
|
+
ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
|
|
1198
|
+
ZSTD_seqSymbol* const llDInfo = &llDInfoS;
|
|
1199
|
+
ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
|
|
1200
|
+
ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
|
|
1201
|
+
ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
|
|
1202
|
+
ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
|
|
1203
|
+
ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
|
|
1204
|
+
#else
|
|
1205
|
+
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
|
|
1206
|
+
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
|
|
1207
|
+
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
|
|
1208
|
+
#endif
|
|
1209
|
+
seq.matchLength = mlDInfo->baseValue;
|
|
1210
|
+
seq.litLength = llDInfo->baseValue;
|
|
1211
|
+
{ U32 const ofBase = ofDInfo->baseValue;
|
|
1212
|
+
BYTE const llBits = llDInfo->nbAdditionalBits;
|
|
1213
|
+
BYTE const mlBits = mlDInfo->nbAdditionalBits;
|
|
1214
|
+
BYTE const ofBits = ofDInfo->nbAdditionalBits;
|
|
1215
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
|
1216
|
+
|
|
1217
|
+
U16 const llNext = llDInfo->nextState;
|
|
1218
|
+
U16 const mlNext = mlDInfo->nextState;
|
|
1219
|
+
U16 const ofNext = ofDInfo->nextState;
|
|
1220
|
+
U32 const llnbBits = llDInfo->nbBits;
|
|
1221
|
+
U32 const mlnbBits = mlDInfo->nbBits;
|
|
1222
|
+
U32 const ofnbBits = ofDInfo->nbBits;
|
|
1223
|
+
|
|
1224
|
+
assert(llBits <= MaxLLBits);
|
|
1225
|
+
assert(mlBits <= MaxMLBits);
|
|
1226
|
+
assert(ofBits <= MaxOff);
|
|
1227
|
+
/*
|
|
1228
|
+
* As gcc has better branch and block analyzers, sometimes it is only
|
|
1229
|
+
* valuable to mark likeliness for clang, it gives around 3-4% of
|
|
1230
|
+
* performance.
|
|
1231
|
+
*/
|
|
849
1232
|
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
if (
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
1233
|
+
/* sequence */
|
|
1234
|
+
{ size_t offset;
|
|
1235
|
+
if (ofBits > 1) {
|
|
1236
|
+
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
1237
|
+
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
1238
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
|
|
1239
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
|
|
1240
|
+
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
|
1241
|
+
/* Always read extra bits, this keeps the logic simple,
|
|
1242
|
+
* avoids branches, and avoids accidentally reading 0 bits.
|
|
1243
|
+
*/
|
|
1244
|
+
U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
|
|
1245
|
+
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
|
1246
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1247
|
+
offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
|
1248
|
+
} else {
|
|
1249
|
+
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
1250
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
1251
|
+
}
|
|
1252
|
+
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
856
1253
|
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
857
|
-
seqState->prevOffset[0] = offset
|
|
858
|
-
} else {
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
1254
|
+
seqState->prevOffset[0] = offset;
|
|
1255
|
+
} else {
|
|
1256
|
+
U32 const ll0 = (llDInfo->baseValue == 0);
|
|
1257
|
+
if (LIKELY((ofBits == 0))) {
|
|
1258
|
+
offset = seqState->prevOffset[ll0];
|
|
1259
|
+
seqState->prevOffset[1] = seqState->prevOffset[!ll0];
|
|
1260
|
+
seqState->prevOffset[0] = offset;
|
|
1261
|
+
} else {
|
|
1262
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
|
1263
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
1264
|
+
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
1265
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1266
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1267
|
+
seqState->prevOffset[0] = offset = temp;
|
|
1268
|
+
} } }
|
|
1269
|
+
seq.offset = offset;
|
|
865
1270
|
}
|
|
866
|
-
seq.offset = offset;
|
|
867
|
-
}
|
|
868
1271
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
1272
|
+
if (mlBits > 0)
|
|
1273
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
|
1274
|
+
|
|
1275
|
+
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
|
1276
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1277
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
1278
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1279
|
+
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
|
1280
|
+
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
|
1281
|
+
|
|
1282
|
+
if (llBits > 0)
|
|
1283
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
|
1284
|
+
|
|
1285
|
+
if (MEM_32bits())
|
|
1286
|
+
BIT_reloadDStream(&seqState->DStream);
|
|
1287
|
+
|
|
1288
|
+
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
|
1289
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
1290
|
+
|
|
1291
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
|
|
1292
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
|
|
1293
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1294
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
|
|
1295
|
+
}
|
|
891
1296
|
|
|
892
1297
|
return seq;
|
|
893
1298
|
}
|
|
894
1299
|
|
|
1300
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
1301
|
+
MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
|
1302
|
+
{
|
|
1303
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
|
1304
|
+
/* No dictionary used. */
|
|
1305
|
+
if (dctx->dictContentEndForFuzzing == NULL) return 0;
|
|
1306
|
+
/* Dictionary is our prefix. */
|
|
1307
|
+
if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
|
|
1308
|
+
/* Dictionary is not our ext-dict. */
|
|
1309
|
+
if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
|
|
1310
|
+
/* Dictionary is not within our window size. */
|
|
1311
|
+
if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
|
|
1312
|
+
/* Dictionary is active. */
|
|
1313
|
+
return 1;
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
MEM_STATIC void ZSTD_assertValidSequence(
|
|
1317
|
+
ZSTD_DCtx const* dctx,
|
|
1318
|
+
BYTE const* op, BYTE const* oend,
|
|
1319
|
+
seq_t const seq,
|
|
1320
|
+
BYTE const* prefixStart, BYTE const* virtualStart)
|
|
1321
|
+
{
|
|
1322
|
+
#if DEBUGLEVEL >= 1
|
|
1323
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
|
1324
|
+
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
|
1325
|
+
BYTE const* const oLitEnd = op + seq.litLength;
|
|
1326
|
+
DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
|
|
1327
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
1328
|
+
assert(op <= oend);
|
|
1329
|
+
assert((size_t)(oend - op) >= sequenceSize);
|
|
1330
|
+
assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
|
|
1331
|
+
if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
|
|
1332
|
+
size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
|
|
1333
|
+
/* Offset must be within the dictionary. */
|
|
1334
|
+
assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
|
|
1335
|
+
assert(seq.offset <= windowSize + dictSize);
|
|
1336
|
+
} else {
|
|
1337
|
+
/* Offset must be within our window. */
|
|
1338
|
+
assert(seq.offset <= windowSize);
|
|
1339
|
+
}
|
|
1340
|
+
#else
|
|
1341
|
+
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
|
|
1342
|
+
#endif
|
|
1343
|
+
}
|
|
1344
|
+
#endif
|
|
1345
|
+
|
|
1346
|
+
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1347
|
+
|
|
1348
|
+
|
|
895
1349
|
FORCE_INLINE_TEMPLATE size_t
|
|
896
1350
|
DONT_VECTORIZE
|
|
897
|
-
|
|
1351
|
+
ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
|
|
898
1352
|
void* dst, size_t maxDstSize,
|
|
899
1353
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
900
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1354
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1355
|
+
const int frame)
|
|
901
1356
|
{
|
|
902
1357
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
903
1358
|
const BYTE* const iend = ip + seqSize;
|
|
904
|
-
BYTE* const ostart = (BYTE*
|
|
1359
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
905
1360
|
BYTE* const oend = ostart + maxDstSize;
|
|
906
1361
|
BYTE* op = ostart;
|
|
907
1362
|
const BYTE* litPtr = dctx->litPtr;
|
|
908
|
-
const BYTE*
|
|
1363
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
|
909
1364
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
910
1365
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
|
911
1366
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
912
|
-
DEBUGLOG(5, "
|
|
1367
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
|
|
1368
|
+
(void)frame;
|
|
913
1369
|
|
|
914
1370
|
/* Regen sequences */
|
|
915
1371
|
if (nbSeq) {
|
|
@@ -918,38 +1374,279 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
|
918
1374
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
919
1375
|
RETURN_ERROR_IF(
|
|
920
1376
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
|
921
|
-
corruption_detected);
|
|
1377
|
+
corruption_detected, "");
|
|
922
1378
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
923
1379
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
924
1380
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1381
|
+
assert(dst != NULL);
|
|
925
1382
|
|
|
926
1383
|
ZSTD_STATIC_ASSERT(
|
|
927
1384
|
BIT_DStream_unfinished < BIT_DStream_completed &&
|
|
928
1385
|
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
|
929
1386
|
BIT_DStream_completed < BIT_DStream_overflow);
|
|
930
1387
|
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
1388
|
+
/* decompress without overrunning litPtr begins */
|
|
1389
|
+
{
|
|
1390
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1391
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
|
1392
|
+
*
|
|
1393
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
|
1394
|
+
* speed swings based on the alignment of the decompression loop. This
|
|
1395
|
+
* performance swing is caused by parts of the decompression loop falling
|
|
1396
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
|
1397
|
+
* when it can't we get much worse performance. You can measure if you've
|
|
1398
|
+
* hit the good case or the bad case with this perf command for some
|
|
1399
|
+
* compressed file test.zst:
|
|
1400
|
+
*
|
|
1401
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
|
1402
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
|
1403
|
+
*
|
|
1404
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
|
1405
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
|
1406
|
+
* If it is pretty even then you may be in an okay case.
|
|
1407
|
+
*
|
|
1408
|
+
* This issue has been reproduced on the following CPUs:
|
|
1409
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
|
1410
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
|
1411
|
+
* I never got performance swings, but I was able to
|
|
1412
|
+
* go from the good case of mostly DSB to half of the
|
|
1413
|
+
* cycles served from MITE.
|
|
1414
|
+
* - Coffeelake: Intel i9-9900k
|
|
1415
|
+
* - Coffeelake: Intel i7-9700k
|
|
1416
|
+
*
|
|
1417
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
|
1418
|
+
* of the following CPUS:
|
|
1419
|
+
* - Haswell
|
|
1420
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
|
1421
|
+
* - Skylake
|
|
1422
|
+
*
|
|
1423
|
+
* Alignment is done for each of the three major decompression loops:
|
|
1424
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
|
|
1425
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
|
|
1426
|
+
* - ZSTD_decompressSequences_body
|
|
1427
|
+
* Alignment choices are made to minimize large swings on bad cases and influence on performance
|
|
1428
|
+
* from changes external to this code, rather than to overoptimize on the current commit.
|
|
1429
|
+
*
|
|
1430
|
+
* If you are seeing performance stability this script can help test.
|
|
1431
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
|
1432
|
+
*
|
|
1433
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
|
1434
|
+
*/
|
|
1435
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1436
|
+
__asm__(".p2align 6");
|
|
1437
|
+
# if __GNUC__ >= 7
|
|
1438
|
+
/* good for gcc-7, gcc-9, and gcc-11 */
|
|
1439
|
+
__asm__("nop");
|
|
1440
|
+
__asm__(".p2align 5");
|
|
1441
|
+
__asm__("nop");
|
|
1442
|
+
__asm__(".p2align 4");
|
|
1443
|
+
# if __GNUC__ == 8 || __GNUC__ == 10
|
|
1444
|
+
/* good for gcc-8 and gcc-10 */
|
|
1445
|
+
__asm__("nop");
|
|
1446
|
+
__asm__(".p2align 3");
|
|
1447
|
+
# endif
|
|
1448
|
+
# endif
|
|
1449
|
+
#endif
|
|
1450
|
+
|
|
1451
|
+
/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
|
|
1452
|
+
for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
|
|
1453
|
+
size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1454
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1455
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1456
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1457
|
+
#endif
|
|
1458
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1459
|
+
return oneSeqSize;
|
|
935
1460
|
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
936
|
-
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
937
1461
|
op += oneSeqSize;
|
|
938
|
-
|
|
1462
|
+
if (UNLIKELY(!--nbSeq))
|
|
1463
|
+
break;
|
|
1464
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1465
|
+
sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
|
|
1469
|
+
if (nbSeq > 0) {
|
|
1470
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1471
|
+
if (leftoverLit)
|
|
1472
|
+
{
|
|
1473
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1474
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1475
|
+
sequence.litLength -= leftoverLit;
|
|
1476
|
+
op += leftoverLit;
|
|
1477
|
+
}
|
|
1478
|
+
litPtr = dctx->litExtraBuffer;
|
|
1479
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1480
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1481
|
+
{
|
|
1482
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1483
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1484
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1485
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1486
|
+
#endif
|
|
1487
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1488
|
+
return oneSeqSize;
|
|
1489
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1490
|
+
op += oneSeqSize;
|
|
1491
|
+
if (--nbSeq)
|
|
1492
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1493
|
+
}
|
|
1494
|
+
}
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1497
|
+
if (nbSeq > 0) /* there is remaining lit from extra buffer */
|
|
1498
|
+
{
|
|
1499
|
+
|
|
1500
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1501
|
+
__asm__(".p2align 6");
|
|
1502
|
+
__asm__("nop");
|
|
1503
|
+
# if __GNUC__ != 7
|
|
1504
|
+
/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
|
|
1505
|
+
__asm__(".p2align 4");
|
|
1506
|
+
__asm__("nop");
|
|
1507
|
+
__asm__(".p2align 3");
|
|
1508
|
+
# elif __GNUC__ >= 11
|
|
1509
|
+
__asm__(".p2align 3");
|
|
1510
|
+
# else
|
|
1511
|
+
__asm__(".p2align 5");
|
|
1512
|
+
__asm__("nop");
|
|
1513
|
+
__asm__(".p2align 3");
|
|
1514
|
+
# endif
|
|
1515
|
+
#endif
|
|
1516
|
+
|
|
1517
|
+
for (; ; ) {
|
|
1518
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1519
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
|
1520
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1521
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1522
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1523
|
+
#endif
|
|
1524
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1525
|
+
return oneSeqSize;
|
|
1526
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1527
|
+
op += oneSeqSize;
|
|
1528
|
+
if (UNLIKELY(!--nbSeq))
|
|
1529
|
+
break;
|
|
1530
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
/* check if reached exact end */
|
|
1535
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
|
|
1536
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
|
1537
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
|
1538
|
+
/* save reps for next block */
|
|
1539
|
+
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
/* last literal segment */
|
|
1543
|
+
if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
|
|
1544
|
+
{
|
|
1545
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1546
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
|
1547
|
+
if (op != NULL) {
|
|
1548
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1549
|
+
op += lastLLSize;
|
|
1550
|
+
}
|
|
1551
|
+
litPtr = dctx->litExtraBuffer;
|
|
1552
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1553
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1554
|
+
}
|
|
1555
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1556
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1557
|
+
if (op != NULL) {
|
|
1558
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
|
1559
|
+
op += lastLLSize;
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
|
|
1563
|
+
return op-ostart;
|
|
1564
|
+
}
|
|
1565
|
+
|
|
1566
|
+
FORCE_INLINE_TEMPLATE size_t
|
|
1567
|
+
DONT_VECTORIZE
|
|
1568
|
+
ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
|
|
1569
|
+
void* dst, size_t maxDstSize,
|
|
1570
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1571
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1572
|
+
const int frame)
|
|
1573
|
+
{
|
|
1574
|
+
const BYTE* ip = (const BYTE*)seqStart;
|
|
1575
|
+
const BYTE* const iend = ip + seqSize;
|
|
1576
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1577
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
|
|
1578
|
+
BYTE* op = ostart;
|
|
1579
|
+
const BYTE* litPtr = dctx->litPtr;
|
|
1580
|
+
const BYTE* const litEnd = litPtr + dctx->litSize;
|
|
1581
|
+
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
|
|
1582
|
+
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
|
|
1583
|
+
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
|
|
1584
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
|
|
1585
|
+
(void)frame;
|
|
1586
|
+
|
|
1587
|
+
/* Regen sequences */
|
|
1588
|
+
if (nbSeq) {
|
|
1589
|
+
seqState_t seqState;
|
|
1590
|
+
dctx->fseEntropy = 1;
|
|
1591
|
+
{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1592
|
+
RETURN_ERROR_IF(
|
|
1593
|
+
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
|
|
1594
|
+
corruption_detected, "");
|
|
1595
|
+
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
1596
|
+
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
1597
|
+
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1598
|
+
assert(dst != NULL);
|
|
1599
|
+
|
|
1600
|
+
ZSTD_STATIC_ASSERT(
|
|
1601
|
+
BIT_DStream_unfinished < BIT_DStream_completed &&
|
|
1602
|
+
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
|
1603
|
+
BIT_DStream_completed < BIT_DStream_overflow);
|
|
1604
|
+
|
|
1605
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1606
|
+
__asm__(".p2align 6");
|
|
1607
|
+
__asm__("nop");
|
|
1608
|
+
# if __GNUC__ >= 7
|
|
1609
|
+
__asm__(".p2align 5");
|
|
1610
|
+
__asm__("nop");
|
|
1611
|
+
__asm__(".p2align 3");
|
|
1612
|
+
# else
|
|
1613
|
+
__asm__(".p2align 4");
|
|
1614
|
+
__asm__("nop");
|
|
1615
|
+
__asm__(".p2align 3");
|
|
1616
|
+
# endif
|
|
1617
|
+
#endif
|
|
1618
|
+
|
|
1619
|
+
for ( ; ; ) {
|
|
1620
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1621
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
|
1622
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1623
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1624
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1625
|
+
#endif
|
|
1626
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
|
1627
|
+
return oneSeqSize;
|
|
1628
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1629
|
+
op += oneSeqSize;
|
|
1630
|
+
if (UNLIKELY(!--nbSeq))
|
|
1631
|
+
break;
|
|
1632
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1633
|
+
}
|
|
939
1634
|
|
|
940
1635
|
/* check if reached exact end */
|
|
941
1636
|
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
|
942
|
-
RETURN_ERROR_IF(nbSeq, corruption_detected);
|
|
943
|
-
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
|
|
1637
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
|
1638
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
|
944
1639
|
/* save reps for next block */
|
|
945
1640
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
|
946
1641
|
}
|
|
947
1642
|
|
|
948
1643
|
/* last literal segment */
|
|
949
1644
|
{ size_t const lastLLSize = litEnd - litPtr;
|
|
950
|
-
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
|
951
|
-
|
|
952
|
-
|
|
1645
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1646
|
+
if (op != NULL) {
|
|
1647
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
|
1648
|
+
op += lastLLSize;
|
|
1649
|
+
}
|
|
953
1650
|
}
|
|
954
1651
|
|
|
955
1652
|
return op-ostart;
|
|
@@ -959,157 +1656,180 @@ static size_t
|
|
|
959
1656
|
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
|
960
1657
|
void* dst, size_t maxDstSize,
|
|
961
1658
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
962
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1659
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1660
|
+
const int frame)
|
|
963
1661
|
{
|
|
964
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1662
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
965
1663
|
}
|
|
966
|
-
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
967
1664
|
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
1665
|
+
static size_t
|
|
1666
|
+
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
|
|
1667
|
+
void* dst, size_t maxDstSize,
|
|
1668
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1669
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1670
|
+
const int frame)
|
|
973
1671
|
{
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
|
|
978
|
-
U32 const totalBits = llBits+mlBits+ofBits;
|
|
979
|
-
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
|
|
980
|
-
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
|
|
981
|
-
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
|
|
982
|
-
|
|
983
|
-
/* sequence */
|
|
984
|
-
{ size_t offset;
|
|
985
|
-
if (!ofBits)
|
|
986
|
-
offset = 0;
|
|
987
|
-
else {
|
|
988
|
-
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
989
|
-
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
990
|
-
assert(ofBits <= MaxOff);
|
|
991
|
-
if (MEM_32bits() && longOffsets) {
|
|
992
|
-
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
|
993
|
-
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
|
994
|
-
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
|
995
|
-
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
|
996
|
-
} else {
|
|
997
|
-
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
998
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
999
|
-
}
|
|
1000
|
-
}
|
|
1672
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1673
|
+
}
|
|
1674
|
+
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1001
1675
|
|
|
1002
|
-
|
|
1003
|
-
offset += (llBase==0);
|
|
1004
|
-
if (offset) {
|
|
1005
|
-
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
1006
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
1007
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1008
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1009
|
-
seqState->prevOffset[0] = offset = temp;
|
|
1010
|
-
} else {
|
|
1011
|
-
offset = seqState->prevOffset[0];
|
|
1012
|
-
}
|
|
1013
|
-
} else {
|
|
1014
|
-
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1015
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1016
|
-
seqState->prevOffset[0] = offset;
|
|
1017
|
-
}
|
|
1018
|
-
seq.offset = offset;
|
|
1019
|
-
}
|
|
1676
|
+
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1020
1677
|
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
if (MEM_32bits())
|
|
1031
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1032
|
-
|
|
1033
|
-
{ size_t const pos = seqState->pos + seq.litLength;
|
|
1034
|
-
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
|
1035
|
-
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1036
|
-
* No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
|
|
1037
|
-
seqState->pos = pos + seq.matchLength;
|
|
1678
|
+
FORCE_INLINE_TEMPLATE size_t
|
|
1679
|
+
ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
|
|
1680
|
+
const BYTE* const prefixStart, const BYTE* const dictEnd)
|
|
1681
|
+
{
|
|
1682
|
+
prefetchPos += sequence.litLength;
|
|
1683
|
+
{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
|
|
1684
|
+
const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1685
|
+
* No consequence though : memory address is only used for prefetching, not for dereferencing */
|
|
1686
|
+
PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
|
1038
1687
|
}
|
|
1039
|
-
|
|
1040
|
-
/* ANS state update */
|
|
1041
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
|
1042
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
|
1043
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1044
|
-
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
|
1045
|
-
|
|
1046
|
-
return seq;
|
|
1688
|
+
return prefetchPos + sequence.matchLength;
|
|
1047
1689
|
}
|
|
1048
1690
|
|
|
1691
|
+
/* This decoding function employs prefetching
|
|
1692
|
+
* to reduce latency impact of cache misses.
|
|
1693
|
+
* It's generally employed when block contains a significant portion of long-distance matches
|
|
1694
|
+
* or when coupled with a "cold" dictionary */
|
|
1049
1695
|
FORCE_INLINE_TEMPLATE size_t
|
|
1050
1696
|
ZSTD_decompressSequencesLong_body(
|
|
1051
1697
|
ZSTD_DCtx* dctx,
|
|
1052
1698
|
void* dst, size_t maxDstSize,
|
|
1053
1699
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1054
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1700
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1701
|
+
const int frame)
|
|
1055
1702
|
{
|
|
1056
1703
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
1057
1704
|
const BYTE* const iend = ip + seqSize;
|
|
1058
|
-
BYTE* const ostart = (BYTE*
|
|
1059
|
-
BYTE* const oend = ostart + maxDstSize;
|
|
1705
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1706
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
|
|
1060
1707
|
BYTE* op = ostart;
|
|
1061
1708
|
const BYTE* litPtr = dctx->litPtr;
|
|
1062
|
-
const BYTE*
|
|
1709
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
|
1063
1710
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
1064
1711
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
|
1065
1712
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
1713
|
+
(void)frame;
|
|
1066
1714
|
|
|
1067
1715
|
/* Regen sequences */
|
|
1068
1716
|
if (nbSeq) {
|
|
1069
|
-
#define STORED_SEQS
|
|
1717
|
+
#define STORED_SEQS 8
|
|
1070
1718
|
#define STORED_SEQS_MASK (STORED_SEQS-1)
|
|
1071
|
-
#define ADVANCED_SEQS
|
|
1719
|
+
#define ADVANCED_SEQS STORED_SEQS
|
|
1072
1720
|
seq_t sequences[STORED_SEQS];
|
|
1073
1721
|
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
|
|
1074
1722
|
seqState_t seqState;
|
|
1075
1723
|
int seqNb;
|
|
1724
|
+
size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
|
|
1725
|
+
|
|
1076
1726
|
dctx->fseEntropy = 1;
|
|
1077
1727
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
1078
|
-
|
|
1079
|
-
seqState.pos = (size_t)(op-prefixStart);
|
|
1080
|
-
seqState.dictEnd = dictEnd;
|
|
1728
|
+
assert(dst != NULL);
|
|
1081
1729
|
assert(iend >= ip);
|
|
1082
1730
|
RETURN_ERROR_IF(
|
|
1083
1731
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
|
1084
|
-
corruption_detected);
|
|
1732
|
+
corruption_detected, "");
|
|
1085
1733
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
1086
1734
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
1087
1735
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1088
1736
|
|
|
1089
1737
|
/* prepare in advance */
|
|
1090
1738
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
|
1091
|
-
|
|
1092
|
-
|
|
1739
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1740
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1741
|
+
sequences[seqNb] = sequence;
|
|
1093
1742
|
}
|
|
1094
|
-
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
|
|
1095
|
-
|
|
1096
|
-
/*
|
|
1097
|
-
for (
|
|
1098
|
-
seq_t
|
|
1099
|
-
size_t
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1743
|
+
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
|
1744
|
+
|
|
1745
|
+
/* decompress without stomping litBuffer */
|
|
1746
|
+
for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
|
|
1747
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
|
1748
|
+
size_t oneSeqSize;
|
|
1749
|
+
|
|
1750
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
|
|
1751
|
+
{
|
|
1752
|
+
/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
|
|
1753
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1754
|
+
if (leftoverLit)
|
|
1755
|
+
{
|
|
1756
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1757
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1758
|
+
sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
|
|
1759
|
+
op += leftoverLit;
|
|
1760
|
+
}
|
|
1761
|
+
litPtr = dctx->litExtraBuffer;
|
|
1762
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1763
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1764
|
+
oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1765
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1766
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1767
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1768
|
+
#endif
|
|
1769
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1770
|
+
|
|
1771
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1772
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1773
|
+
op += oneSeqSize;
|
|
1774
|
+
}
|
|
1775
|
+
else
|
|
1776
|
+
{
|
|
1777
|
+
/* lit buffer is either wholly contained in first or second split, or not split at all*/
|
|
1778
|
+
oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
|
1779
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
|
1780
|
+
ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1781
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1782
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1783
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1784
|
+
#endif
|
|
1785
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1786
|
+
|
|
1787
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
|
1788
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1789
|
+
op += oneSeqSize;
|
|
1790
|
+
}
|
|
1104
1791
|
}
|
|
1105
|
-
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
|
|
1792
|
+
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
|
|
1106
1793
|
|
|
1107
1794
|
/* finish queue */
|
|
1108
1795
|
seqNb -= seqAdvance;
|
|
1109
1796
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
|
1110
|
-
|
|
1111
|
-
if (
|
|
1112
|
-
|
|
1797
|
+
seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
|
|
1798
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
|
|
1799
|
+
{
|
|
1800
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
|
1801
|
+
if (leftoverLit)
|
|
1802
|
+
{
|
|
1803
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
|
1804
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
|
1805
|
+
sequence->litLength -= leftoverLit;
|
|
1806
|
+
op += leftoverLit;
|
|
1807
|
+
}
|
|
1808
|
+
litPtr = dctx->litExtraBuffer;
|
|
1809
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1810
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
|
1811
|
+
{
|
|
1812
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1813
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1814
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1815
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1816
|
+
#endif
|
|
1817
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1818
|
+
op += oneSeqSize;
|
|
1819
|
+
}
|
|
1820
|
+
}
|
|
1821
|
+
else
|
|
1822
|
+
{
|
|
1823
|
+
size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
|
1824
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
|
1825
|
+
ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
|
1826
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1827
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1828
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1829
|
+
#endif
|
|
1830
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1831
|
+
op += oneSeqSize;
|
|
1832
|
+
}
|
|
1113
1833
|
}
|
|
1114
1834
|
|
|
1115
1835
|
/* save reps for next block */
|
|
@@ -1117,10 +1837,23 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1117
1837
|
}
|
|
1118
1838
|
|
|
1119
1839
|
/* last literal segment */
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
op
|
|
1840
|
+
if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
|
|
1841
|
+
{
|
|
1842
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1843
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
|
1844
|
+
if (op != NULL) {
|
|
1845
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1846
|
+
op += lastLLSize;
|
|
1847
|
+
}
|
|
1848
|
+
litPtr = dctx->litExtraBuffer;
|
|
1849
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
|
1850
|
+
}
|
|
1851
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
|
1852
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1853
|
+
if (op != NULL) {
|
|
1854
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
|
1855
|
+
op += lastLLSize;
|
|
1856
|
+
}
|
|
1124
1857
|
}
|
|
1125
1858
|
|
|
1126
1859
|
return op-ostart;
|
|
@@ -1130,9 +1863,10 @@ static size_t
|
|
|
1130
1863
|
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
1131
1864
|
void* dst, size_t maxDstSize,
|
|
1132
1865
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1133
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1866
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1867
|
+
const int frame)
|
|
1134
1868
|
{
|
|
1135
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1869
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1136
1870
|
}
|
|
1137
1871
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1138
1872
|
|
|
@@ -1141,25 +1875,37 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
|
1141
1875
|
#if DYNAMIC_BMI2
|
|
1142
1876
|
|
|
1143
1877
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1144
|
-
static
|
|
1878
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1145
1879
|
DONT_VECTORIZE
|
|
1146
1880
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
|
1147
1881
|
void* dst, size_t maxDstSize,
|
|
1148
1882
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1149
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1883
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1884
|
+
const int frame)
|
|
1150
1885
|
{
|
|
1151
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1886
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1887
|
+
}
|
|
1888
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1889
|
+
DONT_VECTORIZE
|
|
1890
|
+
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
|
|
1891
|
+
void* dst, size_t maxDstSize,
|
|
1892
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1893
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1894
|
+
const int frame)
|
|
1895
|
+
{
|
|
1896
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1152
1897
|
}
|
|
1153
1898
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1154
1899
|
|
|
1155
1900
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1156
|
-
static
|
|
1901
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
1157
1902
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
|
1158
1903
|
void* dst, size_t maxDstSize,
|
|
1159
1904
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1160
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1905
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1906
|
+
const int frame)
|
|
1161
1907
|
{
|
|
1162
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1908
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1163
1909
|
}
|
|
1164
1910
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1165
1911
|
|
|
@@ -1169,21 +1915,37 @@ typedef size_t (*ZSTD_decompressSequences_t)(
|
|
|
1169
1915
|
ZSTD_DCtx* dctx,
|
|
1170
1916
|
void* dst, size_t maxDstSize,
|
|
1171
1917
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1172
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1918
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1919
|
+
const int frame);
|
|
1173
1920
|
|
|
1174
1921
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1175
1922
|
static size_t
|
|
1176
1923
|
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1177
1924
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1178
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1925
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1926
|
+
const int frame)
|
|
1179
1927
|
{
|
|
1180
1928
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
|
1181
1929
|
#if DYNAMIC_BMI2
|
|
1182
|
-
if (dctx
|
|
1183
|
-
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1930
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1931
|
+
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1932
|
+
}
|
|
1933
|
+
#endif
|
|
1934
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1935
|
+
}
|
|
1936
|
+
static size_t
|
|
1937
|
+
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1938
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1939
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1940
|
+
const int frame)
|
|
1941
|
+
{
|
|
1942
|
+
DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
|
|
1943
|
+
#if DYNAMIC_BMI2
|
|
1944
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1945
|
+
return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1184
1946
|
}
|
|
1185
1947
|
#endif
|
|
1186
|
-
|
|
1948
|
+
return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1187
1949
|
}
|
|
1188
1950
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1189
1951
|
|
|
@@ -1198,69 +1960,115 @@ static size_t
|
|
|
1198
1960
|
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
1199
1961
|
void* dst, size_t maxDstSize,
|
|
1200
1962
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1201
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1963
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1964
|
+
const int frame)
|
|
1202
1965
|
{
|
|
1203
1966
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
|
1204
1967
|
#if DYNAMIC_BMI2
|
|
1205
|
-
if (dctx
|
|
1206
|
-
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1968
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
|
1969
|
+
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1207
1970
|
}
|
|
1208
1971
|
#endif
|
|
1209
|
-
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1972
|
+
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1210
1973
|
}
|
|
1211
1974
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1212
1975
|
|
|
1213
1976
|
|
|
1977
|
+
/**
|
|
1978
|
+
* @returns The total size of the history referenceable by zstd, including
|
|
1979
|
+
* both the prefix and the extDict. At @p op any offset larger than this
|
|
1980
|
+
* is invalid.
|
|
1981
|
+
*/
|
|
1982
|
+
static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
|
|
1983
|
+
{
|
|
1984
|
+
return (size_t)(op - virtualStart);
|
|
1985
|
+
}
|
|
1214
1986
|
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1987
|
+
typedef struct {
|
|
1988
|
+
unsigned longOffsetShare;
|
|
1989
|
+
unsigned maxNbAdditionalBits;
|
|
1990
|
+
} ZSTD_OffsetInfo;
|
|
1991
|
+
|
|
1992
|
+
/* ZSTD_getOffsetInfo() :
|
|
1218
1993
|
* condition : offTable must be valid
|
|
1219
1994
|
* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
|
|
1220
|
-
* compared to maximum possible of (1<<OffFSELog)
|
|
1221
|
-
|
|
1222
|
-
|
|
1995
|
+
* compared to maximum possible of (1<<OffFSELog),
|
|
1996
|
+
* as well as the maximum number additional bits required.
|
|
1997
|
+
*/
|
|
1998
|
+
static ZSTD_OffsetInfo
|
|
1999
|
+
ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
|
|
1223
2000
|
{
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
2001
|
+
ZSTD_OffsetInfo info = {0, 0};
|
|
2002
|
+
/* If nbSeq == 0, then the offTable is uninitialized, but we have
|
|
2003
|
+
* no sequences, so both values should be 0.
|
|
2004
|
+
*/
|
|
2005
|
+
if (nbSeq != 0) {
|
|
2006
|
+
const void* ptr = offTable;
|
|
2007
|
+
U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
|
|
2008
|
+
const ZSTD_seqSymbol* table = offTable + 1;
|
|
2009
|
+
U32 const max = 1 << tableLog;
|
|
2010
|
+
U32 u;
|
|
2011
|
+
DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
|
|
2012
|
+
|
|
2013
|
+
assert(max <= (1 << OffFSELog)); /* max not too large */
|
|
2014
|
+
for (u=0; u<max; u++) {
|
|
2015
|
+
info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
|
|
2016
|
+
if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
|
|
2017
|
+
}
|
|
1235
2018
|
|
|
1236
|
-
|
|
1237
|
-
|
|
2019
|
+
assert(tableLog <= OffFSELog);
|
|
2020
|
+
info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
|
|
2021
|
+
}
|
|
1238
2022
|
|
|
1239
|
-
return
|
|
2023
|
+
return info;
|
|
1240
2024
|
}
|
|
1241
|
-
#endif
|
|
1242
2025
|
|
|
2026
|
+
/**
|
|
2027
|
+
* @returns The maximum offset we can decode in one read of our bitstream, without
|
|
2028
|
+
* reloading more bits in the middle of the offset bits read. Any offsets larger
|
|
2029
|
+
* than this must use the long offset decoder.
|
|
2030
|
+
*/
|
|
2031
|
+
static size_t ZSTD_maxShortOffset(void)
|
|
2032
|
+
{
|
|
2033
|
+
if (MEM_64bits()) {
|
|
2034
|
+
/* We can decode any offset without reloading bits.
|
|
2035
|
+
* This might change if the max window size grows.
|
|
2036
|
+
*/
|
|
2037
|
+
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
|
2038
|
+
return (size_t)-1;
|
|
2039
|
+
} else {
|
|
2040
|
+
/* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
|
|
2041
|
+
* This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
|
|
2042
|
+
* Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
|
|
2043
|
+
*/
|
|
2044
|
+
size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
|
|
2045
|
+
size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
|
|
2046
|
+
assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
|
|
2047
|
+
return maxOffset;
|
|
2048
|
+
}
|
|
2049
|
+
}
|
|
1243
2050
|
|
|
1244
2051
|
size_t
|
|
1245
2052
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1246
2053
|
void* dst, size_t dstCapacity,
|
|
1247
|
-
const void* src, size_t srcSize, const int frame)
|
|
2054
|
+
const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
|
|
1248
2055
|
{ /* blockType == blockCompressed */
|
|
1249
2056
|
const BYTE* ip = (const BYTE*)src;
|
|
1250
|
-
/* isLongOffset must be true if there are long offsets.
|
|
1251
|
-
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
|
|
1252
|
-
* We don't expect that to be the case in 64-bit mode.
|
|
1253
|
-
* In block mode, window size is not known, so we have to be conservative.
|
|
1254
|
-
* (note: but it could be evaluated from current-lowLimit)
|
|
1255
|
-
*/
|
|
1256
|
-
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
|
1257
2057
|
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
|
1258
2058
|
|
|
1259
|
-
|
|
2059
|
+
/* Note : the wording of the specification
|
|
2060
|
+
* allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
|
|
2061
|
+
* This generally does not happen, as it makes little sense,
|
|
2062
|
+
* since an uncompressed block would feature same size and have no decompression cost.
|
|
2063
|
+
* Also, note that decoder from reference libzstd before < v1.5.4
|
|
2064
|
+
* would consider this edge case as an error.
|
|
2065
|
+
* As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
|
|
2066
|
+
* for broader compatibility with the deployed ecosystem of zstd decoders */
|
|
2067
|
+
RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
|
1260
2068
|
|
|
1261
2069
|
/* Decode literals section */
|
|
1262
|
-
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
|
1263
|
-
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock :
|
|
2070
|
+
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
|
|
2071
|
+
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
|
|
1264
2072
|
if (ZSTD_isError(litCSize)) return litCSize;
|
|
1265
2073
|
ip += litCSize;
|
|
1266
2074
|
srcSize -= litCSize;
|
|
@@ -1268,6 +2076,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1268
2076
|
|
|
1269
2077
|
/* Build Decoding Tables */
|
|
1270
2078
|
{
|
|
2079
|
+
/* Compute the maximum block size, which must also work when !frame and fParams are unset.
|
|
2080
|
+
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
|
|
2081
|
+
*/
|
|
2082
|
+
size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
|
|
2083
|
+
size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
|
|
2084
|
+
/* isLongOffset must be true if there are long offsets.
|
|
2085
|
+
* Offsets are long if they are larger than ZSTD_maxShortOffset().
|
|
2086
|
+
* We don't expect that to be the case in 64-bit mode.
|
|
2087
|
+
*
|
|
2088
|
+
* We check here to see if our history is large enough to allow long offsets.
|
|
2089
|
+
* If it isn't, then we can't possible have (valid) long offsets. If the offset
|
|
2090
|
+
* is invalid, then it is okay to read it incorrectly.
|
|
2091
|
+
*
|
|
2092
|
+
* If isLongOffsets is true, then we will later check our decoding table to see
|
|
2093
|
+
* if it is even possible to generate long offsets.
|
|
2094
|
+
*/
|
|
2095
|
+
ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
|
|
1271
2096
|
/* These macros control at build-time which decompressor implementation
|
|
1272
2097
|
* we use. If neither is defined, we do some inspection and dispatch at
|
|
1273
2098
|
* runtime.
|
|
@@ -1275,6 +2100,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1275
2100
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
|
1276
2101
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1277
2102
|
int usePrefetchDecoder = dctx->ddictIsCold;
|
|
2103
|
+
#else
|
|
2104
|
+
/* Set to 1 to avoid computing offset info if we don't need to.
|
|
2105
|
+
* Otherwise this value is ignored.
|
|
2106
|
+
*/
|
|
2107
|
+
int usePrefetchDecoder = 1;
|
|
1278
2108
|
#endif
|
|
1279
2109
|
int nbSeq;
|
|
1280
2110
|
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
|
|
@@ -1282,42 +2112,81 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1282
2112
|
ip += seqHSize;
|
|
1283
2113
|
srcSize -= seqHSize;
|
|
1284
2114
|
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
2115
|
+
RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
|
2116
|
+
RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
|
|
2117
|
+
"invalid dst");
|
|
2118
|
+
|
|
2119
|
+
/* If we could potentially have long offsets, or we might want to use the prefetch decoder,
|
|
2120
|
+
* compute information about the share of long offsets, and the maximum nbAdditionalBits.
|
|
2121
|
+
* NOTE: could probably use a larger nbSeq limit
|
|
2122
|
+
*/
|
|
2123
|
+
if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
|
|
2124
|
+
ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
|
|
2125
|
+
if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
|
|
2126
|
+
/* If isLongOffset, but the maximum number of additional bits that we see in our table is small
|
|
2127
|
+
* enough, then we know it is impossible to have too long an offset in this block, so we can
|
|
2128
|
+
* use the regular offset decoder.
|
|
2129
|
+
*/
|
|
2130
|
+
isLongOffset = ZSTD_lo_isRegularOffset;
|
|
2131
|
+
}
|
|
2132
|
+
if (!usePrefetchDecoder) {
|
|
2133
|
+
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
|
2134
|
+
usePrefetchDecoder = (info.longOffsetShare >= minShare);
|
|
2135
|
+
}
|
|
1293
2136
|
}
|
|
1294
|
-
#endif
|
|
1295
2137
|
|
|
1296
2138
|
dctx->ddictIsCold = 0;
|
|
1297
2139
|
|
|
1298
2140
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
|
1299
2141
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1300
|
-
if (usePrefetchDecoder)
|
|
2142
|
+
if (usePrefetchDecoder) {
|
|
2143
|
+
#else
|
|
2144
|
+
(void)usePrefetchDecoder;
|
|
2145
|
+
{
|
|
1301
2146
|
#endif
|
|
1302
2147
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1303
|
-
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
2148
|
+
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1304
2149
|
#endif
|
|
2150
|
+
}
|
|
1305
2151
|
|
|
1306
2152
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1307
2153
|
/* else */
|
|
1308
|
-
|
|
2154
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
|
2155
|
+
return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
2156
|
+
else
|
|
2157
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1309
2158
|
#endif
|
|
1310
2159
|
}
|
|
1311
2160
|
}
|
|
1312
2161
|
|
|
1313
2162
|
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
2163
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
|
|
2164
|
+
{
|
|
2165
|
+
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
|
|
2166
|
+
dctx->dictEnd = dctx->previousDstEnd;
|
|
2167
|
+
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
|
2168
|
+
dctx->prefixStart = dst;
|
|
2169
|
+
dctx->previousDstEnd = dst;
|
|
2170
|
+
}
|
|
2171
|
+
}
|
|
2172
|
+
|
|
2173
|
+
|
|
2174
|
+
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
|
|
2175
|
+
void* dst, size_t dstCapacity,
|
|
2176
|
+
const void* src, size_t srcSize)
|
|
1317
2177
|
{
|
|
1318
2178
|
size_t dSize;
|
|
1319
|
-
ZSTD_checkContinuity(dctx, dst);
|
|
1320
|
-
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
|
|
2179
|
+
ZSTD_checkContinuity(dctx, dst, dstCapacity);
|
|
2180
|
+
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
|
|
1321
2181
|
dctx->previousDstEnd = (char*)dst + dSize;
|
|
1322
2182
|
return dSize;
|
|
1323
2183
|
}
|
|
2184
|
+
|
|
2185
|
+
|
|
2186
|
+
/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
|
|
2187
|
+
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
|
2188
|
+
void* dst, size_t dstCapacity,
|
|
2189
|
+
const void* src, size_t srcSize)
|
|
2190
|
+
{
|
|
2191
|
+
return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
|
|
2192
|
+
}
|