zstdlib 0.8.0-x86-mingw32 → 0.9.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +10 -0
- data/README.md +7 -1
- data/Rakefile +38 -8
- data/ext/{zstdlib → zstdlib_c}/extconf.rb +10 -5
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-3.0/zstdlib.c +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/adler32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/compress.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzclose.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzguts.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzlib.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzread.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzwrite.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/infback.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffixed.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/uncompr.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zconf.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zlib.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +24 -9
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/compiler.h +89 -43
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/debug.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/entropy_common.c +11 -5
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.h +79 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +2 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +1 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +24 -22
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +18 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +11 -6
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +2 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +0 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_deps.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_internal.h +95 -92
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_trace.h +12 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +63 -27
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/huf_compress.c +537 -104
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress.c +307 -373
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +174 -83
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +4 -3
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +3 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +15 -14
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.c +4 -3
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +41 -27
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +295 -120
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +309 -130
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_lazy.c +482 -562
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_lazy.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +9 -7
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +1 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm_geartab.h +4 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +249 -148
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +76 -38
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.h +4 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/huf_decompress.c +727 -189
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +85 -22
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.c +744 -220
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +8 -2
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +34 -3
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/zdict.h +4 -4
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +179 -136
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/zstd_errors.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +7 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +0 -0
- data/ext/zstdlib_c/zstd.mk +15 -0
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/3.1/zstdlib_c.so +0 -0
- data/lib/zstdlib.rb +2 -2
- metadata +125 -121
- data/ext/zstdlib/zstd-1.5.0/lib/common/xxhash.c +0 -824
- data/ext/zstdlib/zstd-1.5.0/lib/common/xxhash.h +0 -285
- data/ext/zstdlib/zstd.mk +0 -14
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
@@ -69,15 +69,56 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
69
69
|
}
|
70
70
|
}
|
71
71
|
|
72
|
+
/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
|
73
|
+
static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
|
74
|
+
const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
|
75
|
+
{
|
76
|
+
if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
|
77
|
+
{
|
78
|
+
/* room for litbuffer to fit without read faulting */
|
79
|
+
dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
|
80
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
81
|
+
dctx->litBufferLocation = ZSTD_in_dst;
|
82
|
+
}
|
83
|
+
else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
|
84
|
+
{
|
85
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
86
|
+
if (splitImmediately) {
|
87
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
88
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
89
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
|
90
|
+
}
|
91
|
+
else {
|
92
|
+
/* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
|
93
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
|
94
|
+
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
|
95
|
+
}
|
96
|
+
dctx->litBufferLocation = ZSTD_split;
|
97
|
+
}
|
98
|
+
else
|
99
|
+
{
|
100
|
+
/* fits entirely within litExtraBuffer, so no split is necessary */
|
101
|
+
dctx->litBuffer = dctx->litExtraBuffer;
|
102
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
103
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
104
|
+
}
|
105
|
+
}
|
72
106
|
|
73
107
|
/* Hidden declaration for fullbench */
|
74
108
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
75
|
-
const void* src, size_t srcSize
|
109
|
+
const void* src, size_t srcSize,
|
110
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming);
|
76
111
|
/*! ZSTD_decodeLiteralsBlock() :
|
112
|
+
* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
|
113
|
+
* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
|
114
|
+
* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
|
115
|
+
* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
|
116
|
+
*
|
77
117
|
* @return : nb of bytes read from src (< srcSize )
|
78
118
|
* note : symbol not declared but exposed for fullbench */
|
79
119
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
80
|
-
const void* src, size_t srcSize
|
120
|
+
const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
|
121
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming)
|
81
122
|
{
|
82
123
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
83
124
|
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
@@ -90,7 +131,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
90
131
|
case set_repeat:
|
91
132
|
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
92
133
|
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
93
|
-
|
134
|
+
ZSTD_FALLTHROUGH;
|
94
135
|
|
95
136
|
case set_compressed:
|
96
137
|
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
|
@@ -99,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
99
140
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
100
141
|
U32 const lhc = MEM_readLE32(istart);
|
101
142
|
size_t hufSuccess;
|
143
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
102
144
|
switch(lhlCode)
|
103
145
|
{
|
104
146
|
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -121,8 +163,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
121
163
|
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
122
164
|
break;
|
123
165
|
}
|
166
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
124
167
|
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
125
168
|
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
169
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
|
170
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
|
126
171
|
|
127
172
|
/* prefetch huffman table if cold */
|
128
173
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
@@ -133,11 +178,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
133
178
|
if (singleStream) {
|
134
179
|
hufSuccess = HUF_decompress1X_usingDTable_bmi2(
|
135
180
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
136
|
-
dctx->HUFptr, dctx
|
181
|
+
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
|
137
182
|
} else {
|
138
183
|
hufSuccess = HUF_decompress4X_usingDTable_bmi2(
|
139
184
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
140
|
-
dctx->HUFptr, dctx
|
185
|
+
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
|
141
186
|
}
|
142
187
|
} else {
|
143
188
|
if (singleStream) {
|
@@ -150,15 +195,22 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
150
195
|
hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
|
151
196
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
152
197
|
istart+lhSize, litCSize, dctx->workspace,
|
153
|
-
sizeof(dctx->workspace), dctx
|
198
|
+
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
|
154
199
|
#endif
|
155
200
|
} else {
|
156
201
|
hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
|
157
202
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
158
203
|
istart+lhSize, litCSize, dctx->workspace,
|
159
|
-
sizeof(dctx->workspace), dctx
|
204
|
+
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
|
160
205
|
}
|
161
206
|
}
|
207
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
208
|
+
{
|
209
|
+
ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
210
|
+
ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
211
|
+
dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
212
|
+
dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
|
213
|
+
}
|
162
214
|
|
163
215
|
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
164
216
|
|
@@ -166,13 +218,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
166
218
|
dctx->litSize = litSize;
|
167
219
|
dctx->litEntropy = 1;
|
168
220
|
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
169
|
-
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
170
221
|
return litCSize + lhSize;
|
171
222
|
}
|
172
223
|
|
173
224
|
case set_basic:
|
174
225
|
{ size_t litSize, lhSize;
|
175
226
|
U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
227
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
176
228
|
switch(lhlCode)
|
177
229
|
{
|
178
230
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -189,23 +241,36 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
189
241
|
break;
|
190
242
|
}
|
191
243
|
|
244
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
245
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
246
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
192
247
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
193
248
|
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
194
|
-
|
249
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
250
|
+
{
|
251
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
252
|
+
ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
253
|
+
}
|
254
|
+
else
|
255
|
+
{
|
256
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
|
257
|
+
}
|
195
258
|
dctx->litPtr = dctx->litBuffer;
|
196
259
|
dctx->litSize = litSize;
|
197
|
-
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
198
260
|
return lhSize+litSize;
|
199
261
|
}
|
200
262
|
/* direct reference into compressed stream */
|
201
263
|
dctx->litPtr = istart+lhSize;
|
202
264
|
dctx->litSize = litSize;
|
265
|
+
dctx->litBufferEnd = dctx->litPtr + litSize;
|
266
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
203
267
|
return lhSize+litSize;
|
204
268
|
}
|
205
269
|
|
206
270
|
case set_rle:
|
207
271
|
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
208
272
|
size_t litSize, lhSize;
|
273
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
209
274
|
switch(lhlCode)
|
210
275
|
{
|
211
276
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -222,8 +287,19 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
222
287
|
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
223
288
|
break;
|
224
289
|
}
|
290
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
225
291
|
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
226
|
-
|
292
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
293
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
294
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
295
|
+
{
|
296
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
|
297
|
+
ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
|
298
|
+
}
|
299
|
+
else
|
300
|
+
{
|
301
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
|
302
|
+
}
|
227
303
|
dctx->litPtr = dctx->litBuffer;
|
228
304
|
dctx->litSize = litSize;
|
229
305
|
return lhSize+1;
|
@@ -343,7 +419,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
|
|
343
419
|
}; /* ML_defaultDTable */
|
344
420
|
|
345
421
|
|
346
|
-
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue,
|
422
|
+
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
|
347
423
|
{
|
348
424
|
void* ptr = dt;
|
349
425
|
ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
|
@@ -355,7 +431,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
355
431
|
cell->nbBits = 0;
|
356
432
|
cell->nextState = 0;
|
357
433
|
assert(nbAddBits < 255);
|
358
|
-
cell->nbAdditionalBits =
|
434
|
+
cell->nbAdditionalBits = nbAddBits;
|
359
435
|
cell->baseValue = baseValue;
|
360
436
|
}
|
361
437
|
|
@@ -367,7 +443,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
367
443
|
FORCE_INLINE_TEMPLATE
|
368
444
|
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
369
445
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
370
|
-
const U32* baseValue, const
|
446
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
371
447
|
unsigned tableLog, void* wksp, size_t wkspSize)
|
372
448
|
{
|
373
449
|
ZSTD_seqSymbol* const tableDecode = dt+1;
|
@@ -478,7 +554,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
478
554
|
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
|
479
555
|
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
480
556
|
assert(nbAdditionalBits[symbol] < 255);
|
481
|
-
tableDecode[u].nbAdditionalBits =
|
557
|
+
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
|
482
558
|
tableDecode[u].baseValue = baseValue[symbol];
|
483
559
|
}
|
484
560
|
}
|
@@ -487,7 +563,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
487
563
|
/* Avoids the FORCE_INLINE of the _body() function. */
|
488
564
|
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
489
565
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
490
|
-
const U32* baseValue, const
|
566
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
491
567
|
unsigned tableLog, void* wksp, size_t wkspSize)
|
492
568
|
{
|
493
569
|
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
@@ -495,9 +571,9 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
|
495
571
|
}
|
496
572
|
|
497
573
|
#if DYNAMIC_BMI2
|
498
|
-
|
574
|
+
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
499
575
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
500
|
-
const U32* baseValue, const
|
576
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
501
577
|
unsigned tableLog, void* wksp, size_t wkspSize)
|
502
578
|
{
|
503
579
|
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
@@ -507,7 +583,7 @@ TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol
|
|
507
583
|
|
508
584
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
509
585
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
510
|
-
const U32* baseValue, const
|
586
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
511
587
|
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
512
588
|
{
|
513
589
|
#if DYNAMIC_BMI2
|
@@ -529,7 +605,7 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
529
605
|
static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
|
530
606
|
symbolEncodingType_e type, unsigned max, U32 maxLog,
|
531
607
|
const void* src, size_t srcSize,
|
532
|
-
const U32* baseValue, const
|
608
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
533
609
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
534
610
|
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
535
611
|
int bmi2)
|
@@ -541,7 +617,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
541
617
|
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
542
618
|
{ U32 const symbol = *(const BYTE*)src;
|
543
619
|
U32 const baseline = baseValue[symbol];
|
544
|
-
|
620
|
+
U8 const nbBits = nbAdditionalBits[symbol];
|
545
621
|
ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
|
546
622
|
}
|
547
623
|
*DTablePtr = DTableSpace;
|
@@ -620,7 +696,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
620
696
|
LL_defaultDTable, dctx->fseEntropy,
|
621
697
|
dctx->ddictIsCold, nbSeq,
|
622
698
|
dctx->workspace, sizeof(dctx->workspace),
|
623
|
-
dctx
|
699
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
624
700
|
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
625
701
|
ip += llhSize;
|
626
702
|
}
|
@@ -632,7 +708,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
632
708
|
OF_defaultDTable, dctx->fseEntropy,
|
633
709
|
dctx->ddictIsCold, nbSeq,
|
634
710
|
dctx->workspace, sizeof(dctx->workspace),
|
635
|
-
dctx
|
711
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
636
712
|
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
637
713
|
ip += ofhSize;
|
638
714
|
}
|
@@ -644,7 +720,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
644
720
|
ML_defaultDTable, dctx->fseEntropy,
|
645
721
|
dctx->ddictIsCold, nbSeq,
|
646
722
|
dctx->workspace, sizeof(dctx->workspace),
|
647
|
-
dctx
|
723
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
648
724
|
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
649
725
|
ip += mlhSize;
|
650
726
|
}
|
@@ -713,7 +789,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
713
789
|
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
714
790
|
* The src buffer must be before the dst buffer.
|
715
791
|
*/
|
716
|
-
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
792
|
+
static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
717
793
|
ptrdiff_t const diff = op - ip;
|
718
794
|
BYTE* const oend = op + length;
|
719
795
|
|
@@ -729,6 +805,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
729
805
|
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
730
806
|
assert(length >= 8);
|
731
807
|
ZSTD_overlapCopy8(&op, &ip, diff);
|
808
|
+
length -= 8;
|
732
809
|
assert(op - ip >= 8);
|
733
810
|
assert(op <= oend);
|
734
811
|
}
|
@@ -743,12 +820,35 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
743
820
|
assert(oend > oend_w);
|
744
821
|
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
745
822
|
ip += oend_w - op;
|
746
|
-
op
|
823
|
+
op += oend_w - op;
|
747
824
|
}
|
748
825
|
/* Handle the leftovers. */
|
749
826
|
while (op < oend) *op++ = *ip++;
|
750
827
|
}
|
751
828
|
|
829
|
+
/* ZSTD_safecopyDstBeforeSrc():
|
830
|
+
* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
|
831
|
+
* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
|
832
|
+
static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
|
833
|
+
ptrdiff_t const diff = op - ip;
|
834
|
+
BYTE* const oend = op + length;
|
835
|
+
|
836
|
+
if (length < 8 || diff > -8) {
|
837
|
+
/* Handle short lengths, close overlaps, and dst not before src. */
|
838
|
+
while (op < oend) *op++ = *ip++;
|
839
|
+
return;
|
840
|
+
}
|
841
|
+
|
842
|
+
if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
|
843
|
+
ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
|
844
|
+
ip += oend - WILDCOPY_OVERLENGTH - op;
|
845
|
+
op += oend - WILDCOPY_OVERLENGTH - op;
|
846
|
+
}
|
847
|
+
|
848
|
+
/* Handle the leftovers. */
|
849
|
+
while (op < oend) *op++ = *ip++;
|
850
|
+
}
|
851
|
+
|
752
852
|
/* ZSTD_execSequenceEnd():
|
753
853
|
* This version handles cases that are near the end of the output buffer. It requires
|
754
854
|
* more careful checks to make sure there is no overflow. By separating out these hard
|
@@ -759,9 +859,9 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
759
859
|
*/
|
760
860
|
FORCE_NOINLINE
|
761
861
|
size_t ZSTD_execSequenceEnd(BYTE* op,
|
762
|
-
|
763
|
-
|
764
|
-
|
862
|
+
BYTE* const oend, seq_t sequence,
|
863
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
864
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
765
865
|
{
|
766
866
|
BYTE* const oLitEnd = op + sequence.litLength;
|
767
867
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
@@ -784,27 +884,76 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
784
884
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
785
885
|
/* offset beyond prefix */
|
786
886
|
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
787
|
-
match = dictEnd - (prefixStart-match);
|
887
|
+
match = dictEnd - (prefixStart - match);
|
788
888
|
if (match + sequence.matchLength <= dictEnd) {
|
789
889
|
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
790
890
|
return sequenceLength;
|
791
891
|
}
|
792
892
|
/* span extDict & currentPrefixSegment */
|
793
893
|
{ size_t const length1 = dictEnd - match;
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
894
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
895
|
+
op = oLitEnd + length1;
|
896
|
+
sequence.matchLength -= length1;
|
897
|
+
match = prefixStart;
|
898
|
+
}
|
899
|
+
}
|
900
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
901
|
+
return sequenceLength;
|
902
|
+
}
|
903
|
+
|
904
|
+
/* ZSTD_execSequenceEndSplitLitBuffer():
|
905
|
+
* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
|
906
|
+
*/
|
907
|
+
FORCE_NOINLINE
|
908
|
+
size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
|
909
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
910
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
911
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
912
|
+
{
|
913
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
914
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
915
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
916
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
917
|
+
|
918
|
+
|
919
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
920
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
921
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
922
|
+
assert(op < op + sequenceLength);
|
923
|
+
assert(oLitEnd < op + sequenceLength);
|
924
|
+
|
925
|
+
/* copy literals */
|
926
|
+
RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
|
927
|
+
ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
|
928
|
+
op = oLitEnd;
|
929
|
+
*litPtr = iLitEnd;
|
930
|
+
|
931
|
+
/* copy Match */
|
932
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
933
|
+
/* offset beyond prefix */
|
934
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
935
|
+
match = dictEnd - (prefixStart - match);
|
936
|
+
if (match + sequence.matchLength <= dictEnd) {
|
937
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
938
|
+
return sequenceLength;
|
939
|
+
}
|
940
|
+
/* span extDict & currentPrefixSegment */
|
941
|
+
{ size_t const length1 = dictEnd - match;
|
942
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
943
|
+
op = oLitEnd + length1;
|
944
|
+
sequence.matchLength -= length1;
|
945
|
+
match = prefixStart;
|
946
|
+
}
|
947
|
+
}
|
799
948
|
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
800
949
|
return sequenceLength;
|
801
950
|
}
|
802
951
|
|
803
952
|
HINT_INLINE
|
804
953
|
size_t ZSTD_execSequence(BYTE* op,
|
805
|
-
|
806
|
-
|
807
|
-
|
954
|
+
BYTE* const oend, seq_t sequence,
|
955
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
956
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
808
957
|
{
|
809
958
|
BYTE* const oLitEnd = op + sequence.litLength;
|
810
959
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
@@ -813,6 +962,98 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
813
962
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
814
963
|
const BYTE* match = oLitEnd - sequence.offset;
|
815
964
|
|
965
|
+
assert(op != NULL /* Precondition */);
|
966
|
+
assert(oend_w < oend /* No underflow */);
|
967
|
+
/* Handle edge cases in a slow path:
|
968
|
+
* - Read beyond end of literals
|
969
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
970
|
+
* - 32-bit mode and the match length overflows
|
971
|
+
*/
|
972
|
+
if (UNLIKELY(
|
973
|
+
iLitEnd > litLimit ||
|
974
|
+
oMatchEnd > oend_w ||
|
975
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
976
|
+
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
977
|
+
|
978
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
979
|
+
assert(op <= oLitEnd /* No overflow */);
|
980
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
981
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
982
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
983
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
984
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
985
|
+
|
986
|
+
/* Copy Literals:
|
987
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
988
|
+
* We likely don't need the full 32-byte wildcopy.
|
989
|
+
*/
|
990
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
991
|
+
ZSTD_copy16(op, (*litPtr));
|
992
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
993
|
+
ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
|
994
|
+
}
|
995
|
+
op = oLitEnd;
|
996
|
+
*litPtr = iLitEnd; /* update for next sequence */
|
997
|
+
|
998
|
+
/* Copy Match */
|
999
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
1000
|
+
/* offset beyond prefix -> go into extDict */
|
1001
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
1002
|
+
match = dictEnd + (match - prefixStart);
|
1003
|
+
if (match + sequence.matchLength <= dictEnd) {
|
1004
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
1005
|
+
return sequenceLength;
|
1006
|
+
}
|
1007
|
+
/* span extDict & currentPrefixSegment */
|
1008
|
+
{ size_t const length1 = dictEnd - match;
|
1009
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
1010
|
+
op = oLitEnd + length1;
|
1011
|
+
sequence.matchLength -= length1;
|
1012
|
+
match = prefixStart;
|
1013
|
+
}
|
1014
|
+
}
|
1015
|
+
/* Match within prefix of 1 or more bytes */
|
1016
|
+
assert(op <= oMatchEnd);
|
1017
|
+
assert(oMatchEnd <= oend_w);
|
1018
|
+
assert(match >= prefixStart);
|
1019
|
+
assert(sequence.matchLength >= 1);
|
1020
|
+
|
1021
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
1022
|
+
* without overlap checking.
|
1023
|
+
*/
|
1024
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
1025
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
1026
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
1027
|
+
* than 16 bytes.
|
1028
|
+
*/
|
1029
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
1030
|
+
return sequenceLength;
|
1031
|
+
}
|
1032
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
1033
|
+
|
1034
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
1035
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
1036
|
+
|
1037
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
1038
|
+
if (sequence.matchLength > 8) {
|
1039
|
+
assert(op < oMatchEnd);
|
1040
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
|
1041
|
+
}
|
1042
|
+
return sequenceLength;
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
HINT_INLINE
|
1046
|
+
size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
|
1047
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
1048
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
1049
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
1050
|
+
{
|
1051
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
1052
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
1053
|
+
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
1054
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
1055
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
1056
|
+
|
816
1057
|
assert(op != NULL /* Precondition */);
|
817
1058
|
assert(oend_w < oend /* No underflow */);
|
818
1059
|
/* Handle edge cases in a slow path:
|
@@ -824,7 +1065,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
824
1065
|
iLitEnd > litLimit ||
|
825
1066
|
oMatchEnd > oend_w ||
|
826
1067
|
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
827
|
-
return
|
1068
|
+
return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
828
1069
|
|
829
1070
|
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
830
1071
|
assert(op <= oLitEnd /* No overflow */);
|
@@ -892,6 +1133,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
892
1133
|
return sequenceLength;
|
893
1134
|
}
|
894
1135
|
|
1136
|
+
|
895
1137
|
static void
|
896
1138
|
ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
|
897
1139
|
{
|
@@ -905,20 +1147,10 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS
|
|
905
1147
|
}
|
906
1148
|
|
907
1149
|
FORCE_INLINE_TEMPLATE void
|
908
|
-
|
909
|
-
{
|
910
|
-
ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
|
911
|
-
U32 const nbBits = DInfo.nbBits;
|
912
|
-
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
913
|
-
DStatePtr->state = DInfo.nextState + lowBits;
|
914
|
-
}
|
915
|
-
|
916
|
-
FORCE_INLINE_TEMPLATE void
|
917
|
-
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
1150
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
|
918
1151
|
{
|
919
|
-
U32 const nbBits = DInfo.nbBits;
|
920
1152
|
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
921
|
-
DStatePtr->state =
|
1153
|
+
DStatePtr->state = nextState + lowBits;
|
922
1154
|
}
|
923
1155
|
|
924
1156
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
@@ -937,102 +1169,100 @@ FORCE_INLINE_TEMPLATE seq_t
|
|
937
1169
|
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
938
1170
|
{
|
939
1171
|
seq_t seq;
|
940
|
-
ZSTD_seqSymbol const llDInfo = seqState->stateLL.table
|
941
|
-
ZSTD_seqSymbol const mlDInfo = seqState->stateML.table
|
942
|
-
ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table
|
943
|
-
|
944
|
-
|
945
|
-
U32 const ofBase = ofDInfo
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
if (
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
1172
|
+
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
|
1173
|
+
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
|
1174
|
+
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
|
1175
|
+
seq.matchLength = mlDInfo->baseValue;
|
1176
|
+
seq.litLength = llDInfo->baseValue;
|
1177
|
+
{ U32 const ofBase = ofDInfo->baseValue;
|
1178
|
+
BYTE const llBits = llDInfo->nbAdditionalBits;
|
1179
|
+
BYTE const mlBits = mlDInfo->nbAdditionalBits;
|
1180
|
+
BYTE const ofBits = ofDInfo->nbAdditionalBits;
|
1181
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
1182
|
+
|
1183
|
+
U16 const llNext = llDInfo->nextState;
|
1184
|
+
U16 const mlNext = mlDInfo->nextState;
|
1185
|
+
U16 const ofNext = ofDInfo->nextState;
|
1186
|
+
U32 const llnbBits = llDInfo->nbBits;
|
1187
|
+
U32 const mlnbBits = mlDInfo->nbBits;
|
1188
|
+
U32 const ofnbBits = ofDInfo->nbBits;
|
1189
|
+
/*
|
1190
|
+
* As gcc has better branch and block analyzers, sometimes it is only
|
1191
|
+
* valuable to mark likelyness for clang, it gives around 3-4% of
|
1192
|
+
* performance.
|
1193
|
+
*/
|
1194
|
+
|
1195
|
+
/* sequence */
|
1196
|
+
{ size_t offset;
|
1197
|
+
#if defined(__clang__)
|
1198
|
+
if (LIKELY(ofBits > 1)) {
|
1199
|
+
#else
|
1200
|
+
if (ofBits > 1) {
|
1201
|
+
#endif
|
1202
|
+
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
1203
|
+
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
1204
|
+
assert(ofBits <= MaxOff);
|
1205
|
+
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
1206
|
+
U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
|
1207
|
+
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
1208
|
+
BIT_reloadDStream(&seqState->DStream);
|
1209
|
+
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
1210
|
+
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
|
1211
|
+
} else {
|
1212
|
+
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
1213
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
979
1214
|
}
|
1215
|
+
seqState->prevOffset[2] = seqState->prevOffset[1];
|
1216
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
1217
|
+
seqState->prevOffset[0] = offset;
|
980
1218
|
} else {
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
seqState->prevOffset[
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
996
|
-
BIT_reloadDStream(&seqState->DStream);
|
997
|
-
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
998
|
-
BIT_reloadDStream(&seqState->DStream);
|
999
|
-
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
1000
|
-
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
1001
|
-
|
1002
|
-
seq.litLength = llBase;
|
1003
|
-
if (llBits > 0)
|
1004
|
-
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
1005
|
-
|
1006
|
-
if (MEM_32bits())
|
1007
|
-
BIT_reloadDStream(&seqState->DStream);
|
1008
|
-
|
1009
|
-
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
1010
|
-
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
1011
|
-
|
1012
|
-
/* ANS state update
|
1013
|
-
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
1014
|
-
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
1015
|
-
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
1016
|
-
* better option, so it is the default for other compilers. But, if you
|
1017
|
-
* measure that it is worse, please put up a pull request.
|
1018
|
-
*/
|
1019
|
-
{
|
1020
|
-
#if defined(__GNUC__) && !defined(__clang__)
|
1021
|
-
const int kUseUpdateFseState = 1;
|
1022
|
-
#else
|
1023
|
-
const int kUseUpdateFseState = 0;
|
1024
|
-
#endif
|
1025
|
-
if (kUseUpdateFseState) {
|
1026
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
1027
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
1028
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
1029
|
-
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
1030
|
-
} else {
|
1031
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
|
1032
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
|
1033
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
1034
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
|
1219
|
+
U32 const ll0 = (llDInfo->baseValue == 0);
|
1220
|
+
if (LIKELY((ofBits == 0))) {
|
1221
|
+
offset = seqState->prevOffset[ll0];
|
1222
|
+
seqState->prevOffset[1] = seqState->prevOffset[!ll0];
|
1223
|
+
seqState->prevOffset[0] = offset;
|
1224
|
+
} else {
|
1225
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
1226
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
1227
|
+
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
1228
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
1229
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
1230
|
+
seqState->prevOffset[0] = offset = temp;
|
1231
|
+
} } }
|
1232
|
+
seq.offset = offset;
|
1035
1233
|
}
|
1234
|
+
|
1235
|
+
#if defined(__clang__)
|
1236
|
+
if (UNLIKELY(mlBits > 0))
|
1237
|
+
#else
|
1238
|
+
if (mlBits > 0)
|
1239
|
+
#endif
|
1240
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
1241
|
+
|
1242
|
+
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
1243
|
+
BIT_reloadDStream(&seqState->DStream);
|
1244
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
1245
|
+
BIT_reloadDStream(&seqState->DStream);
|
1246
|
+
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
1247
|
+
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
1248
|
+
|
1249
|
+
#if defined(__clang__)
|
1250
|
+
if (UNLIKELY(llBits > 0))
|
1251
|
+
#else
|
1252
|
+
if (llBits > 0)
|
1253
|
+
#endif
|
1254
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
1255
|
+
|
1256
|
+
if (MEM_32bits())
|
1257
|
+
BIT_reloadDStream(&seqState->DStream);
|
1258
|
+
|
1259
|
+
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
1260
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
1261
|
+
|
1262
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
|
1263
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
|
1264
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
1265
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
|
1036
1266
|
}
|
1037
1267
|
|
1038
1268
|
return seq;
|
@@ -1085,9 +1315,11 @@ MEM_STATIC void ZSTD_assertValidSequence(
|
|
1085
1315
|
#endif
|
1086
1316
|
|
1087
1317
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1318
|
+
|
1319
|
+
|
1088
1320
|
FORCE_INLINE_TEMPLATE size_t
|
1089
1321
|
DONT_VECTORIZE
|
1090
|
-
|
1322
|
+
ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
|
1091
1323
|
void* dst, size_t maxDstSize,
|
1092
1324
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1093
1325
|
const ZSTD_longOffset_e isLongOffset,
|
@@ -1099,11 +1331,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
1099
1331
|
BYTE* const oend = ostart + maxDstSize;
|
1100
1332
|
BYTE* op = ostart;
|
1101
1333
|
const BYTE* litPtr = dctx->litPtr;
|
1102
|
-
const BYTE*
|
1334
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
1103
1335
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
1104
1336
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
1105
1337
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
1106
|
-
DEBUGLOG(5, "
|
1338
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
|
1107
1339
|
(void)frame;
|
1108
1340
|
|
1109
1341
|
/* Regen sequences */
|
@@ -1124,55 +1356,237 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
1124
1356
|
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
1125
1357
|
BIT_DStream_completed < BIT_DStream_overflow);
|
1126
1358
|
|
1359
|
+
/* decompress without overrunning litPtr begins */
|
1360
|
+
{
|
1361
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1362
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
1363
|
+
*
|
1364
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
1365
|
+
* speed swings based on the alignment of the decompression loop. This
|
1366
|
+
* performance swing is caused by parts of the decompression loop falling
|
1367
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
1368
|
+
* when it can't we get much worse performance. You can measure if you've
|
1369
|
+
* hit the good case or the bad case with this perf command for some
|
1370
|
+
* compressed file test.zst:
|
1371
|
+
*
|
1372
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
1373
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
1374
|
+
*
|
1375
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
1376
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
1377
|
+
* If it is pretty even then you may be in an okay case.
|
1378
|
+
*
|
1379
|
+
* This issue has been reproduced on the following CPUs:
|
1380
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
1381
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
1382
|
+
* I never got performance swings, but I was able to
|
1383
|
+
* go from the good case of mostly DSB to half of the
|
1384
|
+
* cycles served from MITE.
|
1385
|
+
* - Coffeelake: Intel i9-9900k
|
1386
|
+
* - Coffeelake: Intel i7-9700k
|
1387
|
+
*
|
1388
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
1389
|
+
* of the following CPUS:
|
1390
|
+
* - Haswell
|
1391
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
1392
|
+
* - Skylake
|
1393
|
+
*
|
1394
|
+
* Alignment is done for each of the three major decompression loops:
|
1395
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
|
1396
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
|
1397
|
+
* - ZSTD_decompressSequences_body
|
1398
|
+
* Alignment choices are made to minimize large swings on bad cases and influence on performance
|
1399
|
+
* from changes external to this code, rather than to overoptimize on the current commit.
|
1400
|
+
*
|
1401
|
+
* If you are seeing performance stability this script can help test.
|
1402
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
1403
|
+
*
|
1404
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
1405
|
+
*/
|
1127
1406
|
#if defined(__GNUC__) && defined(__x86_64__)
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1407
|
+
__asm__(".p2align 6");
|
1408
|
+
# if __GNUC__ >= 7
|
1409
|
+
/* good for gcc-7, gcc-9, and gcc-11 */
|
1410
|
+
__asm__("nop");
|
1411
|
+
__asm__(".p2align 5");
|
1412
|
+
__asm__("nop");
|
1413
|
+
__asm__(".p2align 4");
|
1414
|
+
# if __GNUC__ == 8 || __GNUC__ == 10
|
1415
|
+
/* good for gcc-8 and gcc-10 */
|
1416
|
+
__asm__("nop");
|
1417
|
+
__asm__(".p2align 3");
|
1418
|
+
# endif
|
1419
|
+
# endif
|
1420
|
+
#endif
|
1421
|
+
|
1422
|
+
/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
|
1423
|
+
for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
|
1424
|
+
size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
1425
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1426
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1427
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1428
|
+
#endif
|
1429
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1430
|
+
return oneSeqSize;
|
1431
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1432
|
+
op += oneSeqSize;
|
1433
|
+
if (UNLIKELY(!--nbSeq))
|
1434
|
+
break;
|
1435
|
+
BIT_reloadDStream(&(seqState.DStream));
|
1436
|
+
sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1437
|
+
}
|
1438
|
+
|
1439
|
+
/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
|
1440
|
+
if (nbSeq > 0) {
|
1441
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
1442
|
+
if (leftoverLit)
|
1443
|
+
{
|
1444
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
1445
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
1446
|
+
sequence.litLength -= leftoverLit;
|
1447
|
+
op += leftoverLit;
|
1448
|
+
}
|
1449
|
+
litPtr = dctx->litExtraBuffer;
|
1450
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1451
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1452
|
+
{
|
1453
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
1454
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1455
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1456
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1457
|
+
#endif
|
1458
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1459
|
+
return oneSeqSize;
|
1460
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1461
|
+
op += oneSeqSize;
|
1462
|
+
if (--nbSeq)
|
1463
|
+
BIT_reloadDStream(&(seqState.DStream));
|
1464
|
+
}
|
1465
|
+
}
|
1466
|
+
}
|
1467
|
+
|
1468
|
+
if (nbSeq > 0) /* there is remaining lit from extra buffer */
|
1469
|
+
{
|
1470
|
+
|
1471
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
1472
|
+
__asm__(".p2align 6");
|
1473
|
+
__asm__("nop");
|
1474
|
+
# if __GNUC__ != 7
|
1475
|
+
/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
|
1476
|
+
__asm__(".p2align 4");
|
1477
|
+
__asm__("nop");
|
1478
|
+
__asm__(".p2align 3");
|
1479
|
+
# elif __GNUC__ >= 11
|
1480
|
+
__asm__(".p2align 3");
|
1481
|
+
# else
|
1482
|
+
__asm__(".p2align 5");
|
1483
|
+
__asm__("nop");
|
1484
|
+
__asm__(".p2align 3");
|
1485
|
+
# endif
|
1486
|
+
#endif
|
1487
|
+
|
1488
|
+
for (; ; ) {
|
1489
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1490
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
1491
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1492
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1493
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1494
|
+
#endif
|
1495
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1496
|
+
return oneSeqSize;
|
1497
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1498
|
+
op += oneSeqSize;
|
1499
|
+
if (UNLIKELY(!--nbSeq))
|
1500
|
+
break;
|
1501
|
+
BIT_reloadDStream(&(seqState.DStream));
|
1502
|
+
}
|
1503
|
+
}
|
1504
|
+
|
1505
|
+
/* check if reached exact end */
|
1506
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
|
1507
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
1508
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
1509
|
+
/* save reps for next block */
|
1510
|
+
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
1511
|
+
}
|
1512
|
+
|
1513
|
+
/* last literal segment */
|
1514
|
+
if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
|
1515
|
+
{
|
1516
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
1517
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
1518
|
+
if (op != NULL) {
|
1519
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
1520
|
+
op += lastLLSize;
|
1521
|
+
}
|
1522
|
+
litPtr = dctx->litExtraBuffer;
|
1523
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1524
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1525
|
+
}
|
1526
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
1527
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1528
|
+
if (op != NULL) {
|
1529
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
1530
|
+
op += lastLLSize;
|
1531
|
+
}
|
1532
|
+
}
|
1533
|
+
|
1534
|
+
return op-ostart;
|
1535
|
+
}
|
1536
|
+
|
1537
|
+
FORCE_INLINE_TEMPLATE size_t
|
1538
|
+
DONT_VECTORIZE
|
1539
|
+
ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
|
1540
|
+
void* dst, size_t maxDstSize,
|
1541
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1542
|
+
const ZSTD_longOffset_e isLongOffset,
|
1543
|
+
const int frame)
|
1544
|
+
{
|
1545
|
+
const BYTE* ip = (const BYTE*)seqStart;
|
1546
|
+
const BYTE* const iend = ip + seqSize;
|
1547
|
+
BYTE* const ostart = (BYTE*)dst;
|
1548
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
|
1549
|
+
BYTE* op = ostart;
|
1550
|
+
const BYTE* litPtr = dctx->litPtr;
|
1551
|
+
const BYTE* const litEnd = litPtr + dctx->litSize;
|
1552
|
+
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
|
1553
|
+
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
|
1554
|
+
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
|
1555
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_body");
|
1556
|
+
(void)frame;
|
1557
|
+
|
1558
|
+
/* Regen sequences */
|
1559
|
+
if (nbSeq) {
|
1560
|
+
seqState_t seqState;
|
1561
|
+
dctx->fseEntropy = 1;
|
1562
|
+
{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
1563
|
+
RETURN_ERROR_IF(
|
1564
|
+
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
|
1565
|
+
corruption_detected, "");
|
1566
|
+
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
1567
|
+
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
1568
|
+
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
1569
|
+
assert(dst != NULL);
|
1570
|
+
|
1571
|
+
ZSTD_STATIC_ASSERT(
|
1572
|
+
BIT_DStream_unfinished < BIT_DStream_completed &&
|
1573
|
+
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
1574
|
+
BIT_DStream_completed < BIT_DStream_overflow);
|
1575
|
+
|
1576
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
1577
|
+
__asm__(".p2align 6");
|
1578
|
+
__asm__("nop");
|
1579
|
+
# if __GNUC__ >= 7
|
1580
|
+
__asm__(".p2align 5");
|
1581
|
+
__asm__("nop");
|
1582
|
+
__asm__(".p2align 3");
|
1172
1583
|
# else
|
1173
|
-
|
1584
|
+
__asm__(".p2align 4");
|
1585
|
+
__asm__("nop");
|
1586
|
+
__asm__(".p2align 3");
|
1174
1587
|
# endif
|
1175
1588
|
#endif
|
1589
|
+
|
1176
1590
|
for ( ; ; ) {
|
1177
1591
|
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1178
1592
|
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
@@ -1218,6 +1632,16 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
|
1218
1632
|
{
|
1219
1633
|
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1220
1634
|
}
|
1635
|
+
|
1636
|
+
static size_t
|
1637
|
+
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
|
1638
|
+
void* dst, size_t maxDstSize,
|
1639
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1640
|
+
const ZSTD_longOffset_e isLongOffset,
|
1641
|
+
const int frame)
|
1642
|
+
{
|
1643
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1644
|
+
}
|
1221
1645
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1222
1646
|
|
1223
1647
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
@@ -1250,10 +1674,10 @@ ZSTD_decompressSequencesLong_body(
|
|
1250
1674
|
const BYTE* ip = (const BYTE*)seqStart;
|
1251
1675
|
const BYTE* const iend = ip + seqSize;
|
1252
1676
|
BYTE* const ostart = (BYTE*)dst;
|
1253
|
-
BYTE* const oend = ostart + maxDstSize;
|
1677
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
|
1254
1678
|
BYTE* op = ostart;
|
1255
1679
|
const BYTE* litPtr = dctx->litPtr;
|
1256
|
-
const BYTE*
|
1680
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
1257
1681
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
1258
1682
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
1259
1683
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
@@ -1289,32 +1713,94 @@ ZSTD_decompressSequencesLong_body(
|
|
1289
1713
|
}
|
1290
1714
|
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
1291
1715
|
|
1292
|
-
/*
|
1293
|
-
for (
|
1294
|
-
seq_t
|
1295
|
-
size_t
|
1716
|
+
/* decompress without stomping litBuffer */
|
1717
|
+
for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
|
1718
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1719
|
+
size_t oneSeqSize;
|
1720
|
+
|
1721
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
|
1722
|
+
{
|
1723
|
+
/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
|
1724
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
1725
|
+
if (leftoverLit)
|
1726
|
+
{
|
1727
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
1728
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
1729
|
+
sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
|
1730
|
+
op += leftoverLit;
|
1731
|
+
}
|
1732
|
+
litPtr = dctx->litExtraBuffer;
|
1733
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1734
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1735
|
+
oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1296
1736
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1297
|
-
|
1298
|
-
|
1737
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1738
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
1299
1739
|
#endif
|
1300
|
-
|
1740
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1301
1741
|
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1742
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1743
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
1744
|
+
op += oneSeqSize;
|
1745
|
+
}
|
1746
|
+
else
|
1747
|
+
{
|
1748
|
+
/* lit buffer is either wholly contained in first or second split, or not split at all*/
|
1749
|
+
oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
1750
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
1751
|
+
ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1752
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1753
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1754
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
1755
|
+
#endif
|
1756
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1757
|
+
|
1758
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1759
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
1760
|
+
op += oneSeqSize;
|
1761
|
+
}
|
1305
1762
|
}
|
1306
1763
|
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
|
1307
1764
|
|
1308
1765
|
/* finish queue */
|
1309
1766
|
seqNb -= seqAdvance;
|
1310
1767
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
1311
|
-
|
1768
|
+
seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
|
1769
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
|
1770
|
+
{
|
1771
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
1772
|
+
if (leftoverLit)
|
1773
|
+
{
|
1774
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
1775
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
1776
|
+
sequence->litLength -= leftoverLit;
|
1777
|
+
op += leftoverLit;
|
1778
|
+
}
|
1779
|
+
litPtr = dctx->litExtraBuffer;
|
1780
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1781
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1782
|
+
{
|
1783
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1312
1784
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1313
|
-
|
1314
|
-
|
1785
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1786
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
1315
1787
|
#endif
|
1316
|
-
|
1317
|
-
|
1788
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1789
|
+
op += oneSeqSize;
|
1790
|
+
}
|
1791
|
+
}
|
1792
|
+
else
|
1793
|
+
{
|
1794
|
+
size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
1795
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
1796
|
+
ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1797
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1798
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1799
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
1800
|
+
#endif
|
1801
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1802
|
+
op += oneSeqSize;
|
1803
|
+
}
|
1318
1804
|
}
|
1319
1805
|
|
1320
1806
|
/* save reps for next block */
|
@@ -1322,10 +1808,21 @@ ZSTD_decompressSequencesLong_body(
|
|
1322
1808
|
}
|
1323
1809
|
|
1324
1810
|
/* last literal segment */
|
1325
|
-
|
1811
|
+
if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
|
1812
|
+
{
|
1813
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
1814
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
1815
|
+
if (op != NULL) {
|
1816
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
1817
|
+
op += lastLLSize;
|
1818
|
+
}
|
1819
|
+
litPtr = dctx->litExtraBuffer;
|
1820
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1821
|
+
}
|
1822
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
1326
1823
|
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1327
1824
|
if (op != NULL) {
|
1328
|
-
|
1825
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
1329
1826
|
op += lastLLSize;
|
1330
1827
|
}
|
1331
1828
|
}
|
@@ -1349,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
1349
1846
|
#if DYNAMIC_BMI2
|
1350
1847
|
|
1351
1848
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1352
|
-
static
|
1849
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
1353
1850
|
DONT_VECTORIZE
|
1354
1851
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
1355
1852
|
void* dst, size_t maxDstSize,
|
@@ -1359,10 +1856,20 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
|
1359
1856
|
{
|
1360
1857
|
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1361
1858
|
}
|
1859
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
1860
|
+
DONT_VECTORIZE
|
1861
|
+
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
|
1862
|
+
void* dst, size_t maxDstSize,
|
1863
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1864
|
+
const ZSTD_longOffset_e isLongOffset,
|
1865
|
+
const int frame)
|
1866
|
+
{
|
1867
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1868
|
+
}
|
1362
1869
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1363
1870
|
|
1364
1871
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
1365
|
-
static
|
1872
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
1366
1873
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
1367
1874
|
void* dst, size_t maxDstSize,
|
1368
1875
|
const void* seqStart, size_t seqSize, int nbSeq,
|
@@ -1391,11 +1898,25 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1391
1898
|
{
|
1392
1899
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
1393
1900
|
#if DYNAMIC_BMI2
|
1394
|
-
if (dctx
|
1901
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
1395
1902
|
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1396
1903
|
}
|
1397
1904
|
#endif
|
1398
|
-
|
1905
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1906
|
+
}
|
1907
|
+
static size_t
|
1908
|
+
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
1909
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1910
|
+
const ZSTD_longOffset_e isLongOffset,
|
1911
|
+
const int frame)
|
1912
|
+
{
|
1913
|
+
DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
|
1914
|
+
#if DYNAMIC_BMI2
|
1915
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
1916
|
+
return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1917
|
+
}
|
1918
|
+
#endif
|
1919
|
+
return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1399
1920
|
}
|
1400
1921
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1401
1922
|
|
@@ -1415,7 +1936,7 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
1415
1936
|
{
|
1416
1937
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
1417
1938
|
#if DYNAMIC_BMI2
|
1418
|
-
if (dctx
|
1939
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
1419
1940
|
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1420
1941
|
}
|
1421
1942
|
#endif
|
@@ -1456,7 +1977,7 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
|
|
1456
1977
|
size_t
|
1457
1978
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
1458
1979
|
void* dst, size_t dstCapacity,
|
1459
|
-
const void* src, size_t srcSize, const int frame)
|
1980
|
+
const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
|
1460
1981
|
{ /* blockType == blockCompressed */
|
1461
1982
|
const BYTE* ip = (const BYTE*)src;
|
1462
1983
|
/* isLongOffset must be true if there are long offsets.
|
@@ -1471,7 +1992,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1471
1992
|
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
1472
1993
|
|
1473
1994
|
/* Decode literals section */
|
1474
|
-
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
1995
|
+
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
|
1475
1996
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
|
1476
1997
|
if (ZSTD_isError(litCSize)) return litCSize;
|
1477
1998
|
ip += litCSize;
|
@@ -1519,7 +2040,10 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1519
2040
|
|
1520
2041
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1521
2042
|
/* else */
|
1522
|
-
|
2043
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
2044
|
+
return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
2045
|
+
else
|
2046
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
1523
2047
|
#endif
|
1524
2048
|
}
|
1525
2049
|
}
|
@@ -1542,7 +2066,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
|
1542
2066
|
{
|
1543
2067
|
size_t dSize;
|
1544
2068
|
ZSTD_checkContinuity(dctx, dst, dstCapacity);
|
1545
|
-
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
|
2069
|
+
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
|
1546
2070
|
dctx->previousDstEnd = (char*)dst + dSize;
|
1547
2071
|
return dSize;
|
1548
2072
|
}
|