zstdlib 0.7.0-x86-mingw32 → 0.10.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +20 -0
- data/README.md +7 -1
- data/Rakefile +38 -8
- data/ext/{zstdlib → zstdlib_c}/extconf.rb +11 -6
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/adler32.c +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/compress.c +0 -0
- data/ext/zstdlib_c/zlib-1.2.12/crc32.c +1116 -0
- data/ext/zstdlib_c/zlib-1.2.12/crc32.h +9446 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/deflate.c +78 -30
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/deflate.h +12 -15
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzclose.c +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzguts.h +3 -2
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzlib.c +5 -3
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzread.c +5 -7
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzwrite.c +25 -13
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/infback.c +2 -1
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffast.c +14 -14
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffast.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffixed.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inflate.c +39 -8
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inflate.h +3 -2
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inftrees.c +3 -3
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inftrees.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/trees.c +27 -48
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/trees.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/uncompr.c +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zconf.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zlib.h +123 -100
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zutil.c +2 -2
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zutil.h +12 -9
- data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +46 -22
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/compiler.h +335 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +1 -3
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/debug.c +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +12 -19
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/entropy_common.c +368 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +2 -1
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/error_private.h +159 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +41 -12
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +139 -22
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +47 -23
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +87 -98
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +34 -23
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +4 -4
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +6 -5
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +0 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +10 -10
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_deps.h +111 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_internal.h +191 -145
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_trace.h +163 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +89 -46
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +27 -29
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +2 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/huf_compress.c +1370 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress.c +2917 -868
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +458 -125
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +12 -11
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +4 -2
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +41 -18
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.c +26 -298
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +234 -83
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +313 -138
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +329 -150
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +1 -1
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.c +2104 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.h +125 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +321 -216
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +9 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_ldm_geartab.h +106 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +412 -166
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +169 -453
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstdmt_compress.h +113 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/huf_decompress.c +1044 -403
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +9 -9
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +2 -2
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +450 -105
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.c +913 -273
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +14 -5
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +59 -12
- data/ext/zstdlib_c/zstd-1.5.2/lib/zdict.h +452 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +699 -214
- data/ext/{zstdlib/zstd-1.4.5/lib/common → zstdlib_c/zstd-1.5.2/lib}/zstd_errors.h +2 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +133 -44
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/ext/zstdlib_c/zstd.mk +15 -0
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/3.1/zstdlib_c.so +0 -0
- data/lib/zstdlib.rb +2 -2
- metadata +125 -116
- data/ext/zstdlib/zlib-1.2.11/crc32.c +0 -442
- data/ext/zstdlib/zlib-1.2.11/crc32.h +0 -441
- data/ext/zstdlib/zstd-1.4.5/lib/common/compiler.h +0 -175
- data/ext/zstdlib/zstd-1.4.5/lib/common/entropy_common.c +0 -216
- data/ext/zstdlib/zstd-1.4.5/lib/common/error_private.h +0 -80
- data/ext/zstdlib/zstd-1.4.5/lib/common/xxhash.c +0 -864
- data/ext/zstdlib/zstd-1.4.5/lib/common/xxhash.h +0 -285
- data/ext/zstdlib/zstd-1.4.5/lib/compress/huf_compress.c +0 -798
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.c +0 -1138
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.h +0 -67
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstdmt_compress.h +0 -192
- data/ext/zstdlib/zstd.mk +0 -14
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -14,7 +14,7 @@
|
|
14
14
|
/*-*******************************************************
|
15
15
|
* Dependencies
|
16
16
|
*********************************************************/
|
17
|
-
#include
|
17
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
18
18
|
#include "../common/compiler.h" /* prefetch */
|
19
19
|
#include "../common/cpu.h" /* bmi2 */
|
20
20
|
#include "../common/mem.h" /* low level memory routines */
|
@@ -44,7 +44,7 @@
|
|
44
44
|
/*_*******************************************************
|
45
45
|
* Memory operations
|
46
46
|
**********************************************************/
|
47
|
-
static void ZSTD_copy4(void* dst, const void* src) {
|
47
|
+
static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
|
48
48
|
|
49
49
|
|
50
50
|
/*-*************************************************************
|
@@ -69,15 +69,56 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
69
69
|
}
|
70
70
|
}
|
71
71
|
|
72
|
+
/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
|
73
|
+
static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
|
74
|
+
const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
|
75
|
+
{
|
76
|
+
if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
|
77
|
+
{
|
78
|
+
/* room for litbuffer to fit without read faulting */
|
79
|
+
dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
|
80
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
81
|
+
dctx->litBufferLocation = ZSTD_in_dst;
|
82
|
+
}
|
83
|
+
else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
|
84
|
+
{
|
85
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
86
|
+
if (splitImmediately) {
|
87
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
88
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
89
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
|
90
|
+
}
|
91
|
+
else {
|
92
|
+
/* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
|
93
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
|
94
|
+
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
|
95
|
+
}
|
96
|
+
dctx->litBufferLocation = ZSTD_split;
|
97
|
+
}
|
98
|
+
else
|
99
|
+
{
|
100
|
+
/* fits entirely within litExtraBuffer, so no split is necessary */
|
101
|
+
dctx->litBuffer = dctx->litExtraBuffer;
|
102
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
103
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
104
|
+
}
|
105
|
+
}
|
72
106
|
|
73
107
|
/* Hidden declaration for fullbench */
|
74
108
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
75
|
-
const void* src, size_t srcSize
|
109
|
+
const void* src, size_t srcSize,
|
110
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming);
|
76
111
|
/*! ZSTD_decodeLiteralsBlock() :
|
112
|
+
* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
|
113
|
+
* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
|
114
|
+
* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
|
115
|
+
* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
|
116
|
+
*
|
77
117
|
* @return : nb of bytes read from src (< srcSize )
|
78
118
|
* note : symbol not declared but exposed for fullbench */
|
79
119
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
80
|
-
const void* src, size_t srcSize
|
120
|
+
const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
|
121
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming)
|
81
122
|
{
|
82
123
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
83
124
|
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
@@ -90,7 +131,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
90
131
|
case set_repeat:
|
91
132
|
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
92
133
|
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
93
|
-
|
134
|
+
ZSTD_FALLTHROUGH;
|
94
135
|
|
95
136
|
case set_compressed:
|
96
137
|
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
|
@@ -99,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
99
140
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
100
141
|
U32 const lhc = MEM_readLE32(istart);
|
101
142
|
size_t hufSuccess;
|
143
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
102
144
|
switch(lhlCode)
|
103
145
|
{
|
104
146
|
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -121,8 +163,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
121
163
|
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
122
164
|
break;
|
123
165
|
}
|
166
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
124
167
|
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
125
168
|
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
169
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
|
170
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
|
126
171
|
|
127
172
|
/* prefetch huffman table if cold */
|
128
173
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
@@ -133,11 +178,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
133
178
|
if (singleStream) {
|
134
179
|
hufSuccess = HUF_decompress1X_usingDTable_bmi2(
|
135
180
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
136
|
-
dctx->HUFptr, dctx
|
181
|
+
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
|
137
182
|
} else {
|
138
183
|
hufSuccess = HUF_decompress4X_usingDTable_bmi2(
|
139
184
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
140
|
-
dctx->HUFptr, dctx
|
185
|
+
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
|
141
186
|
}
|
142
187
|
} else {
|
143
188
|
if (singleStream) {
|
@@ -150,15 +195,22 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
150
195
|
hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
|
151
196
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
152
197
|
istart+lhSize, litCSize, dctx->workspace,
|
153
|
-
sizeof(dctx->workspace), dctx
|
198
|
+
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
|
154
199
|
#endif
|
155
200
|
} else {
|
156
201
|
hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
|
157
202
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
158
203
|
istart+lhSize, litCSize, dctx->workspace,
|
159
|
-
sizeof(dctx->workspace), dctx
|
204
|
+
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
|
160
205
|
}
|
161
206
|
}
|
207
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
208
|
+
{
|
209
|
+
ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
210
|
+
ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
211
|
+
dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
212
|
+
dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
|
213
|
+
}
|
162
214
|
|
163
215
|
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
164
216
|
|
@@ -166,13 +218,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
166
218
|
dctx->litSize = litSize;
|
167
219
|
dctx->litEntropy = 1;
|
168
220
|
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
169
|
-
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
170
221
|
return litCSize + lhSize;
|
171
222
|
}
|
172
223
|
|
173
224
|
case set_basic:
|
174
225
|
{ size_t litSize, lhSize;
|
175
226
|
U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
227
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
176
228
|
switch(lhlCode)
|
177
229
|
{
|
178
230
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -189,23 +241,36 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
189
241
|
break;
|
190
242
|
}
|
191
243
|
|
244
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
245
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
246
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
192
247
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
193
248
|
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
194
|
-
|
249
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
250
|
+
{
|
251
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
252
|
+
ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
253
|
+
}
|
254
|
+
else
|
255
|
+
{
|
256
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
|
257
|
+
}
|
195
258
|
dctx->litPtr = dctx->litBuffer;
|
196
259
|
dctx->litSize = litSize;
|
197
|
-
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
198
260
|
return lhSize+litSize;
|
199
261
|
}
|
200
262
|
/* direct reference into compressed stream */
|
201
263
|
dctx->litPtr = istart+lhSize;
|
202
264
|
dctx->litSize = litSize;
|
265
|
+
dctx->litBufferEnd = dctx->litPtr + litSize;
|
266
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
203
267
|
return lhSize+litSize;
|
204
268
|
}
|
205
269
|
|
206
270
|
case set_rle:
|
207
271
|
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
208
272
|
size_t litSize, lhSize;
|
273
|
+
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
209
274
|
switch(lhlCode)
|
210
275
|
{
|
211
276
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -222,8 +287,19 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
222
287
|
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
223
288
|
break;
|
224
289
|
}
|
290
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
225
291
|
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
226
|
-
|
292
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
293
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
294
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
295
|
+
{
|
296
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
|
297
|
+
ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
|
298
|
+
}
|
299
|
+
else
|
300
|
+
{
|
301
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
|
302
|
+
}
|
227
303
|
dctx->litPtr = dctx->litBuffer;
|
228
304
|
dctx->litSize = litSize;
|
229
305
|
return lhSize+1;
|
@@ -236,7 +312,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
236
312
|
|
237
313
|
/* Default FSE distribution tables.
|
238
314
|
* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
|
239
|
-
* https://github.com/facebook/zstd/blob/
|
315
|
+
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
|
240
316
|
* They were generated programmatically with following method :
|
241
317
|
* - start from default distributions, present in /lib/common/zstd_internal.h
|
242
318
|
* - generate tables normally, using ZSTD_buildFSETable()
|
@@ -343,7 +419,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
|
|
343
419
|
}; /* ML_defaultDTable */
|
344
420
|
|
345
421
|
|
346
|
-
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue,
|
422
|
+
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
|
347
423
|
{
|
348
424
|
void* ptr = dt;
|
349
425
|
ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
|
@@ -355,7 +431,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
355
431
|
cell->nbBits = 0;
|
356
432
|
cell->nextState = 0;
|
357
433
|
assert(nbAddBits < 255);
|
358
|
-
cell->nbAdditionalBits =
|
434
|
+
cell->nbAdditionalBits = nbAddBits;
|
359
435
|
cell->baseValue = baseValue;
|
360
436
|
}
|
361
437
|
|
@@ -364,23 +440,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
364
440
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
365
441
|
* cannot fail if input is valid =>
|
366
442
|
* all inputs are presumed validated at this stage */
|
367
|
-
|
368
|
-
|
443
|
+
FORCE_INLINE_TEMPLATE
|
444
|
+
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
369
445
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
370
|
-
const U32* baseValue, const
|
371
|
-
unsigned tableLog)
|
446
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
447
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
372
448
|
{
|
373
449
|
ZSTD_seqSymbol* const tableDecode = dt+1;
|
374
|
-
U16 symbolNext[MaxSeq+1];
|
375
|
-
|
376
450
|
U32 const maxSV1 = maxSymbolValue + 1;
|
377
451
|
U32 const tableSize = 1 << tableLog;
|
378
|
-
|
452
|
+
|
453
|
+
U16* symbolNext = (U16*)wksp;
|
454
|
+
BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
|
455
|
+
U32 highThreshold = tableSize - 1;
|
456
|
+
|
379
457
|
|
380
458
|
/* Sanity Checks */
|
381
459
|
assert(maxSymbolValue <= MaxSeq);
|
382
460
|
assert(tableLog <= MaxFSELog);
|
383
|
-
|
461
|
+
assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
|
462
|
+
(void)wkspSize;
|
384
463
|
/* Init, lay down lowprob symbols */
|
385
464
|
{ ZSTD_seqSymbol_header DTableH;
|
386
465
|
DTableH.tableLog = tableLog;
|
@@ -396,16 +475,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
396
475
|
assert(normalizedCounter[s]>=0);
|
397
476
|
symbolNext[s] = (U16)normalizedCounter[s];
|
398
477
|
} } }
|
399
|
-
|
478
|
+
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
400
479
|
}
|
401
480
|
|
402
481
|
/* Spread symbols */
|
403
|
-
|
482
|
+
assert(tableSize <= 512);
|
483
|
+
/* Specialized symbol spreading for the case when there are
|
484
|
+
* no low probability (-1 count) symbols. When compressing
|
485
|
+
* small blocks we avoid low probability symbols to hit this
|
486
|
+
* case, since header decoding speed matters more.
|
487
|
+
*/
|
488
|
+
if (highThreshold == tableSize - 1) {
|
489
|
+
size_t const tableMask = tableSize-1;
|
490
|
+
size_t const step = FSE_TABLESTEP(tableSize);
|
491
|
+
/* First lay down the symbols in order.
|
492
|
+
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
493
|
+
* misses since small blocks generally have small table logs, so nearly
|
494
|
+
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
495
|
+
* our buffer to handle the over-write.
|
496
|
+
*/
|
497
|
+
{
|
498
|
+
U64 const add = 0x0101010101010101ull;
|
499
|
+
size_t pos = 0;
|
500
|
+
U64 sv = 0;
|
501
|
+
U32 s;
|
502
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
503
|
+
int i;
|
504
|
+
int const n = normalizedCounter[s];
|
505
|
+
MEM_write64(spread + pos, sv);
|
506
|
+
for (i = 8; i < n; i += 8) {
|
507
|
+
MEM_write64(spread + pos + i, sv);
|
508
|
+
}
|
509
|
+
pos += n;
|
510
|
+
}
|
511
|
+
}
|
512
|
+
/* Now we spread those positions across the table.
|
513
|
+
* The benefit of doing it in two stages is that we avoid the the
|
514
|
+
* variable size inner loop, which caused lots of branch misses.
|
515
|
+
* Now we can run through all the positions without any branch misses.
|
516
|
+
* We unroll the loop twice, since that is what emperically worked best.
|
517
|
+
*/
|
518
|
+
{
|
519
|
+
size_t position = 0;
|
520
|
+
size_t s;
|
521
|
+
size_t const unroll = 2;
|
522
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
523
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
524
|
+
size_t u;
|
525
|
+
for (u = 0; u < unroll; ++u) {
|
526
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
527
|
+
tableDecode[uPosition].baseValue = spread[s + u];
|
528
|
+
}
|
529
|
+
position = (position + (unroll * step)) & tableMask;
|
530
|
+
}
|
531
|
+
assert(position == 0);
|
532
|
+
}
|
533
|
+
} else {
|
534
|
+
U32 const tableMask = tableSize-1;
|
404
535
|
U32 const step = FSE_TABLESTEP(tableSize);
|
405
536
|
U32 s, position = 0;
|
406
537
|
for (s=0; s<maxSV1; s++) {
|
407
538
|
int i;
|
408
|
-
|
539
|
+
int const n = normalizedCounter[s];
|
540
|
+
for (i=0; i<n; i++) {
|
409
541
|
tableDecode[position].baseValue = s;
|
410
542
|
position = (position + step) & tableMask;
|
411
543
|
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
@@ -414,16 +546,56 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
414
546
|
}
|
415
547
|
|
416
548
|
/* Build Decoding table */
|
417
|
-
{
|
549
|
+
{
|
550
|
+
U32 u;
|
418
551
|
for (u=0; u<tableSize; u++) {
|
419
552
|
U32 const symbol = tableDecode[u].baseValue;
|
420
553
|
U32 const nextState = symbolNext[symbol]++;
|
421
554
|
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
|
422
555
|
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
423
556
|
assert(nbAdditionalBits[symbol] < 255);
|
424
|
-
tableDecode[u].nbAdditionalBits =
|
557
|
+
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
|
425
558
|
tableDecode[u].baseValue = baseValue[symbol];
|
426
|
-
|
559
|
+
}
|
560
|
+
}
|
561
|
+
}
|
562
|
+
|
563
|
+
/* Avoids the FORCE_INLINE of the _body() function. */
|
564
|
+
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
565
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
566
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
567
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
568
|
+
{
|
569
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
570
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
571
|
+
}
|
572
|
+
|
573
|
+
#if DYNAMIC_BMI2
|
574
|
+
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
575
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
576
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
577
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
578
|
+
{
|
579
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
580
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
581
|
+
}
|
582
|
+
#endif
|
583
|
+
|
584
|
+
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
585
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
586
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
587
|
+
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
588
|
+
{
|
589
|
+
#if DYNAMIC_BMI2
|
590
|
+
if (bmi2) {
|
591
|
+
ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
|
592
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
593
|
+
return;
|
594
|
+
}
|
595
|
+
#endif
|
596
|
+
(void)bmi2;
|
597
|
+
ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
|
598
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
427
599
|
}
|
428
600
|
|
429
601
|
|
@@ -433,9 +605,10 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
433
605
|
static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
|
434
606
|
symbolEncodingType_e type, unsigned max, U32 maxLog,
|
435
607
|
const void* src, size_t srcSize,
|
436
|
-
const U32* baseValue, const
|
608
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
437
609
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
438
|
-
int ddictIsCold, int nbSeq
|
610
|
+
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
611
|
+
int bmi2)
|
439
612
|
{
|
440
613
|
switch(type)
|
441
614
|
{
|
@@ -444,7 +617,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
444
617
|
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
445
618
|
{ U32 const symbol = *(const BYTE*)src;
|
446
619
|
U32 const baseline = baseValue[symbol];
|
447
|
-
|
620
|
+
U8 const nbBits = nbAdditionalBits[symbol];
|
448
621
|
ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
|
449
622
|
}
|
450
623
|
*DTablePtr = DTableSpace;
|
@@ -467,7 +640,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
467
640
|
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
468
641
|
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
|
469
642
|
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
|
470
|
-
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
643
|
+
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
|
471
644
|
*DTablePtr = DTableSpace;
|
472
645
|
return headerSize;
|
473
646
|
}
|
@@ -480,7 +653,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
480
653
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
481
654
|
const void* src, size_t srcSize)
|
482
655
|
{
|
483
|
-
const BYTE* const istart = (const BYTE*
|
656
|
+
const BYTE* const istart = (const BYTE*)src;
|
484
657
|
const BYTE* const iend = istart + srcSize;
|
485
658
|
const BYTE* ip = istart;
|
486
659
|
int nbSeq;
|
@@ -499,7 +672,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
499
672
|
if (nbSeq > 0x7F) {
|
500
673
|
if (nbSeq == 0xFF) {
|
501
674
|
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
502
|
-
nbSeq = MEM_readLE16(ip) + LONGNBSEQ
|
675
|
+
nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
|
676
|
+
ip+=2;
|
503
677
|
} else {
|
504
678
|
RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
|
505
679
|
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
@@ -520,7 +694,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
520
694
|
ip, iend-ip,
|
521
695
|
LL_base, LL_bits,
|
522
696
|
LL_defaultDTable, dctx->fseEntropy,
|
523
|
-
dctx->ddictIsCold, nbSeq
|
697
|
+
dctx->ddictIsCold, nbSeq,
|
698
|
+
dctx->workspace, sizeof(dctx->workspace),
|
699
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
524
700
|
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
525
701
|
ip += llhSize;
|
526
702
|
}
|
@@ -530,7 +706,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
530
706
|
ip, iend-ip,
|
531
707
|
OF_base, OF_bits,
|
532
708
|
OF_defaultDTable, dctx->fseEntropy,
|
533
|
-
dctx->ddictIsCold, nbSeq
|
709
|
+
dctx->ddictIsCold, nbSeq,
|
710
|
+
dctx->workspace, sizeof(dctx->workspace),
|
711
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
534
712
|
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
535
713
|
ip += ofhSize;
|
536
714
|
}
|
@@ -540,7 +718,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
540
718
|
ip, iend-ip,
|
541
719
|
ML_base, ML_bits,
|
542
720
|
ML_defaultDTable, dctx->fseEntropy,
|
543
|
-
dctx->ddictIsCold, nbSeq
|
721
|
+
dctx->ddictIsCold, nbSeq,
|
722
|
+
dctx->workspace, sizeof(dctx->workspace),
|
723
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
544
724
|
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
545
725
|
ip += mlhSize;
|
546
726
|
}
|
@@ -554,7 +734,6 @@ typedef struct {
|
|
554
734
|
size_t litLength;
|
555
735
|
size_t matchLength;
|
556
736
|
size_t offset;
|
557
|
-
const BYTE* match;
|
558
737
|
} seq_t;
|
559
738
|
|
560
739
|
typedef struct {
|
@@ -568,9 +747,6 @@ typedef struct {
|
|
568
747
|
ZSTD_fseState stateOffb;
|
569
748
|
ZSTD_fseState stateML;
|
570
749
|
size_t prevOffset[ZSTD_REP_NUM];
|
571
|
-
const BYTE* prefixStart;
|
572
|
-
const BYTE* dictEnd;
|
573
|
-
size_t pos;
|
574
750
|
} seqState_t;
|
575
751
|
|
576
752
|
/*! ZSTD_overlapCopy8() :
|
@@ -613,7 +789,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
613
789
|
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
614
790
|
* The src buffer must be before the dst buffer.
|
615
791
|
*/
|
616
|
-
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
792
|
+
static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
617
793
|
ptrdiff_t const diff = op - ip;
|
618
794
|
BYTE* const oend = op + length;
|
619
795
|
|
@@ -629,6 +805,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
629
805
|
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
630
806
|
assert(length >= 8);
|
631
807
|
ZSTD_overlapCopy8(&op, &ip, diff);
|
808
|
+
length -= 8;
|
632
809
|
assert(op - ip >= 8);
|
633
810
|
assert(op <= oend);
|
634
811
|
}
|
@@ -643,8 +820,31 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
643
820
|
assert(oend > oend_w);
|
644
821
|
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
645
822
|
ip += oend_w - op;
|
646
|
-
op
|
823
|
+
op += oend_w - op;
|
824
|
+
}
|
825
|
+
/* Handle the leftovers. */
|
826
|
+
while (op < oend) *op++ = *ip++;
|
827
|
+
}
|
828
|
+
|
829
|
+
/* ZSTD_safecopyDstBeforeSrc():
|
830
|
+
* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
|
831
|
+
* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
|
832
|
+
static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
|
833
|
+
ptrdiff_t const diff = op - ip;
|
834
|
+
BYTE* const oend = op + length;
|
835
|
+
|
836
|
+
if (length < 8 || diff > -8) {
|
837
|
+
/* Handle short lengths, close overlaps, and dst not before src. */
|
838
|
+
while (op < oend) *op++ = *ip++;
|
839
|
+
return;
|
840
|
+
}
|
841
|
+
|
842
|
+
if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
|
843
|
+
ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
|
844
|
+
ip += oend - WILDCOPY_OVERLENGTH - op;
|
845
|
+
op += oend - WILDCOPY_OVERLENGTH - op;
|
647
846
|
}
|
847
|
+
|
648
848
|
/* Handle the leftovers. */
|
649
849
|
while (op < oend) *op++ = *ip++;
|
650
850
|
}
|
@@ -659,9 +859,9 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
659
859
|
*/
|
660
860
|
FORCE_NOINLINE
|
661
861
|
size_t ZSTD_execSequenceEnd(BYTE* op,
|
662
|
-
|
663
|
-
|
664
|
-
|
862
|
+
BYTE* const oend, seq_t sequence,
|
863
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
864
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
665
865
|
{
|
666
866
|
BYTE* const oLitEnd = op + sequence.litLength;
|
667
867
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
@@ -684,27 +884,76 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
684
884
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
685
885
|
/* offset beyond prefix */
|
686
886
|
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
687
|
-
match = dictEnd - (prefixStart-match);
|
887
|
+
match = dictEnd - (prefixStart - match);
|
688
888
|
if (match + sequence.matchLength <= dictEnd) {
|
689
|
-
|
889
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
690
890
|
return sequenceLength;
|
691
891
|
}
|
692
892
|
/* span extDict & currentPrefixSegment */
|
693
893
|
{ size_t const length1 = dictEnd - match;
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
894
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
895
|
+
op = oLitEnd + length1;
|
896
|
+
sequence.matchLength -= length1;
|
897
|
+
match = prefixStart;
|
898
|
+
}
|
899
|
+
}
|
900
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
901
|
+
return sequenceLength;
|
902
|
+
}
|
903
|
+
|
904
|
+
/* ZSTD_execSequenceEndSplitLitBuffer():
|
905
|
+
* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
|
906
|
+
*/
|
907
|
+
FORCE_NOINLINE
|
908
|
+
size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
|
909
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
910
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
911
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
912
|
+
{
|
913
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
914
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
915
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
916
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
917
|
+
|
918
|
+
|
919
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
920
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
921
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
922
|
+
assert(op < op + sequenceLength);
|
923
|
+
assert(oLitEnd < op + sequenceLength);
|
924
|
+
|
925
|
+
/* copy literals */
|
926
|
+
RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
|
927
|
+
ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
|
928
|
+
op = oLitEnd;
|
929
|
+
*litPtr = iLitEnd;
|
930
|
+
|
931
|
+
/* copy Match */
|
932
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
933
|
+
/* offset beyond prefix */
|
934
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
935
|
+
match = dictEnd - (prefixStart - match);
|
936
|
+
if (match + sequence.matchLength <= dictEnd) {
|
937
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
938
|
+
return sequenceLength;
|
939
|
+
}
|
940
|
+
/* span extDict & currentPrefixSegment */
|
941
|
+
{ size_t const length1 = dictEnd - match;
|
942
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
943
|
+
op = oLitEnd + length1;
|
944
|
+
sequence.matchLength -= length1;
|
945
|
+
match = prefixStart;
|
946
|
+
}
|
947
|
+
}
|
699
948
|
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
700
949
|
return sequenceLength;
|
701
950
|
}
|
702
951
|
|
703
952
|
HINT_INLINE
|
704
953
|
size_t ZSTD_execSequence(BYTE* op,
|
705
|
-
|
706
|
-
|
707
|
-
|
954
|
+
BYTE* const oend, seq_t sequence,
|
955
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
956
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
708
957
|
{
|
709
958
|
BYTE* const oLitEnd = op + sequence.litLength;
|
710
959
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
@@ -713,6 +962,98 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
713
962
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
714
963
|
const BYTE* match = oLitEnd - sequence.offset;
|
715
964
|
|
965
|
+
assert(op != NULL /* Precondition */);
|
966
|
+
assert(oend_w < oend /* No underflow */);
|
967
|
+
/* Handle edge cases in a slow path:
|
968
|
+
* - Read beyond end of literals
|
969
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
970
|
+
* - 32-bit mode and the match length overflows
|
971
|
+
*/
|
972
|
+
if (UNLIKELY(
|
973
|
+
iLitEnd > litLimit ||
|
974
|
+
oMatchEnd > oend_w ||
|
975
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
976
|
+
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
977
|
+
|
978
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
979
|
+
assert(op <= oLitEnd /* No overflow */);
|
980
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
981
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
982
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
983
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
984
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
985
|
+
|
986
|
+
/* Copy Literals:
|
987
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
988
|
+
* We likely don't need the full 32-byte wildcopy.
|
989
|
+
*/
|
990
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
991
|
+
ZSTD_copy16(op, (*litPtr));
|
992
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
993
|
+
ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
|
994
|
+
}
|
995
|
+
op = oLitEnd;
|
996
|
+
*litPtr = iLitEnd; /* update for next sequence */
|
997
|
+
|
998
|
+
/* Copy Match */
|
999
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
1000
|
+
/* offset beyond prefix -> go into extDict */
|
1001
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
1002
|
+
match = dictEnd + (match - prefixStart);
|
1003
|
+
if (match + sequence.matchLength <= dictEnd) {
|
1004
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
1005
|
+
return sequenceLength;
|
1006
|
+
}
|
1007
|
+
/* span extDict & currentPrefixSegment */
|
1008
|
+
{ size_t const length1 = dictEnd - match;
|
1009
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
1010
|
+
op = oLitEnd + length1;
|
1011
|
+
sequence.matchLength -= length1;
|
1012
|
+
match = prefixStart;
|
1013
|
+
}
|
1014
|
+
}
|
1015
|
+
/* Match within prefix of 1 or more bytes */
|
1016
|
+
assert(op <= oMatchEnd);
|
1017
|
+
assert(oMatchEnd <= oend_w);
|
1018
|
+
assert(match >= prefixStart);
|
1019
|
+
assert(sequence.matchLength >= 1);
|
1020
|
+
|
1021
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
1022
|
+
* without overlap checking.
|
1023
|
+
*/
|
1024
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
1025
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
1026
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
1027
|
+
* than 16 bytes.
|
1028
|
+
*/
|
1029
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
1030
|
+
return sequenceLength;
|
1031
|
+
}
|
1032
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
1033
|
+
|
1034
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
1035
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
1036
|
+
|
1037
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
1038
|
+
if (sequence.matchLength > 8) {
|
1039
|
+
assert(op < oMatchEnd);
|
1040
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
|
1041
|
+
}
|
1042
|
+
return sequenceLength;
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
HINT_INLINE
|
1046
|
+
size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
|
1047
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
1048
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
1049
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
1050
|
+
{
|
1051
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
1052
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
1053
|
+
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
1054
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
1055
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
1056
|
+
|
716
1057
|
assert(op != NULL /* Precondition */);
|
717
1058
|
assert(oend_w < oend /* No underflow */);
|
718
1059
|
/* Handle edge cases in a slow path:
|
@@ -724,7 +1065,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
724
1065
|
iLitEnd > litLimit ||
|
725
1066
|
oMatchEnd > oend_w ||
|
726
1067
|
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
727
|
-
return
|
1068
|
+
return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
728
1069
|
|
729
1070
|
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
730
1071
|
assert(op <= oLitEnd /* No overflow */);
|
@@ -752,12 +1093,12 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
752
1093
|
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
753
1094
|
match = dictEnd + (match - prefixStart);
|
754
1095
|
if (match + sequence.matchLength <= dictEnd) {
|
755
|
-
|
1096
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
756
1097
|
return sequenceLength;
|
757
1098
|
}
|
758
1099
|
/* span extDict & currentPrefixSegment */
|
759
1100
|
{ size_t const length1 = dictEnd - match;
|
760
|
-
|
1101
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
761
1102
|
op = oLitEnd + length1;
|
762
1103
|
sequence.matchLength -= length1;
|
763
1104
|
match = prefixStart;
|
@@ -792,6 +1133,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
792
1133
|
return sequenceLength;
|
793
1134
|
}
|
794
1135
|
|
1136
|
+
|
795
1137
|
static void
|
796
1138
|
ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
|
797
1139
|
{
|
@@ -805,20 +1147,10 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS
|
|
805
1147
|
}
|
806
1148
|
|
807
1149
|
FORCE_INLINE_TEMPLATE void
|
808
|
-
|
1150
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
|
809
1151
|
{
|
810
|
-
ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
|
811
|
-
U32 const nbBits = DInfo.nbBits;
|
812
1152
|
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
813
|
-
DStatePtr->state =
|
814
|
-
}
|
815
|
-
|
816
|
-
FORCE_INLINE_TEMPLATE void
|
817
|
-
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
818
|
-
{
|
819
|
-
U32 const nbBits = DInfo.nbBits;
|
820
|
-
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
821
|
-
DStatePtr->state = DInfo.nextState + lowBits;
|
1153
|
+
DStatePtr->state = nextState + lowBits;
|
822
1154
|
}
|
823
1155
|
|
824
1156
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
@@ -832,123 +1164,112 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
|
|
832
1164
|
: 0)
|
833
1165
|
|
834
1166
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
835
|
-
typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
|
836
1167
|
|
837
1168
|
FORCE_INLINE_TEMPLATE seq_t
|
838
|
-
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets
|
1169
|
+
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
839
1170
|
{
|
840
1171
|
seq_t seq;
|
841
|
-
ZSTD_seqSymbol const llDInfo = seqState->stateLL.table
|
842
|
-
ZSTD_seqSymbol const mlDInfo = seqState->stateML.table
|
843
|
-
ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table
|
844
|
-
|
845
|
-
|
846
|
-
U32 const ofBase = ofDInfo
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
if (
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
1172
|
+
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
|
1173
|
+
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
|
1174
|
+
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
|
1175
|
+
seq.matchLength = mlDInfo->baseValue;
|
1176
|
+
seq.litLength = llDInfo->baseValue;
|
1177
|
+
{ U32 const ofBase = ofDInfo->baseValue;
|
1178
|
+
BYTE const llBits = llDInfo->nbAdditionalBits;
|
1179
|
+
BYTE const mlBits = mlDInfo->nbAdditionalBits;
|
1180
|
+
BYTE const ofBits = ofDInfo->nbAdditionalBits;
|
1181
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
1182
|
+
|
1183
|
+
U16 const llNext = llDInfo->nextState;
|
1184
|
+
U16 const mlNext = mlDInfo->nextState;
|
1185
|
+
U16 const ofNext = ofDInfo->nextState;
|
1186
|
+
U32 const llnbBits = llDInfo->nbBits;
|
1187
|
+
U32 const mlnbBits = mlDInfo->nbBits;
|
1188
|
+
U32 const ofnbBits = ofDInfo->nbBits;
|
1189
|
+
/*
|
1190
|
+
* As gcc has better branch and block analyzers, sometimes it is only
|
1191
|
+
* valuable to mark likelyness for clang, it gives around 3-4% of
|
1192
|
+
* performance.
|
1193
|
+
*/
|
1194
|
+
|
1195
|
+
/* sequence */
|
1196
|
+
{ size_t offset;
|
1197
|
+
#if defined(__clang__)
|
1198
|
+
if (LIKELY(ofBits > 1)) {
|
1199
|
+
#else
|
1200
|
+
if (ofBits > 1) {
|
1201
|
+
#endif
|
1202
|
+
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
1203
|
+
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
1204
|
+
assert(ofBits <= MaxOff);
|
1205
|
+
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
1206
|
+
U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
|
1207
|
+
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
1208
|
+
BIT_reloadDStream(&seqState->DStream);
|
1209
|
+
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
1210
|
+
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
|
1211
|
+
} else {
|
1212
|
+
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
1213
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
880
1214
|
}
|
1215
|
+
seqState->prevOffset[2] = seqState->prevOffset[1];
|
1216
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
1217
|
+
seqState->prevOffset[0] = offset;
|
881
1218
|
} else {
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
seqState->prevOffset[
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
897
|
-
BIT_reloadDStream(&seqState->DStream);
|
898
|
-
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
899
|
-
BIT_reloadDStream(&seqState->DStream);
|
900
|
-
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
901
|
-
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
902
|
-
|
903
|
-
seq.litLength = llBase;
|
904
|
-
if (llBits > 0)
|
905
|
-
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
906
|
-
|
907
|
-
if (MEM_32bits())
|
908
|
-
BIT_reloadDStream(&seqState->DStream);
|
909
|
-
|
910
|
-
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
911
|
-
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
912
|
-
|
913
|
-
if (prefetch == ZSTD_p_prefetch) {
|
914
|
-
size_t const pos = seqState->pos + seq.litLength;
|
915
|
-
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
916
|
-
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
917
|
-
* No consequence though : no memory access will occur, offset is only used for prefetching */
|
918
|
-
seqState->pos = pos + seq.matchLength;
|
919
|
-
}
|
920
|
-
|
921
|
-
/* ANS state update
|
922
|
-
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
923
|
-
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
924
|
-
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
925
|
-
* better option, so it is the default for other compilers. But, if you
|
926
|
-
* measure that it is worse, please put up a pull request.
|
927
|
-
*/
|
928
|
-
{
|
929
|
-
#if defined(__GNUC__) && !defined(__clang__)
|
930
|
-
const int kUseUpdateFseState = 1;
|
931
|
-
#else
|
932
|
-
const int kUseUpdateFseState = 0;
|
933
|
-
#endif
|
934
|
-
if (kUseUpdateFseState) {
|
935
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
936
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
937
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
938
|
-
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
939
|
-
} else {
|
940
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
|
941
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
|
942
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
943
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
|
1219
|
+
U32 const ll0 = (llDInfo->baseValue == 0);
|
1220
|
+
if (LIKELY((ofBits == 0))) {
|
1221
|
+
offset = seqState->prevOffset[ll0];
|
1222
|
+
seqState->prevOffset[1] = seqState->prevOffset[!ll0];
|
1223
|
+
seqState->prevOffset[0] = offset;
|
1224
|
+
} else {
|
1225
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
1226
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
1227
|
+
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
1228
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
1229
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
1230
|
+
seqState->prevOffset[0] = offset = temp;
|
1231
|
+
} } }
|
1232
|
+
seq.offset = offset;
|
944
1233
|
}
|
1234
|
+
|
1235
|
+
#if defined(__clang__)
|
1236
|
+
if (UNLIKELY(mlBits > 0))
|
1237
|
+
#else
|
1238
|
+
if (mlBits > 0)
|
1239
|
+
#endif
|
1240
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
1241
|
+
|
1242
|
+
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
1243
|
+
BIT_reloadDStream(&seqState->DStream);
|
1244
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
1245
|
+
BIT_reloadDStream(&seqState->DStream);
|
1246
|
+
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
1247
|
+
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
1248
|
+
|
1249
|
+
#if defined(__clang__)
|
1250
|
+
if (UNLIKELY(llBits > 0))
|
1251
|
+
#else
|
1252
|
+
if (llBits > 0)
|
1253
|
+
#endif
|
1254
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
1255
|
+
|
1256
|
+
if (MEM_32bits())
|
1257
|
+
BIT_reloadDStream(&seqState->DStream);
|
1258
|
+
|
1259
|
+
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
1260
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
1261
|
+
|
1262
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
|
1263
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
|
1264
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
1265
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
|
945
1266
|
}
|
946
1267
|
|
947
1268
|
return seq;
|
948
1269
|
}
|
949
1270
|
|
950
1271
|
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
951
|
-
|
1272
|
+
MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
952
1273
|
{
|
953
1274
|
size_t const windowSize = dctx->fParams.windowSize;
|
954
1275
|
/* No dictionary used. */
|
@@ -969,6 +1290,7 @@ MEM_STATIC void ZSTD_assertValidSequence(
|
|
969
1290
|
seq_t const seq,
|
970
1291
|
BYTE const* prefixStart, BYTE const* virtualStart)
|
971
1292
|
{
|
1293
|
+
#if DEBUGLEVEL >= 1
|
972
1294
|
size_t const windowSize = dctx->fParams.windowSize;
|
973
1295
|
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
974
1296
|
BYTE const* const oLitEnd = op + seq.litLength;
|
@@ -986,13 +1308,18 @@ MEM_STATIC void ZSTD_assertValidSequence(
|
|
986
1308
|
/* Offset must be within our window. */
|
987
1309
|
assert(seq.offset <= windowSize);
|
988
1310
|
}
|
1311
|
+
#else
|
1312
|
+
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
|
1313
|
+
#endif
|
989
1314
|
}
|
990
1315
|
#endif
|
991
1316
|
|
992
1317
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1318
|
+
|
1319
|
+
|
993
1320
|
FORCE_INLINE_TEMPLATE size_t
|
994
1321
|
DONT_VECTORIZE
|
995
|
-
|
1322
|
+
ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
|
996
1323
|
void* dst, size_t maxDstSize,
|
997
1324
|
const void* seqStart, size_t seqSize, int nbSeq,
|
998
1325
|
const ZSTD_longOffset_e isLongOffset,
|
@@ -1000,21 +1327,20 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
1000
1327
|
{
|
1001
1328
|
const BYTE* ip = (const BYTE*)seqStart;
|
1002
1329
|
const BYTE* const iend = ip + seqSize;
|
1003
|
-
BYTE* const ostart = (BYTE*
|
1330
|
+
BYTE* const ostart = (BYTE*)dst;
|
1004
1331
|
BYTE* const oend = ostart + maxDstSize;
|
1005
1332
|
BYTE* op = ostart;
|
1006
1333
|
const BYTE* litPtr = dctx->litPtr;
|
1007
|
-
const BYTE*
|
1334
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
1008
1335
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
1009
1336
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
1010
1337
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
1011
|
-
DEBUGLOG(5, "
|
1338
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
|
1012
1339
|
(void)frame;
|
1013
1340
|
|
1014
1341
|
/* Regen sequences */
|
1015
1342
|
if (nbSeq) {
|
1016
1343
|
seqState_t seqState;
|
1017
|
-
size_t error = 0;
|
1018
1344
|
dctx->fseEntropy = 1;
|
1019
1345
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
1020
1346
|
RETURN_ERROR_IF(
|
@@ -1030,70 +1356,255 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
1030
1356
|
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
1031
1357
|
BIT_DStream_completed < BIT_DStream_overflow);
|
1032
1358
|
|
1359
|
+
/* decompress without overrunning litPtr begins */
|
1360
|
+
{
|
1361
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1362
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
1363
|
+
*
|
1364
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
1365
|
+
* speed swings based on the alignment of the decompression loop. This
|
1366
|
+
* performance swing is caused by parts of the decompression loop falling
|
1367
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
1368
|
+
* when it can't we get much worse performance. You can measure if you've
|
1369
|
+
* hit the good case or the bad case with this perf command for some
|
1370
|
+
* compressed file test.zst:
|
1371
|
+
*
|
1372
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
1373
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
1374
|
+
*
|
1375
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
1376
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
1377
|
+
* If it is pretty even then you may be in an okay case.
|
1378
|
+
*
|
1379
|
+
* This issue has been reproduced on the following CPUs:
|
1380
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
1381
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
1382
|
+
* I never got performance swings, but I was able to
|
1383
|
+
* go from the good case of mostly DSB to half of the
|
1384
|
+
* cycles served from MITE.
|
1385
|
+
* - Coffeelake: Intel i9-9900k
|
1386
|
+
* - Coffeelake: Intel i7-9700k
|
1387
|
+
*
|
1388
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
1389
|
+
* of the following CPUS:
|
1390
|
+
* - Haswell
|
1391
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
1392
|
+
* - Skylake
|
1393
|
+
*
|
1394
|
+
* Alignment is done for each of the three major decompression loops:
|
1395
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
|
1396
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
|
1397
|
+
* - ZSTD_decompressSequences_body
|
1398
|
+
* Alignment choices are made to minimize large swings on bad cases and influence on performance
|
1399
|
+
* from changes external to this code, rather than to overoptimize on the current commit.
|
1400
|
+
*
|
1401
|
+
* If you are seeing performance stability this script can help test.
|
1402
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
1403
|
+
*
|
1404
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
1405
|
+
*/
|
1033
1406
|
#if defined(__GNUC__) && defined(__x86_64__)
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1407
|
+
__asm__(".p2align 6");
|
1408
|
+
# if __GNUC__ >= 7
|
1409
|
+
/* good for gcc-7, gcc-9, and gcc-11 */
|
1410
|
+
__asm__("nop");
|
1411
|
+
__asm__(".p2align 5");
|
1412
|
+
__asm__("nop");
|
1413
|
+
__asm__(".p2align 4");
|
1414
|
+
# if __GNUC__ == 8 || __GNUC__ == 10
|
1415
|
+
/* good for gcc-8 and gcc-10 */
|
1416
|
+
__asm__("nop");
|
1417
|
+
__asm__(".p2align 3");
|
1418
|
+
# endif
|
1419
|
+
# endif
|
1420
|
+
#endif
|
1421
|
+
|
1422
|
+
/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
|
1423
|
+
for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
|
1424
|
+
size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
1425
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1426
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1427
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1428
|
+
#endif
|
1429
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1430
|
+
return oneSeqSize;
|
1431
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1432
|
+
op += oneSeqSize;
|
1433
|
+
if (UNLIKELY(!--nbSeq))
|
1434
|
+
break;
|
1435
|
+
BIT_reloadDStream(&(seqState.DStream));
|
1436
|
+
sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1437
|
+
}
|
1438
|
+
|
1439
|
+
/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
|
1440
|
+
if (nbSeq > 0) {
|
1441
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
1442
|
+
if (leftoverLit)
|
1443
|
+
{
|
1444
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
1445
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
1446
|
+
sequence.litLength -= leftoverLit;
|
1447
|
+
op += leftoverLit;
|
1448
|
+
}
|
1449
|
+
litPtr = dctx->litExtraBuffer;
|
1450
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1451
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1452
|
+
{
|
1453
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
1454
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1455
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1456
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1457
|
+
#endif
|
1458
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1459
|
+
return oneSeqSize;
|
1460
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1461
|
+
op += oneSeqSize;
|
1462
|
+
if (--nbSeq)
|
1463
|
+
BIT_reloadDStream(&(seqState.DStream));
|
1464
|
+
}
|
1465
|
+
}
|
1466
|
+
}
|
1467
|
+
|
1468
|
+
if (nbSeq > 0) /* there is remaining lit from extra buffer */
|
1469
|
+
{
|
1470
|
+
|
1471
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
1472
|
+
__asm__(".p2align 6");
|
1473
|
+
__asm__("nop");
|
1474
|
+
# if __GNUC__ != 7
|
1475
|
+
/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
|
1476
|
+
__asm__(".p2align 4");
|
1477
|
+
__asm__("nop");
|
1478
|
+
__asm__(".p2align 3");
|
1479
|
+
# elif __GNUC__ >= 11
|
1480
|
+
__asm__(".p2align 3");
|
1481
|
+
# else
|
1482
|
+
__asm__(".p2align 5");
|
1483
|
+
__asm__("nop");
|
1484
|
+
__asm__(".p2align 3");
|
1485
|
+
# endif
|
1486
|
+
#endif
|
1487
|
+
|
1488
|
+
for (; ; ) {
|
1489
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1490
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
1491
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1492
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1493
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1494
|
+
#endif
|
1495
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1496
|
+
return oneSeqSize;
|
1497
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1498
|
+
op += oneSeqSize;
|
1499
|
+
if (UNLIKELY(!--nbSeq))
|
1500
|
+
break;
|
1501
|
+
BIT_reloadDStream(&(seqState.DStream));
|
1502
|
+
}
|
1503
|
+
}
|
1504
|
+
|
1505
|
+
/* check if reached exact end */
|
1506
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
|
1507
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
1508
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
1509
|
+
/* save reps for next block */
|
1510
|
+
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
1511
|
+
}
|
1512
|
+
|
1513
|
+
/* last literal segment */
|
1514
|
+
if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
|
1515
|
+
{
|
1516
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
1517
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
1518
|
+
if (op != NULL) {
|
1519
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
1520
|
+
op += lastLLSize;
|
1521
|
+
}
|
1522
|
+
litPtr = dctx->litExtraBuffer;
|
1523
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1524
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1525
|
+
}
|
1526
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
1527
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1528
|
+
if (op != NULL) {
|
1529
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
1530
|
+
op += lastLLSize;
|
1531
|
+
}
|
1532
|
+
}
|
1533
|
+
|
1534
|
+
return op-ostart;
|
1535
|
+
}
|
1536
|
+
|
1537
|
+
FORCE_INLINE_TEMPLATE size_t
|
1538
|
+
DONT_VECTORIZE
|
1539
|
+
ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
|
1540
|
+
void* dst, size_t maxDstSize,
|
1541
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1542
|
+
const ZSTD_longOffset_e isLongOffset,
|
1543
|
+
const int frame)
|
1544
|
+
{
|
1545
|
+
const BYTE* ip = (const BYTE*)seqStart;
|
1546
|
+
const BYTE* const iend = ip + seqSize;
|
1547
|
+
BYTE* const ostart = (BYTE*)dst;
|
1548
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
|
1549
|
+
BYTE* op = ostart;
|
1550
|
+
const BYTE* litPtr = dctx->litPtr;
|
1551
|
+
const BYTE* const litEnd = litPtr + dctx->litSize;
|
1552
|
+
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
|
1553
|
+
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
|
1554
|
+
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
|
1555
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_body");
|
1556
|
+
(void)frame;
|
1557
|
+
|
1558
|
+
/* Regen sequences */
|
1559
|
+
if (nbSeq) {
|
1560
|
+
seqState_t seqState;
|
1561
|
+
dctx->fseEntropy = 1;
|
1562
|
+
{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
1563
|
+
RETURN_ERROR_IF(
|
1564
|
+
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
|
1565
|
+
corruption_detected, "");
|
1566
|
+
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
1567
|
+
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
1568
|
+
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
1569
|
+
assert(dst != NULL);
|
1570
|
+
|
1571
|
+
ZSTD_STATIC_ASSERT(
|
1572
|
+
BIT_DStream_unfinished < BIT_DStream_completed &&
|
1573
|
+
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
1574
|
+
BIT_DStream_completed < BIT_DStream_overflow);
|
1575
|
+
|
1576
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
1577
|
+
__asm__(".p2align 6");
|
1578
|
+
__asm__("nop");
|
1579
|
+
# if __GNUC__ >= 7
|
1580
|
+
__asm__(".p2align 5");
|
1581
|
+
__asm__("nop");
|
1582
|
+
__asm__(".p2align 3");
|
1583
|
+
# else
|
1584
|
+
__asm__(".p2align 4");
|
1585
|
+
__asm__("nop");
|
1586
|
+
__asm__(".p2align 3");
|
1587
|
+
# endif
|
1073
1588
|
#endif
|
1589
|
+
|
1074
1590
|
for ( ; ; ) {
|
1075
|
-
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset
|
1591
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1076
1592
|
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
1077
1593
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1078
1594
|
assert(!ZSTD_isError(oneSeqSize));
|
1079
1595
|
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1080
1596
|
#endif
|
1597
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1598
|
+
return oneSeqSize;
|
1081
1599
|
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1600
|
+
op += oneSeqSize;
|
1601
|
+
if (UNLIKELY(!--nbSeq))
|
1602
|
+
break;
|
1082
1603
|
BIT_reloadDStream(&(seqState.DStream));
|
1083
|
-
/* gcc and clang both don't like early returns in this loop.
|
1084
|
-
* gcc doesn't like early breaks either.
|
1085
|
-
* Instead save an error and report it at the end.
|
1086
|
-
* When there is an error, don't increment op, so we don't
|
1087
|
-
* overwrite.
|
1088
|
-
*/
|
1089
|
-
if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
|
1090
|
-
else op += oneSeqSize;
|
1091
|
-
if (UNLIKELY(!--nbSeq)) break;
|
1092
1604
|
}
|
1093
1605
|
|
1094
1606
|
/* check if reached exact end */
|
1095
1607
|
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
1096
|
-
if (ZSTD_isError(error)) return error;
|
1097
1608
|
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
1098
1609
|
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
1099
1610
|
/* save reps for next block */
|
@@ -1104,7 +1615,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
1104
1615
|
{ size_t const lastLLSize = litEnd - litPtr;
|
1105
1616
|
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1106
1617
|
if (op != NULL) {
|
1107
|
-
|
1618
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
1108
1619
|
op += lastLLSize;
|
1109
1620
|
}
|
1110
1621
|
}
|
@@ -1121,9 +1632,37 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
|
1121
1632
|
{
|
1122
1633
|
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1123
1634
|
}
|
1635
|
+
|
1636
|
+
static size_t
|
1637
|
+
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
|
1638
|
+
void* dst, size_t maxDstSize,
|
1639
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1640
|
+
const ZSTD_longOffset_e isLongOffset,
|
1641
|
+
const int frame)
|
1642
|
+
{
|
1643
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1644
|
+
}
|
1124
1645
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1125
1646
|
|
1126
1647
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
1648
|
+
|
1649
|
+
FORCE_INLINE_TEMPLATE size_t
|
1650
|
+
ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
|
1651
|
+
const BYTE* const prefixStart, const BYTE* const dictEnd)
|
1652
|
+
{
|
1653
|
+
prefetchPos += sequence.litLength;
|
1654
|
+
{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
|
1655
|
+
const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
1656
|
+
* No consequence though : memory address is only used for prefetching, not for dereferencing */
|
1657
|
+
PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
1658
|
+
}
|
1659
|
+
return prefetchPos + sequence.matchLength;
|
1660
|
+
}
|
1661
|
+
|
1662
|
+
/* This decoding function employs prefetching
|
1663
|
+
* to reduce latency impact of cache misses.
|
1664
|
+
* It's generally employed when block contains a significant portion of long-distance matches
|
1665
|
+
* or when coupled with a "cold" dictionary */
|
1127
1666
|
FORCE_INLINE_TEMPLATE size_t
|
1128
1667
|
ZSTD_decompressSequencesLong_body(
|
1129
1668
|
ZSTD_DCtx* dctx,
|
@@ -1134,11 +1673,11 @@ ZSTD_decompressSequencesLong_body(
|
|
1134
1673
|
{
|
1135
1674
|
const BYTE* ip = (const BYTE*)seqStart;
|
1136
1675
|
const BYTE* const iend = ip + seqSize;
|
1137
|
-
BYTE* const ostart = (BYTE*
|
1138
|
-
BYTE* const oend = ostart + maxDstSize;
|
1676
|
+
BYTE* const ostart = (BYTE*)dst;
|
1677
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
|
1139
1678
|
BYTE* op = ostart;
|
1140
1679
|
const BYTE* litPtr = dctx->litPtr;
|
1141
|
-
const BYTE*
|
1680
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
1142
1681
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
1143
1682
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
1144
1683
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
@@ -1146,18 +1685,17 @@ ZSTD_decompressSequencesLong_body(
|
|
1146
1685
|
|
1147
1686
|
/* Regen sequences */
|
1148
1687
|
if (nbSeq) {
|
1149
|
-
#define STORED_SEQS
|
1688
|
+
#define STORED_SEQS 8
|
1150
1689
|
#define STORED_SEQS_MASK (STORED_SEQS-1)
|
1151
|
-
#define ADVANCED_SEQS
|
1690
|
+
#define ADVANCED_SEQS STORED_SEQS
|
1152
1691
|
seq_t sequences[STORED_SEQS];
|
1153
1692
|
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
|
1154
1693
|
seqState_t seqState;
|
1155
1694
|
int seqNb;
|
1695
|
+
size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
|
1696
|
+
|
1156
1697
|
dctx->fseEntropy = 1;
|
1157
1698
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
1158
|
-
seqState.prefixStart = prefixStart;
|
1159
|
-
seqState.pos = (size_t)(op-prefixStart);
|
1160
|
-
seqState.dictEnd = dictEnd;
|
1161
1699
|
assert(dst != NULL);
|
1162
1700
|
assert(iend >= ip);
|
1163
1701
|
RETURN_ERROR_IF(
|
@@ -1169,36 +1707,100 @@ ZSTD_decompressSequencesLong_body(
|
|
1169
1707
|
|
1170
1708
|
/* prepare in advance */
|
1171
1709
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
1172
|
-
|
1173
|
-
|
1710
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1711
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1712
|
+
sequences[seqNb] = sequence;
|
1174
1713
|
}
|
1175
1714
|
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
1176
1715
|
|
1177
|
-
/*
|
1178
|
-
for (
|
1179
|
-
seq_t
|
1180
|
-
size_t
|
1716
|
+
/* decompress without stomping litBuffer */
|
1717
|
+
for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
|
1718
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1719
|
+
size_t oneSeqSize;
|
1720
|
+
|
1721
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
|
1722
|
+
{
|
1723
|
+
/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
|
1724
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
1725
|
+
if (leftoverLit)
|
1726
|
+
{
|
1727
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
1728
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
1729
|
+
sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
|
1730
|
+
op += leftoverLit;
|
1731
|
+
}
|
1732
|
+
litPtr = dctx->litExtraBuffer;
|
1733
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1734
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1735
|
+
oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1181
1736
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1182
|
-
|
1183
|
-
|
1737
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1738
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
1184
1739
|
#endif
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1740
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1741
|
+
|
1742
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1743
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
1744
|
+
op += oneSeqSize;
|
1745
|
+
}
|
1746
|
+
else
|
1747
|
+
{
|
1748
|
+
/* lit buffer is either wholly contained in first or second split, or not split at all*/
|
1749
|
+
oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
1750
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
1751
|
+
ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1752
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1753
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1754
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
1755
|
+
#endif
|
1756
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1757
|
+
|
1758
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1759
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
1760
|
+
op += oneSeqSize;
|
1761
|
+
}
|
1189
1762
|
}
|
1190
1763
|
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
|
1191
1764
|
|
1192
1765
|
/* finish queue */
|
1193
1766
|
seqNb -= seqAdvance;
|
1194
1767
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
1195
|
-
|
1768
|
+
seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
|
1769
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
|
1770
|
+
{
|
1771
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
1772
|
+
if (leftoverLit)
|
1773
|
+
{
|
1774
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
1775
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
1776
|
+
sequence->litLength -= leftoverLit;
|
1777
|
+
op += leftoverLit;
|
1778
|
+
}
|
1779
|
+
litPtr = dctx->litExtraBuffer;
|
1780
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1781
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1782
|
+
{
|
1783
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1196
1784
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1197
|
-
|
1198
|
-
|
1785
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1786
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
1199
1787
|
#endif
|
1200
|
-
|
1201
|
-
|
1788
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1789
|
+
op += oneSeqSize;
|
1790
|
+
}
|
1791
|
+
}
|
1792
|
+
else
|
1793
|
+
{
|
1794
|
+
size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
1795
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
1796
|
+
ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1797
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1798
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1799
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
1800
|
+
#endif
|
1801
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1802
|
+
op += oneSeqSize;
|
1803
|
+
}
|
1202
1804
|
}
|
1203
1805
|
|
1204
1806
|
/* save reps for next block */
|
@@ -1206,10 +1808,21 @@ ZSTD_decompressSequencesLong_body(
|
|
1206
1808
|
}
|
1207
1809
|
|
1208
1810
|
/* last literal segment */
|
1209
|
-
|
1811
|
+
if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
|
1812
|
+
{
|
1813
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
1814
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
1815
|
+
if (op != NULL) {
|
1816
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
1817
|
+
op += lastLLSize;
|
1818
|
+
}
|
1819
|
+
litPtr = dctx->litExtraBuffer;
|
1820
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1821
|
+
}
|
1822
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
1210
1823
|
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1211
1824
|
if (op != NULL) {
|
1212
|
-
|
1825
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
1213
1826
|
op += lastLLSize;
|
1214
1827
|
}
|
1215
1828
|
}
|
@@ -1233,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
1233
1846
|
#if DYNAMIC_BMI2
|
1234
1847
|
|
1235
1848
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1236
|
-
static
|
1849
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
1237
1850
|
DONT_VECTORIZE
|
1238
1851
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
1239
1852
|
void* dst, size_t maxDstSize,
|
@@ -1243,10 +1856,20 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
|
1243
1856
|
{
|
1244
1857
|
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1245
1858
|
}
|
1859
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
1860
|
+
DONT_VECTORIZE
|
1861
|
+
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
|
1862
|
+
void* dst, size_t maxDstSize,
|
1863
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1864
|
+
const ZSTD_longOffset_e isLongOffset,
|
1865
|
+
const int frame)
|
1866
|
+
{
|
1867
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1868
|
+
}
|
1246
1869
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1247
1870
|
|
1248
1871
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
1249
|
-
static
|
1872
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
1250
1873
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
1251
1874
|
void* dst, size_t maxDstSize,
|
1252
1875
|
const void* seqStart, size_t seqSize, int nbSeq,
|
@@ -1275,11 +1898,25 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1275
1898
|
{
|
1276
1899
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
1277
1900
|
#if DYNAMIC_BMI2
|
1278
|
-
if (dctx
|
1901
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
1279
1902
|
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1280
1903
|
}
|
1281
1904
|
#endif
|
1282
|
-
|
1905
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1906
|
+
}
|
1907
|
+
static size_t
|
1908
|
+
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
1909
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1910
|
+
const ZSTD_longOffset_e isLongOffset,
|
1911
|
+
const int frame)
|
1912
|
+
{
|
1913
|
+
DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
|
1914
|
+
#if DYNAMIC_BMI2
|
1915
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
1916
|
+
return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1917
|
+
}
|
1918
|
+
#endif
|
1919
|
+
return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1283
1920
|
}
|
1284
1921
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1285
1922
|
|
@@ -1299,7 +1936,7 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
1299
1936
|
{
|
1300
1937
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
1301
1938
|
#if DYNAMIC_BMI2
|
1302
|
-
if (dctx
|
1939
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
1303
1940
|
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1304
1941
|
}
|
1305
1942
|
#endif
|
@@ -1340,7 +1977,7 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
|
|
1340
1977
|
size_t
|
1341
1978
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
1342
1979
|
void* dst, size_t dstCapacity,
|
1343
|
-
const void* src, size_t srcSize, const int frame)
|
1980
|
+
const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
|
1344
1981
|
{ /* blockType == blockCompressed */
|
1345
1982
|
const BYTE* ip = (const BYTE*)src;
|
1346
1983
|
/* isLongOffset must be true if there are long offsets.
|
@@ -1355,7 +1992,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1355
1992
|
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
1356
1993
|
|
1357
1994
|
/* Decode literals section */
|
1358
|
-
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
1995
|
+
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
|
1359
1996
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
|
1360
1997
|
if (ZSTD_isError(litCSize)) return litCSize;
|
1361
1998
|
ip += litCSize;
|
@@ -1403,15 +2040,18 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1403
2040
|
|
1404
2041
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1405
2042
|
/* else */
|
1406
|
-
|
2043
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
2044
|
+
return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
2045
|
+
else
|
2046
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
1407
2047
|
#endif
|
1408
2048
|
}
|
1409
2049
|
}
|
1410
2050
|
|
1411
2051
|
|
1412
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
|
2052
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
|
1413
2053
|
{
|
1414
|
-
if (dst != dctx->previousDstEnd) { /* not contiguous */
|
2054
|
+
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
|
1415
2055
|
dctx->dictEnd = dctx->previousDstEnd;
|
1416
2056
|
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
1417
2057
|
dctx->prefixStart = dst;
|
@@ -1425,8 +2065,8 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
|
1425
2065
|
const void* src, size_t srcSize)
|
1426
2066
|
{
|
1427
2067
|
size_t dSize;
|
1428
|
-
ZSTD_checkContinuity(dctx, dst);
|
1429
|
-
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
|
2068
|
+
ZSTD_checkContinuity(dctx, dst, dstCapacity);
|
2069
|
+
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
|
1430
2070
|
dctx->previousDstEnd = (char*)dst + dSize;
|
1431
2071
|
return dSize;
|
1432
2072
|
}
|