zstd-ruby 1.4.4.0 → 1.5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +241 -173
- data/ext/zstdruby/libzstd/README.md +76 -18
- data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
- data/ext/zstdruby/libzstd/common/compiler.h +196 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +51 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
- data/ext/zstdruby/libzstd/common/huf.h +60 -54
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +10 -8
- data/ext/zstdruby/libzstd/common/threading.h +4 -3
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
- data/ext/zstdruby/libzstd/zstd.h +760 -234
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +20 -9
- data/.travis.yml +0 -14
|
@@ -1,47 +1,34 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
notice, this list of conditions and the following disclaimer.
|
|
14
|
-
* Redistributions in binary form must reproduce the above
|
|
15
|
-
copyright notice, this list of conditions and the following disclaimer
|
|
16
|
-
in the documentation and/or other materials provided with the
|
|
17
|
-
distribution.
|
|
18
|
-
|
|
19
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
20
|
-
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
21
|
-
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
22
|
-
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
23
|
-
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
24
|
-
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
25
|
-
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
26
|
-
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
27
|
-
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
28
|
-
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
29
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
30
|
-
|
|
31
|
-
You can contact the author at :
|
|
32
|
-
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
2
|
+
* huff0 huffman decoder,
|
|
3
|
+
* part of Finite State Entropy library
|
|
4
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
5
|
+
*
|
|
6
|
+
* You can contact the author at :
|
|
7
|
+
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
8
|
+
*
|
|
9
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
10
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
11
|
+
* in the COPYING file in the root directory of this source tree).
|
|
12
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
33
13
|
****************************************************************** */
|
|
34
14
|
|
|
35
15
|
/* **************************************************************
|
|
36
16
|
* Dependencies
|
|
37
17
|
****************************************************************/
|
|
38
|
-
#include
|
|
39
|
-
#include "compiler.h"
|
|
40
|
-
#include "bitstream.h" /* BIT_* */
|
|
41
|
-
#include "fse.h" /* to compress headers */
|
|
18
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
|
|
19
|
+
#include "../common/compiler.h"
|
|
20
|
+
#include "../common/bitstream.h" /* BIT_* */
|
|
21
|
+
#include "../common/fse.h" /* to compress headers */
|
|
42
22
|
#define HUF_STATIC_LINKING_ONLY
|
|
43
|
-
#include "huf.h"
|
|
44
|
-
#include "error_private.h"
|
|
23
|
+
#include "../common/huf.h"
|
|
24
|
+
#include "../common/error_private.h"
|
|
25
|
+
#include "../common/zstd_internal.h"
|
|
26
|
+
|
|
27
|
+
/* **************************************************************
|
|
28
|
+
* Constants
|
|
29
|
+
****************************************************************/
|
|
30
|
+
|
|
31
|
+
#define HUF_DECODER_FAST_TABLELOG 11
|
|
45
32
|
|
|
46
33
|
/* **************************************************************
|
|
47
34
|
* Macros
|
|
@@ -56,14 +43,35 @@
|
|
|
56
43
|
#error "Cannot force the use of the X1 and X2 decoders at the same time!"
|
|
57
44
|
#endif
|
|
58
45
|
|
|
46
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
|
|
47
|
+
# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
|
|
48
|
+
#else
|
|
49
|
+
# define HUF_ASM_X86_64_BMI2_ATTRS
|
|
50
|
+
#endif
|
|
51
|
+
|
|
52
|
+
#ifdef __cplusplus
|
|
53
|
+
# define HUF_EXTERN_C extern "C"
|
|
54
|
+
#else
|
|
55
|
+
# define HUF_EXTERN_C
|
|
56
|
+
#endif
|
|
57
|
+
#define HUF_ASM_DECL HUF_EXTERN_C
|
|
58
|
+
|
|
59
|
+
#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
|
|
60
|
+
# define HUF_NEED_BMI2_FUNCTION 1
|
|
61
|
+
#else
|
|
62
|
+
# define HUF_NEED_BMI2_FUNCTION 0
|
|
63
|
+
#endif
|
|
64
|
+
|
|
65
|
+
#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
|
|
66
|
+
# define HUF_NEED_DEFAULT_FUNCTION 1
|
|
67
|
+
#else
|
|
68
|
+
# define HUF_NEED_DEFAULT_FUNCTION 0
|
|
69
|
+
#endif
|
|
59
70
|
|
|
60
71
|
/* **************************************************************
|
|
61
72
|
* Error Management
|
|
62
73
|
****************************************************************/
|
|
63
74
|
#define HUF_isError ERR_isError
|
|
64
|
-
#ifndef CHECK_F
|
|
65
|
-
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
|
66
|
-
#endif
|
|
67
75
|
|
|
68
76
|
|
|
69
77
|
/* **************************************************************
|
|
@@ -88,7 +96,7 @@
|
|
|
88
96
|
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
|
89
97
|
} \
|
|
90
98
|
\
|
|
91
|
-
static
|
|
99
|
+
static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
|
|
92
100
|
void* dst, size_t dstSize, \
|
|
93
101
|
const void* cSrc, size_t cSrcSize, \
|
|
94
102
|
const HUF_DTable* DTable) \
|
|
@@ -126,82 +134,349 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved;
|
|
|
126
134
|
static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
|
127
135
|
{
|
|
128
136
|
DTableDesc dtd;
|
|
129
|
-
|
|
137
|
+
ZSTD_memcpy(&dtd, table, sizeof(dtd));
|
|
130
138
|
return dtd;
|
|
131
139
|
}
|
|
132
140
|
|
|
141
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
142
|
+
|
|
143
|
+
static size_t HUF_initDStream(BYTE const* ip) {
|
|
144
|
+
BYTE const lastByte = ip[7];
|
|
145
|
+
size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
|
|
146
|
+
size_t const value = MEM_readLEST(ip) | 1;
|
|
147
|
+
assert(bitsConsumed <= 8);
|
|
148
|
+
return value << bitsConsumed;
|
|
149
|
+
}
|
|
150
|
+
typedef struct {
|
|
151
|
+
BYTE const* ip[4];
|
|
152
|
+
BYTE* op[4];
|
|
153
|
+
U64 bits[4];
|
|
154
|
+
void const* dt;
|
|
155
|
+
BYTE const* ilimit;
|
|
156
|
+
BYTE* oend;
|
|
157
|
+
BYTE const* iend[4];
|
|
158
|
+
} HUF_DecompressAsmArgs;
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Initializes args for the asm decoding loop.
|
|
162
|
+
* @returns 0 on success
|
|
163
|
+
* 1 if the fallback implementation should be used.
|
|
164
|
+
* Or an error code on failure.
|
|
165
|
+
*/
|
|
166
|
+
static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
|
|
167
|
+
{
|
|
168
|
+
void const* dt = DTable + 1;
|
|
169
|
+
U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
|
|
170
|
+
|
|
171
|
+
const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
|
|
172
|
+
|
|
173
|
+
BYTE* const oend = (BYTE*)dst + dstSize;
|
|
174
|
+
|
|
175
|
+
/* The following condition is false on x32 platform,
|
|
176
|
+
* but HUF_asm is not compatible with this ABI */
|
|
177
|
+
if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
|
|
178
|
+
|
|
179
|
+
/* strict minimum : jump table + 1 byte per stream */
|
|
180
|
+
if (srcSize < 10)
|
|
181
|
+
return ERROR(corruption_detected);
|
|
182
|
+
|
|
183
|
+
/* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
|
|
184
|
+
* If table log is not correct at this point, fallback to the old decoder.
|
|
185
|
+
* On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
|
|
186
|
+
*/
|
|
187
|
+
if (dtLog != HUF_DECODER_FAST_TABLELOG)
|
|
188
|
+
return 1;
|
|
189
|
+
|
|
190
|
+
/* Read the jump table. */
|
|
191
|
+
{
|
|
192
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
193
|
+
size_t const length1 = MEM_readLE16(istart);
|
|
194
|
+
size_t const length2 = MEM_readLE16(istart+2);
|
|
195
|
+
size_t const length3 = MEM_readLE16(istart+4);
|
|
196
|
+
size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
|
|
197
|
+
args->iend[0] = istart + 6; /* jumpTable */
|
|
198
|
+
args->iend[1] = args->iend[0] + length1;
|
|
199
|
+
args->iend[2] = args->iend[1] + length2;
|
|
200
|
+
args->iend[3] = args->iend[2] + length3;
|
|
201
|
+
|
|
202
|
+
/* HUF_initDStream() requires this, and this small of an input
|
|
203
|
+
* won't benefit from the ASM loop anyways.
|
|
204
|
+
* length1 must be >= 16 so that ip[0] >= ilimit before the loop
|
|
205
|
+
* starts.
|
|
206
|
+
*/
|
|
207
|
+
if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
|
|
208
|
+
return 1;
|
|
209
|
+
if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
|
|
210
|
+
}
|
|
211
|
+
/* ip[] contains the position that is currently loaded into bits[]. */
|
|
212
|
+
args->ip[0] = args->iend[1] - sizeof(U64);
|
|
213
|
+
args->ip[1] = args->iend[2] - sizeof(U64);
|
|
214
|
+
args->ip[2] = args->iend[3] - sizeof(U64);
|
|
215
|
+
args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
|
|
216
|
+
|
|
217
|
+
/* op[] contains the output pointers. */
|
|
218
|
+
args->op[0] = (BYTE*)dst;
|
|
219
|
+
args->op[1] = args->op[0] + (dstSize+3)/4;
|
|
220
|
+
args->op[2] = args->op[1] + (dstSize+3)/4;
|
|
221
|
+
args->op[3] = args->op[2] + (dstSize+3)/4;
|
|
222
|
+
|
|
223
|
+
/* No point to call the ASM loop for tiny outputs. */
|
|
224
|
+
if (args->op[3] >= oend)
|
|
225
|
+
return 1;
|
|
226
|
+
|
|
227
|
+
/* bits[] is the bit container.
|
|
228
|
+
* It is read from the MSB down to the LSB.
|
|
229
|
+
* It is shifted left as it is read, and zeros are
|
|
230
|
+
* shifted in. After the lowest valid bit a 1 is
|
|
231
|
+
* set, so that CountTrailingZeros(bits[]) can be used
|
|
232
|
+
* to count how many bits we've consumed.
|
|
233
|
+
*/
|
|
234
|
+
args->bits[0] = HUF_initDStream(args->ip[0]);
|
|
235
|
+
args->bits[1] = HUF_initDStream(args->ip[1]);
|
|
236
|
+
args->bits[2] = HUF_initDStream(args->ip[2]);
|
|
237
|
+
args->bits[3] = HUF_initDStream(args->ip[3]);
|
|
238
|
+
|
|
239
|
+
/* If ip[] >= ilimit, it is guaranteed to be safe to
|
|
240
|
+
* reload bits[]. It may be beyond its section, but is
|
|
241
|
+
* guaranteed to be valid (>= istart).
|
|
242
|
+
*/
|
|
243
|
+
args->ilimit = ilimit;
|
|
244
|
+
|
|
245
|
+
args->oend = oend;
|
|
246
|
+
args->dt = dt;
|
|
247
|
+
|
|
248
|
+
return 0;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
|
|
252
|
+
{
|
|
253
|
+
/* Validate that we haven't overwritten. */
|
|
254
|
+
if (args->op[stream] > segmentEnd)
|
|
255
|
+
return ERROR(corruption_detected);
|
|
256
|
+
/* Validate that we haven't read beyond iend[].
|
|
257
|
+
* Note that ip[] may be < iend[] because the MSB is
|
|
258
|
+
* the next bit to read, and we may have consumed 100%
|
|
259
|
+
* of the stream, so down to iend[i] - 8 is valid.
|
|
260
|
+
*/
|
|
261
|
+
if (args->ip[stream] < args->iend[stream] - 8)
|
|
262
|
+
return ERROR(corruption_detected);
|
|
263
|
+
|
|
264
|
+
/* Construct the BIT_DStream_t. */
|
|
265
|
+
bit->bitContainer = MEM_readLE64(args->ip[stream]);
|
|
266
|
+
bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
|
|
267
|
+
bit->start = (const char*)args->iend[0];
|
|
268
|
+
bit->limitPtr = bit->start + sizeof(size_t);
|
|
269
|
+
bit->ptr = (const char*)args->ip[stream];
|
|
270
|
+
|
|
271
|
+
return 0;
|
|
272
|
+
}
|
|
273
|
+
#endif
|
|
274
|
+
|
|
133
275
|
|
|
134
276
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
|
135
277
|
|
|
136
278
|
/*-***************************/
|
|
137
279
|
/* single-symbol decoding */
|
|
138
280
|
/*-***************************/
|
|
139
|
-
typedef struct { BYTE
|
|
281
|
+
typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
|
|
285
|
+
* a time.
|
|
286
|
+
*/
|
|
287
|
+
static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
|
|
288
|
+
U64 D4;
|
|
289
|
+
if (MEM_isLittleEndian()) {
|
|
290
|
+
D4 = (symbol << 8) + nbBits;
|
|
291
|
+
} else {
|
|
292
|
+
D4 = symbol + (nbBits << 8);
|
|
293
|
+
}
|
|
294
|
+
D4 *= 0x0001000100010001ULL;
|
|
295
|
+
return D4;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Increase the tableLog to targetTableLog and rescales the stats.
|
|
300
|
+
* If tableLog > targetTableLog this is a no-op.
|
|
301
|
+
* @returns New tableLog
|
|
302
|
+
*/
|
|
303
|
+
static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
|
|
304
|
+
{
|
|
305
|
+
if (tableLog > targetTableLog)
|
|
306
|
+
return tableLog;
|
|
307
|
+
if (tableLog < targetTableLog) {
|
|
308
|
+
U32 const scale = targetTableLog - tableLog;
|
|
309
|
+
U32 s;
|
|
310
|
+
/* Increase the weight for all non-zero probability symbols by scale. */
|
|
311
|
+
for (s = 0; s < nbSymbols; ++s) {
|
|
312
|
+
huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
|
|
313
|
+
}
|
|
314
|
+
/* Update rankVal to reflect the new weights.
|
|
315
|
+
* All weights except 0 get moved to weight + scale.
|
|
316
|
+
* Weights [1, scale] are empty.
|
|
317
|
+
*/
|
|
318
|
+
for (s = targetTableLog; s > scale; --s) {
|
|
319
|
+
rankVal[s] = rankVal[s - scale];
|
|
320
|
+
}
|
|
321
|
+
for (s = scale; s > 0; --s) {
|
|
322
|
+
rankVal[s] = 0;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return targetTableLog;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
typedef struct {
|
|
329
|
+
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
|
330
|
+
U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
|
331
|
+
U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
|
332
|
+
BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
|
|
333
|
+
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
|
|
334
|
+
} HUF_ReadDTableX1_Workspace;
|
|
335
|
+
|
|
140
336
|
|
|
141
337
|
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
|
|
338
|
+
{
|
|
339
|
+
return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
|
|
142
343
|
{
|
|
143
344
|
U32 tableLog = 0;
|
|
144
345
|
U32 nbSymbols = 0;
|
|
145
346
|
size_t iSize;
|
|
146
347
|
void* const dtPtr = DTable + 1;
|
|
147
348
|
HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
|
|
349
|
+
HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
|
|
148
350
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
size_t spaceUsed32 = 0;
|
|
152
|
-
|
|
153
|
-
rankVal = (U32 *)workSpace + spaceUsed32;
|
|
154
|
-
spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
|
|
155
|
-
huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
|
156
|
-
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
|
157
|
-
|
|
158
|
-
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
351
|
+
DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
|
|
352
|
+
if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
159
353
|
|
|
160
354
|
DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
|
161
|
-
/*
|
|
355
|
+
/* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
|
162
356
|
|
|
163
|
-
iSize =
|
|
357
|
+
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
|
|
164
358
|
if (HUF_isError(iSize)) return iSize;
|
|
165
359
|
|
|
360
|
+
|
|
166
361
|
/* Table header */
|
|
167
362
|
{ DTableDesc dtd = HUF_getDTableDesc(DTable);
|
|
363
|
+
U32 const maxTableLog = dtd.maxTableLog + 1;
|
|
364
|
+
U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
|
|
365
|
+
tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
|
|
168
366
|
if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
|
|
169
367
|
dtd.tableType = 0;
|
|
170
368
|
dtd.tableLog = (BYTE)tableLog;
|
|
171
|
-
|
|
369
|
+
ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
|
|
172
370
|
}
|
|
173
371
|
|
|
174
|
-
/*
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
372
|
+
/* Compute symbols and rankStart given rankVal:
|
|
373
|
+
*
|
|
374
|
+
* rankVal already contains the number of values of each weight.
|
|
375
|
+
*
|
|
376
|
+
* symbols contains the symbols ordered by weight. First are the rankVal[0]
|
|
377
|
+
* weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
|
|
378
|
+
* symbols[0] is filled (but unused) to avoid a branch.
|
|
379
|
+
*
|
|
380
|
+
* rankStart contains the offset where each rank belongs in the DTable.
|
|
381
|
+
* rankStart[0] is not filled because there are no entries in the table for
|
|
382
|
+
* weight 0.
|
|
383
|
+
*/
|
|
384
|
+
{
|
|
385
|
+
int n;
|
|
386
|
+
int nextRankStart = 0;
|
|
387
|
+
int const unroll = 4;
|
|
388
|
+
int const nLimit = (int)nbSymbols - unroll + 1;
|
|
389
|
+
for (n=0; n<(int)tableLog+1; n++) {
|
|
390
|
+
U32 const curr = nextRankStart;
|
|
391
|
+
nextRankStart += wksp->rankVal[n];
|
|
392
|
+
wksp->rankStart[n] = curr;
|
|
393
|
+
}
|
|
394
|
+
for (n=0; n < nLimit; n += unroll) {
|
|
395
|
+
int u;
|
|
396
|
+
for (u=0; u < unroll; ++u) {
|
|
397
|
+
size_t const w = wksp->huffWeight[n+u];
|
|
398
|
+
wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
for (; n < (int)nbSymbols; ++n) {
|
|
402
|
+
size_t const w = wksp->huffWeight[n];
|
|
403
|
+
wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
194
406
|
|
|
407
|
+
/* fill DTable
|
|
408
|
+
* We fill all entries of each weight in order.
|
|
409
|
+
* That way length is a constant for each iteration of the outer loop.
|
|
410
|
+
* We can switch based on the length to a different inner loop which is
|
|
411
|
+
* optimized for that particular case.
|
|
412
|
+
*/
|
|
413
|
+
{
|
|
414
|
+
U32 w;
|
|
415
|
+
int symbol=wksp->rankVal[0];
|
|
416
|
+
int rankStart=0;
|
|
417
|
+
for (w=1; w<tableLog+1; ++w) {
|
|
418
|
+
int const symbolCount = wksp->rankVal[w];
|
|
419
|
+
int const length = (1 << w) >> 1;
|
|
420
|
+
int uStart = rankStart;
|
|
421
|
+
BYTE const nbBits = (BYTE)(tableLog + 1 - w);
|
|
422
|
+
int s;
|
|
423
|
+
int u;
|
|
424
|
+
switch (length) {
|
|
425
|
+
case 1:
|
|
426
|
+
for (s=0; s<symbolCount; ++s) {
|
|
427
|
+
HUF_DEltX1 D;
|
|
428
|
+
D.byte = wksp->symbols[symbol + s];
|
|
429
|
+
D.nbBits = nbBits;
|
|
430
|
+
dt[uStart] = D;
|
|
431
|
+
uStart += 1;
|
|
432
|
+
}
|
|
433
|
+
break;
|
|
434
|
+
case 2:
|
|
435
|
+
for (s=0; s<symbolCount; ++s) {
|
|
436
|
+
HUF_DEltX1 D;
|
|
437
|
+
D.byte = wksp->symbols[symbol + s];
|
|
438
|
+
D.nbBits = nbBits;
|
|
439
|
+
dt[uStart+0] = D;
|
|
440
|
+
dt[uStart+1] = D;
|
|
441
|
+
uStart += 2;
|
|
442
|
+
}
|
|
443
|
+
break;
|
|
444
|
+
case 4:
|
|
445
|
+
for (s=0; s<symbolCount; ++s) {
|
|
446
|
+
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
|
447
|
+
MEM_write64(dt + uStart, D4);
|
|
448
|
+
uStart += 4;
|
|
449
|
+
}
|
|
450
|
+
break;
|
|
451
|
+
case 8:
|
|
452
|
+
for (s=0; s<symbolCount; ++s) {
|
|
453
|
+
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
|
454
|
+
MEM_write64(dt + uStart, D4);
|
|
455
|
+
MEM_write64(dt + uStart + 4, D4);
|
|
456
|
+
uStart += 8;
|
|
457
|
+
}
|
|
458
|
+
break;
|
|
459
|
+
default:
|
|
460
|
+
for (s=0; s<symbolCount; ++s) {
|
|
461
|
+
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
|
462
|
+
for (u=0; u < length; u += 16) {
|
|
463
|
+
MEM_write64(dt + uStart + u + 0, D4);
|
|
464
|
+
MEM_write64(dt + uStart + u + 4, D4);
|
|
465
|
+
MEM_write64(dt + uStart + u + 8, D4);
|
|
466
|
+
MEM_write64(dt + uStart + u + 12, D4);
|
|
467
|
+
}
|
|
468
|
+
assert(u == length);
|
|
469
|
+
uStart += length;
|
|
470
|
+
}
|
|
471
|
+
break;
|
|
472
|
+
}
|
|
473
|
+
symbol += symbolCount;
|
|
474
|
+
rankStart += symbolCount * length;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
195
477
|
return iSize;
|
|
196
478
|
}
|
|
197
479
|
|
|
198
|
-
size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
199
|
-
{
|
|
200
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
201
|
-
return HUF_readDTableX1_wksp(DTable, src, srcSize,
|
|
202
|
-
workSpace, sizeof(workSpace));
|
|
203
|
-
}
|
|
204
|
-
|
|
205
480
|
FORCE_INLINE_TEMPLATE BYTE
|
|
206
481
|
HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
|
|
207
482
|
{
|
|
@@ -228,11 +503,15 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
|
|
|
228
503
|
BYTE* const pStart = p;
|
|
229
504
|
|
|
230
505
|
/* up to 4 symbols at a time */
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
506
|
+
if ((pEnd - p) > 3) {
|
|
507
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
|
|
508
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
|
509
|
+
HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
|
|
510
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
|
511
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
|
512
|
+
}
|
|
513
|
+
} else {
|
|
514
|
+
BIT_reloadDStream(bitDPtr);
|
|
236
515
|
}
|
|
237
516
|
|
|
238
517
|
/* [0-3] symbols remaining */
|
|
@@ -282,6 +561,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
282
561
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
|
283
562
|
BYTE* const ostart = (BYTE*) dst;
|
|
284
563
|
BYTE* const oend = ostart + dstSize;
|
|
564
|
+
BYTE* const olimit = oend - 3;
|
|
285
565
|
const void* const dtPtr = DTable + 1;
|
|
286
566
|
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
|
287
567
|
|
|
@@ -306,39 +586,41 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
306
586
|
BYTE* op2 = opStart2;
|
|
307
587
|
BYTE* op3 = opStart3;
|
|
308
588
|
BYTE* op4 = opStart4;
|
|
309
|
-
U32 endSignal = BIT_DStream_unfinished;
|
|
310
589
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
311
590
|
U32 const dtLog = dtd.tableLog;
|
|
591
|
+
U32 endSignal = 1;
|
|
312
592
|
|
|
313
593
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
|
594
|
+
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
|
314
595
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
|
315
596
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
|
316
597
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
|
317
598
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
|
318
599
|
|
|
319
600
|
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
601
|
+
if ((size_t)(oend - op4) >= sizeof(size_t)) {
|
|
602
|
+
for ( ; (endSignal) & (op4 < olimit) ; ) {
|
|
603
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
|
604
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
|
605
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
|
606
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
|
607
|
+
HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
|
|
608
|
+
HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
|
|
609
|
+
HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
|
|
610
|
+
HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
|
|
611
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
|
612
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
|
613
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
|
614
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
|
615
|
+
HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
|
|
616
|
+
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
|
617
|
+
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
|
618
|
+
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
|
619
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
|
620
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
|
621
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
|
622
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
|
623
|
+
}
|
|
342
624
|
}
|
|
343
625
|
|
|
344
626
|
/* check corruption */
|
|
@@ -364,6 +646,79 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
364
646
|
}
|
|
365
647
|
}
|
|
366
648
|
|
|
649
|
+
#if HUF_NEED_BMI2_FUNCTION
|
|
650
|
+
static BMI2_TARGET_ATTRIBUTE
|
|
651
|
+
size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
|
652
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
653
|
+
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
654
|
+
}
|
|
655
|
+
#endif
|
|
656
|
+
|
|
657
|
+
#if HUF_NEED_DEFAULT_FUNCTION
|
|
658
|
+
static
|
|
659
|
+
size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
|
660
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
661
|
+
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
662
|
+
}
|
|
663
|
+
#endif
|
|
664
|
+
|
|
665
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
666
|
+
|
|
667
|
+
HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args);
|
|
668
|
+
|
|
669
|
+
static HUF_ASM_X86_64_BMI2_ATTRS
|
|
670
|
+
size_t
|
|
671
|
+
HUF_decompress4X1_usingDTable_internal_bmi2_asm(
|
|
672
|
+
void* dst, size_t dstSize,
|
|
673
|
+
const void* cSrc, size_t cSrcSize,
|
|
674
|
+
const HUF_DTable* DTable)
|
|
675
|
+
{
|
|
676
|
+
void const* dt = DTable + 1;
|
|
677
|
+
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
|
678
|
+
BYTE* const oend = (BYTE*)dst + dstSize;
|
|
679
|
+
HUF_DecompressAsmArgs args;
|
|
680
|
+
{
|
|
681
|
+
size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
|
682
|
+
FORWARD_IF_ERROR(ret, "Failed to init asm args");
|
|
683
|
+
if (ret != 0)
|
|
684
|
+
return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
assert(args.ip[0] >= args.ilimit);
|
|
688
|
+
HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
|
|
689
|
+
|
|
690
|
+
/* Our loop guarantees that ip[] >= ilimit and that we haven't
|
|
691
|
+
* overwritten any op[].
|
|
692
|
+
*/
|
|
693
|
+
assert(args.ip[0] >= iend);
|
|
694
|
+
assert(args.ip[1] >= iend);
|
|
695
|
+
assert(args.ip[2] >= iend);
|
|
696
|
+
assert(args.ip[3] >= iend);
|
|
697
|
+
assert(args.op[3] <= oend);
|
|
698
|
+
(void)iend;
|
|
699
|
+
|
|
700
|
+
/* finish bit streams one by one. */
|
|
701
|
+
{
|
|
702
|
+
size_t const segmentSize = (dstSize+3) / 4;
|
|
703
|
+
BYTE* segmentEnd = (BYTE*)dst;
|
|
704
|
+
int i;
|
|
705
|
+
for (i = 0; i < 4; ++i) {
|
|
706
|
+
BIT_DStream_t bit;
|
|
707
|
+
if (segmentSize <= (size_t)(oend - segmentEnd))
|
|
708
|
+
segmentEnd += segmentSize;
|
|
709
|
+
else
|
|
710
|
+
segmentEnd = oend;
|
|
711
|
+
FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
|
|
712
|
+
/* Decompress and validate that we've produced exactly the expected length. */
|
|
713
|
+
args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
|
|
714
|
+
if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
/* decoded size */
|
|
719
|
+
return dstSize;
|
|
720
|
+
}
|
|
721
|
+
#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
|
|
367
722
|
|
|
368
723
|
typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
|
369
724
|
const void *cSrc,
|
|
@@ -371,8 +726,28 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
|
|
371
726
|
const HUF_DTable *DTable);
|
|
372
727
|
|
|
373
728
|
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
|
|
374
|
-
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
|
|
375
729
|
|
|
730
|
+
static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
|
731
|
+
size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
|
|
732
|
+
{
|
|
733
|
+
#if DYNAMIC_BMI2
|
|
734
|
+
if (bmi2) {
|
|
735
|
+
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
736
|
+
return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
737
|
+
# else
|
|
738
|
+
return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
739
|
+
# endif
|
|
740
|
+
}
|
|
741
|
+
#else
|
|
742
|
+
(void)bmi2;
|
|
743
|
+
#endif
|
|
744
|
+
|
|
745
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
|
746
|
+
return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
747
|
+
#else
|
|
748
|
+
return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
749
|
+
#endif
|
|
750
|
+
}
|
|
376
751
|
|
|
377
752
|
|
|
378
753
|
size_t HUF_decompress1X1_usingDTable(
|
|
@@ -400,20 +775,6 @@ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
|
400
775
|
}
|
|
401
776
|
|
|
402
777
|
|
|
403
|
-
size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
404
|
-
const void* cSrc, size_t cSrcSize)
|
|
405
|
-
{
|
|
406
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
407
|
-
return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
|
408
|
-
workSpace, sizeof(workSpace));
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
412
|
-
{
|
|
413
|
-
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
|
414
|
-
return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
|
415
|
-
}
|
|
416
|
-
|
|
417
778
|
size_t HUF_decompress4X1_usingDTable(
|
|
418
779
|
void* dst, size_t dstSize,
|
|
419
780
|
const void* cSrc, size_t cSrcSize,
|
|
@@ -430,8 +791,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size
|
|
|
430
791
|
{
|
|
431
792
|
const BYTE* ip = (const BYTE*) cSrc;
|
|
432
793
|
|
|
433
|
-
size_t const hSize =
|
|
434
|
-
workSpace, wkspSize);
|
|
794
|
+
size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
|
435
795
|
if (HUF_isError(hSize)) return hSize;
|
|
436
796
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
437
797
|
ip += hSize; cSrcSize -= hSize;
|
|
@@ -447,18 +807,6 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
|
447
807
|
}
|
|
448
808
|
|
|
449
809
|
|
|
450
|
-
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
451
|
-
{
|
|
452
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
453
|
-
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
454
|
-
workSpace, sizeof(workSpace));
|
|
455
|
-
}
|
|
456
|
-
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
457
|
-
{
|
|
458
|
-
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
|
459
|
-
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
460
|
-
}
|
|
461
|
-
|
|
462
810
|
#endif /* HUF_FORCE_DECOMPRESS_X2 */
|
|
463
811
|
|
|
464
812
|
|
|
@@ -469,209 +817,329 @@ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
|
469
817
|
/* *************************/
|
|
470
818
|
|
|
471
819
|
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
|
|
472
|
-
typedef struct { BYTE symbol;
|
|
820
|
+
typedef struct { BYTE symbol; } sortedSymbol_t;
|
|
473
821
|
typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
|
|
474
822
|
typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
|
|
475
823
|
|
|
824
|
+
/**
|
|
825
|
+
* Constructs a HUF_DEltX2 in a U32.
|
|
826
|
+
*/
|
|
827
|
+
static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
|
|
828
|
+
{
|
|
829
|
+
U32 seq;
|
|
830
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
|
|
831
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
|
|
832
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
|
|
833
|
+
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
|
|
834
|
+
if (MEM_isLittleEndian()) {
|
|
835
|
+
seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
|
|
836
|
+
return seq + (nbBits << 16) + ((U32)level << 24);
|
|
837
|
+
} else {
|
|
838
|
+
seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
|
|
839
|
+
return (seq << 16) + (nbBits << 8) + (U32)level;
|
|
840
|
+
}
|
|
841
|
+
}
|
|
476
842
|
|
|
477
|
-
|
|
478
|
-
*
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
|
|
482
|
-
U32 nbBitsBaseline, U16 baseSeq)
|
|
843
|
+
/**
|
|
844
|
+
* Constructs a HUF_DEltX2.
|
|
845
|
+
*/
|
|
846
|
+
static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
|
|
483
847
|
{
|
|
484
848
|
HUF_DEltX2 DElt;
|
|
485
|
-
U32
|
|
849
|
+
U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
|
|
850
|
+
DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
|
|
851
|
+
ZSTD_memcpy(&DElt, &val, sizeof(val));
|
|
852
|
+
return DElt;
|
|
853
|
+
}
|
|
486
854
|
|
|
487
|
-
|
|
488
|
-
|
|
855
|
+
/**
|
|
856
|
+
* Constructs 2 HUF_DEltX2s and packs them into a U64.
|
|
857
|
+
*/
|
|
858
|
+
static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
|
|
859
|
+
{
|
|
860
|
+
U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
|
|
861
|
+
return (U64)DElt + ((U64)DElt << 32);
|
|
862
|
+
}
|
|
489
863
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
864
|
+
/**
|
|
865
|
+
* Fills the DTable rank with all the symbols from [begin, end) that are each
|
|
866
|
+
* nbBits long.
|
|
867
|
+
*
|
|
868
|
+
* @param DTableRank The start of the rank in the DTable.
|
|
869
|
+
* @param begin The first symbol to fill (inclusive).
|
|
870
|
+
* @param end The last symbol to fill (exclusive).
|
|
871
|
+
* @param nbBits Each symbol is nbBits long.
|
|
872
|
+
* @param tableLog The table log.
|
|
873
|
+
* @param baseSeq If level == 1 { 0 } else { the first level symbol }
|
|
874
|
+
* @param level The level in the table. Must be 1 or 2.
|
|
875
|
+
*/
|
|
876
|
+
static void HUF_fillDTableX2ForWeight(
|
|
877
|
+
HUF_DEltX2* DTableRank,
|
|
878
|
+
sortedSymbol_t const* begin, sortedSymbol_t const* end,
|
|
879
|
+
U32 nbBits, U32 tableLog,
|
|
880
|
+
U16 baseSeq, int const level)
|
|
881
|
+
{
|
|
882
|
+
U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
|
|
883
|
+
const sortedSymbol_t* ptr;
|
|
884
|
+
assert(level >= 1 && level <= 2);
|
|
885
|
+
switch (length) {
|
|
886
|
+
case 1:
|
|
887
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
888
|
+
HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
|
|
889
|
+
*DTableRank++ = DElt;
|
|
890
|
+
}
|
|
891
|
+
break;
|
|
892
|
+
case 2:
|
|
893
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
894
|
+
HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
|
|
895
|
+
DTableRank[0] = DElt;
|
|
896
|
+
DTableRank[1] = DElt;
|
|
897
|
+
DTableRank += 2;
|
|
898
|
+
}
|
|
899
|
+
break;
|
|
900
|
+
case 4:
|
|
901
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
902
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
|
903
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
|
904
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
|
905
|
+
DTableRank += 4;
|
|
906
|
+
}
|
|
907
|
+
break;
|
|
908
|
+
case 8:
|
|
909
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
910
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
|
911
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
|
912
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
|
913
|
+
ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
|
|
914
|
+
ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
|
|
915
|
+
DTableRank += 8;
|
|
916
|
+
}
|
|
917
|
+
break;
|
|
918
|
+
default:
|
|
919
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
920
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
|
921
|
+
HUF_DEltX2* const DTableRankEnd = DTableRank + length;
|
|
922
|
+
for (; DTableRank != DTableRankEnd; DTableRank += 8) {
|
|
923
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
|
924
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
|
925
|
+
ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
|
|
926
|
+
ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
break;
|
|
498
930
|
}
|
|
931
|
+
}
|
|
499
932
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
933
|
+
/* HUF_fillDTableX2Level2() :
|
|
934
|
+
* `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
|
|
935
|
+
static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
|
|
936
|
+
const U32* rankVal, const int minWeight, const int maxWeight1,
|
|
937
|
+
const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
|
|
938
|
+
U32 nbBitsBaseline, U16 baseSeq)
|
|
939
|
+
{
|
|
940
|
+
/* Fill skipped values (all positions up to rankVal[minWeight]).
|
|
941
|
+
* These are positions only get a single symbol because the combined weight
|
|
942
|
+
* is too large.
|
|
943
|
+
*/
|
|
944
|
+
if (minWeight>1) {
|
|
945
|
+
U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
|
|
946
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
|
|
947
|
+
int const skipSize = rankVal[minWeight];
|
|
948
|
+
assert(length > 1);
|
|
949
|
+
assert((U32)skipSize < length);
|
|
950
|
+
switch (length) {
|
|
951
|
+
case 2:
|
|
952
|
+
assert(skipSize == 1);
|
|
953
|
+
ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
|
|
954
|
+
break;
|
|
955
|
+
case 4:
|
|
956
|
+
assert(skipSize <= 4);
|
|
957
|
+
ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
|
|
958
|
+
ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
|
|
959
|
+
break;
|
|
960
|
+
default:
|
|
961
|
+
{
|
|
962
|
+
int i;
|
|
963
|
+
for (i = 0; i < skipSize; i += 8) {
|
|
964
|
+
ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
|
|
965
|
+
ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
|
|
966
|
+
ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
|
|
967
|
+
ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
}
|
|
514
972
|
|
|
515
|
-
|
|
516
|
-
|
|
973
|
+
/* Fill each of the second level symbols by weight. */
|
|
974
|
+
{
|
|
975
|
+
int w;
|
|
976
|
+
for (w = minWeight; w < maxWeight1; ++w) {
|
|
977
|
+
int const begin = rankStart[w];
|
|
978
|
+
int const end = rankStart[w+1];
|
|
979
|
+
U32 const nbBits = nbBitsBaseline - w;
|
|
980
|
+
U32 const totalBits = nbBits + consumedBits;
|
|
981
|
+
HUF_fillDTableX2ForWeight(
|
|
982
|
+
DTable + rankVal[w],
|
|
983
|
+
sortedSymbols + begin, sortedSymbols + end,
|
|
984
|
+
totalBits, targetLog,
|
|
985
|
+
baseSeq, /* level */ 2);
|
|
986
|
+
}
|
|
987
|
+
}
|
|
517
988
|
}
|
|
518
989
|
|
|
519
|
-
|
|
520
990
|
static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
|
|
521
|
-
const sortedSymbol_t* sortedList,
|
|
991
|
+
const sortedSymbol_t* sortedList,
|
|
522
992
|
const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
|
|
523
993
|
const U32 nbBitsBaseline)
|
|
524
994
|
{
|
|
525
|
-
U32 rankVal
|
|
995
|
+
U32* const rankVal = rankValOrigin[0];
|
|
526
996
|
const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
|
|
527
997
|
const U32 minBits = nbBitsBaseline - maxWeight;
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
const
|
|
535
|
-
const
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
U32 sortedRank;
|
|
998
|
+
int w;
|
|
999
|
+
int const wEnd = (int)maxWeight + 1;
|
|
1000
|
+
|
|
1001
|
+
/* Fill DTable in order of weight. */
|
|
1002
|
+
for (w = 1; w < wEnd; ++w) {
|
|
1003
|
+
int const begin = (int)rankStart[w];
|
|
1004
|
+
int const end = (int)rankStart[w+1];
|
|
1005
|
+
U32 const nbBits = nbBitsBaseline - w;
|
|
1006
|
+
|
|
1007
|
+
if (targetLog-nbBits >= minBits) {
|
|
1008
|
+
/* Enough room for a second symbol. */
|
|
1009
|
+
int start = rankVal[w];
|
|
1010
|
+
U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
|
|
542
1011
|
int minWeight = nbBits + scaleLog;
|
|
1012
|
+
int s;
|
|
543
1013
|
if (minWeight < 1) minWeight = 1;
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
1014
|
+
/* Fill the DTable for every symbol of weight w.
|
|
1015
|
+
* These symbols get at least 1 second symbol.
|
|
1016
|
+
*/
|
|
1017
|
+
for (s = begin; s != end; ++s) {
|
|
1018
|
+
HUF_fillDTableX2Level2(
|
|
1019
|
+
DTable + start, targetLog, nbBits,
|
|
1020
|
+
rankValOrigin[nbBits], minWeight, wEnd,
|
|
1021
|
+
sortedList, rankStart,
|
|
1022
|
+
nbBitsBaseline, sortedList[s].symbol);
|
|
1023
|
+
start += length;
|
|
1024
|
+
}
|
|
549
1025
|
} else {
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
} }
|
|
558
|
-
rankVal[weight] += length;
|
|
1026
|
+
/* Only a single symbol. */
|
|
1027
|
+
HUF_fillDTableX2ForWeight(
|
|
1028
|
+
DTable + rankVal[w],
|
|
1029
|
+
sortedList + begin, sortedList + end,
|
|
1030
|
+
nbBits, targetLog,
|
|
1031
|
+
/* baseSeq */ 0, /* level */ 1);
|
|
1032
|
+
}
|
|
559
1033
|
}
|
|
560
1034
|
}
|
|
561
1035
|
|
|
1036
|
+
typedef struct {
|
|
1037
|
+
rankValCol_t rankVal[HUF_TABLELOG_MAX];
|
|
1038
|
+
U32 rankStats[HUF_TABLELOG_MAX + 1];
|
|
1039
|
+
U32 rankStart0[HUF_TABLELOG_MAX + 3];
|
|
1040
|
+
sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
|
|
1041
|
+
BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
|
|
1042
|
+
U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
|
1043
|
+
} HUF_ReadDTableX2_Workspace;
|
|
1044
|
+
|
|
562
1045
|
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
|
|
563
1046
|
const void* src, size_t srcSize,
|
|
564
1047
|
void* workSpace, size_t wkspSize)
|
|
565
1048
|
{
|
|
566
|
-
|
|
1049
|
+
return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
|
|
1053
|
+
const void* src, size_t srcSize,
|
|
1054
|
+
void* workSpace, size_t wkspSize, int bmi2)
|
|
1055
|
+
{
|
|
1056
|
+
U32 tableLog, maxW, nbSymbols;
|
|
567
1057
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
|
568
|
-
U32
|
|
1058
|
+
U32 maxTableLog = dtd.maxTableLog;
|
|
569
1059
|
size_t iSize;
|
|
570
1060
|
void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
|
|
571
1061
|
HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
|
|
572
1062
|
U32 *rankStart;
|
|
573
1063
|
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
|
|
582
|
-
spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
|
|
583
|
-
rankStats = (U32 *)workSpace + spaceUsed32;
|
|
584
|
-
spaceUsed32 += HUF_TABLELOG_MAX + 1;
|
|
585
|
-
rankStart0 = (U32 *)workSpace + spaceUsed32;
|
|
586
|
-
spaceUsed32 += HUF_TABLELOG_MAX + 2;
|
|
587
|
-
sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
|
|
588
|
-
spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
|
|
589
|
-
weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
|
590
|
-
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
|
591
|
-
|
|
592
|
-
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
593
|
-
|
|
594
|
-
rankStart = rankStart0 + 1;
|
|
595
|
-
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
|
1064
|
+
HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
|
|
1065
|
+
|
|
1066
|
+
if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
|
|
1067
|
+
|
|
1068
|
+
rankStart = wksp->rankStart0 + 1;
|
|
1069
|
+
ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
|
|
1070
|
+
ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
|
|
596
1071
|
|
|
597
1072
|
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
|
|
598
1073
|
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
|
599
|
-
/*
|
|
1074
|
+
/* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
|
600
1075
|
|
|
601
|
-
iSize =
|
|
1076
|
+
iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
|
|
602
1077
|
if (HUF_isError(iSize)) return iSize;
|
|
603
1078
|
|
|
604
1079
|
/* check result */
|
|
605
1080
|
if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
|
|
1081
|
+
if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
|
|
606
1082
|
|
|
607
1083
|
/* find maxWeight */
|
|
608
|
-
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
|
|
1084
|
+
for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
|
|
609
1085
|
|
|
610
1086
|
/* Get start index of each weight */
|
|
611
1087
|
{ U32 w, nextRankStart = 0;
|
|
612
1088
|
for (w=1; w<maxW+1; w++) {
|
|
613
|
-
U32
|
|
614
|
-
nextRankStart += rankStats[w];
|
|
615
|
-
rankStart[w] =
|
|
1089
|
+
U32 curr = nextRankStart;
|
|
1090
|
+
nextRankStart += wksp->rankStats[w];
|
|
1091
|
+
rankStart[w] = curr;
|
|
616
1092
|
}
|
|
617
1093
|
rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
|
|
618
|
-
|
|
1094
|
+
rankStart[maxW+1] = nextRankStart;
|
|
619
1095
|
}
|
|
620
1096
|
|
|
621
1097
|
/* sort symbols by weight */
|
|
622
1098
|
{ U32 s;
|
|
623
1099
|
for (s=0; s<nbSymbols; s++) {
|
|
624
|
-
U32 const w = weightList[s];
|
|
1100
|
+
U32 const w = wksp->weightList[s];
|
|
625
1101
|
U32 const r = rankStart[w]++;
|
|
626
|
-
sortedSymbol[r].symbol = (BYTE)s;
|
|
627
|
-
sortedSymbol[r].weight = (BYTE)w;
|
|
1102
|
+
wksp->sortedSymbol[r].symbol = (BYTE)s;
|
|
628
1103
|
}
|
|
629
1104
|
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
|
|
630
1105
|
}
|
|
631
1106
|
|
|
632
1107
|
/* Build rankVal */
|
|
633
|
-
{ U32* const rankVal0 = rankVal[0];
|
|
1108
|
+
{ U32* const rankVal0 = wksp->rankVal[0];
|
|
634
1109
|
{ int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
|
|
635
1110
|
U32 nextRankVal = 0;
|
|
636
1111
|
U32 w;
|
|
637
1112
|
for (w=1; w<maxW+1; w++) {
|
|
638
|
-
U32
|
|
639
|
-
nextRankVal += rankStats[w] << (w+rescale);
|
|
640
|
-
rankVal0[w] =
|
|
1113
|
+
U32 curr = nextRankVal;
|
|
1114
|
+
nextRankVal += wksp->rankStats[w] << (w+rescale);
|
|
1115
|
+
rankVal0[w] = curr;
|
|
641
1116
|
} }
|
|
642
1117
|
{ U32 const minBits = tableLog+1 - maxW;
|
|
643
1118
|
U32 consumed;
|
|
644
1119
|
for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
|
|
645
|
-
U32* const rankValPtr = rankVal[consumed];
|
|
1120
|
+
U32* const rankValPtr = wksp->rankVal[consumed];
|
|
646
1121
|
U32 w;
|
|
647
1122
|
for (w = 1; w < maxW+1; w++) {
|
|
648
1123
|
rankValPtr[w] = rankVal0[w] >> consumed;
|
|
649
1124
|
} } } }
|
|
650
1125
|
|
|
651
1126
|
HUF_fillDTableX2(dt, maxTableLog,
|
|
652
|
-
sortedSymbol,
|
|
653
|
-
rankStart0, rankVal, maxW,
|
|
1127
|
+
wksp->sortedSymbol,
|
|
1128
|
+
wksp->rankStart0, wksp->rankVal, maxW,
|
|
654
1129
|
tableLog+1);
|
|
655
1130
|
|
|
656
1131
|
dtd.tableLog = (BYTE)maxTableLog;
|
|
657
1132
|
dtd.tableType = 1;
|
|
658
|
-
|
|
1133
|
+
ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
|
|
659
1134
|
return iSize;
|
|
660
1135
|
}
|
|
661
1136
|
|
|
662
|
-
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
663
|
-
{
|
|
664
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
665
|
-
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
|
666
|
-
workSpace, sizeof(workSpace));
|
|
667
|
-
}
|
|
668
|
-
|
|
669
1137
|
|
|
670
1138
|
FORCE_INLINE_TEMPLATE U32
|
|
671
1139
|
HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
|
672
1140
|
{
|
|
673
1141
|
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
|
674
|
-
|
|
1142
|
+
ZSTD_memcpy(op, &dt[val].sequence, 2);
|
|
675
1143
|
BIT_skipBits(DStream, dt[val].nbBits);
|
|
676
1144
|
return dt[val].length;
|
|
677
1145
|
}
|
|
@@ -680,15 +1148,17 @@ FORCE_INLINE_TEMPLATE U32
|
|
|
680
1148
|
HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
|
681
1149
|
{
|
|
682
1150
|
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
|
683
|
-
|
|
684
|
-
if (dt[val].length==1)
|
|
685
|
-
|
|
1151
|
+
ZSTD_memcpy(op, &dt[val].sequence, 1);
|
|
1152
|
+
if (dt[val].length==1) {
|
|
1153
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
|
1154
|
+
} else {
|
|
686
1155
|
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
|
687
1156
|
BIT_skipBits(DStream, dt[val].nbBits);
|
|
688
1157
|
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
|
689
1158
|
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
|
690
1159
|
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
|
691
|
-
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
692
1162
|
return 1;
|
|
693
1163
|
}
|
|
694
1164
|
|
|
@@ -710,19 +1180,37 @@ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
|
|
710
1180
|
BYTE* const pStart = p;
|
|
711
1181
|
|
|
712
1182
|
/* up to 8 symbols at a time */
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
1183
|
+
if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
|
|
1184
|
+
if (dtLog <= 11 && MEM_64bits()) {
|
|
1185
|
+
/* up to 10 symbols at a time */
|
|
1186
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
|
|
1187
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1188
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1189
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1190
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1191
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1192
|
+
}
|
|
1193
|
+
} else {
|
|
1194
|
+
/* up to 8 symbols at a time */
|
|
1195
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
|
1196
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
|
1197
|
+
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
|
|
1198
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
|
1199
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
} else {
|
|
1203
|
+
BIT_reloadDStream(bitDPtr);
|
|
718
1204
|
}
|
|
719
1205
|
|
|
720
1206
|
/* closer to end : up to 2 symbols at a time */
|
|
721
|
-
|
|
722
|
-
|
|
1207
|
+
if ((size_t)(pEnd - p) >= 2) {
|
|
1208
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
|
1209
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
723
1210
|
|
|
724
|
-
|
|
725
|
-
|
|
1211
|
+
while (p <= pEnd-2)
|
|
1212
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
|
1213
|
+
}
|
|
726
1214
|
|
|
727
1215
|
if (p < pEnd)
|
|
728
1216
|
p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
|
|
@@ -756,8 +1244,6 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
|
756
1244
|
/* decoded size */
|
|
757
1245
|
return dstSize;
|
|
758
1246
|
}
|
|
759
|
-
|
|
760
|
-
|
|
761
1247
|
FORCE_INLINE_TEMPLATE size_t
|
|
762
1248
|
HUF_decompress4X2_usingDTable_internal_body(
|
|
763
1249
|
void* dst, size_t dstSize,
|
|
@@ -769,6 +1255,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
769
1255
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
|
770
1256
|
BYTE* const ostart = (BYTE*) dst;
|
|
771
1257
|
BYTE* const oend = ostart + dstSize;
|
|
1258
|
+
BYTE* const olimit = oend - (sizeof(size_t)-1);
|
|
772
1259
|
const void* const dtPtr = DTable+1;
|
|
773
1260
|
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
|
774
1261
|
|
|
@@ -793,37 +1280,65 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
793
1280
|
BYTE* op2 = opStart2;
|
|
794
1281
|
BYTE* op3 = opStart3;
|
|
795
1282
|
BYTE* op4 = opStart4;
|
|
796
|
-
U32 endSignal;
|
|
1283
|
+
U32 endSignal = 1;
|
|
797
1284
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
798
1285
|
U32 const dtLog = dtd.tableLog;
|
|
799
1286
|
|
|
800
1287
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
|
1288
|
+
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
|
801
1289
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
|
802
1290
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
|
803
1291
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
|
804
1292
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
|
805
1293
|
|
|
806
1294
|
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
1295
|
+
if ((size_t)(oend - op4) >= sizeof(size_t)) {
|
|
1296
|
+
for ( ; (endSignal) & (op4 < olimit); ) {
|
|
1297
|
+
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
|
1298
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1299
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
|
1300
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1301
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
|
1302
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1303
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
|
1304
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1305
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
|
1306
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
|
1307
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
|
1308
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1309
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
|
1310
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1311
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
|
1312
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1313
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
|
1314
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1315
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
|
1316
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
|
1317
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
|
1318
|
+
#else
|
|
1319
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1320
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1321
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1322
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1323
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
|
1324
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
|
1325
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
|
1326
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
|
1327
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1328
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1329
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1330
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1331
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
|
1332
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
|
1333
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
|
1334
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
|
1335
|
+
endSignal = (U32)LIKELY((U32)
|
|
1336
|
+
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
|
|
1337
|
+
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
|
|
1338
|
+
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
|
|
1339
|
+
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
|
|
1340
|
+
#endif
|
|
1341
|
+
}
|
|
827
1342
|
}
|
|
828
1343
|
|
|
829
1344
|
/* check corruption */
|
|
@@ -847,8 +1362,99 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
847
1362
|
}
|
|
848
1363
|
}
|
|
849
1364
|
|
|
1365
|
+
#if HUF_NEED_BMI2_FUNCTION
|
|
1366
|
+
static BMI2_TARGET_ATTRIBUTE
|
|
1367
|
+
size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
|
1368
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
1369
|
+
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1370
|
+
}
|
|
1371
|
+
#endif
|
|
1372
|
+
|
|
1373
|
+
#if HUF_NEED_DEFAULT_FUNCTION
|
|
1374
|
+
static
|
|
1375
|
+
size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
|
1376
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
1377
|
+
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1378
|
+
}
|
|
1379
|
+
#endif
|
|
1380
|
+
|
|
1381
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
1382
|
+
|
|
1383
|
+
HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args);
|
|
1384
|
+
|
|
1385
|
+
static HUF_ASM_X86_64_BMI2_ATTRS size_t
|
|
1386
|
+
HUF_decompress4X2_usingDTable_internal_bmi2_asm(
|
|
1387
|
+
void* dst, size_t dstSize,
|
|
1388
|
+
const void* cSrc, size_t cSrcSize,
|
|
1389
|
+
const HUF_DTable* DTable) {
|
|
1390
|
+
void const* dt = DTable + 1;
|
|
1391
|
+
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
|
1392
|
+
BYTE* const oend = (BYTE*)dst + dstSize;
|
|
1393
|
+
HUF_DecompressAsmArgs args;
|
|
1394
|
+
{
|
|
1395
|
+
size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1396
|
+
FORWARD_IF_ERROR(ret, "Failed to init asm args");
|
|
1397
|
+
if (ret != 0)
|
|
1398
|
+
return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
assert(args.ip[0] >= args.ilimit);
|
|
1402
|
+
HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
|
|
1403
|
+
|
|
1404
|
+
/* note : op4 already verified within main loop */
|
|
1405
|
+
assert(args.ip[0] >= iend);
|
|
1406
|
+
assert(args.ip[1] >= iend);
|
|
1407
|
+
assert(args.ip[2] >= iend);
|
|
1408
|
+
assert(args.ip[3] >= iend);
|
|
1409
|
+
assert(args.op[3] <= oend);
|
|
1410
|
+
(void)iend;
|
|
1411
|
+
|
|
1412
|
+
/* finish bitStreams one by one */
|
|
1413
|
+
{
|
|
1414
|
+
size_t const segmentSize = (dstSize+3) / 4;
|
|
1415
|
+
BYTE* segmentEnd = (BYTE*)dst;
|
|
1416
|
+
int i;
|
|
1417
|
+
for (i = 0; i < 4; ++i) {
|
|
1418
|
+
BIT_DStream_t bit;
|
|
1419
|
+
if (segmentSize <= (size_t)(oend - segmentEnd))
|
|
1420
|
+
segmentEnd += segmentSize;
|
|
1421
|
+
else
|
|
1422
|
+
segmentEnd = oend;
|
|
1423
|
+
FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
|
|
1424
|
+
args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
|
|
1425
|
+
if (args.op[i] != segmentEnd)
|
|
1426
|
+
return ERROR(corruption_detected);
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
/* decoded size */
|
|
1431
|
+
return dstSize;
|
|
1432
|
+
}
|
|
1433
|
+
#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
|
|
1434
|
+
|
|
1435
|
+
static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
|
1436
|
+
size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
|
|
1437
|
+
{
|
|
1438
|
+
#if DYNAMIC_BMI2
|
|
1439
|
+
if (bmi2) {
|
|
1440
|
+
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
1441
|
+
return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1442
|
+
# else
|
|
1443
|
+
return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1444
|
+
# endif
|
|
1445
|
+
}
|
|
1446
|
+
#else
|
|
1447
|
+
(void)bmi2;
|
|
1448
|
+
#endif
|
|
1449
|
+
|
|
1450
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
|
1451
|
+
return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1452
|
+
#else
|
|
1453
|
+
return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1454
|
+
#endif
|
|
1455
|
+
}
|
|
1456
|
+
|
|
850
1457
|
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
|
|
851
|
-
HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
|
|
852
1458
|
|
|
853
1459
|
size_t HUF_decompress1X2_usingDTable(
|
|
854
1460
|
void* dst, size_t dstSize,
|
|
@@ -876,20 +1482,6 @@ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
|
876
1482
|
}
|
|
877
1483
|
|
|
878
1484
|
|
|
879
|
-
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
880
|
-
const void* cSrc, size_t cSrcSize)
|
|
881
|
-
{
|
|
882
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
883
|
-
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
|
884
|
-
workSpace, sizeof(workSpace));
|
|
885
|
-
}
|
|
886
|
-
|
|
887
|
-
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
888
|
-
{
|
|
889
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
|
890
|
-
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
891
|
-
}
|
|
892
|
-
|
|
893
1485
|
size_t HUF_decompress4X2_usingDTable(
|
|
894
1486
|
void* dst, size_t dstSize,
|
|
895
1487
|
const void* cSrc, size_t cSrcSize,
|
|
@@ -923,20 +1515,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
|
923
1515
|
}
|
|
924
1516
|
|
|
925
1517
|
|
|
926
|
-
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
927
|
-
const void* cSrc, size_t cSrcSize)
|
|
928
|
-
{
|
|
929
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
930
|
-
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
931
|
-
workSpace, sizeof(workSpace));
|
|
932
|
-
}
|
|
933
|
-
|
|
934
|
-
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
935
|
-
{
|
|
936
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
|
937
|
-
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
938
|
-
}
|
|
939
|
-
|
|
940
1518
|
#endif /* HUF_FORCE_DECOMPRESS_X1 */
|
|
941
1519
|
|
|
942
1520
|
|
|
@@ -985,25 +1563,25 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
|
|
985
1563
|
|
|
986
1564
|
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
|
987
1565
|
typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
|
|
988
|
-
static const algo_time_t algoTime[16 /* Quantization */][
|
|
1566
|
+
static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
|
|
989
1567
|
{
|
|
990
1568
|
/* single, double, quad */
|
|
991
|
-
{{0,0}, {1,1}
|
|
992
|
-
{{0,0}, {1,1}
|
|
993
|
-
{{
|
|
994
|
-
{{
|
|
995
|
-
{{
|
|
996
|
-
{{
|
|
997
|
-
{{
|
|
998
|
-
{{
|
|
999
|
-
{{
|
|
1000
|
-
{{
|
|
1001
|
-
{{
|
|
1002
|
-
{{
|
|
1003
|
-
{{
|
|
1004
|
-
{{
|
|
1005
|
-
{{
|
|
1006
|
-
{{
|
|
1569
|
+
{{0,0}, {1,1}}, /* Q==0 : impossible */
|
|
1570
|
+
{{0,0}, {1,1}}, /* Q==1 : impossible */
|
|
1571
|
+
{{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
|
|
1572
|
+
{{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
|
|
1573
|
+
{{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
|
|
1574
|
+
{{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
|
|
1575
|
+
{{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
|
|
1576
|
+
{{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
|
|
1577
|
+
{{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
|
|
1578
|
+
{{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
|
|
1579
|
+
{{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
|
|
1580
|
+
{{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
|
|
1581
|
+
{{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
|
|
1582
|
+
{{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
|
|
1583
|
+
{{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
|
|
1584
|
+
{{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
|
|
1007
1585
|
};
|
|
1008
1586
|
#endif
|
|
1009
1587
|
|
|
@@ -1030,74 +1608,13 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
|
|
1030
1608
|
U32 const D256 = (U32)(dstSize >> 8);
|
|
1031
1609
|
U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
|
|
1032
1610
|
U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
|
|
1033
|
-
DTime1 += DTime1 >>
|
|
1611
|
+
DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
|
|
1034
1612
|
return DTime1 < DTime0;
|
|
1035
1613
|
}
|
|
1036
1614
|
#endif
|
|
1037
1615
|
}
|
|
1038
1616
|
|
|
1039
1617
|
|
|
1040
|
-
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
|
1041
|
-
|
|
1042
|
-
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1043
|
-
{
|
|
1044
|
-
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1045
|
-
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
|
1046
|
-
#endif
|
|
1047
|
-
|
|
1048
|
-
/* validation checks */
|
|
1049
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1050
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1051
|
-
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1052
|
-
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1053
|
-
|
|
1054
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1055
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1056
|
-
(void)algoNb;
|
|
1057
|
-
assert(algoNb == 0);
|
|
1058
|
-
return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
|
|
1059
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1060
|
-
(void)algoNb;
|
|
1061
|
-
assert(algoNb == 1);
|
|
1062
|
-
return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
|
|
1063
|
-
#else
|
|
1064
|
-
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
|
1065
|
-
#endif
|
|
1066
|
-
}
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1070
|
-
{
|
|
1071
|
-
/* validation checks */
|
|
1072
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1073
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1074
|
-
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1075
|
-
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1076
|
-
|
|
1077
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1078
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1079
|
-
(void)algoNb;
|
|
1080
|
-
assert(algoNb == 0);
|
|
1081
|
-
return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
|
1082
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1083
|
-
(void)algoNb;
|
|
1084
|
-
assert(algoNb == 1);
|
|
1085
|
-
return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
|
1086
|
-
#else
|
|
1087
|
-
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
|
1088
|
-
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
|
1089
|
-
#endif
|
|
1090
|
-
}
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1094
|
-
{
|
|
1095
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1096
|
-
return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1097
|
-
workSpace, sizeof(workSpace));
|
|
1098
|
-
}
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
1618
|
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
|
|
1102
1619
|
size_t dstSize, const void* cSrc,
|
|
1103
1620
|
size_t cSrcSize, void* workSpace,
|
|
@@ -1131,8 +1648,8 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
|
1131
1648
|
/* validation checks */
|
|
1132
1649
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1133
1650
|
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1134
|
-
if (cSrcSize == dstSize) {
|
|
1135
|
-
if (cSrcSize == 1) {
|
|
1651
|
+
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1652
|
+
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1136
1653
|
|
|
1137
1654
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1138
1655
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
@@ -1154,14 +1671,6 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
|
1154
1671
|
}
|
|
1155
1672
|
}
|
|
1156
1673
|
|
|
1157
|
-
size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1158
|
-
const void* cSrc, size_t cSrcSize)
|
|
1159
|
-
{
|
|
1160
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1161
|
-
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1162
|
-
workSpace, sizeof(workSpace));
|
|
1163
|
-
}
|
|
1164
|
-
|
|
1165
1674
|
|
|
1166
1675
|
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
|
1167
1676
|
{
|
|
@@ -1185,7 +1694,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
|
|
|
1185
1694
|
{
|
|
1186
1695
|
const BYTE* ip = (const BYTE*) cSrc;
|
|
1187
1696
|
|
|
1188
|
-
size_t const hSize =
|
|
1697
|
+
size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
|
1189
1698
|
if (HUF_isError(hSize)) return hSize;
|
|
1190
1699
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
1191
1700
|
ip += hSize; cSrcSize -= hSize;
|
|
@@ -1232,3 +1741,149 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
|
|
1232
1741
|
#endif
|
|
1233
1742
|
}
|
|
1234
1743
|
}
|
|
1744
|
+
|
|
1745
|
+
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
|
1746
|
+
#ifndef HUF_FORCE_DECOMPRESS_X2
|
|
1747
|
+
size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
1748
|
+
{
|
|
1749
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1750
|
+
return HUF_readDTableX1_wksp(DTable, src, srcSize,
|
|
1751
|
+
workSpace, sizeof(workSpace));
|
|
1752
|
+
}
|
|
1753
|
+
|
|
1754
|
+
size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
1755
|
+
const void* cSrc, size_t cSrcSize)
|
|
1756
|
+
{
|
|
1757
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1758
|
+
return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
|
1759
|
+
workSpace, sizeof(workSpace));
|
|
1760
|
+
}
|
|
1761
|
+
|
|
1762
|
+
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1763
|
+
{
|
|
1764
|
+
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
|
1765
|
+
return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
|
1766
|
+
}
|
|
1767
|
+
#endif
|
|
1768
|
+
|
|
1769
|
+
#ifndef HUF_FORCE_DECOMPRESS_X1
|
|
1770
|
+
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
1771
|
+
{
|
|
1772
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1773
|
+
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
|
1774
|
+
workSpace, sizeof(workSpace));
|
|
1775
|
+
}
|
|
1776
|
+
|
|
1777
|
+
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
1778
|
+
const void* cSrc, size_t cSrcSize)
|
|
1779
|
+
{
|
|
1780
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1781
|
+
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
|
1782
|
+
workSpace, sizeof(workSpace));
|
|
1783
|
+
}
|
|
1784
|
+
|
|
1785
|
+
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1786
|
+
{
|
|
1787
|
+
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
|
1788
|
+
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
1789
|
+
}
|
|
1790
|
+
#endif
|
|
1791
|
+
|
|
1792
|
+
#ifndef HUF_FORCE_DECOMPRESS_X2
|
|
1793
|
+
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1794
|
+
{
|
|
1795
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1796
|
+
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1797
|
+
workSpace, sizeof(workSpace));
|
|
1798
|
+
}
|
|
1799
|
+
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1800
|
+
{
|
|
1801
|
+
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
|
1802
|
+
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
1803
|
+
}
|
|
1804
|
+
#endif
|
|
1805
|
+
|
|
1806
|
+
#ifndef HUF_FORCE_DECOMPRESS_X1
|
|
1807
|
+
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1808
|
+
const void* cSrc, size_t cSrcSize)
|
|
1809
|
+
{
|
|
1810
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1811
|
+
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1812
|
+
workSpace, sizeof(workSpace));
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1815
|
+
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1816
|
+
{
|
|
1817
|
+
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
|
1818
|
+
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
1819
|
+
}
|
|
1820
|
+
#endif
|
|
1821
|
+
|
|
1822
|
+
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
|
1823
|
+
|
|
1824
|
+
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1825
|
+
{
|
|
1826
|
+
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1827
|
+
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
|
1828
|
+
#endif
|
|
1829
|
+
|
|
1830
|
+
/* validation checks */
|
|
1831
|
+
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1832
|
+
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1833
|
+
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1834
|
+
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1835
|
+
|
|
1836
|
+
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1837
|
+
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1838
|
+
(void)algoNb;
|
|
1839
|
+
assert(algoNb == 0);
|
|
1840
|
+
return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
|
|
1841
|
+
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1842
|
+
(void)algoNb;
|
|
1843
|
+
assert(algoNb == 1);
|
|
1844
|
+
return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
|
|
1845
|
+
#else
|
|
1846
|
+
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
|
1847
|
+
#endif
|
|
1848
|
+
}
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1852
|
+
{
|
|
1853
|
+
/* validation checks */
|
|
1854
|
+
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1855
|
+
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1856
|
+
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1857
|
+
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1858
|
+
|
|
1859
|
+
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1860
|
+
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1861
|
+
(void)algoNb;
|
|
1862
|
+
assert(algoNb == 0);
|
|
1863
|
+
return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
|
1864
|
+
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1865
|
+
(void)algoNb;
|
|
1866
|
+
assert(algoNb == 1);
|
|
1867
|
+
return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
|
1868
|
+
#else
|
|
1869
|
+
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
|
1870
|
+
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
|
1871
|
+
#endif
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1876
|
+
{
|
|
1877
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1878
|
+
return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1879
|
+
workSpace, sizeof(workSpace));
|
|
1880
|
+
}
|
|
1881
|
+
|
|
1882
|
+
size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1883
|
+
const void* cSrc, size_t cSrcSize)
|
|
1884
|
+
{
|
|
1885
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1886
|
+
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1887
|
+
workSpace, sizeof(workSpace));
|
|
1888
|
+
}
|
|
1889
|
+
#endif
|