extzstd 0.3.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
2
|
* huff0 huffman decoder,
|
|
3
3
|
* part of Finite State Entropy library
|
|
4
|
-
* Copyright (c)
|
|
4
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
5
5
|
*
|
|
6
6
|
* You can contact the author at :
|
|
7
7
|
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
@@ -19,14 +19,27 @@
|
|
|
19
19
|
#include "../common/compiler.h"
|
|
20
20
|
#include "../common/bitstream.h" /* BIT_* */
|
|
21
21
|
#include "../common/fse.h" /* to compress headers */
|
|
22
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
23
22
|
#include "../common/huf.h"
|
|
24
23
|
#include "../common/error_private.h"
|
|
24
|
+
#include "../common/zstd_internal.h"
|
|
25
|
+
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
|
|
26
|
+
|
|
27
|
+
/* **************************************************************
|
|
28
|
+
* Constants
|
|
29
|
+
****************************************************************/
|
|
30
|
+
|
|
31
|
+
#define HUF_DECODER_FAST_TABLELOG 11
|
|
25
32
|
|
|
26
33
|
/* **************************************************************
|
|
27
34
|
* Macros
|
|
28
35
|
****************************************************************/
|
|
29
36
|
|
|
37
|
+
#ifdef HUF_DISABLE_FAST_DECODE
|
|
38
|
+
# define HUF_ENABLE_FAST_DECODE 0
|
|
39
|
+
#else
|
|
40
|
+
# define HUF_ENABLE_FAST_DECODE 1
|
|
41
|
+
#endif
|
|
42
|
+
|
|
30
43
|
/* These two optional macros force the use one way or another of the two
|
|
31
44
|
* Huffman decompression implementations. You can't force in both directions
|
|
32
45
|
* at the same time.
|
|
@@ -36,6 +49,28 @@
|
|
|
36
49
|
#error "Cannot force the use of the X1 and X2 decoders at the same time!"
|
|
37
50
|
#endif
|
|
38
51
|
|
|
52
|
+
/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
|
|
53
|
+
* supported at runtime, so we can add the BMI2 target attribute.
|
|
54
|
+
* When it is disabled, we will still get BMI2 if it is enabled statically.
|
|
55
|
+
*/
|
|
56
|
+
#if DYNAMIC_BMI2
|
|
57
|
+
# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
|
|
58
|
+
#else
|
|
59
|
+
# define HUF_FAST_BMI2_ATTRS
|
|
60
|
+
#endif
|
|
61
|
+
|
|
62
|
+
#ifdef __cplusplus
|
|
63
|
+
# define HUF_EXTERN_C extern "C"
|
|
64
|
+
#else
|
|
65
|
+
# define HUF_EXTERN_C
|
|
66
|
+
#endif
|
|
67
|
+
#define HUF_ASM_DECL HUF_EXTERN_C
|
|
68
|
+
|
|
69
|
+
#if DYNAMIC_BMI2
|
|
70
|
+
# define HUF_NEED_BMI2_FUNCTION 1
|
|
71
|
+
#else
|
|
72
|
+
# define HUF_NEED_BMI2_FUNCTION 0
|
|
73
|
+
#endif
|
|
39
74
|
|
|
40
75
|
/* **************************************************************
|
|
41
76
|
* Error Management
|
|
@@ -53,6 +88,11 @@
|
|
|
53
88
|
/* **************************************************************
|
|
54
89
|
* BMI2 Variant Wrappers
|
|
55
90
|
****************************************************************/
|
|
91
|
+
typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
|
|
92
|
+
const void *cSrc,
|
|
93
|
+
size_t cSrcSize,
|
|
94
|
+
const HUF_DTable *DTable);
|
|
95
|
+
|
|
56
96
|
#if DYNAMIC_BMI2
|
|
57
97
|
|
|
58
98
|
#define HUF_DGEN(fn) \
|
|
@@ -65,7 +105,7 @@
|
|
|
65
105
|
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
|
66
106
|
} \
|
|
67
107
|
\
|
|
68
|
-
static
|
|
108
|
+
static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
|
|
69
109
|
void* dst, size_t dstSize, \
|
|
70
110
|
const void* cSrc, size_t cSrcSize, \
|
|
71
111
|
const HUF_DTable* DTable) \
|
|
@@ -74,9 +114,9 @@
|
|
|
74
114
|
} \
|
|
75
115
|
\
|
|
76
116
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
|
77
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
|
117
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags) \
|
|
78
118
|
{ \
|
|
79
|
-
if (
|
|
119
|
+
if (flags & HUF_flags_bmi2) { \
|
|
80
120
|
return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
|
|
81
121
|
} \
|
|
82
122
|
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
|
|
@@ -86,9 +126,9 @@
|
|
|
86
126
|
|
|
87
127
|
#define HUF_DGEN(fn) \
|
|
88
128
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
|
89
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
|
129
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags) \
|
|
90
130
|
{ \
|
|
91
|
-
(void)
|
|
131
|
+
(void)flags; \
|
|
92
132
|
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
|
93
133
|
}
|
|
94
134
|
|
|
@@ -107,13 +147,186 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
|
|
107
147
|
return dtd;
|
|
108
148
|
}
|
|
109
149
|
|
|
150
|
+
static size_t HUF_initFastDStream(BYTE const* ip) {
|
|
151
|
+
BYTE const lastByte = ip[7];
|
|
152
|
+
size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
|
|
153
|
+
size_t const value = MEM_readLEST(ip) | 1;
|
|
154
|
+
assert(bitsConsumed <= 8);
|
|
155
|
+
assert(sizeof(size_t) == 8);
|
|
156
|
+
return value << bitsConsumed;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* The input/output arguments to the Huffman fast decoding loop:
|
|
162
|
+
*
|
|
163
|
+
* ip [in/out] - The input pointers, must be updated to reflect what is consumed.
|
|
164
|
+
* op [in/out] - The output pointers, must be updated to reflect what is written.
|
|
165
|
+
* bits [in/out] - The bitstream containers, must be updated to reflect the current state.
|
|
166
|
+
* dt [in] - The decoding table.
|
|
167
|
+
* ilowest [in] - The beginning of the valid range of the input. Decoders may read
|
|
168
|
+
* down to this pointer. It may be below iend[0].
|
|
169
|
+
* oend [in] - The end of the output stream. op[3] must not cross oend.
|
|
170
|
+
* iend [in] - The end of each input stream. ip[i] may cross iend[i],
|
|
171
|
+
* as long as it is above ilowest, but that indicates corruption.
|
|
172
|
+
*/
|
|
173
|
+
typedef struct {
|
|
174
|
+
BYTE const* ip[4];
|
|
175
|
+
BYTE* op[4];
|
|
176
|
+
U64 bits[4];
|
|
177
|
+
void const* dt;
|
|
178
|
+
BYTE const* ilowest;
|
|
179
|
+
BYTE* oend;
|
|
180
|
+
BYTE const* iend[4];
|
|
181
|
+
} HUF_DecompressFastArgs;
|
|
182
|
+
|
|
183
|
+
typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Initializes args for the fast decoding loop.
|
|
187
|
+
* @returns 1 on success
|
|
188
|
+
* 0 if the fallback implementation should be used.
|
|
189
|
+
* Or an error code on failure.
|
|
190
|
+
*/
|
|
191
|
+
static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
|
|
192
|
+
{
|
|
193
|
+
void const* dt = DTable + 1;
|
|
194
|
+
U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
|
|
195
|
+
|
|
196
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
197
|
+
|
|
198
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
|
|
199
|
+
|
|
200
|
+
/* The fast decoding loop assumes 64-bit little-endian.
|
|
201
|
+
* This condition is false on x32.
|
|
202
|
+
*/
|
|
203
|
+
if (!MEM_isLittleEndian() || MEM_32bits())
|
|
204
|
+
return 0;
|
|
205
|
+
|
|
206
|
+
/* Avoid nullptr addition */
|
|
207
|
+
if (dstSize == 0)
|
|
208
|
+
return 0;
|
|
209
|
+
assert(dst != NULL);
|
|
210
|
+
|
|
211
|
+
/* strict minimum : jump table + 1 byte per stream */
|
|
212
|
+
if (srcSize < 10)
|
|
213
|
+
return ERROR(corruption_detected);
|
|
214
|
+
|
|
215
|
+
/* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
|
|
216
|
+
* If table log is not correct at this point, fallback to the old decoder.
|
|
217
|
+
* On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
|
|
218
|
+
*/
|
|
219
|
+
if (dtLog != HUF_DECODER_FAST_TABLELOG)
|
|
220
|
+
return 0;
|
|
221
|
+
|
|
222
|
+
/* Read the jump table. */
|
|
223
|
+
{
|
|
224
|
+
size_t const length1 = MEM_readLE16(istart);
|
|
225
|
+
size_t const length2 = MEM_readLE16(istart+2);
|
|
226
|
+
size_t const length3 = MEM_readLE16(istart+4);
|
|
227
|
+
size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
|
|
228
|
+
args->iend[0] = istart + 6; /* jumpTable */
|
|
229
|
+
args->iend[1] = args->iend[0] + length1;
|
|
230
|
+
args->iend[2] = args->iend[1] + length2;
|
|
231
|
+
args->iend[3] = args->iend[2] + length3;
|
|
232
|
+
|
|
233
|
+
/* HUF_initFastDStream() requires this, and this small of an input
|
|
234
|
+
* won't benefit from the ASM loop anyways.
|
|
235
|
+
*/
|
|
236
|
+
if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
|
|
237
|
+
return 0;
|
|
238
|
+
if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
|
|
239
|
+
}
|
|
240
|
+
/* ip[] contains the position that is currently loaded into bits[]. */
|
|
241
|
+
args->ip[0] = args->iend[1] - sizeof(U64);
|
|
242
|
+
args->ip[1] = args->iend[2] - sizeof(U64);
|
|
243
|
+
args->ip[2] = args->iend[3] - sizeof(U64);
|
|
244
|
+
args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
|
|
245
|
+
|
|
246
|
+
/* op[] contains the output pointers. */
|
|
247
|
+
args->op[0] = (BYTE*)dst;
|
|
248
|
+
args->op[1] = args->op[0] + (dstSize+3)/4;
|
|
249
|
+
args->op[2] = args->op[1] + (dstSize+3)/4;
|
|
250
|
+
args->op[3] = args->op[2] + (dstSize+3)/4;
|
|
251
|
+
|
|
252
|
+
/* No point to call the ASM loop for tiny outputs. */
|
|
253
|
+
if (args->op[3] >= oend)
|
|
254
|
+
return 0;
|
|
255
|
+
|
|
256
|
+
/* bits[] is the bit container.
|
|
257
|
+
* It is read from the MSB down to the LSB.
|
|
258
|
+
* It is shifted left as it is read, and zeros are
|
|
259
|
+
* shifted in. After the lowest valid bit a 1 is
|
|
260
|
+
* set, so that CountTrailingZeros(bits[]) can be used
|
|
261
|
+
* to count how many bits we've consumed.
|
|
262
|
+
*/
|
|
263
|
+
args->bits[0] = HUF_initFastDStream(args->ip[0]);
|
|
264
|
+
args->bits[1] = HUF_initFastDStream(args->ip[1]);
|
|
265
|
+
args->bits[2] = HUF_initFastDStream(args->ip[2]);
|
|
266
|
+
args->bits[3] = HUF_initFastDStream(args->ip[3]);
|
|
267
|
+
|
|
268
|
+
/* The decoders must be sure to never read beyond ilowest.
|
|
269
|
+
* This is lower than iend[0], but allowing decoders to read
|
|
270
|
+
* down to ilowest can allow an extra iteration or two in the
|
|
271
|
+
* fast loop.
|
|
272
|
+
*/
|
|
273
|
+
args->ilowest = istart;
|
|
274
|
+
|
|
275
|
+
args->oend = oend;
|
|
276
|
+
args->dt = dt;
|
|
277
|
+
|
|
278
|
+
return 1;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
|
|
282
|
+
{
|
|
283
|
+
/* Validate that we haven't overwritten. */
|
|
284
|
+
if (args->op[stream] > segmentEnd)
|
|
285
|
+
return ERROR(corruption_detected);
|
|
286
|
+
/* Validate that we haven't read beyond iend[].
|
|
287
|
+
* Note that ip[] may be < iend[] because the MSB is
|
|
288
|
+
* the next bit to read, and we may have consumed 100%
|
|
289
|
+
* of the stream, so down to iend[i] - 8 is valid.
|
|
290
|
+
*/
|
|
291
|
+
if (args->ip[stream] < args->iend[stream] - 8)
|
|
292
|
+
return ERROR(corruption_detected);
|
|
293
|
+
|
|
294
|
+
/* Construct the BIT_DStream_t. */
|
|
295
|
+
assert(sizeof(size_t) == 8);
|
|
296
|
+
bit->bitContainer = MEM_readLEST(args->ip[stream]);
|
|
297
|
+
bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
|
|
298
|
+
bit->start = (const char*)args->ilowest;
|
|
299
|
+
bit->limitPtr = bit->start + sizeof(size_t);
|
|
300
|
+
bit->ptr = (const char*)args->ip[stream];
|
|
301
|
+
|
|
302
|
+
return 0;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/* Calls X(N) for each stream 0, 1, 2, 3. */
|
|
306
|
+
#define HUF_4X_FOR_EACH_STREAM(X) \
|
|
307
|
+
do { \
|
|
308
|
+
X(0); \
|
|
309
|
+
X(1); \
|
|
310
|
+
X(2); \
|
|
311
|
+
X(3); \
|
|
312
|
+
} while (0)
|
|
313
|
+
|
|
314
|
+
/* Calls X(N, var) for each stream 0, 1, 2, 3. */
|
|
315
|
+
#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
|
|
316
|
+
do { \
|
|
317
|
+
X(0, (var)); \
|
|
318
|
+
X(1, (var)); \
|
|
319
|
+
X(2, (var)); \
|
|
320
|
+
X(3, (var)); \
|
|
321
|
+
} while (0)
|
|
322
|
+
|
|
110
323
|
|
|
111
324
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
|
112
325
|
|
|
113
326
|
/*-***************************/
|
|
114
327
|
/* single-symbol decoding */
|
|
115
328
|
/*-***************************/
|
|
116
|
-
typedef struct { BYTE
|
|
329
|
+
typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
|
|
117
330
|
|
|
118
331
|
/**
|
|
119
332
|
* Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
|
|
@@ -122,14 +335,45 @@ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decodi
|
|
|
122
335
|
static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
|
|
123
336
|
U64 D4;
|
|
124
337
|
if (MEM_isLittleEndian()) {
|
|
125
|
-
D4 = symbol
|
|
338
|
+
D4 = (U64)((symbol << 8) + nbBits);
|
|
126
339
|
} else {
|
|
127
|
-
D4 = (symbol << 8)
|
|
340
|
+
D4 = (U64)(symbol + (nbBits << 8));
|
|
128
341
|
}
|
|
342
|
+
assert(D4 < (1U << 16));
|
|
129
343
|
D4 *= 0x0001000100010001ULL;
|
|
130
344
|
return D4;
|
|
131
345
|
}
|
|
132
346
|
|
|
347
|
+
/**
|
|
348
|
+
* Increase the tableLog to targetTableLog and rescales the stats.
|
|
349
|
+
* If tableLog > targetTableLog this is a no-op.
|
|
350
|
+
* @returns New tableLog
|
|
351
|
+
*/
|
|
352
|
+
static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
|
|
353
|
+
{
|
|
354
|
+
if (tableLog > targetTableLog)
|
|
355
|
+
return tableLog;
|
|
356
|
+
if (tableLog < targetTableLog) {
|
|
357
|
+
U32 const scale = targetTableLog - tableLog;
|
|
358
|
+
U32 s;
|
|
359
|
+
/* Increase the weight for all non-zero probability symbols by scale. */
|
|
360
|
+
for (s = 0; s < nbSymbols; ++s) {
|
|
361
|
+
huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
|
|
362
|
+
}
|
|
363
|
+
/* Update rankVal to reflect the new weights.
|
|
364
|
+
* All weights except 0 get moved to weight + scale.
|
|
365
|
+
* Weights [1, scale] are empty.
|
|
366
|
+
*/
|
|
367
|
+
for (s = targetTableLog; s > scale; --s) {
|
|
368
|
+
rankVal[s] = rankVal[s - scale];
|
|
369
|
+
}
|
|
370
|
+
for (s = scale; s > 0; --s) {
|
|
371
|
+
rankVal[s] = 0;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
return targetTableLog;
|
|
375
|
+
}
|
|
376
|
+
|
|
133
377
|
typedef struct {
|
|
134
378
|
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
|
135
379
|
U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
|
@@ -138,13 +382,7 @@ typedef struct {
|
|
|
138
382
|
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
|
|
139
383
|
} HUF_ReadDTableX1_Workspace;
|
|
140
384
|
|
|
141
|
-
|
|
142
|
-
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
|
|
143
|
-
{
|
|
144
|
-
return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
|
|
385
|
+
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags)
|
|
148
386
|
{
|
|
149
387
|
U32 tableLog = 0;
|
|
150
388
|
U32 nbSymbols = 0;
|
|
@@ -159,11 +397,15 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
|
|
159
397
|
DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
|
160
398
|
/* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
|
161
399
|
|
|
162
|
-
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp),
|
|
400
|
+
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
|
|
163
401
|
if (HUF_isError(iSize)) return iSize;
|
|
164
402
|
|
|
403
|
+
|
|
165
404
|
/* Table header */
|
|
166
405
|
{ DTableDesc dtd = HUF_getDTableDesc(DTable);
|
|
406
|
+
U32 const maxTableLog = dtd.maxTableLog + 1;
|
|
407
|
+
U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
|
|
408
|
+
tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
|
|
167
409
|
if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
|
|
168
410
|
dtd.tableType = 0;
|
|
169
411
|
dtd.tableLog = (BYTE)tableLog;
|
|
@@ -182,9 +424,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
|
|
182
424
|
* rankStart[0] is not filled because there are no entries in the table for
|
|
183
425
|
* weight 0.
|
|
184
426
|
*/
|
|
185
|
-
{
|
|
186
|
-
|
|
187
|
-
int nextRankStart = 0;
|
|
427
|
+
{ int n;
|
|
428
|
+
U32 nextRankStart = 0;
|
|
188
429
|
int const unroll = 4;
|
|
189
430
|
int const nLimit = (int)nbSymbols - unroll + 1;
|
|
190
431
|
for (n=0; n<(int)tableLog+1; n++) {
|
|
@@ -207,14 +448,13 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
|
|
207
448
|
|
|
208
449
|
/* fill DTable
|
|
209
450
|
* We fill all entries of each weight in order.
|
|
210
|
-
* That way length is a constant for each iteration of the
|
|
451
|
+
* That way length is a constant for each iteration of the outer loop.
|
|
211
452
|
* We can switch based on the length to a different inner loop which is
|
|
212
453
|
* optimized for that particular case.
|
|
213
454
|
*/
|
|
214
|
-
{
|
|
215
|
-
|
|
216
|
-
int
|
|
217
|
-
int rankStart=0;
|
|
455
|
+
{ U32 w;
|
|
456
|
+
int symbol = wksp->rankVal[0];
|
|
457
|
+
int rankStart = 0;
|
|
218
458
|
for (w=1; w<tableLog+1; ++w) {
|
|
219
459
|
int const symbolCount = wksp->rankVal[w];
|
|
220
460
|
int const length = (1 << w) >> 1;
|
|
@@ -288,15 +528,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog
|
|
|
288
528
|
}
|
|
289
529
|
|
|
290
530
|
#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
|
|
291
|
-
*ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
|
|
531
|
+
do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
|
|
292
532
|
|
|
293
|
-
#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)
|
|
294
|
-
|
|
295
|
-
|
|
533
|
+
#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
|
|
534
|
+
do { \
|
|
535
|
+
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
|
536
|
+
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
|
|
537
|
+
} while (0)
|
|
296
538
|
|
|
297
|
-
#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr)
|
|
298
|
-
|
|
299
|
-
|
|
539
|
+
#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
|
|
540
|
+
do { \
|
|
541
|
+
if (MEM_64bits()) \
|
|
542
|
+
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
|
|
543
|
+
} while (0)
|
|
300
544
|
|
|
301
545
|
HINT_INLINE size_t
|
|
302
546
|
HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
|
|
@@ -304,11 +548,15 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
|
|
|
304
548
|
BYTE* const pStart = p;
|
|
305
549
|
|
|
306
550
|
/* up to 4 symbols at a time */
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
551
|
+
if ((pEnd - p) > 3) {
|
|
552
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
|
|
553
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
|
554
|
+
HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
|
|
555
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
|
556
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
|
557
|
+
}
|
|
558
|
+
} else {
|
|
559
|
+
BIT_reloadDStream(bitDPtr);
|
|
312
560
|
}
|
|
313
561
|
|
|
314
562
|
/* [0-3] symbols remaining */
|
|
@@ -320,7 +568,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
|
|
|
320
568
|
while (p < pEnd)
|
|
321
569
|
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
|
322
570
|
|
|
323
|
-
return pEnd-pStart;
|
|
571
|
+
return (size_t)(pEnd-pStart);
|
|
324
572
|
}
|
|
325
573
|
|
|
326
574
|
FORCE_INLINE_TEMPLATE size_t
|
|
@@ -330,7 +578,7 @@ HUF_decompress1X1_usingDTable_internal_body(
|
|
|
330
578
|
const HUF_DTable* DTable)
|
|
331
579
|
{
|
|
332
580
|
BYTE* op = (BYTE*)dst;
|
|
333
|
-
BYTE* const oend = op
|
|
581
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
|
|
334
582
|
const void* dtPtr = DTable + 1;
|
|
335
583
|
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
|
336
584
|
BIT_DStream_t bitD;
|
|
@@ -346,6 +594,10 @@ HUF_decompress1X1_usingDTable_internal_body(
|
|
|
346
594
|
return dstSize;
|
|
347
595
|
}
|
|
348
596
|
|
|
597
|
+
/* HUF_decompress4X1_usingDTable_internal_body():
|
|
598
|
+
* Conditions :
|
|
599
|
+
* @dstSize >= 6
|
|
600
|
+
*/
|
|
349
601
|
FORCE_INLINE_TEMPLATE size_t
|
|
350
602
|
HUF_decompress4X1_usingDTable_internal_body(
|
|
351
603
|
void* dst, size_t dstSize,
|
|
@@ -354,6 +606,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
354
606
|
{
|
|
355
607
|
/* Check */
|
|
356
608
|
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
|
609
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
|
357
610
|
|
|
358
611
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
|
359
612
|
BYTE* const ostart = (BYTE*) dst;
|
|
@@ -388,33 +641,37 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
388
641
|
U32 endSignal = 1;
|
|
389
642
|
|
|
390
643
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
|
644
|
+
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
|
645
|
+
assert(dstSize >= 6); /* validated above */
|
|
391
646
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
|
392
647
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
|
393
648
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
|
394
649
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
|
395
650
|
|
|
396
651
|
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
652
|
+
if ((size_t)(oend - op4) >= sizeof(size_t)) {
|
|
653
|
+
for ( ; (endSignal) & (op4 < olimit) ; ) {
|
|
654
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
|
655
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
|
656
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
|
657
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
|
658
|
+
HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
|
|
659
|
+
HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
|
|
660
|
+
HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
|
|
661
|
+
HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
|
|
662
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
|
663
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
|
664
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
|
665
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
|
666
|
+
HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
|
|
667
|
+
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
|
668
|
+
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
|
669
|
+
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
|
670
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
|
671
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
|
672
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
|
673
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
|
674
|
+
}
|
|
418
675
|
}
|
|
419
676
|
|
|
420
677
|
/* check corruption */
|
|
@@ -440,74 +697,250 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
440
697
|
}
|
|
441
698
|
}
|
|
442
699
|
|
|
700
|
+
#if HUF_NEED_BMI2_FUNCTION
|
|
701
|
+
static BMI2_TARGET_ATTRIBUTE
|
|
702
|
+
size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
|
703
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
704
|
+
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
705
|
+
}
|
|
706
|
+
#endif
|
|
443
707
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
708
|
+
static
|
|
709
|
+
size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
|
710
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
711
|
+
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
712
|
+
}
|
|
448
713
|
|
|
449
|
-
|
|
450
|
-
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
|
|
714
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
451
715
|
|
|
716
|
+
HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
|
452
717
|
|
|
718
|
+
#endif
|
|
453
719
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
const void* cSrc, size_t cSrcSize,
|
|
457
|
-
const HUF_DTable* DTable)
|
|
720
|
+
static HUF_FAST_BMI2_ATTRS
|
|
721
|
+
void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
|
458
722
|
{
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
723
|
+
U64 bits[4];
|
|
724
|
+
BYTE const* ip[4];
|
|
725
|
+
BYTE* op[4];
|
|
726
|
+
U16 const* const dtable = (U16 const*)args->dt;
|
|
727
|
+
BYTE* const oend = args->oend;
|
|
728
|
+
BYTE const* const ilowest = args->ilowest;
|
|
729
|
+
|
|
730
|
+
/* Copy the arguments to local variables */
|
|
731
|
+
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
|
732
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
|
733
|
+
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
|
734
|
+
|
|
735
|
+
assert(MEM_isLittleEndian());
|
|
736
|
+
assert(!MEM_32bits());
|
|
737
|
+
|
|
738
|
+
for (;;) {
|
|
739
|
+
BYTE* olimit;
|
|
740
|
+
int stream;
|
|
741
|
+
|
|
742
|
+
/* Assert loop preconditions */
|
|
743
|
+
#ifndef NDEBUG
|
|
744
|
+
for (stream = 0; stream < 4; ++stream) {
|
|
745
|
+
assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
|
|
746
|
+
assert(ip[stream] >= ilowest);
|
|
747
|
+
}
|
|
748
|
+
#endif
|
|
749
|
+
/* Compute olimit */
|
|
750
|
+
{
|
|
751
|
+
/* Each iteration produces 5 output symbols per stream */
|
|
752
|
+
size_t const oiters = (size_t)(oend - op[3]) / 5;
|
|
753
|
+
/* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
|
|
754
|
+
* per stream.
|
|
755
|
+
*/
|
|
756
|
+
size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
|
|
757
|
+
/* We can safely run iters iterations before running bounds checks */
|
|
758
|
+
size_t const iters = MIN(oiters, iiters);
|
|
759
|
+
size_t const symbols = iters * 5;
|
|
760
|
+
|
|
761
|
+
/* We can simply check that op[3] < olimit, instead of checking all
|
|
762
|
+
* of our bounds, since we can't hit the other bounds until we've run
|
|
763
|
+
* iters iterations, which only happens when op[3] == olimit.
|
|
764
|
+
*/
|
|
765
|
+
olimit = op[3] + symbols;
|
|
766
|
+
|
|
767
|
+
/* Exit fast decoding loop once we reach the end. */
|
|
768
|
+
if (op[3] == olimit)
|
|
769
|
+
break;
|
|
770
|
+
|
|
771
|
+
/* Exit the decoding loop if any input pointer has crossed the
|
|
772
|
+
* previous one. This indicates corruption, and a precondition
|
|
773
|
+
* to our loop is that ip[i] >= ip[0].
|
|
774
|
+
*/
|
|
775
|
+
for (stream = 1; stream < 4; ++stream) {
|
|
776
|
+
if (ip[stream] < ip[stream - 1])
|
|
777
|
+
goto _out;
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
#ifndef NDEBUG
|
|
782
|
+
for (stream = 1; stream < 4; ++stream) {
|
|
783
|
+
assert(ip[stream] >= ip[stream - 1]);
|
|
784
|
+
}
|
|
785
|
+
#endif
|
|
786
|
+
|
|
787
|
+
#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \
|
|
788
|
+
do { \
|
|
789
|
+
int const index = (int)(bits[(_stream)] >> 53); \
|
|
790
|
+
int const entry = (int)dtable[index]; \
|
|
791
|
+
bits[(_stream)] <<= (entry & 0x3F); \
|
|
792
|
+
op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
|
|
793
|
+
} while (0)
|
|
794
|
+
|
|
795
|
+
#define HUF_4X1_RELOAD_STREAM(_stream) \
|
|
796
|
+
do { \
|
|
797
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
|
|
798
|
+
int const nbBits = ctz & 7; \
|
|
799
|
+
int const nbBytes = ctz >> 3; \
|
|
800
|
+
op[(_stream)] += 5; \
|
|
801
|
+
ip[(_stream)] -= nbBytes; \
|
|
802
|
+
bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
|
|
803
|
+
bits[(_stream)] <<= nbBits; \
|
|
804
|
+
} while (0)
|
|
805
|
+
|
|
806
|
+
/* Manually unroll the loop because compilers don't consistently
|
|
807
|
+
* unroll the inner loops, which destroys performance.
|
|
808
|
+
*/
|
|
809
|
+
do {
|
|
810
|
+
/* Decode 5 symbols in each of the 4 streams */
|
|
811
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
|
|
812
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
|
|
813
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
|
|
814
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
|
|
815
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
|
|
816
|
+
|
|
817
|
+
/* Reload each of the 4 the bitstreams */
|
|
818
|
+
HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
|
|
819
|
+
} while (op[3] < olimit);
|
|
820
|
+
|
|
821
|
+
#undef HUF_4X1_DECODE_SYMBOL
|
|
822
|
+
#undef HUF_4X1_RELOAD_STREAM
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
_out:
|
|
826
|
+
|
|
827
|
+
/* Save the final values of each of the state variables back to args. */
|
|
828
|
+
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
|
829
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
|
830
|
+
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
|
462
831
|
}
|
|
463
832
|
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
833
|
+
/**
|
|
834
|
+
* @returns @p dstSize on success (>= 6)
|
|
835
|
+
* 0 if the fallback implementation should be used
|
|
836
|
+
* An error if an error occurred
|
|
837
|
+
*/
|
|
838
|
+
static HUF_FAST_BMI2_ATTRS
|
|
839
|
+
size_t
|
|
840
|
+
HUF_decompress4X1_usingDTable_internal_fast(
|
|
841
|
+
void* dst, size_t dstSize,
|
|
842
|
+
const void* cSrc, size_t cSrcSize,
|
|
843
|
+
const HUF_DTable* DTable,
|
|
844
|
+
HUF_DecompressFastLoopFn loopFn)
|
|
467
845
|
{
|
|
468
|
-
const
|
|
846
|
+
void const* dt = DTable + 1;
|
|
847
|
+
BYTE const* const ilowest = (BYTE const*)cSrc;
|
|
848
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
|
|
849
|
+
HUF_DecompressFastArgs args;
|
|
850
|
+
{ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
|
851
|
+
FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
|
|
852
|
+
if (ret == 0)
|
|
853
|
+
return 0;
|
|
854
|
+
}
|
|
469
855
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
ip
|
|
856
|
+
assert(args.ip[0] >= args.ilowest);
|
|
857
|
+
loopFn(&args);
|
|
858
|
+
|
|
859
|
+
/* Our loop guarantees that ip[] >= ilowest and that we haven't
|
|
860
|
+
* overwritten any op[].
|
|
861
|
+
*/
|
|
862
|
+
assert(args.ip[0] >= ilowest);
|
|
863
|
+
assert(args.ip[0] >= ilowest);
|
|
864
|
+
assert(args.ip[1] >= ilowest);
|
|
865
|
+
assert(args.ip[2] >= ilowest);
|
|
866
|
+
assert(args.ip[3] >= ilowest);
|
|
867
|
+
assert(args.op[3] <= oend);
|
|
868
|
+
|
|
869
|
+
assert(ilowest == args.ilowest);
|
|
870
|
+
assert(ilowest + 6 == args.iend[0]);
|
|
871
|
+
(void)ilowest;
|
|
872
|
+
|
|
873
|
+
/* finish bit streams one by one. */
|
|
874
|
+
{ size_t const segmentSize = (dstSize+3) / 4;
|
|
875
|
+
BYTE* segmentEnd = (BYTE*)dst;
|
|
876
|
+
int i;
|
|
877
|
+
for (i = 0; i < 4; ++i) {
|
|
878
|
+
BIT_DStream_t bit;
|
|
879
|
+
if (segmentSize <= (size_t)(oend - segmentEnd))
|
|
880
|
+
segmentEnd += segmentSize;
|
|
881
|
+
else
|
|
882
|
+
segmentEnd = oend;
|
|
883
|
+
FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
|
|
884
|
+
/* Decompress and validate that we've produced exactly the expected length. */
|
|
885
|
+
args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
|
|
886
|
+
if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
|
|
887
|
+
}
|
|
888
|
+
}
|
|
474
889
|
|
|
475
|
-
|
|
890
|
+
/* decoded size */
|
|
891
|
+
assert(dstSize != 0);
|
|
892
|
+
return dstSize;
|
|
476
893
|
}
|
|
477
894
|
|
|
895
|
+
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
|
|
478
896
|
|
|
479
|
-
size_t
|
|
480
|
-
|
|
481
|
-
const void* cSrc, size_t cSrcSize,
|
|
482
|
-
const HUF_DTable* DTable)
|
|
897
|
+
static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
|
898
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
|
483
899
|
{
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
900
|
+
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
|
|
901
|
+
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
|
|
902
|
+
|
|
903
|
+
#if DYNAMIC_BMI2
|
|
904
|
+
if (flags & HUF_flags_bmi2) {
|
|
905
|
+
fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
|
|
906
|
+
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
907
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
|
908
|
+
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
|
909
|
+
}
|
|
910
|
+
# endif
|
|
911
|
+
} else {
|
|
912
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
913
|
+
}
|
|
914
|
+
#endif
|
|
915
|
+
|
|
916
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
|
917
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
|
918
|
+
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
|
919
|
+
}
|
|
920
|
+
#endif
|
|
921
|
+
|
|
922
|
+
if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
|
|
923
|
+
size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
|
924
|
+
if (ret != 0)
|
|
925
|
+
return ret;
|
|
926
|
+
}
|
|
927
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
487
928
|
}
|
|
488
929
|
|
|
489
|
-
static size_t
|
|
930
|
+
static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
490
931
|
const void* cSrc, size_t cSrcSize,
|
|
491
|
-
void* workSpace, size_t wkspSize, int
|
|
932
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
492
933
|
{
|
|
493
934
|
const BYTE* ip = (const BYTE*) cSrc;
|
|
494
935
|
|
|
495
|
-
size_t const hSize =
|
|
936
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
496
937
|
if (HUF_isError(hSize)) return hSize;
|
|
497
938
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
498
939
|
ip += hSize; cSrcSize -= hSize;
|
|
499
940
|
|
|
500
|
-
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
|
941
|
+
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
|
501
942
|
}
|
|
502
943
|
|
|
503
|
-
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
504
|
-
const void* cSrc, size_t cSrcSize,
|
|
505
|
-
void* workSpace, size_t wkspSize)
|
|
506
|
-
{
|
|
507
|
-
return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
|
|
511
944
|
#endif /* HUF_FORCE_DECOMPRESS_X2 */
|
|
512
945
|
|
|
513
946
|
|
|
@@ -518,188 +951,308 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
|
518
951
|
/* *************************/
|
|
519
952
|
|
|
520
953
|
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
|
|
521
|
-
typedef struct { BYTE symbol;
|
|
954
|
+
typedef struct { BYTE symbol; } sortedSymbol_t;
|
|
522
955
|
typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
|
|
523
956
|
typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
|
|
524
957
|
|
|
958
|
+
/**
|
|
959
|
+
* Constructs a HUF_DEltX2 in a U32.
|
|
960
|
+
*/
|
|
961
|
+
static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
|
|
962
|
+
{
|
|
963
|
+
U32 seq;
|
|
964
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
|
|
965
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
|
|
966
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
|
|
967
|
+
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
|
|
968
|
+
if (MEM_isLittleEndian()) {
|
|
969
|
+
seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
|
|
970
|
+
return seq + (nbBits << 16) + ((U32)level << 24);
|
|
971
|
+
} else {
|
|
972
|
+
seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
|
|
973
|
+
return (seq << 16) + (nbBits << 8) + (U32)level;
|
|
974
|
+
}
|
|
975
|
+
}
|
|
525
976
|
|
|
526
|
-
|
|
527
|
-
*
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
|
|
531
|
-
U32 nbBitsBaseline, U16 baseSeq)
|
|
977
|
+
/**
|
|
978
|
+
* Constructs a HUF_DEltX2.
|
|
979
|
+
*/
|
|
980
|
+
static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
|
|
532
981
|
{
|
|
533
982
|
HUF_DEltX2 DElt;
|
|
534
|
-
U32
|
|
983
|
+
U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
|
|
984
|
+
DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
|
|
985
|
+
ZSTD_memcpy(&DElt, &val, sizeof(val));
|
|
986
|
+
return DElt;
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
/**
|
|
990
|
+
* Constructs 2 HUF_DEltX2s and packs them into a U64.
|
|
991
|
+
*/
|
|
992
|
+
static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
|
|
993
|
+
{
|
|
994
|
+
U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
|
|
995
|
+
return (U64)DElt + ((U64)DElt << 32);
|
|
996
|
+
}
|
|
535
997
|
|
|
536
|
-
|
|
537
|
-
|
|
998
|
+
/**
|
|
999
|
+
* Fills the DTable rank with all the symbols from [begin, end) that are each
|
|
1000
|
+
* nbBits long.
|
|
1001
|
+
*
|
|
1002
|
+
* @param DTableRank The start of the rank in the DTable.
|
|
1003
|
+
* @param begin The first symbol to fill (inclusive).
|
|
1004
|
+
* @param end The last symbol to fill (exclusive).
|
|
1005
|
+
* @param nbBits Each symbol is nbBits long.
|
|
1006
|
+
* @param tableLog The table log.
|
|
1007
|
+
* @param baseSeq If level == 1 { 0 } else { the first level symbol }
|
|
1008
|
+
* @param level The level in the table. Must be 1 or 2.
|
|
1009
|
+
*/
|
|
1010
|
+
static void HUF_fillDTableX2ForWeight(
|
|
1011
|
+
HUF_DEltX2* DTableRank,
|
|
1012
|
+
sortedSymbol_t const* begin, sortedSymbol_t const* end,
|
|
1013
|
+
U32 nbBits, U32 tableLog,
|
|
1014
|
+
U16 baseSeq, int const level)
|
|
1015
|
+
{
|
|
1016
|
+
U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
|
|
1017
|
+
const sortedSymbol_t* ptr;
|
|
1018
|
+
assert(level >= 1 && level <= 2);
|
|
1019
|
+
switch (length) {
|
|
1020
|
+
case 1:
|
|
1021
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
1022
|
+
HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
|
|
1023
|
+
*DTableRank++ = DElt;
|
|
1024
|
+
}
|
|
1025
|
+
break;
|
|
1026
|
+
case 2:
|
|
1027
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
1028
|
+
HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
|
|
1029
|
+
DTableRank[0] = DElt;
|
|
1030
|
+
DTableRank[1] = DElt;
|
|
1031
|
+
DTableRank += 2;
|
|
1032
|
+
}
|
|
1033
|
+
break;
|
|
1034
|
+
case 4:
|
|
1035
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
1036
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
|
1037
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
|
1038
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
|
1039
|
+
DTableRank += 4;
|
|
1040
|
+
}
|
|
1041
|
+
break;
|
|
1042
|
+
case 8:
|
|
1043
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
1044
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
|
1045
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
|
1046
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
|
1047
|
+
ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
|
|
1048
|
+
ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
|
|
1049
|
+
DTableRank += 8;
|
|
1050
|
+
}
|
|
1051
|
+
break;
|
|
1052
|
+
default:
|
|
1053
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
1054
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
|
1055
|
+
HUF_DEltX2* const DTableRankEnd = DTableRank + length;
|
|
1056
|
+
for (; DTableRank != DTableRankEnd; DTableRank += 8) {
|
|
1057
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
|
1058
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
|
1059
|
+
ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
|
|
1060
|
+
ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
break;
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
538
1066
|
|
|
539
|
-
|
|
1067
|
+
/* HUF_fillDTableX2Level2() :
|
|
1068
|
+
* `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
|
|
1069
|
+
static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
|
|
1070
|
+
const U32* rankVal, const int minWeight, const int maxWeight1,
|
|
1071
|
+
const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
|
|
1072
|
+
U32 nbBitsBaseline, U16 baseSeq)
|
|
1073
|
+
{
|
|
1074
|
+
/* Fill skipped values (all positions up to rankVal[minWeight]).
|
|
1075
|
+
* These are positions only get a single symbol because the combined weight
|
|
1076
|
+
* is too large.
|
|
1077
|
+
*/
|
|
540
1078
|
if (minWeight>1) {
|
|
541
|
-
U32
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
1079
|
+
U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
|
|
1080
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
|
|
1081
|
+
int const skipSize = rankVal[minWeight];
|
|
1082
|
+
assert(length > 1);
|
|
1083
|
+
assert((U32)skipSize < length);
|
|
1084
|
+
switch (length) {
|
|
1085
|
+
case 2:
|
|
1086
|
+
assert(skipSize == 1);
|
|
1087
|
+
ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
|
|
1088
|
+
break;
|
|
1089
|
+
case 4:
|
|
1090
|
+
assert(skipSize <= 4);
|
|
1091
|
+
ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
|
|
1092
|
+
ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
|
|
1093
|
+
break;
|
|
1094
|
+
default:
|
|
1095
|
+
{
|
|
1096
|
+
int i;
|
|
1097
|
+
for (i = 0; i < skipSize; i += 8) {
|
|
1098
|
+
ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
|
|
1099
|
+
ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
|
|
1100
|
+
ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
|
|
1101
|
+
ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
547
1105
|
}
|
|
548
1106
|
|
|
549
|
-
/*
|
|
550
|
-
{
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
const
|
|
554
|
-
const
|
|
555
|
-
const
|
|
556
|
-
U32
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
rankVal[weight] += length;
|
|
565
|
-
} }
|
|
1107
|
+
/* Fill each of the second level symbols by weight. */
|
|
1108
|
+
{
|
|
1109
|
+
int w;
|
|
1110
|
+
for (w = minWeight; w < maxWeight1; ++w) {
|
|
1111
|
+
int const begin = rankStart[w];
|
|
1112
|
+
int const end = rankStart[w+1];
|
|
1113
|
+
U32 const nbBits = nbBitsBaseline - w;
|
|
1114
|
+
U32 const totalBits = nbBits + consumedBits;
|
|
1115
|
+
HUF_fillDTableX2ForWeight(
|
|
1116
|
+
DTable + rankVal[w],
|
|
1117
|
+
sortedSymbols + begin, sortedSymbols + end,
|
|
1118
|
+
totalBits, targetLog,
|
|
1119
|
+
baseSeq, /* level */ 2);
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
566
1122
|
}
|
|
567
1123
|
|
|
568
|
-
|
|
569
1124
|
static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
|
|
570
|
-
const sortedSymbol_t* sortedList,
|
|
571
|
-
const U32* rankStart,
|
|
1125
|
+
const sortedSymbol_t* sortedList,
|
|
1126
|
+
const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight,
|
|
572
1127
|
const U32 nbBitsBaseline)
|
|
573
1128
|
{
|
|
574
|
-
U32 rankVal
|
|
1129
|
+
U32* const rankVal = rankValOrigin[0];
|
|
575
1130
|
const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
|
|
576
1131
|
const U32 minBits = nbBitsBaseline - maxWeight;
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
const
|
|
584
|
-
const
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
U32 sortedRank;
|
|
1132
|
+
int w;
|
|
1133
|
+
int const wEnd = (int)maxWeight + 1;
|
|
1134
|
+
|
|
1135
|
+
/* Fill DTable in order of weight. */
|
|
1136
|
+
for (w = 1; w < wEnd; ++w) {
|
|
1137
|
+
int const begin = (int)rankStart[w];
|
|
1138
|
+
int const end = (int)rankStart[w+1];
|
|
1139
|
+
U32 const nbBits = nbBitsBaseline - w;
|
|
1140
|
+
|
|
1141
|
+
if (targetLog-nbBits >= minBits) {
|
|
1142
|
+
/* Enough room for a second symbol. */
|
|
1143
|
+
int start = rankVal[w];
|
|
1144
|
+
U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
|
|
591
1145
|
int minWeight = nbBits + scaleLog;
|
|
1146
|
+
int s;
|
|
592
1147
|
if (minWeight < 1) minWeight = 1;
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
1148
|
+
/* Fill the DTable for every symbol of weight w.
|
|
1149
|
+
* These symbols get at least 1 second symbol.
|
|
1150
|
+
*/
|
|
1151
|
+
for (s = begin; s != end; ++s) {
|
|
1152
|
+
HUF_fillDTableX2Level2(
|
|
1153
|
+
DTable + start, targetLog, nbBits,
|
|
1154
|
+
rankValOrigin[nbBits], minWeight, wEnd,
|
|
1155
|
+
sortedList, rankStart,
|
|
1156
|
+
nbBitsBaseline, sortedList[s].symbol);
|
|
1157
|
+
start += length;
|
|
1158
|
+
}
|
|
598
1159
|
} else {
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
} }
|
|
607
|
-
rankVal[weight] += length;
|
|
1160
|
+
/* Only a single symbol. */
|
|
1161
|
+
HUF_fillDTableX2ForWeight(
|
|
1162
|
+
DTable + rankVal[w],
|
|
1163
|
+
sortedList + begin, sortedList + end,
|
|
1164
|
+
nbBits, targetLog,
|
|
1165
|
+
/* baseSeq */ 0, /* level */ 1);
|
|
1166
|
+
}
|
|
608
1167
|
}
|
|
609
1168
|
}
|
|
610
1169
|
|
|
1170
|
+
typedef struct {
|
|
1171
|
+
rankValCol_t rankVal[HUF_TABLELOG_MAX];
|
|
1172
|
+
U32 rankStats[HUF_TABLELOG_MAX + 1];
|
|
1173
|
+
U32 rankStart0[HUF_TABLELOG_MAX + 3];
|
|
1174
|
+
sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
|
|
1175
|
+
BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
|
|
1176
|
+
U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
|
1177
|
+
} HUF_ReadDTableX2_Workspace;
|
|
1178
|
+
|
|
611
1179
|
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
|
|
612
1180
|
const void* src, size_t srcSize,
|
|
613
|
-
void* workSpace, size_t wkspSize)
|
|
1181
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
614
1182
|
{
|
|
615
|
-
U32 tableLog, maxW,
|
|
1183
|
+
U32 tableLog, maxW, nbSymbols;
|
|
616
1184
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
|
617
|
-
U32
|
|
1185
|
+
U32 maxTableLog = dtd.maxTableLog;
|
|
618
1186
|
size_t iSize;
|
|
619
1187
|
void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
|
|
620
1188
|
HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
|
|
621
1189
|
U32 *rankStart;
|
|
622
1190
|
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
|
|
631
|
-
spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
|
|
632
|
-
rankStats = (U32 *)workSpace + spaceUsed32;
|
|
633
|
-
spaceUsed32 += HUF_TABLELOG_MAX + 1;
|
|
634
|
-
rankStart0 = (U32 *)workSpace + spaceUsed32;
|
|
635
|
-
spaceUsed32 += HUF_TABLELOG_MAX + 2;
|
|
636
|
-
sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
|
|
637
|
-
spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
|
|
638
|
-
weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
|
639
|
-
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
|
640
|
-
|
|
641
|
-
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
642
|
-
|
|
643
|
-
rankStart = rankStart0 + 1;
|
|
644
|
-
ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
|
1191
|
+
HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
|
|
1192
|
+
|
|
1193
|
+
if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
|
|
1194
|
+
|
|
1195
|
+
rankStart = wksp->rankStart0 + 1;
|
|
1196
|
+
ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
|
|
1197
|
+
ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
|
|
645
1198
|
|
|
646
1199
|
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
|
|
647
1200
|
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
|
648
1201
|
/* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
|
649
1202
|
|
|
650
|
-
iSize =
|
|
1203
|
+
iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags);
|
|
651
1204
|
if (HUF_isError(iSize)) return iSize;
|
|
652
1205
|
|
|
653
1206
|
/* check result */
|
|
654
1207
|
if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
|
|
1208
|
+
if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
|
|
655
1209
|
|
|
656
1210
|
/* find maxWeight */
|
|
657
|
-
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
|
|
1211
|
+
for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
|
|
658
1212
|
|
|
659
1213
|
/* Get start index of each weight */
|
|
660
1214
|
{ U32 w, nextRankStart = 0;
|
|
661
1215
|
for (w=1; w<maxW+1; w++) {
|
|
662
1216
|
U32 curr = nextRankStart;
|
|
663
|
-
nextRankStart += rankStats[w];
|
|
1217
|
+
nextRankStart += wksp->rankStats[w];
|
|
664
1218
|
rankStart[w] = curr;
|
|
665
1219
|
}
|
|
666
1220
|
rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
|
|
667
|
-
|
|
1221
|
+
rankStart[maxW+1] = nextRankStart;
|
|
668
1222
|
}
|
|
669
1223
|
|
|
670
1224
|
/* sort symbols by weight */
|
|
671
1225
|
{ U32 s;
|
|
672
1226
|
for (s=0; s<nbSymbols; s++) {
|
|
673
|
-
U32 const w = weightList[s];
|
|
1227
|
+
U32 const w = wksp->weightList[s];
|
|
674
1228
|
U32 const r = rankStart[w]++;
|
|
675
|
-
sortedSymbol[r].symbol = (BYTE)s;
|
|
676
|
-
sortedSymbol[r].weight = (BYTE)w;
|
|
1229
|
+
wksp->sortedSymbol[r].symbol = (BYTE)s;
|
|
677
1230
|
}
|
|
678
1231
|
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
|
|
679
1232
|
}
|
|
680
1233
|
|
|
681
1234
|
/* Build rankVal */
|
|
682
|
-
{ U32* const rankVal0 = rankVal[0];
|
|
1235
|
+
{ U32* const rankVal0 = wksp->rankVal[0];
|
|
683
1236
|
{ int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
|
|
684
1237
|
U32 nextRankVal = 0;
|
|
685
1238
|
U32 w;
|
|
686
1239
|
for (w=1; w<maxW+1; w++) {
|
|
687
1240
|
U32 curr = nextRankVal;
|
|
688
|
-
nextRankVal += rankStats[w] << (w+rescale);
|
|
1241
|
+
nextRankVal += wksp->rankStats[w] << (w+rescale);
|
|
689
1242
|
rankVal0[w] = curr;
|
|
690
1243
|
} }
|
|
691
1244
|
{ U32 const minBits = tableLog+1 - maxW;
|
|
692
1245
|
U32 consumed;
|
|
693
1246
|
for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
|
|
694
|
-
U32* const rankValPtr = rankVal[consumed];
|
|
1247
|
+
U32* const rankValPtr = wksp->rankVal[consumed];
|
|
695
1248
|
U32 w;
|
|
696
1249
|
for (w = 1; w < maxW+1; w++) {
|
|
697
1250
|
rankValPtr[w] = rankVal0[w] >> consumed;
|
|
698
1251
|
} } } }
|
|
699
1252
|
|
|
700
1253
|
HUF_fillDTableX2(dt, maxTableLog,
|
|
701
|
-
sortedSymbol,
|
|
702
|
-
rankStart0, rankVal, maxW,
|
|
1254
|
+
wksp->sortedSymbol,
|
|
1255
|
+
wksp->rankStart0, wksp->rankVal, maxW,
|
|
703
1256
|
tableLog+1);
|
|
704
1257
|
|
|
705
1258
|
dtd.tableLog = (BYTE)maxTableLog;
|
|
@@ -713,7 +1266,7 @@ FORCE_INLINE_TEMPLATE U32
|
|
|
713
1266
|
HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
|
714
1267
|
{
|
|
715
1268
|
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
|
716
|
-
ZSTD_memcpy(op, dt
|
|
1269
|
+
ZSTD_memcpy(op, &dt[val].sequence, 2);
|
|
717
1270
|
BIT_skipBits(DStream, dt[val].nbBits);
|
|
718
1271
|
return dt[val].length;
|
|
719
1272
|
}
|
|
@@ -722,28 +1275,34 @@ FORCE_INLINE_TEMPLATE U32
|
|
|
722
1275
|
HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
|
723
1276
|
{
|
|
724
1277
|
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
|
725
|
-
ZSTD_memcpy(op, dt
|
|
726
|
-
if (dt[val].length==1)
|
|
727
|
-
|
|
1278
|
+
ZSTD_memcpy(op, &dt[val].sequence, 1);
|
|
1279
|
+
if (dt[val].length==1) {
|
|
1280
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
|
1281
|
+
} else {
|
|
728
1282
|
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
|
729
1283
|
BIT_skipBits(DStream, dt[val].nbBits);
|
|
730
1284
|
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
|
731
1285
|
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
|
732
1286
|
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
|
733
|
-
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
734
1289
|
return 1;
|
|
735
1290
|
}
|
|
736
1291
|
|
|
737
1292
|
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
|
|
738
|
-
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
|
|
1293
|
+
do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
|
|
739
1294
|
|
|
740
|
-
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)
|
|
741
|
-
|
|
742
|
-
|
|
1295
|
+
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
|
|
1296
|
+
do { \
|
|
1297
|
+
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
|
1298
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
|
|
1299
|
+
} while (0)
|
|
743
1300
|
|
|
744
|
-
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr)
|
|
745
|
-
|
|
746
|
-
|
|
1301
|
+
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
|
|
1302
|
+
do { \
|
|
1303
|
+
if (MEM_64bits()) \
|
|
1304
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
|
|
1305
|
+
} while (0)
|
|
747
1306
|
|
|
748
1307
|
HINT_INLINE size_t
|
|
749
1308
|
HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
|
@@ -752,19 +1311,37 @@ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
|
|
752
1311
|
BYTE* const pStart = p;
|
|
753
1312
|
|
|
754
1313
|
/* up to 8 symbols at a time */
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
1314
|
+
if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
|
|
1315
|
+
if (dtLog <= 11 && MEM_64bits()) {
|
|
1316
|
+
/* up to 10 symbols at a time */
|
|
1317
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
|
|
1318
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1319
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1320
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1321
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1322
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1323
|
+
}
|
|
1324
|
+
} else {
|
|
1325
|
+
/* up to 8 symbols at a time */
|
|
1326
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
|
1327
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
|
1328
|
+
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
|
|
1329
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
|
1330
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1331
|
+
}
|
|
1332
|
+
}
|
|
1333
|
+
} else {
|
|
1334
|
+
BIT_reloadDStream(bitDPtr);
|
|
760
1335
|
}
|
|
761
1336
|
|
|
762
1337
|
/* closer to end : up to 2 symbols at a time */
|
|
763
|
-
|
|
764
|
-
|
|
1338
|
+
if ((size_t)(pEnd - p) >= 2) {
|
|
1339
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
|
1340
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
765
1341
|
|
|
766
|
-
|
|
767
|
-
|
|
1342
|
+
while (p <= pEnd-2)
|
|
1343
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
|
1344
|
+
}
|
|
768
1345
|
|
|
769
1346
|
if (p < pEnd)
|
|
770
1347
|
p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
|
|
@@ -785,7 +1362,7 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
|
785
1362
|
|
|
786
1363
|
/* decode */
|
|
787
1364
|
{ BYTE* const ostart = (BYTE*) dst;
|
|
788
|
-
BYTE* const oend = ostart
|
|
1365
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
|
|
789
1366
|
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
|
790
1367
|
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
|
791
1368
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
@@ -799,6 +1376,10 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
|
799
1376
|
return dstSize;
|
|
800
1377
|
}
|
|
801
1378
|
|
|
1379
|
+
/* HUF_decompress4X2_usingDTable_internal_body():
|
|
1380
|
+
* Conditions:
|
|
1381
|
+
* @dstSize >= 6
|
|
1382
|
+
*/
|
|
802
1383
|
FORCE_INLINE_TEMPLATE size_t
|
|
803
1384
|
HUF_decompress4X2_usingDTable_internal_body(
|
|
804
1385
|
void* dst, size_t dstSize,
|
|
@@ -806,6 +1387,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
806
1387
|
const HUF_DTable* DTable)
|
|
807
1388
|
{
|
|
808
1389
|
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
|
1390
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
|
809
1391
|
|
|
810
1392
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
|
811
1393
|
BYTE* const ostart = (BYTE*) dst;
|
|
@@ -839,58 +1421,62 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
839
1421
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
840
1422
|
U32 const dtLog = dtd.tableLog;
|
|
841
1423
|
|
|
842
|
-
if (length4 > cSrcSize) return ERROR(corruption_detected);
|
|
1424
|
+
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
|
1425
|
+
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
|
1426
|
+
assert(dstSize >= 6 /* validated above */);
|
|
843
1427
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
|
844
1428
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
|
845
1429
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
|
846
1430
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
|
847
1431
|
|
|
848
1432
|
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
|
849
|
-
|
|
1433
|
+
if ((size_t)(oend - op4) >= sizeof(size_t)) {
|
|
1434
|
+
for ( ; (endSignal) & (op4 < olimit); ) {
|
|
850
1435
|
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
1436
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1437
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
|
1438
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1439
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
|
1440
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1441
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
|
1442
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1443
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
|
1444
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
|
1445
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
|
1446
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1447
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
|
1448
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1449
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
|
1450
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1451
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
|
1452
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1453
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
|
1454
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
|
1455
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
|
871
1456
|
#else
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
1457
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1458
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1459
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1460
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1461
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
|
1462
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
|
1463
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
|
1464
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
|
1465
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1466
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1467
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1468
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1469
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
|
1470
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
|
1471
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
|
1472
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
|
1473
|
+
endSignal = (U32)LIKELY((U32)
|
|
1474
|
+
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
|
|
1475
|
+
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
|
|
1476
|
+
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
|
|
1477
|
+
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
|
|
893
1478
|
#endif
|
|
1479
|
+
}
|
|
894
1480
|
}
|
|
895
1481
|
|
|
896
1482
|
/* check corruption */
|
|
@@ -914,68 +1500,287 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
914
1500
|
}
|
|
915
1501
|
}
|
|
916
1502
|
|
|
917
|
-
|
|
918
|
-
|
|
1503
|
+
#if HUF_NEED_BMI2_FUNCTION
|
|
1504
|
+
static BMI2_TARGET_ATTRIBUTE
|
|
1505
|
+
size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
|
1506
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
1507
|
+
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1508
|
+
}
|
|
1509
|
+
#endif
|
|
1510
|
+
|
|
1511
|
+
static
|
|
1512
|
+
size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
|
1513
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
1514
|
+
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1515
|
+
}
|
|
1516
|
+
|
|
1517
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
1518
|
+
|
|
1519
|
+
HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
|
1520
|
+
|
|
1521
|
+
#endif
|
|
1522
|
+
|
|
1523
|
+
static HUF_FAST_BMI2_ATTRS
|
|
1524
|
+
void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
|
1525
|
+
{
|
|
1526
|
+
U64 bits[4];
|
|
1527
|
+
BYTE const* ip[4];
|
|
1528
|
+
BYTE* op[4];
|
|
1529
|
+
BYTE* oend[4];
|
|
1530
|
+
HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
|
|
1531
|
+
BYTE const* const ilowest = args->ilowest;
|
|
1532
|
+
|
|
1533
|
+
/* Copy the arguments to local registers. */
|
|
1534
|
+
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
|
1535
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
|
1536
|
+
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
|
1537
|
+
|
|
1538
|
+
oend[0] = op[1];
|
|
1539
|
+
oend[1] = op[2];
|
|
1540
|
+
oend[2] = op[3];
|
|
1541
|
+
oend[3] = args->oend;
|
|
1542
|
+
|
|
1543
|
+
assert(MEM_isLittleEndian());
|
|
1544
|
+
assert(!MEM_32bits());
|
|
1545
|
+
|
|
1546
|
+
for (;;) {
|
|
1547
|
+
BYTE* olimit;
|
|
1548
|
+
int stream;
|
|
1549
|
+
|
|
1550
|
+
/* Assert loop preconditions */
|
|
1551
|
+
#ifndef NDEBUG
|
|
1552
|
+
for (stream = 0; stream < 4; ++stream) {
|
|
1553
|
+
assert(op[stream] <= oend[stream]);
|
|
1554
|
+
assert(ip[stream] >= ilowest);
|
|
1555
|
+
}
|
|
1556
|
+
#endif
|
|
1557
|
+
/* Compute olimit */
|
|
1558
|
+
{
|
|
1559
|
+
/* Each loop does 5 table lookups for each of the 4 streams.
|
|
1560
|
+
* Each table lookup consumes up to 11 bits of input, and produces
|
|
1561
|
+
* up to 2 bytes of output.
|
|
1562
|
+
*/
|
|
1563
|
+
/* We can consume up to 7 bytes of input per iteration per stream.
|
|
1564
|
+
* We also know that each input pointer is >= ip[0]. So we can run
|
|
1565
|
+
* iters loops before running out of input.
|
|
1566
|
+
*/
|
|
1567
|
+
size_t iters = (size_t)(ip[0] - ilowest) / 7;
|
|
1568
|
+
/* Each iteration can produce up to 10 bytes of output per stream.
|
|
1569
|
+
* Each output stream my advance at different rates. So take the
|
|
1570
|
+
* minimum number of safe iterations among all the output streams.
|
|
1571
|
+
*/
|
|
1572
|
+
for (stream = 0; stream < 4; ++stream) {
|
|
1573
|
+
size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
|
|
1574
|
+
iters = MIN(iters, oiters);
|
|
1575
|
+
}
|
|
1576
|
+
|
|
1577
|
+
/* Each iteration produces at least 5 output symbols. So until
|
|
1578
|
+
* op[3] crosses olimit, we know we haven't executed iters
|
|
1579
|
+
* iterations yet. This saves us maintaining an iters counter,
|
|
1580
|
+
* at the expense of computing the remaining # of iterations
|
|
1581
|
+
* more frequently.
|
|
1582
|
+
*/
|
|
1583
|
+
olimit = op[3] + (iters * 5);
|
|
1584
|
+
|
|
1585
|
+
/* Exit the fast decoding loop once we reach the end. */
|
|
1586
|
+
if (op[3] == olimit)
|
|
1587
|
+
break;
|
|
1588
|
+
|
|
1589
|
+
/* Exit the decoding loop if any input pointer has crossed the
|
|
1590
|
+
* previous one. This indicates corruption, and a precondition
|
|
1591
|
+
* to our loop is that ip[i] >= ip[0].
|
|
1592
|
+
*/
|
|
1593
|
+
for (stream = 1; stream < 4; ++stream) {
|
|
1594
|
+
if (ip[stream] < ip[stream - 1])
|
|
1595
|
+
goto _out;
|
|
1596
|
+
}
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
#ifndef NDEBUG
|
|
1600
|
+
for (stream = 1; stream < 4; ++stream) {
|
|
1601
|
+
assert(ip[stream] >= ip[stream - 1]);
|
|
1602
|
+
}
|
|
1603
|
+
#endif
|
|
1604
|
+
|
|
1605
|
+
#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \
|
|
1606
|
+
do { \
|
|
1607
|
+
if ((_decode3) || (_stream) != 3) { \
|
|
1608
|
+
int const index = (int)(bits[(_stream)] >> 53); \
|
|
1609
|
+
HUF_DEltX2 const entry = dtable[index]; \
|
|
1610
|
+
MEM_write16(op[(_stream)], entry.sequence); \
|
|
1611
|
+
bits[(_stream)] <<= (entry.nbBits) & 0x3F; \
|
|
1612
|
+
op[(_stream)] += (entry.length); \
|
|
1613
|
+
} \
|
|
1614
|
+
} while (0)
|
|
1615
|
+
|
|
1616
|
+
#define HUF_4X2_RELOAD_STREAM(_stream) \
|
|
1617
|
+
do { \
|
|
1618
|
+
HUF_4X2_DECODE_SYMBOL(3, 1); \
|
|
1619
|
+
{ \
|
|
1620
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
|
|
1621
|
+
int const nbBits = ctz & 7; \
|
|
1622
|
+
int const nbBytes = ctz >> 3; \
|
|
1623
|
+
ip[(_stream)] -= nbBytes; \
|
|
1624
|
+
bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
|
|
1625
|
+
bits[(_stream)] <<= nbBits; \
|
|
1626
|
+
} \
|
|
1627
|
+
} while (0)
|
|
1628
|
+
|
|
1629
|
+
/* Manually unroll the loop because compilers don't consistently
|
|
1630
|
+
* unroll the inner loops, which destroys performance.
|
|
1631
|
+
*/
|
|
1632
|
+
do {
|
|
1633
|
+
/* Decode 5 symbols from each of the first 3 streams.
|
|
1634
|
+
* The final stream will be decoded during the reload phase
|
|
1635
|
+
* to reduce register pressure.
|
|
1636
|
+
*/
|
|
1637
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
|
1638
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
|
1639
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
|
1640
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
|
1641
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
|
1642
|
+
|
|
1643
|
+
/* Decode one symbol from the final stream */
|
|
1644
|
+
HUF_4X2_DECODE_SYMBOL(3, 1);
|
|
1645
|
+
|
|
1646
|
+
/* Decode 4 symbols from the final stream & reload bitstreams.
|
|
1647
|
+
* The final stream is reloaded last, meaning that all 5 symbols
|
|
1648
|
+
* are decoded from the final stream before it is reloaded.
|
|
1649
|
+
*/
|
|
1650
|
+
HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
|
|
1651
|
+
} while (op[3] < olimit);
|
|
1652
|
+
}
|
|
919
1653
|
|
|
920
|
-
|
|
1654
|
+
#undef HUF_4X2_DECODE_SYMBOL
|
|
1655
|
+
#undef HUF_4X2_RELOAD_STREAM
|
|
1656
|
+
|
|
1657
|
+
_out:
|
|
1658
|
+
|
|
1659
|
+
/* Save the final values of each of the state variables back to args. */
|
|
1660
|
+
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
|
1661
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
|
1662
|
+
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
|
1663
|
+
}
|
|
1664
|
+
|
|
1665
|
+
|
|
1666
|
+
static HUF_FAST_BMI2_ATTRS size_t
|
|
1667
|
+
HUF_decompress4X2_usingDTable_internal_fast(
|
|
921
1668
|
void* dst, size_t dstSize,
|
|
922
1669
|
const void* cSrc, size_t cSrcSize,
|
|
923
|
-
const HUF_DTable* DTable
|
|
1670
|
+
const HUF_DTable* DTable,
|
|
1671
|
+
HUF_DecompressFastLoopFn loopFn) {
|
|
1672
|
+
void const* dt = DTable + 1;
|
|
1673
|
+
const BYTE* const ilowest = (const BYTE*)cSrc;
|
|
1674
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
|
|
1675
|
+
HUF_DecompressFastArgs args;
|
|
1676
|
+
{
|
|
1677
|
+
size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1678
|
+
FORWARD_IF_ERROR(ret, "Failed to init asm args");
|
|
1679
|
+
if (ret == 0)
|
|
1680
|
+
return 0;
|
|
1681
|
+
}
|
|
1682
|
+
|
|
1683
|
+
assert(args.ip[0] >= args.ilowest);
|
|
1684
|
+
loopFn(&args);
|
|
1685
|
+
|
|
1686
|
+
/* note : op4 already verified within main loop */
|
|
1687
|
+
assert(args.ip[0] >= ilowest);
|
|
1688
|
+
assert(args.ip[1] >= ilowest);
|
|
1689
|
+
assert(args.ip[2] >= ilowest);
|
|
1690
|
+
assert(args.ip[3] >= ilowest);
|
|
1691
|
+
assert(args.op[3] <= oend);
|
|
1692
|
+
|
|
1693
|
+
assert(ilowest == args.ilowest);
|
|
1694
|
+
assert(ilowest + 6 == args.iend[0]);
|
|
1695
|
+
(void)ilowest;
|
|
1696
|
+
|
|
1697
|
+
/* finish bitStreams one by one */
|
|
1698
|
+
{
|
|
1699
|
+
size_t const segmentSize = (dstSize+3) / 4;
|
|
1700
|
+
BYTE* segmentEnd = (BYTE*)dst;
|
|
1701
|
+
int i;
|
|
1702
|
+
for (i = 0; i < 4; ++i) {
|
|
1703
|
+
BIT_DStream_t bit;
|
|
1704
|
+
if (segmentSize <= (size_t)(oend - segmentEnd))
|
|
1705
|
+
segmentEnd += segmentSize;
|
|
1706
|
+
else
|
|
1707
|
+
segmentEnd = oend;
|
|
1708
|
+
FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
|
|
1709
|
+
args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
|
|
1710
|
+
if (args.op[i] != segmentEnd)
|
|
1711
|
+
return ERROR(corruption_detected);
|
|
1712
|
+
}
|
|
1713
|
+
}
|
|
1714
|
+
|
|
1715
|
+
/* decoded size */
|
|
1716
|
+
return dstSize;
|
|
1717
|
+
}
|
|
1718
|
+
|
|
1719
|
+
static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
|
1720
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
|
924
1721
|
{
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
1722
|
+
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
|
|
1723
|
+
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
|
|
1724
|
+
|
|
1725
|
+
#if DYNAMIC_BMI2
|
|
1726
|
+
if (flags & HUF_flags_bmi2) {
|
|
1727
|
+
fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
|
|
1728
|
+
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
1729
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
|
1730
|
+
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
|
1731
|
+
}
|
|
1732
|
+
# endif
|
|
1733
|
+
} else {
|
|
1734
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1735
|
+
}
|
|
1736
|
+
#endif
|
|
1737
|
+
|
|
1738
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
|
1739
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
|
1740
|
+
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
|
1741
|
+
}
|
|
1742
|
+
#endif
|
|
1743
|
+
|
|
1744
|
+
if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
|
|
1745
|
+
size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
|
1746
|
+
if (ret != 0)
|
|
1747
|
+
return ret;
|
|
1748
|
+
}
|
|
1749
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
928
1750
|
}
|
|
929
1751
|
|
|
1752
|
+
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
|
|
1753
|
+
|
|
930
1754
|
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
931
1755
|
const void* cSrc, size_t cSrcSize,
|
|
932
|
-
void* workSpace, size_t wkspSize)
|
|
1756
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
933
1757
|
{
|
|
934
1758
|
const BYTE* ip = (const BYTE*) cSrc;
|
|
935
1759
|
|
|
936
1760
|
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
|
|
937
|
-
workSpace, wkspSize);
|
|
1761
|
+
workSpace, wkspSize, flags);
|
|
938
1762
|
if (HUF_isError(hSize)) return hSize;
|
|
939
1763
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
940
1764
|
ip += hSize; cSrcSize -= hSize;
|
|
941
1765
|
|
|
942
|
-
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx,
|
|
1766
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
|
|
943
1767
|
}
|
|
944
1768
|
|
|
945
|
-
|
|
946
|
-
size_t HUF_decompress4X2_usingDTable(
|
|
947
|
-
void* dst, size_t dstSize,
|
|
948
|
-
const void* cSrc, size_t cSrcSize,
|
|
949
|
-
const HUF_DTable* DTable)
|
|
950
|
-
{
|
|
951
|
-
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
|
952
|
-
if (dtd.tableType != 1) return ERROR(GENERIC);
|
|
953
|
-
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
954
|
-
}
|
|
955
|
-
|
|
956
|
-
static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1769
|
+
static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
957
1770
|
const void* cSrc, size_t cSrcSize,
|
|
958
|
-
void* workSpace, size_t wkspSize, int
|
|
1771
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
959
1772
|
{
|
|
960
1773
|
const BYTE* ip = (const BYTE*) cSrc;
|
|
961
1774
|
|
|
962
1775
|
size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
|
|
963
|
-
workSpace, wkspSize);
|
|
1776
|
+
workSpace, wkspSize, flags);
|
|
964
1777
|
if (HUF_isError(hSize)) return hSize;
|
|
965
1778
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
966
1779
|
ip += hSize; cSrcSize -= hSize;
|
|
967
1780
|
|
|
968
|
-
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
|
969
|
-
}
|
|
970
|
-
|
|
971
|
-
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
972
|
-
const void* cSrc, size_t cSrcSize,
|
|
973
|
-
void* workSpace, size_t wkspSize)
|
|
974
|
-
{
|
|
975
|
-
return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
|
1781
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
|
976
1782
|
}
|
|
977
1783
|
|
|
978
|
-
|
|
979
1784
|
#endif /* HUF_FORCE_DECOMPRESS_X1 */
|
|
980
1785
|
|
|
981
1786
|
|
|
@@ -983,66 +1788,28 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
|
983
1788
|
/* Universal decompression selectors */
|
|
984
1789
|
/* ***********************************/
|
|
985
1790
|
|
|
986
|
-
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
|
|
987
|
-
const void* cSrc, size_t cSrcSize,
|
|
988
|
-
const HUF_DTable* DTable)
|
|
989
|
-
{
|
|
990
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
991
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
992
|
-
(void)dtd;
|
|
993
|
-
assert(dtd.tableType == 0);
|
|
994
|
-
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
995
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
996
|
-
(void)dtd;
|
|
997
|
-
assert(dtd.tableType == 1);
|
|
998
|
-
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
999
|
-
#else
|
|
1000
|
-
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
|
1001
|
-
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
1002
|
-
#endif
|
|
1003
|
-
}
|
|
1004
|
-
|
|
1005
|
-
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
|
1006
|
-
const void* cSrc, size_t cSrcSize,
|
|
1007
|
-
const HUF_DTable* DTable)
|
|
1008
|
-
{
|
|
1009
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
1010
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1011
|
-
(void)dtd;
|
|
1012
|
-
assert(dtd.tableType == 0);
|
|
1013
|
-
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
1014
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1015
|
-
(void)dtd;
|
|
1016
|
-
assert(dtd.tableType == 1);
|
|
1017
|
-
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
1018
|
-
#else
|
|
1019
|
-
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
|
1020
|
-
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
1021
|
-
#endif
|
|
1022
|
-
}
|
|
1023
|
-
|
|
1024
1791
|
|
|
1025
1792
|
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1026
1793
|
typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
|
|
1027
|
-
static const algo_time_t algoTime[16 /* Quantization */][
|
|
1794
|
+
static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
|
|
1028
1795
|
{
|
|
1029
1796
|
/* single, double, quad */
|
|
1030
|
-
{{0,0}, {1,1}
|
|
1031
|
-
{{0,0}, {1,1}
|
|
1032
|
-
{{
|
|
1033
|
-
{{
|
|
1034
|
-
{{
|
|
1035
|
-
{{
|
|
1036
|
-
{{
|
|
1037
|
-
{{
|
|
1038
|
-
{{
|
|
1039
|
-
{{
|
|
1040
|
-
{{
|
|
1041
|
-
{{
|
|
1042
|
-
{{
|
|
1043
|
-
{{
|
|
1044
|
-
{{
|
|
1045
|
-
{{
|
|
1797
|
+
{{0,0}, {1,1}}, /* Q==0 : impossible */
|
|
1798
|
+
{{0,0}, {1,1}}, /* Q==1 : impossible */
|
|
1799
|
+
{{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
|
|
1800
|
+
{{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
|
|
1801
|
+
{{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
|
|
1802
|
+
{{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
|
|
1803
|
+
{{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
|
|
1804
|
+
{{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
|
|
1805
|
+
{{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
|
|
1806
|
+
{{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
|
|
1807
|
+
{{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
|
|
1808
|
+
{{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
|
|
1809
|
+
{{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
|
|
1810
|
+
{{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
|
|
1811
|
+
{{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
|
|
1812
|
+
{{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
|
|
1046
1813
|
};
|
|
1047
1814
|
#endif
|
|
1048
1815
|
|
|
@@ -1069,42 +1836,15 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
|
|
1069
1836
|
U32 const D256 = (U32)(dstSize >> 8);
|
|
1070
1837
|
U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
|
|
1071
1838
|
U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
|
|
1072
|
-
DTime1 += DTime1 >>
|
|
1839
|
+
DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
|
|
1073
1840
|
return DTime1 < DTime0;
|
|
1074
1841
|
}
|
|
1075
1842
|
#endif
|
|
1076
1843
|
}
|
|
1077
1844
|
|
|
1078
|
-
|
|
1079
|
-
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
|
|
1080
|
-
size_t dstSize, const void* cSrc,
|
|
1081
|
-
size_t cSrcSize, void* workSpace,
|
|
1082
|
-
size_t wkspSize)
|
|
1083
|
-
{
|
|
1084
|
-
/* validation checks */
|
|
1085
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1086
|
-
if (cSrcSize == 0) return ERROR(corruption_detected);
|
|
1087
|
-
|
|
1088
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1089
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1090
|
-
(void)algoNb;
|
|
1091
|
-
assert(algoNb == 0);
|
|
1092
|
-
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
|
1093
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1094
|
-
(void)algoNb;
|
|
1095
|
-
assert(algoNb == 1);
|
|
1096
|
-
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
|
1097
|
-
#else
|
|
1098
|
-
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1099
|
-
cSrcSize, workSpace, wkspSize):
|
|
1100
|
-
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
|
1101
|
-
#endif
|
|
1102
|
-
}
|
|
1103
|
-
}
|
|
1104
|
-
|
|
1105
1845
|
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1106
1846
|
const void* cSrc, size_t cSrcSize,
|
|
1107
|
-
void* workSpace, size_t wkspSize)
|
|
1847
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
1108
1848
|
{
|
|
1109
1849
|
/* validation checks */
|
|
1110
1850
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
@@ -1117,71 +1857,71 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
|
1117
1857
|
(void)algoNb;
|
|
1118
1858
|
assert(algoNb == 0);
|
|
1119
1859
|
return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1120
|
-
cSrcSize, workSpace, wkspSize);
|
|
1860
|
+
cSrcSize, workSpace, wkspSize, flags);
|
|
1121
1861
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1122
1862
|
(void)algoNb;
|
|
1123
1863
|
assert(algoNb == 1);
|
|
1124
1864
|
return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1125
|
-
cSrcSize, workSpace, wkspSize);
|
|
1865
|
+
cSrcSize, workSpace, wkspSize, flags);
|
|
1126
1866
|
#else
|
|
1127
1867
|
return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1128
|
-
cSrcSize, workSpace, wkspSize):
|
|
1868
|
+
cSrcSize, workSpace, wkspSize, flags):
|
|
1129
1869
|
HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1130
|
-
cSrcSize, workSpace, wkspSize);
|
|
1870
|
+
cSrcSize, workSpace, wkspSize, flags);
|
|
1131
1871
|
#endif
|
|
1132
1872
|
}
|
|
1133
1873
|
}
|
|
1134
1874
|
|
|
1135
1875
|
|
|
1136
|
-
size_t
|
|
1876
|
+
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
|
|
1137
1877
|
{
|
|
1138
1878
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
1139
1879
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1140
1880
|
(void)dtd;
|
|
1141
1881
|
assert(dtd.tableType == 0);
|
|
1142
|
-
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1882
|
+
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1143
1883
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1144
1884
|
(void)dtd;
|
|
1145
1885
|
assert(dtd.tableType == 1);
|
|
1146
|
-
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1886
|
+
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1147
1887
|
#else
|
|
1148
|
-
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1149
|
-
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1888
|
+
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
|
|
1889
|
+
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1150
1890
|
#endif
|
|
1151
1891
|
}
|
|
1152
1892
|
|
|
1153
1893
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
|
1154
|
-
size_t
|
|
1894
|
+
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
|
|
1155
1895
|
{
|
|
1156
1896
|
const BYTE* ip = (const BYTE*) cSrc;
|
|
1157
1897
|
|
|
1158
|
-
size_t const hSize =
|
|
1898
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
1159
1899
|
if (HUF_isError(hSize)) return hSize;
|
|
1160
1900
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
1161
1901
|
ip += hSize; cSrcSize -= hSize;
|
|
1162
1902
|
|
|
1163
|
-
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
|
1903
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
|
1164
1904
|
}
|
|
1165
1905
|
#endif
|
|
1166
1906
|
|
|
1167
|
-
size_t
|
|
1907
|
+
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
|
|
1168
1908
|
{
|
|
1169
1909
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
1170
1910
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1171
1911
|
(void)dtd;
|
|
1172
1912
|
assert(dtd.tableType == 0);
|
|
1173
|
-
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1913
|
+
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1174
1914
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1175
1915
|
(void)dtd;
|
|
1176
1916
|
assert(dtd.tableType == 1);
|
|
1177
|
-
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1917
|
+
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1178
1918
|
#else
|
|
1179
|
-
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1180
|
-
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1919
|
+
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
|
|
1920
|
+
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1181
1921
|
#endif
|
|
1182
1922
|
}
|
|
1183
1923
|
|
|
1184
|
-
size_t
|
|
1924
|
+
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
|
|
1185
1925
|
{
|
|
1186
1926
|
/* validation checks */
|
|
1187
1927
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
@@ -1191,160 +1931,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
|
|
1191
1931
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1192
1932
|
(void)algoNb;
|
|
1193
1933
|
assert(algoNb == 0);
|
|
1194
|
-
return
|
|
1195
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1196
|
-
(void)algoNb;
|
|
1197
|
-
assert(algoNb == 1);
|
|
1198
|
-
return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
|
1199
|
-
#else
|
|
1200
|
-
return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
|
|
1201
|
-
HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
|
1202
|
-
#endif
|
|
1203
|
-
}
|
|
1204
|
-
}
|
|
1205
|
-
|
|
1206
|
-
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
|
1207
|
-
#ifndef HUF_FORCE_DECOMPRESS_X2
|
|
1208
|
-
size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
1209
|
-
{
|
|
1210
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1211
|
-
return HUF_readDTableX1_wksp(DTable, src, srcSize,
|
|
1212
|
-
workSpace, sizeof(workSpace));
|
|
1213
|
-
}
|
|
1214
|
-
|
|
1215
|
-
size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
1216
|
-
const void* cSrc, size_t cSrcSize)
|
|
1217
|
-
{
|
|
1218
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1219
|
-
return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
|
1220
|
-
workSpace, sizeof(workSpace));
|
|
1221
|
-
}
|
|
1222
|
-
|
|
1223
|
-
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1224
|
-
{
|
|
1225
|
-
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
|
1226
|
-
return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
|
1227
|
-
}
|
|
1228
|
-
#endif
|
|
1229
|
-
|
|
1230
|
-
#ifndef HUF_FORCE_DECOMPRESS_X1
|
|
1231
|
-
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
1232
|
-
{
|
|
1233
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1234
|
-
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
|
1235
|
-
workSpace, sizeof(workSpace));
|
|
1236
|
-
}
|
|
1237
|
-
|
|
1238
|
-
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
1239
|
-
const void* cSrc, size_t cSrcSize)
|
|
1240
|
-
{
|
|
1241
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1242
|
-
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
|
1243
|
-
workSpace, sizeof(workSpace));
|
|
1244
|
-
}
|
|
1245
|
-
|
|
1246
|
-
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1247
|
-
{
|
|
1248
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
|
1249
|
-
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
1250
|
-
}
|
|
1251
|
-
#endif
|
|
1252
|
-
|
|
1253
|
-
#ifndef HUF_FORCE_DECOMPRESS_X2
|
|
1254
|
-
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1255
|
-
{
|
|
1256
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1257
|
-
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1258
|
-
workSpace, sizeof(workSpace));
|
|
1259
|
-
}
|
|
1260
|
-
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1261
|
-
{
|
|
1262
|
-
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
|
1263
|
-
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
1264
|
-
}
|
|
1265
|
-
#endif
|
|
1266
|
-
|
|
1267
|
-
#ifndef HUF_FORCE_DECOMPRESS_X1
|
|
1268
|
-
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1269
|
-
const void* cSrc, size_t cSrcSize)
|
|
1270
|
-
{
|
|
1271
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1272
|
-
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1273
|
-
workSpace, sizeof(workSpace));
|
|
1274
|
-
}
|
|
1275
|
-
|
|
1276
|
-
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1277
|
-
{
|
|
1278
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
|
1279
|
-
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
1280
|
-
}
|
|
1281
|
-
#endif
|
|
1282
|
-
|
|
1283
|
-
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
|
1284
|
-
|
|
1285
|
-
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1286
|
-
{
|
|
1287
|
-
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1288
|
-
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
|
1289
|
-
#endif
|
|
1290
|
-
|
|
1291
|
-
/* validation checks */
|
|
1292
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1293
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1294
|
-
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1295
|
-
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1296
|
-
|
|
1297
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1298
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1299
|
-
(void)algoNb;
|
|
1300
|
-
assert(algoNb == 0);
|
|
1301
|
-
return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
|
|
1302
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1303
|
-
(void)algoNb;
|
|
1304
|
-
assert(algoNb == 1);
|
|
1305
|
-
return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
|
|
1306
|
-
#else
|
|
1307
|
-
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
|
1308
|
-
#endif
|
|
1309
|
-
}
|
|
1310
|
-
}
|
|
1311
|
-
|
|
1312
|
-
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1313
|
-
{
|
|
1314
|
-
/* validation checks */
|
|
1315
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1316
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1317
|
-
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1318
|
-
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1319
|
-
|
|
1320
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1321
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1322
|
-
(void)algoNb;
|
|
1323
|
-
assert(algoNb == 0);
|
|
1324
|
-
return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
|
1934
|
+
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
1325
1935
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1326
1936
|
(void)algoNb;
|
|
1327
1937
|
assert(algoNb == 1);
|
|
1328
|
-
return
|
|
1938
|
+
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
1329
1939
|
#else
|
|
1330
|
-
return algoNb ?
|
|
1331
|
-
|
|
1940
|
+
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
|
|
1941
|
+
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
1332
1942
|
#endif
|
|
1333
1943
|
}
|
|
1334
1944
|
}
|
|
1335
|
-
|
|
1336
|
-
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1337
|
-
{
|
|
1338
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1339
|
-
return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1340
|
-
workSpace, sizeof(workSpace));
|
|
1341
|
-
}
|
|
1342
|
-
|
|
1343
|
-
size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1344
|
-
const void* cSrc, size_t cSrcSize)
|
|
1345
|
-
{
|
|
1346
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1347
|
-
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1348
|
-
workSpace, sizeof(workSpace));
|
|
1349
|
-
}
|
|
1350
|
-
#endif
|