zstdlib 0.10.0-arm64-darwin → 0.11.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +8 -0
- data/ext/zstdlib_c/extconf.rb +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/allocations.h +55 -0
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/bits.h +200 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/bitstream.h +19 -60
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/compiler.h +26 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/cpu.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/entropy_common.c +12 -40
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.c +9 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse.h +5 -83
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse_decompress.c +7 -99
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/huf.h +65 -156
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/mem.h +39 -46
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.c +26 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.h +7 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/portability_macros.h +22 -3
- data/ext/zstdlib_c/zstd-1.5.5/lib/common/threading.c +176 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/threading.h +5 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.c +2 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_common.c +1 -36
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_deps.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_internal.h +17 -118
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_trace.h +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/clevels.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/fse_compress.c +7 -124
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/huf_compress.c +234 -169
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress.c +1243 -538
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_internal.h +225 -151
- data/ext/zstdlib_c/zstd-1.5.5/lib/compress/zstd_compress_literals.c +235 -0
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_cwksp.h +128 -62
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.c +95 -33
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.c +433 -148
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.c +398 -345
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.c +106 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.c +17 -9
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress.c +434 -441
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress_amd64.S +30 -39
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.c +4 -4
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress.c +205 -80
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.c +201 -81
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.h +6 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zdict.h +53 -31
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd.h +580 -135
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd_errors.h +27 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzcompatibility.h +8 -8
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzguts.h +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzlib.c +3 -3
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzread.c +10 -10
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzwrite.c +5 -5
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.c +46 -44
- data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.h +4 -1
- data/lib/2.4/zstdlib_c.bundle +0 -0
- data/lib/2.5/zstdlib_c.bundle +0 -0
- data/lib/2.6/zstdlib_c.bundle +0 -0
- data/lib/2.7/zstdlib_c.bundle +0 -0
- data/lib/3.0/zstdlib_c.bundle +0 -0
- data/lib/3.1/zstdlib_c.bundle +0 -0
- data/lib/3.2/zstdlib_c.bundle +0 -0
- metadata +82 -78
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/threading.c +0 -122
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_literals.c +0 -159
@@ -1,7 +1,7 @@
|
|
1
1
|
/* ******************************************************************
|
2
2
|
* huff0 huffman decoder,
|
3
3
|
* part of Finite State Entropy library
|
4
|
-
* Copyright (c)
|
4
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
5
5
|
*
|
6
6
|
* You can contact the author at :
|
7
7
|
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
@@ -19,10 +19,10 @@
|
|
19
19
|
#include "../common/compiler.h"
|
20
20
|
#include "../common/bitstream.h" /* BIT_* */
|
21
21
|
#include "../common/fse.h" /* to compress headers */
|
22
|
-
#define HUF_STATIC_LINKING_ONLY
|
23
22
|
#include "../common/huf.h"
|
24
23
|
#include "../common/error_private.h"
|
25
24
|
#include "../common/zstd_internal.h"
|
25
|
+
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
|
26
26
|
|
27
27
|
/* **************************************************************
|
28
28
|
* Constants
|
@@ -43,10 +43,14 @@
|
|
43
43
|
#error "Cannot force the use of the X1 and X2 decoders at the same time!"
|
44
44
|
#endif
|
45
45
|
|
46
|
-
|
47
|
-
|
46
|
+
/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
|
47
|
+
* supported at runtime, so we can add the BMI2 target attribute.
|
48
|
+
* When it is disabled, we will still get BMI2 if it is enabled statically.
|
49
|
+
*/
|
50
|
+
#if DYNAMIC_BMI2
|
51
|
+
# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
|
48
52
|
#else
|
49
|
-
# define
|
53
|
+
# define HUF_FAST_BMI2_ATTRS
|
50
54
|
#endif
|
51
55
|
|
52
56
|
#ifdef __cplusplus
|
@@ -56,18 +60,12 @@
|
|
56
60
|
#endif
|
57
61
|
#define HUF_ASM_DECL HUF_EXTERN_C
|
58
62
|
|
59
|
-
#if DYNAMIC_BMI2
|
63
|
+
#if DYNAMIC_BMI2
|
60
64
|
# define HUF_NEED_BMI2_FUNCTION 1
|
61
65
|
#else
|
62
66
|
# define HUF_NEED_BMI2_FUNCTION 0
|
63
67
|
#endif
|
64
68
|
|
65
|
-
#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
|
66
|
-
# define HUF_NEED_DEFAULT_FUNCTION 1
|
67
|
-
#else
|
68
|
-
# define HUF_NEED_DEFAULT_FUNCTION 0
|
69
|
-
#endif
|
70
|
-
|
71
69
|
/* **************************************************************
|
72
70
|
* Error Management
|
73
71
|
****************************************************************/
|
@@ -84,6 +82,11 @@
|
|
84
82
|
/* **************************************************************
|
85
83
|
* BMI2 Variant Wrappers
|
86
84
|
****************************************************************/
|
85
|
+
typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
|
86
|
+
const void *cSrc,
|
87
|
+
size_t cSrcSize,
|
88
|
+
const HUF_DTable *DTable);
|
89
|
+
|
87
90
|
#if DYNAMIC_BMI2
|
88
91
|
|
89
92
|
#define HUF_DGEN(fn) \
|
@@ -105,9 +108,9 @@
|
|
105
108
|
} \
|
106
109
|
\
|
107
110
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
108
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
111
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags) \
|
109
112
|
{ \
|
110
|
-
if (
|
113
|
+
if (flags & HUF_flags_bmi2) { \
|
111
114
|
return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
|
112
115
|
} \
|
113
116
|
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
|
@@ -117,9 +120,9 @@
|
|
117
120
|
|
118
121
|
#define HUF_DGEN(fn) \
|
119
122
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
120
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
123
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags) \
|
121
124
|
{ \
|
122
|
-
(void)
|
125
|
+
(void)flags; \
|
123
126
|
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
124
127
|
}
|
125
128
|
|
@@ -138,15 +141,28 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
|
138
141
|
return dtd;
|
139
142
|
}
|
140
143
|
|
141
|
-
|
142
|
-
|
143
|
-
static size_t HUF_initDStream(BYTE const* ip) {
|
144
|
+
static size_t HUF_initFastDStream(BYTE const* ip) {
|
144
145
|
BYTE const lastByte = ip[7];
|
145
|
-
size_t const bitsConsumed = lastByte ? 8 -
|
146
|
+
size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
|
146
147
|
size_t const value = MEM_readLEST(ip) | 1;
|
147
148
|
assert(bitsConsumed <= 8);
|
149
|
+
assert(sizeof(size_t) == 8);
|
148
150
|
return value << bitsConsumed;
|
149
151
|
}
|
152
|
+
|
153
|
+
|
154
|
+
/**
|
155
|
+
* The input/output arguments to the Huffman fast decoding loop:
|
156
|
+
*
|
157
|
+
* ip [in/out] - The input pointers, must be updated to reflect what is consumed.
|
158
|
+
* op [in/out] - The output pointers, must be updated to reflect what is written.
|
159
|
+
* bits [in/out] - The bitstream containers, must be updated to reflect the current state.
|
160
|
+
* dt [in] - The decoding table.
|
161
|
+
* ilimit [in] - The input limit, stop when any input pointer is below ilimit.
|
162
|
+
* oend [in] - The end of the output stream. op[3] must not cross oend.
|
163
|
+
* iend [in] - The end of each input stream. ip[i] may cross iend[i],
|
164
|
+
* as long as it is above ilimit, but that indicates corruption.
|
165
|
+
*/
|
150
166
|
typedef struct {
|
151
167
|
BYTE const* ip[4];
|
152
168
|
BYTE* op[4];
|
@@ -155,15 +171,17 @@ typedef struct {
|
|
155
171
|
BYTE const* ilimit;
|
156
172
|
BYTE* oend;
|
157
173
|
BYTE const* iend[4];
|
158
|
-
}
|
174
|
+
} HUF_DecompressFastArgs;
|
175
|
+
|
176
|
+
typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
|
159
177
|
|
160
178
|
/**
|
161
|
-
* Initializes args for the
|
162
|
-
* @returns
|
163
|
-
*
|
179
|
+
* Initializes args for the fast decoding loop.
|
180
|
+
* @returns 1 on success
|
181
|
+
* 0 if the fallback implementation should be used.
|
164
182
|
* Or an error code on failure.
|
165
183
|
*/
|
166
|
-
static size_t
|
184
|
+
static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
|
167
185
|
{
|
168
186
|
void const* dt = DTable + 1;
|
169
187
|
U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
|
@@ -172,9 +190,11 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
|
172
190
|
|
173
191
|
BYTE* const oend = (BYTE*)dst + dstSize;
|
174
192
|
|
175
|
-
/* The
|
176
|
-
*
|
177
|
-
|
193
|
+
/* The fast decoding loop assumes 64-bit little-endian.
|
194
|
+
* This condition is false on x32.
|
195
|
+
*/
|
196
|
+
if (!MEM_isLittleEndian() || MEM_32bits())
|
197
|
+
return 0;
|
178
198
|
|
179
199
|
/* strict minimum : jump table + 1 byte per stream */
|
180
200
|
if (srcSize < 10)
|
@@ -185,7 +205,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
|
185
205
|
* On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
|
186
206
|
*/
|
187
207
|
if (dtLog != HUF_DECODER_FAST_TABLELOG)
|
188
|
-
return
|
208
|
+
return 0;
|
189
209
|
|
190
210
|
/* Read the jump table. */
|
191
211
|
{
|
@@ -199,13 +219,13 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
|
199
219
|
args->iend[2] = args->iend[1] + length2;
|
200
220
|
args->iend[3] = args->iend[2] + length3;
|
201
221
|
|
202
|
-
/*
|
222
|
+
/* HUF_initFastDStream() requires this, and this small of an input
|
203
223
|
* won't benefit from the ASM loop anyways.
|
204
224
|
* length1 must be >= 16 so that ip[0] >= ilimit before the loop
|
205
225
|
* starts.
|
206
226
|
*/
|
207
227
|
if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
|
208
|
-
return
|
228
|
+
return 0;
|
209
229
|
if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
|
210
230
|
}
|
211
231
|
/* ip[] contains the position that is currently loaded into bits[]. */
|
@@ -222,7 +242,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
|
222
242
|
|
223
243
|
/* No point to call the ASM loop for tiny outputs. */
|
224
244
|
if (args->op[3] >= oend)
|
225
|
-
return
|
245
|
+
return 0;
|
226
246
|
|
227
247
|
/* bits[] is the bit container.
|
228
248
|
* It is read from the MSB down to the LSB.
|
@@ -231,10 +251,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
|
231
251
|
* set, so that CountTrailingZeros(bits[]) can be used
|
232
252
|
* to count how many bits we've consumed.
|
233
253
|
*/
|
234
|
-
args->bits[0] =
|
235
|
-
args->bits[1] =
|
236
|
-
args->bits[2] =
|
237
|
-
args->bits[3] =
|
254
|
+
args->bits[0] = HUF_initFastDStream(args->ip[0]);
|
255
|
+
args->bits[1] = HUF_initFastDStream(args->ip[1]);
|
256
|
+
args->bits[2] = HUF_initFastDStream(args->ip[2]);
|
257
|
+
args->bits[3] = HUF_initFastDStream(args->ip[3]);
|
238
258
|
|
239
259
|
/* If ip[] >= ilimit, it is guaranteed to be safe to
|
240
260
|
* reload bits[]. It may be beyond its section, but is
|
@@ -245,10 +265,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
|
245
265
|
args->oend = oend;
|
246
266
|
args->dt = dt;
|
247
267
|
|
248
|
-
return
|
268
|
+
return 1;
|
249
269
|
}
|
250
270
|
|
251
|
-
static size_t HUF_initRemainingDStream(BIT_DStream_t* bit,
|
271
|
+
static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
|
252
272
|
{
|
253
273
|
/* Validate that we haven't overwritten. */
|
254
274
|
if (args->op[stream] > segmentEnd)
|
@@ -262,15 +282,15 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs
|
|
262
282
|
return ERROR(corruption_detected);
|
263
283
|
|
264
284
|
/* Construct the BIT_DStream_t. */
|
265
|
-
|
266
|
-
bit->
|
285
|
+
assert(sizeof(size_t) == 8);
|
286
|
+
bit->bitContainer = MEM_readLEST(args->ip[stream]);
|
287
|
+
bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
|
267
288
|
bit->start = (const char*)args->iend[0];
|
268
289
|
bit->limitPtr = bit->start + sizeof(size_t);
|
269
290
|
bit->ptr = (const char*)args->ip[stream];
|
270
291
|
|
271
292
|
return 0;
|
272
293
|
}
|
273
|
-
#endif
|
274
294
|
|
275
295
|
|
276
296
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
@@ -287,10 +307,11 @@ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decodi
|
|
287
307
|
static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
|
288
308
|
U64 D4;
|
289
309
|
if (MEM_isLittleEndian()) {
|
290
|
-
D4 = (symbol << 8) + nbBits;
|
310
|
+
D4 = (U64)((symbol << 8) + nbBits);
|
291
311
|
} else {
|
292
|
-
D4 = symbol + (nbBits << 8);
|
312
|
+
D4 = (U64)(symbol + (nbBits << 8));
|
293
313
|
}
|
314
|
+
assert(D4 < (1U << 16));
|
294
315
|
D4 *= 0x0001000100010001ULL;
|
295
316
|
return D4;
|
296
317
|
}
|
@@ -333,13 +354,7 @@ typedef struct {
|
|
333
354
|
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
|
334
355
|
} HUF_ReadDTableX1_Workspace;
|
335
356
|
|
336
|
-
|
337
|
-
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
|
338
|
-
{
|
339
|
-
return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
340
|
-
}
|
341
|
-
|
342
|
-
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
|
357
|
+
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags)
|
343
358
|
{
|
344
359
|
U32 tableLog = 0;
|
345
360
|
U32 nbSymbols = 0;
|
@@ -354,7 +369,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
|
354
369
|
DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
355
370
|
/* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
356
371
|
|
357
|
-
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp),
|
372
|
+
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
|
358
373
|
if (HUF_isError(iSize)) return iSize;
|
359
374
|
|
360
375
|
|
@@ -381,9 +396,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
|
381
396
|
* rankStart[0] is not filled because there are no entries in the table for
|
382
397
|
* weight 0.
|
383
398
|
*/
|
384
|
-
{
|
385
|
-
|
386
|
-
int nextRankStart = 0;
|
399
|
+
{ int n;
|
400
|
+
U32 nextRankStart = 0;
|
387
401
|
int const unroll = 4;
|
388
402
|
int const nLimit = (int)nbSymbols - unroll + 1;
|
389
403
|
for (n=0; n<(int)tableLog+1; n++) {
|
@@ -410,10 +424,9 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
|
410
424
|
* We can switch based on the length to a different inner loop which is
|
411
425
|
* optimized for that particular case.
|
412
426
|
*/
|
413
|
-
{
|
414
|
-
|
415
|
-
int
|
416
|
-
int rankStart=0;
|
427
|
+
{ U32 w;
|
428
|
+
int symbol = wksp->rankVal[0];
|
429
|
+
int rankStart = 0;
|
417
430
|
for (w=1; w<tableLog+1; ++w) {
|
418
431
|
int const symbolCount = wksp->rankVal[w];
|
419
432
|
int const length = (1 << w) >> 1;
|
@@ -523,7 +536,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
|
|
523
536
|
while (p < pEnd)
|
524
537
|
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
525
538
|
|
526
|
-
return pEnd-pStart;
|
539
|
+
return (size_t)(pEnd-pStart);
|
527
540
|
}
|
528
541
|
|
529
542
|
FORCE_INLINE_TEMPLATE size_t
|
@@ -549,6 +562,10 @@ HUF_decompress1X1_usingDTable_internal_body(
|
|
549
562
|
return dstSize;
|
550
563
|
}
|
551
564
|
|
565
|
+
/* HUF_decompress4X1_usingDTable_internal_body():
|
566
|
+
* Conditions :
|
567
|
+
* @dstSize >= 6
|
568
|
+
*/
|
552
569
|
FORCE_INLINE_TEMPLATE size_t
|
553
570
|
HUF_decompress4X1_usingDTable_internal_body(
|
554
571
|
void* dst, size_t dstSize,
|
@@ -592,6 +609,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
592
609
|
|
593
610
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
594
611
|
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
612
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
595
613
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
596
614
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
597
615
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
@@ -654,38 +672,142 @@ size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo
|
|
654
672
|
}
|
655
673
|
#endif
|
656
674
|
|
657
|
-
#if HUF_NEED_DEFAULT_FUNCTION
|
658
675
|
static
|
659
676
|
size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
660
677
|
size_t cSrcSize, HUF_DTable const* DTable) {
|
661
678
|
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
662
679
|
}
|
663
|
-
#endif
|
664
680
|
|
665
681
|
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
666
682
|
|
667
|
-
HUF_ASM_DECL void
|
683
|
+
HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
668
684
|
|
669
|
-
|
685
|
+
#endif
|
686
|
+
|
687
|
+
static HUF_FAST_BMI2_ATTRS
|
688
|
+
void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
689
|
+
{
|
690
|
+
U64 bits[4];
|
691
|
+
BYTE const* ip[4];
|
692
|
+
BYTE* op[4];
|
693
|
+
U16 const* const dtable = (U16 const*)args->dt;
|
694
|
+
BYTE* const oend = args->oend;
|
695
|
+
BYTE const* const ilimit = args->ilimit;
|
696
|
+
|
697
|
+
/* Copy the arguments to local variables */
|
698
|
+
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
699
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
700
|
+
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
701
|
+
|
702
|
+
assert(MEM_isLittleEndian());
|
703
|
+
assert(!MEM_32bits());
|
704
|
+
|
705
|
+
for (;;) {
|
706
|
+
BYTE* olimit;
|
707
|
+
int stream;
|
708
|
+
int symbol;
|
709
|
+
|
710
|
+
/* Assert loop preconditions */
|
711
|
+
#ifndef NDEBUG
|
712
|
+
for (stream = 0; stream < 4; ++stream) {
|
713
|
+
assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
|
714
|
+
assert(ip[stream] >= ilimit);
|
715
|
+
}
|
716
|
+
#endif
|
717
|
+
/* Compute olimit */
|
718
|
+
{
|
719
|
+
/* Each iteration produces 5 output symbols per stream */
|
720
|
+
size_t const oiters = (size_t)(oend - op[3]) / 5;
|
721
|
+
/* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
|
722
|
+
* per stream.
|
723
|
+
*/
|
724
|
+
size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
|
725
|
+
/* We can safely run iters iterations before running bounds checks */
|
726
|
+
size_t const iters = MIN(oiters, iiters);
|
727
|
+
size_t const symbols = iters * 5;
|
728
|
+
|
729
|
+
/* We can simply check that op[3] < olimit, instead of checking all
|
730
|
+
* of our bounds, since we can't hit the other bounds until we've run
|
731
|
+
* iters iterations, which only happens when op[3] == olimit.
|
732
|
+
*/
|
733
|
+
olimit = op[3] + symbols;
|
734
|
+
|
735
|
+
/* Exit fast decoding loop once we get close to the end. */
|
736
|
+
if (op[3] + 20 > olimit)
|
737
|
+
break;
|
738
|
+
|
739
|
+
/* Exit the decoding loop if any input pointer has crossed the
|
740
|
+
* previous one. This indicates corruption, and a precondition
|
741
|
+
* to our loop is that ip[i] >= ip[0].
|
742
|
+
*/
|
743
|
+
for (stream = 1; stream < 4; ++stream) {
|
744
|
+
if (ip[stream] < ip[stream - 1])
|
745
|
+
goto _out;
|
746
|
+
}
|
747
|
+
}
|
748
|
+
|
749
|
+
#ifndef NDEBUG
|
750
|
+
for (stream = 1; stream < 4; ++stream) {
|
751
|
+
assert(ip[stream] >= ip[stream - 1]);
|
752
|
+
}
|
753
|
+
#endif
|
754
|
+
|
755
|
+
do {
|
756
|
+
/* Decode 5 symbols in each of the 4 streams */
|
757
|
+
for (symbol = 0; symbol < 5; ++symbol) {
|
758
|
+
for (stream = 0; stream < 4; ++stream) {
|
759
|
+
int const index = (int)(bits[stream] >> 53);
|
760
|
+
int const entry = (int)dtable[index];
|
761
|
+
bits[stream] <<= (entry & 63);
|
762
|
+
op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
|
763
|
+
}
|
764
|
+
}
|
765
|
+
/* Reload the bitstreams */
|
766
|
+
for (stream = 0; stream < 4; ++stream) {
|
767
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
768
|
+
int const nbBits = ctz & 7;
|
769
|
+
int const nbBytes = ctz >> 3;
|
770
|
+
op[stream] += 5;
|
771
|
+
ip[stream] -= nbBytes;
|
772
|
+
bits[stream] = MEM_read64(ip[stream]) | 1;
|
773
|
+
bits[stream] <<= nbBits;
|
774
|
+
}
|
775
|
+
} while (op[3] < olimit);
|
776
|
+
}
|
777
|
+
|
778
|
+
_out:
|
779
|
+
|
780
|
+
/* Save the final values of each of the state variables back to args. */
|
781
|
+
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
782
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
783
|
+
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
784
|
+
}
|
785
|
+
|
786
|
+
/**
|
787
|
+
* @returns @p dstSize on success (>= 6)
|
788
|
+
* 0 if the fallback implementation should be used
|
789
|
+
* An error if an error occurred
|
790
|
+
*/
|
791
|
+
static HUF_FAST_BMI2_ATTRS
|
670
792
|
size_t
|
671
|
-
|
793
|
+
HUF_decompress4X1_usingDTable_internal_fast(
|
672
794
|
void* dst, size_t dstSize,
|
673
795
|
const void* cSrc, size_t cSrcSize,
|
674
|
-
const HUF_DTable* DTable
|
796
|
+
const HUF_DTable* DTable,
|
797
|
+
HUF_DecompressFastLoopFn loopFn)
|
675
798
|
{
|
676
799
|
void const* dt = DTable + 1;
|
677
800
|
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
678
801
|
BYTE* const oend = (BYTE*)dst + dstSize;
|
679
|
-
|
680
|
-
{
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
802
|
+
HUF_DecompressFastArgs args;
|
803
|
+
{ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
804
|
+
FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
|
805
|
+
if (ret == 0)
|
806
|
+
return 0;
|
685
807
|
}
|
686
808
|
|
687
809
|
assert(args.ip[0] >= args.ilimit);
|
688
|
-
|
810
|
+
loopFn(&args);
|
689
811
|
|
690
812
|
/* Our loop guarantees that ip[] >= ilimit and that we haven't
|
691
813
|
* overwritten any op[].
|
@@ -698,8 +820,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
|
|
698
820
|
(void)iend;
|
699
821
|
|
700
822
|
/* finish bit streams one by one. */
|
701
|
-
{
|
702
|
-
size_t const segmentSize = (dstSize+3) / 4;
|
823
|
+
{ size_t const segmentSize = (dstSize+3) / 4;
|
703
824
|
BYTE* segmentEnd = (BYTE*)dst;
|
704
825
|
int i;
|
705
826
|
for (i = 0; i < 4; ++i) {
|
@@ -716,97 +837,59 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
|
|
716
837
|
}
|
717
838
|
|
718
839
|
/* decoded size */
|
840
|
+
assert(dstSize != 0);
|
719
841
|
return dstSize;
|
720
842
|
}
|
721
|
-
#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
|
722
|
-
|
723
|
-
typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
724
|
-
const void *cSrc,
|
725
|
-
size_t cSrcSize,
|
726
|
-
const HUF_DTable *DTable);
|
727
843
|
|
728
844
|
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
|
729
845
|
|
730
846
|
static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
731
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
847
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
732
848
|
{
|
849
|
+
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
|
850
|
+
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
|
851
|
+
|
733
852
|
#if DYNAMIC_BMI2
|
734
|
-
if (
|
853
|
+
if (flags & HUF_flags_bmi2) {
|
854
|
+
fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
|
735
855
|
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
736
|
-
|
737
|
-
|
738
|
-
|
856
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
857
|
+
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
858
|
+
}
|
739
859
|
# endif
|
860
|
+
} else {
|
861
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
740
862
|
}
|
741
|
-
#else
|
742
|
-
(void)bmi2;
|
743
863
|
#endif
|
744
864
|
|
745
865
|
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
746
|
-
|
747
|
-
|
748
|
-
|
866
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
867
|
+
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
868
|
+
}
|
749
869
|
#endif
|
750
|
-
}
|
751
870
|
|
752
|
-
|
753
|
-
size_t
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
759
|
-
if (dtd.tableType != 0) return ERROR(GENERIC);
|
760
|
-
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
761
|
-
}
|
762
|
-
|
763
|
-
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
764
|
-
const void* cSrc, size_t cSrcSize,
|
765
|
-
void* workSpace, size_t wkspSize)
|
766
|
-
{
|
767
|
-
const BYTE* ip = (const BYTE*) cSrc;
|
768
|
-
|
769
|
-
size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
|
770
|
-
if (HUF_isError(hSize)) return hSize;
|
771
|
-
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
772
|
-
ip += hSize; cSrcSize -= hSize;
|
773
|
-
|
774
|
-
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
|
775
|
-
}
|
776
|
-
|
777
|
-
|
778
|
-
size_t HUF_decompress4X1_usingDTable(
|
779
|
-
void* dst, size_t dstSize,
|
780
|
-
const void* cSrc, size_t cSrcSize,
|
781
|
-
const HUF_DTable* DTable)
|
782
|
-
{
|
783
|
-
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
784
|
-
if (dtd.tableType != 0) return ERROR(GENERIC);
|
785
|
-
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
871
|
+
if (!(flags & HUF_flags_disableFast)) {
|
872
|
+
size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
873
|
+
if (ret != 0)
|
874
|
+
return ret;
|
875
|
+
}
|
876
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
786
877
|
}
|
787
878
|
|
788
|
-
static size_t
|
879
|
+
static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
789
880
|
const void* cSrc, size_t cSrcSize,
|
790
|
-
void* workSpace, size_t wkspSize, int
|
881
|
+
void* workSpace, size_t wkspSize, int flags)
|
791
882
|
{
|
792
883
|
const BYTE* ip = (const BYTE*) cSrc;
|
793
884
|
|
794
|
-
size_t const hSize =
|
885
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
795
886
|
if (HUF_isError(hSize)) return hSize;
|
796
887
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
797
888
|
ip += hSize; cSrcSize -= hSize;
|
798
889
|
|
799
|
-
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
800
|
-
}
|
801
|
-
|
802
|
-
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
803
|
-
const void* cSrc, size_t cSrcSize,
|
804
|
-
void* workSpace, size_t wkspSize)
|
805
|
-
{
|
806
|
-
return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
|
890
|
+
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
807
891
|
}
|
808
892
|
|
809
|
-
|
810
893
|
#endif /* HUF_FORCE_DECOMPRESS_X2 */
|
811
894
|
|
812
895
|
|
@@ -989,7 +1072,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32
|
|
989
1072
|
|
990
1073
|
static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
|
991
1074
|
const sortedSymbol_t* sortedList,
|
992
|
-
const U32* rankStart,
|
1075
|
+
const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight,
|
993
1076
|
const U32 nbBitsBaseline)
|
994
1077
|
{
|
995
1078
|
U32* const rankVal = rankValOrigin[0];
|
@@ -1044,14 +1127,7 @@ typedef struct {
|
|
1044
1127
|
|
1045
1128
|
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
|
1046
1129
|
const void* src, size_t srcSize,
|
1047
|
-
void* workSpace, size_t wkspSize)
|
1048
|
-
{
|
1049
|
-
return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
1050
|
-
}
|
1051
|
-
|
1052
|
-
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
|
1053
|
-
const void* src, size_t srcSize,
|
1054
|
-
void* workSpace, size_t wkspSize, int bmi2)
|
1130
|
+
void* workSpace, size_t wkspSize, int flags)
|
1055
1131
|
{
|
1056
1132
|
U32 tableLog, maxW, nbSymbols;
|
1057
1133
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
@@ -1073,7 +1149,7 @@ size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
|
|
1073
1149
|
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
1074
1150
|
/* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
1075
1151
|
|
1076
|
-
iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp),
|
1152
|
+
iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags);
|
1077
1153
|
if (HUF_isError(iSize)) return iSize;
|
1078
1154
|
|
1079
1155
|
/* check result */
|
@@ -1244,6 +1320,11 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
1244
1320
|
/* decoded size */
|
1245
1321
|
return dstSize;
|
1246
1322
|
}
|
1323
|
+
|
1324
|
+
/* HUF_decompress4X2_usingDTable_internal_body():
|
1325
|
+
* Conditions:
|
1326
|
+
* @dstSize >= 6
|
1327
|
+
*/
|
1247
1328
|
FORCE_INLINE_TEMPLATE size_t
|
1248
1329
|
HUF_decompress4X2_usingDTable_internal_body(
|
1249
1330
|
void* dst, size_t dstSize,
|
@@ -1284,8 +1365,9 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
1284
1365
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1285
1366
|
U32 const dtLog = dtd.tableLog;
|
1286
1367
|
|
1287
|
-
if (length4 > cSrcSize) return ERROR(corruption_detected);
|
1288
|
-
if (opStart4 > oend) return ERROR(corruption_detected);
|
1368
|
+
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
1369
|
+
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
1370
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
1289
1371
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
1290
1372
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
1291
1373
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
@@ -1370,36 +1452,177 @@ size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo
|
|
1370
1452
|
}
|
1371
1453
|
#endif
|
1372
1454
|
|
1373
|
-
#if HUF_NEED_DEFAULT_FUNCTION
|
1374
1455
|
static
|
1375
1456
|
size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
1376
1457
|
size_t cSrcSize, HUF_DTable const* DTable) {
|
1377
1458
|
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
1378
1459
|
}
|
1379
|
-
#endif
|
1380
1460
|
|
1381
1461
|
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
1382
1462
|
|
1383
|
-
HUF_ASM_DECL void
|
1463
|
+
HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
1464
|
+
|
1465
|
+
#endif
|
1466
|
+
|
1467
|
+
static HUF_FAST_BMI2_ATTRS
|
1468
|
+
void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
1469
|
+
{
|
1470
|
+
U64 bits[4];
|
1471
|
+
BYTE const* ip[4];
|
1472
|
+
BYTE* op[4];
|
1473
|
+
BYTE* oend[4];
|
1474
|
+
HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
|
1475
|
+
BYTE const* const ilimit = args->ilimit;
|
1476
|
+
|
1477
|
+
/* Copy the arguments to local registers. */
|
1478
|
+
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
1479
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
1480
|
+
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
1481
|
+
|
1482
|
+
oend[0] = op[1];
|
1483
|
+
oend[1] = op[2];
|
1484
|
+
oend[2] = op[3];
|
1485
|
+
oend[3] = args->oend;
|
1486
|
+
|
1487
|
+
assert(MEM_isLittleEndian());
|
1488
|
+
assert(!MEM_32bits());
|
1489
|
+
|
1490
|
+
for (;;) {
|
1491
|
+
BYTE* olimit;
|
1492
|
+
int stream;
|
1493
|
+
int symbol;
|
1494
|
+
|
1495
|
+
/* Assert loop preconditions */
|
1496
|
+
#ifndef NDEBUG
|
1497
|
+
for (stream = 0; stream < 4; ++stream) {
|
1498
|
+
assert(op[stream] <= oend[stream]);
|
1499
|
+
assert(ip[stream] >= ilimit);
|
1500
|
+
}
|
1501
|
+
#endif
|
1502
|
+
/* Compute olimit */
|
1503
|
+
{
|
1504
|
+
/* Each loop does 5 table lookups for each of the 4 streams.
|
1505
|
+
* Each table lookup consumes up to 11 bits of input, and produces
|
1506
|
+
* up to 2 bytes of output.
|
1507
|
+
*/
|
1508
|
+
/* We can consume up to 7 bytes of input per iteration per stream.
|
1509
|
+
* We also know that each input pointer is >= ip[0]. So we can run
|
1510
|
+
* iters loops before running out of input.
|
1511
|
+
*/
|
1512
|
+
size_t iters = (size_t)(ip[0] - ilimit) / 7;
|
1513
|
+
/* Each iteration can produce up to 10 bytes of output per stream.
|
1514
|
+
* Each output stream my advance at different rates. So take the
|
1515
|
+
* minimum number of safe iterations among all the output streams.
|
1516
|
+
*/
|
1517
|
+
for (stream = 0; stream < 4; ++stream) {
|
1518
|
+
size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
|
1519
|
+
iters = MIN(iters, oiters);
|
1520
|
+
}
|
1521
|
+
|
1522
|
+
/* Each iteration produces at least 5 output symbols. So until
|
1523
|
+
* op[3] crosses olimit, we know we haven't executed iters
|
1524
|
+
* iterations yet. This saves us maintaining an iters counter,
|
1525
|
+
* at the expense of computing the remaining # of iterations
|
1526
|
+
* more frequently.
|
1527
|
+
*/
|
1528
|
+
olimit = op[3] + (iters * 5);
|
1529
|
+
|
1530
|
+
/* Exit the fast decoding loop if we are too close to the end. */
|
1531
|
+
if (op[3] + 10 > olimit)
|
1532
|
+
break;
|
1533
|
+
|
1534
|
+
/* Exit the decoding loop if any input pointer has crossed the
|
1535
|
+
* previous one. This indicates corruption, and a precondition
|
1536
|
+
* to our loop is that ip[i] >= ip[0].
|
1537
|
+
*/
|
1538
|
+
for (stream = 1; stream < 4; ++stream) {
|
1539
|
+
if (ip[stream] < ip[stream - 1])
|
1540
|
+
goto _out;
|
1541
|
+
}
|
1542
|
+
}
|
1543
|
+
|
1544
|
+
#ifndef NDEBUG
|
1545
|
+
for (stream = 1; stream < 4; ++stream) {
|
1546
|
+
assert(ip[stream] >= ip[stream - 1]);
|
1547
|
+
}
|
1548
|
+
#endif
|
1549
|
+
|
1550
|
+
do {
|
1551
|
+
/* Do 5 table lookups for each of the first 3 streams */
|
1552
|
+
for (symbol = 0; symbol < 5; ++symbol) {
|
1553
|
+
for (stream = 0; stream < 3; ++stream) {
|
1554
|
+
int const index = (int)(bits[stream] >> 53);
|
1555
|
+
HUF_DEltX2 const entry = dtable[index];
|
1556
|
+
MEM_write16(op[stream], entry.sequence);
|
1557
|
+
bits[stream] <<= (entry.nbBits);
|
1558
|
+
op[stream] += (entry.length);
|
1559
|
+
}
|
1560
|
+
}
|
1561
|
+
/* Do 1 table lookup from the final stream */
|
1562
|
+
{
|
1563
|
+
int const index = (int)(bits[3] >> 53);
|
1564
|
+
HUF_DEltX2 const entry = dtable[index];
|
1565
|
+
MEM_write16(op[3], entry.sequence);
|
1566
|
+
bits[3] <<= (entry.nbBits);
|
1567
|
+
op[3] += (entry.length);
|
1568
|
+
}
|
1569
|
+
/* Do 4 table lookups from the final stream & reload bitstreams */
|
1570
|
+
for (stream = 0; stream < 4; ++stream) {
|
1571
|
+
/* Do a table lookup from the final stream.
|
1572
|
+
* This is interleaved with the reloading to reduce register
|
1573
|
+
* pressure. This shouldn't be necessary, but compilers can
|
1574
|
+
* struggle with codegen with high register pressure.
|
1575
|
+
*/
|
1576
|
+
{
|
1577
|
+
int const index = (int)(bits[3] >> 53);
|
1578
|
+
HUF_DEltX2 const entry = dtable[index];
|
1579
|
+
MEM_write16(op[3], entry.sequence);
|
1580
|
+
bits[3] <<= (entry.nbBits);
|
1581
|
+
op[3] += (entry.length);
|
1582
|
+
}
|
1583
|
+
/* Reload the bistreams. The final bitstream must be reloaded
|
1584
|
+
* after the 5th symbol was decoded.
|
1585
|
+
*/
|
1586
|
+
{
|
1587
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
1588
|
+
int const nbBits = ctz & 7;
|
1589
|
+
int const nbBytes = ctz >> 3;
|
1590
|
+
ip[stream] -= nbBytes;
|
1591
|
+
bits[stream] = MEM_read64(ip[stream]) | 1;
|
1592
|
+
bits[stream] <<= nbBits;
|
1593
|
+
}
|
1594
|
+
}
|
1595
|
+
} while (op[3] < olimit);
|
1596
|
+
}
|
1384
1597
|
|
1385
|
-
|
1386
|
-
|
1598
|
+
_out:
|
1599
|
+
|
1600
|
+
/* Save the final values of each of the state variables back to args. */
|
1601
|
+
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
1602
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
1603
|
+
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
1604
|
+
}
|
1605
|
+
|
1606
|
+
|
1607
|
+
static HUF_FAST_BMI2_ATTRS size_t
|
1608
|
+
HUF_decompress4X2_usingDTable_internal_fast(
|
1387
1609
|
void* dst, size_t dstSize,
|
1388
1610
|
const void* cSrc, size_t cSrcSize,
|
1389
|
-
const HUF_DTable* DTable
|
1611
|
+
const HUF_DTable* DTable,
|
1612
|
+
HUF_DecompressFastLoopFn loopFn) {
|
1390
1613
|
void const* dt = DTable + 1;
|
1391
1614
|
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
1392
1615
|
BYTE* const oend = (BYTE*)dst + dstSize;
|
1393
|
-
|
1616
|
+
HUF_DecompressFastArgs args;
|
1394
1617
|
{
|
1395
|
-
size_t const ret =
|
1618
|
+
size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
1396
1619
|
FORWARD_IF_ERROR(ret, "Failed to init asm args");
|
1397
|
-
if (ret
|
1398
|
-
return
|
1620
|
+
if (ret == 0)
|
1621
|
+
return 0;
|
1399
1622
|
}
|
1400
1623
|
|
1401
1624
|
assert(args.ip[0] >= args.ilimit);
|
1402
|
-
|
1625
|
+
loopFn(&args);
|
1403
1626
|
|
1404
1627
|
/* note : op4 already verified within main loop */
|
1405
1628
|
assert(args.ip[0] >= iend);
|
@@ -1430,91 +1653,72 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm(
|
|
1430
1653
|
/* decoded size */
|
1431
1654
|
return dstSize;
|
1432
1655
|
}
|
1433
|
-
#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
|
1434
1656
|
|
1435
1657
|
static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
1436
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
1658
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
1437
1659
|
{
|
1660
|
+
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
|
1661
|
+
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
|
1662
|
+
|
1438
1663
|
#if DYNAMIC_BMI2
|
1439
|
-
if (
|
1664
|
+
if (flags & HUF_flags_bmi2) {
|
1665
|
+
fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
|
1440
1666
|
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1667
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
1668
|
+
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
1669
|
+
}
|
1444
1670
|
# endif
|
1671
|
+
} else {
|
1672
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
1445
1673
|
}
|
1446
|
-
#else
|
1447
|
-
(void)bmi2;
|
1448
1674
|
#endif
|
1449
1675
|
|
1450
1676
|
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1677
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
1678
|
+
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
1679
|
+
}
|
1454
1680
|
#endif
|
1681
|
+
|
1682
|
+
if (!(flags & HUF_flags_disableFast)) {
|
1683
|
+
size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
1684
|
+
if (ret != 0)
|
1685
|
+
return ret;
|
1686
|
+
}
|
1687
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
1455
1688
|
}
|
1456
1689
|
|
1457
1690
|
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
|
1458
1691
|
|
1459
|
-
size_t HUF_decompress1X2_usingDTable(
|
1460
|
-
void* dst, size_t dstSize,
|
1461
|
-
const void* cSrc, size_t cSrcSize,
|
1462
|
-
const HUF_DTable* DTable)
|
1463
|
-
{
|
1464
|
-
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
1465
|
-
if (dtd.tableType != 1) return ERROR(GENERIC);
|
1466
|
-
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
1467
|
-
}
|
1468
|
-
|
1469
1692
|
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
1470
1693
|
const void* cSrc, size_t cSrcSize,
|
1471
|
-
void* workSpace, size_t wkspSize)
|
1694
|
+
void* workSpace, size_t wkspSize, int flags)
|
1472
1695
|
{
|
1473
1696
|
const BYTE* ip = (const BYTE*) cSrc;
|
1474
1697
|
|
1475
1698
|
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
|
1476
|
-
workSpace, wkspSize);
|
1699
|
+
workSpace, wkspSize, flags);
|
1477
1700
|
if (HUF_isError(hSize)) return hSize;
|
1478
1701
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
1479
1702
|
ip += hSize; cSrcSize -= hSize;
|
1480
1703
|
|
1481
|
-
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx,
|
1482
|
-
}
|
1483
|
-
|
1484
|
-
|
1485
|
-
size_t HUF_decompress4X2_usingDTable(
|
1486
|
-
void* dst, size_t dstSize,
|
1487
|
-
const void* cSrc, size_t cSrcSize,
|
1488
|
-
const HUF_DTable* DTable)
|
1489
|
-
{
|
1490
|
-
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
1491
|
-
if (dtd.tableType != 1) return ERROR(GENERIC);
|
1492
|
-
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
1704
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
|
1493
1705
|
}
|
1494
1706
|
|
1495
|
-
static size_t
|
1707
|
+
static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
1496
1708
|
const void* cSrc, size_t cSrcSize,
|
1497
|
-
void* workSpace, size_t wkspSize, int
|
1709
|
+
void* workSpace, size_t wkspSize, int flags)
|
1498
1710
|
{
|
1499
1711
|
const BYTE* ip = (const BYTE*) cSrc;
|
1500
1712
|
|
1501
1713
|
size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
|
1502
|
-
workSpace, wkspSize);
|
1714
|
+
workSpace, wkspSize, flags);
|
1503
1715
|
if (HUF_isError(hSize)) return hSize;
|
1504
1716
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
1505
1717
|
ip += hSize; cSrcSize -= hSize;
|
1506
1718
|
|
1507
|
-
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
1719
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
1508
1720
|
}
|
1509
1721
|
|
1510
|
-
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
1511
|
-
const void* cSrc, size_t cSrcSize,
|
1512
|
-
void* workSpace, size_t wkspSize)
|
1513
|
-
{
|
1514
|
-
return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
1515
|
-
}
|
1516
|
-
|
1517
|
-
|
1518
1722
|
#endif /* HUF_FORCE_DECOMPRESS_X1 */
|
1519
1723
|
|
1520
1724
|
|
@@ -1522,44 +1726,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1522
1726
|
/* Universal decompression selectors */
|
1523
1727
|
/* ***********************************/
|
1524
1728
|
|
1525
|
-
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
|
1526
|
-
const void* cSrc, size_t cSrcSize,
|
1527
|
-
const HUF_DTable* DTable)
|
1528
|
-
{
|
1529
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1530
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1531
|
-
(void)dtd;
|
1532
|
-
assert(dtd.tableType == 0);
|
1533
|
-
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
1534
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1535
|
-
(void)dtd;
|
1536
|
-
assert(dtd.tableType == 1);
|
1537
|
-
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
1538
|
-
#else
|
1539
|
-
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
1540
|
-
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
1541
|
-
#endif
|
1542
|
-
}
|
1543
|
-
|
1544
|
-
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
1545
|
-
const void* cSrc, size_t cSrcSize,
|
1546
|
-
const HUF_DTable* DTable)
|
1547
|
-
{
|
1548
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1549
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1550
|
-
(void)dtd;
|
1551
|
-
assert(dtd.tableType == 0);
|
1552
|
-
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
1553
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1554
|
-
(void)dtd;
|
1555
|
-
assert(dtd.tableType == 1);
|
1556
|
-
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
1557
|
-
#else
|
1558
|
-
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
1559
|
-
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
1560
|
-
#endif
|
1561
|
-
}
|
1562
|
-
|
1563
1729
|
|
1564
1730
|
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
1565
1731
|
typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
|
@@ -1614,36 +1780,9 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
|
1614
1780
|
#endif
|
1615
1781
|
}
|
1616
1782
|
|
1617
|
-
|
1618
|
-
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
|
1619
|
-
size_t dstSize, const void* cSrc,
|
1620
|
-
size_t cSrcSize, void* workSpace,
|
1621
|
-
size_t wkspSize)
|
1622
|
-
{
|
1623
|
-
/* validation checks */
|
1624
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
1625
|
-
if (cSrcSize == 0) return ERROR(corruption_detected);
|
1626
|
-
|
1627
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1628
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1629
|
-
(void)algoNb;
|
1630
|
-
assert(algoNb == 0);
|
1631
|
-
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
1632
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1633
|
-
(void)algoNb;
|
1634
|
-
assert(algoNb == 1);
|
1635
|
-
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
1636
|
-
#else
|
1637
|
-
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1638
|
-
cSrcSize, workSpace, wkspSize):
|
1639
|
-
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
1640
|
-
#endif
|
1641
|
-
}
|
1642
|
-
}
|
1643
|
-
|
1644
1783
|
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
1645
1784
|
const void* cSrc, size_t cSrcSize,
|
1646
|
-
void* workSpace, size_t wkspSize)
|
1785
|
+
void* workSpace, size_t wkspSize, int flags)
|
1647
1786
|
{
|
1648
1787
|
/* validation checks */
|
1649
1788
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
@@ -1656,71 +1795,71 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1656
1795
|
(void)algoNb;
|
1657
1796
|
assert(algoNb == 0);
|
1658
1797
|
return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1659
|
-
cSrcSize, workSpace, wkspSize);
|
1798
|
+
cSrcSize, workSpace, wkspSize, flags);
|
1660
1799
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1661
1800
|
(void)algoNb;
|
1662
1801
|
assert(algoNb == 1);
|
1663
1802
|
return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1664
|
-
cSrcSize, workSpace, wkspSize);
|
1803
|
+
cSrcSize, workSpace, wkspSize, flags);
|
1665
1804
|
#else
|
1666
1805
|
return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1667
|
-
cSrcSize, workSpace, wkspSize):
|
1806
|
+
cSrcSize, workSpace, wkspSize, flags):
|
1668
1807
|
HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1669
|
-
cSrcSize, workSpace, wkspSize);
|
1808
|
+
cSrcSize, workSpace, wkspSize, flags);
|
1670
1809
|
#endif
|
1671
1810
|
}
|
1672
1811
|
}
|
1673
1812
|
|
1674
1813
|
|
1675
|
-
size_t
|
1814
|
+
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
|
1676
1815
|
{
|
1677
1816
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1678
1817
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1679
1818
|
(void)dtd;
|
1680
1819
|
assert(dtd.tableType == 0);
|
1681
|
-
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1820
|
+
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1682
1821
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1683
1822
|
(void)dtd;
|
1684
1823
|
assert(dtd.tableType == 1);
|
1685
|
-
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1824
|
+
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1686
1825
|
#else
|
1687
|
-
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1688
|
-
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1826
|
+
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
|
1827
|
+
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1689
1828
|
#endif
|
1690
1829
|
}
|
1691
1830
|
|
1692
1831
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
1693
|
-
size_t
|
1832
|
+
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
|
1694
1833
|
{
|
1695
1834
|
const BYTE* ip = (const BYTE*) cSrc;
|
1696
1835
|
|
1697
|
-
size_t const hSize =
|
1836
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
1698
1837
|
if (HUF_isError(hSize)) return hSize;
|
1699
1838
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
1700
1839
|
ip += hSize; cSrcSize -= hSize;
|
1701
1840
|
|
1702
|
-
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
1841
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
1703
1842
|
}
|
1704
1843
|
#endif
|
1705
1844
|
|
1706
|
-
size_t
|
1845
|
+
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
|
1707
1846
|
{
|
1708
1847
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1709
1848
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1710
1849
|
(void)dtd;
|
1711
1850
|
assert(dtd.tableType == 0);
|
1712
|
-
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1851
|
+
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1713
1852
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1714
1853
|
(void)dtd;
|
1715
1854
|
assert(dtd.tableType == 1);
|
1716
|
-
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1855
|
+
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1717
1856
|
#else
|
1718
|
-
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1719
|
-
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1857
|
+
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
|
1858
|
+
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1720
1859
|
#endif
|
1721
1860
|
}
|
1722
1861
|
|
1723
|
-
size_t
|
1862
|
+
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
|
1724
1863
|
{
|
1725
1864
|
/* validation checks */
|
1726
1865
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
@@ -1730,160 +1869,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
|
1730
1869
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1731
1870
|
(void)algoNb;
|
1732
1871
|
assert(algoNb == 0);
|
1733
|
-
return
|
1872
|
+
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
1734
1873
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1735
1874
|
(void)algoNb;
|
1736
1875
|
assert(algoNb == 1);
|
1737
|
-
return
|
1876
|
+
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
1738
1877
|
#else
|
1739
|
-
return algoNb ?
|
1740
|
-
|
1878
|
+
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
|
1879
|
+
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
1741
1880
|
#endif
|
1742
1881
|
}
|
1743
1882
|
}
|
1744
|
-
|
1745
|
-
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
1746
|
-
#ifndef HUF_FORCE_DECOMPRESS_X2
|
1747
|
-
size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
|
1748
|
-
{
|
1749
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1750
|
-
return HUF_readDTableX1_wksp(DTable, src, srcSize,
|
1751
|
-
workSpace, sizeof(workSpace));
|
1752
|
-
}
|
1753
|
-
|
1754
|
-
size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
1755
|
-
const void* cSrc, size_t cSrcSize)
|
1756
|
-
{
|
1757
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1758
|
-
return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
1759
|
-
workSpace, sizeof(workSpace));
|
1760
|
-
}
|
1761
|
-
|
1762
|
-
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1763
|
-
{
|
1764
|
-
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
1765
|
-
return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
1766
|
-
}
|
1767
|
-
#endif
|
1768
|
-
|
1769
|
-
#ifndef HUF_FORCE_DECOMPRESS_X1
|
1770
|
-
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
1771
|
-
{
|
1772
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1773
|
-
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
1774
|
-
workSpace, sizeof(workSpace));
|
1775
|
-
}
|
1776
|
-
|
1777
|
-
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
1778
|
-
const void* cSrc, size_t cSrcSize)
|
1779
|
-
{
|
1780
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1781
|
-
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
1782
|
-
workSpace, sizeof(workSpace));
|
1783
|
-
}
|
1784
|
-
|
1785
|
-
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1786
|
-
{
|
1787
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
1788
|
-
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
1789
|
-
}
|
1790
|
-
#endif
|
1791
|
-
|
1792
|
-
#ifndef HUF_FORCE_DECOMPRESS_X2
|
1793
|
-
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1794
|
-
{
|
1795
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1796
|
-
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
1797
|
-
workSpace, sizeof(workSpace));
|
1798
|
-
}
|
1799
|
-
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1800
|
-
{
|
1801
|
-
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
1802
|
-
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
1803
|
-
}
|
1804
|
-
#endif
|
1805
|
-
|
1806
|
-
#ifndef HUF_FORCE_DECOMPRESS_X1
|
1807
|
-
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
1808
|
-
const void* cSrc, size_t cSrcSize)
|
1809
|
-
{
|
1810
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1811
|
-
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
1812
|
-
workSpace, sizeof(workSpace));
|
1813
|
-
}
|
1814
|
-
|
1815
|
-
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1816
|
-
{
|
1817
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
1818
|
-
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
1819
|
-
}
|
1820
|
-
#endif
|
1821
|
-
|
1822
|
-
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
1823
|
-
|
1824
|
-
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1825
|
-
{
|
1826
|
-
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
1827
|
-
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
1828
|
-
#endif
|
1829
|
-
|
1830
|
-
/* validation checks */
|
1831
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
1832
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
1833
|
-
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
1834
|
-
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
1835
|
-
|
1836
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1837
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1838
|
-
(void)algoNb;
|
1839
|
-
assert(algoNb == 0);
|
1840
|
-
return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
|
1841
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1842
|
-
(void)algoNb;
|
1843
|
-
assert(algoNb == 1);
|
1844
|
-
return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
|
1845
|
-
#else
|
1846
|
-
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
1847
|
-
#endif
|
1848
|
-
}
|
1849
|
-
}
|
1850
|
-
|
1851
|
-
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1852
|
-
{
|
1853
|
-
/* validation checks */
|
1854
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
1855
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
1856
|
-
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
1857
|
-
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
1858
|
-
|
1859
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1860
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1861
|
-
(void)algoNb;
|
1862
|
-
assert(algoNb == 0);
|
1863
|
-
return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
1864
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1865
|
-
(void)algoNb;
|
1866
|
-
assert(algoNb == 1);
|
1867
|
-
return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
1868
|
-
#else
|
1869
|
-
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
1870
|
-
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
1871
|
-
#endif
|
1872
|
-
}
|
1873
|
-
}
|
1874
|
-
|
1875
|
-
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1876
|
-
{
|
1877
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1878
|
-
return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
1879
|
-
workSpace, sizeof(workSpace));
|
1880
|
-
}
|
1881
|
-
|
1882
|
-
size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
1883
|
-
const void* cSrc, size_t cSrcSize)
|
1884
|
-
{
|
1885
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1886
|
-
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
1887
|
-
workSpace, sizeof(workSpace));
|
1888
|
-
}
|
1889
|
-
#endif
|