zstd-ruby 1.4.5.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
- data/ext/zstdruby/libzstd/common/compiler.h +205 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
- data/ext/zstdruby/libzstd/common/error_private.c +10 -2
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +37 -86
- data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
- data/ext/zstdruby/libzstd/common/huf.h +99 -166
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +10 -4
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +74 -19
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/ext/zstdruby/libzstd/zstd.h +1217 -287
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +19 -36
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -354
- data/ext/zstdruby/libzstd/README.md +0 -179
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
2
|
* huff0 huffman decoder,
|
|
3
3
|
* part of Finite State Entropy library
|
|
4
|
-
* Copyright (c)
|
|
4
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
5
5
|
*
|
|
6
6
|
* You can contact the author at :
|
|
7
7
|
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
@@ -15,13 +15,20 @@
|
|
|
15
15
|
/* **************************************************************
|
|
16
16
|
* Dependencies
|
|
17
17
|
****************************************************************/
|
|
18
|
-
#include
|
|
18
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
|
|
19
19
|
#include "../common/compiler.h"
|
|
20
20
|
#include "../common/bitstream.h" /* BIT_* */
|
|
21
21
|
#include "../common/fse.h" /* to compress headers */
|
|
22
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
23
22
|
#include "../common/huf.h"
|
|
24
23
|
#include "../common/error_private.h"
|
|
24
|
+
#include "../common/zstd_internal.h"
|
|
25
|
+
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
|
|
26
|
+
|
|
27
|
+
/* **************************************************************
|
|
28
|
+
* Constants
|
|
29
|
+
****************************************************************/
|
|
30
|
+
|
|
31
|
+
#define HUF_DECODER_FAST_TABLELOG 11
|
|
25
32
|
|
|
26
33
|
/* **************************************************************
|
|
27
34
|
* Macros
|
|
@@ -36,6 +43,28 @@
|
|
|
36
43
|
#error "Cannot force the use of the X1 and X2 decoders at the same time!"
|
|
37
44
|
#endif
|
|
38
45
|
|
|
46
|
+
/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
|
|
47
|
+
* supported at runtime, so we can add the BMI2 target attribute.
|
|
48
|
+
* When it is disabled, we will still get BMI2 if it is enabled statically.
|
|
49
|
+
*/
|
|
50
|
+
#if DYNAMIC_BMI2
|
|
51
|
+
# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
|
|
52
|
+
#else
|
|
53
|
+
# define HUF_FAST_BMI2_ATTRS
|
|
54
|
+
#endif
|
|
55
|
+
|
|
56
|
+
#ifdef __cplusplus
|
|
57
|
+
# define HUF_EXTERN_C extern "C"
|
|
58
|
+
#else
|
|
59
|
+
# define HUF_EXTERN_C
|
|
60
|
+
#endif
|
|
61
|
+
#define HUF_ASM_DECL HUF_EXTERN_C
|
|
62
|
+
|
|
63
|
+
#if DYNAMIC_BMI2
|
|
64
|
+
# define HUF_NEED_BMI2_FUNCTION 1
|
|
65
|
+
#else
|
|
66
|
+
# define HUF_NEED_BMI2_FUNCTION 0
|
|
67
|
+
#endif
|
|
39
68
|
|
|
40
69
|
/* **************************************************************
|
|
41
70
|
* Error Management
|
|
@@ -53,6 +82,11 @@
|
|
|
53
82
|
/* **************************************************************
|
|
54
83
|
* BMI2 Variant Wrappers
|
|
55
84
|
****************************************************************/
|
|
85
|
+
typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
|
|
86
|
+
const void *cSrc,
|
|
87
|
+
size_t cSrcSize,
|
|
88
|
+
const HUF_DTable *DTable);
|
|
89
|
+
|
|
56
90
|
#if DYNAMIC_BMI2
|
|
57
91
|
|
|
58
92
|
#define HUF_DGEN(fn) \
|
|
@@ -65,7 +99,7 @@
|
|
|
65
99
|
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
|
66
100
|
} \
|
|
67
101
|
\
|
|
68
|
-
static
|
|
102
|
+
static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
|
|
69
103
|
void* dst, size_t dstSize, \
|
|
70
104
|
const void* cSrc, size_t cSrcSize, \
|
|
71
105
|
const HUF_DTable* DTable) \
|
|
@@ -74,9 +108,9 @@
|
|
|
74
108
|
} \
|
|
75
109
|
\
|
|
76
110
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
|
77
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
|
111
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags) \
|
|
78
112
|
{ \
|
|
79
|
-
if (
|
|
113
|
+
if (flags & HUF_flags_bmi2) { \
|
|
80
114
|
return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
|
|
81
115
|
} \
|
|
82
116
|
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
|
|
@@ -86,9 +120,9 @@
|
|
|
86
120
|
|
|
87
121
|
#define HUF_DGEN(fn) \
|
|
88
122
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
|
89
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
|
123
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags) \
|
|
90
124
|
{ \
|
|
91
|
-
(void)
|
|
125
|
+
(void)flags; \
|
|
92
126
|
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
|
93
127
|
}
|
|
94
128
|
|
|
@@ -103,92 +137,357 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved;
|
|
|
103
137
|
static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
|
104
138
|
{
|
|
105
139
|
DTableDesc dtd;
|
|
106
|
-
|
|
140
|
+
ZSTD_memcpy(&dtd, table, sizeof(dtd));
|
|
107
141
|
return dtd;
|
|
108
142
|
}
|
|
109
143
|
|
|
144
|
+
static size_t HUF_initFastDStream(BYTE const* ip) {
|
|
145
|
+
BYTE const lastByte = ip[7];
|
|
146
|
+
size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
|
|
147
|
+
size_t const value = MEM_readLEST(ip) | 1;
|
|
148
|
+
assert(bitsConsumed <= 8);
|
|
149
|
+
assert(sizeof(size_t) == 8);
|
|
150
|
+
return value << bitsConsumed;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* The input/output arguments to the Huffman fast decoding loop:
|
|
156
|
+
*
|
|
157
|
+
* ip [in/out] - The input pointers, must be updated to reflect what is consumed.
|
|
158
|
+
* op [in/out] - The output pointers, must be updated to reflect what is written.
|
|
159
|
+
* bits [in/out] - The bitstream containers, must be updated to reflect the current state.
|
|
160
|
+
* dt [in] - The decoding table.
|
|
161
|
+
* ilimit [in] - The input limit, stop when any input pointer is below ilimit.
|
|
162
|
+
* oend [in] - The end of the output stream. op[3] must not cross oend.
|
|
163
|
+
* iend [in] - The end of each input stream. ip[i] may cross iend[i],
|
|
164
|
+
* as long as it is above ilimit, but that indicates corruption.
|
|
165
|
+
*/
|
|
166
|
+
typedef struct {
|
|
167
|
+
BYTE const* ip[4];
|
|
168
|
+
BYTE* op[4];
|
|
169
|
+
U64 bits[4];
|
|
170
|
+
void const* dt;
|
|
171
|
+
BYTE const* ilimit;
|
|
172
|
+
BYTE* oend;
|
|
173
|
+
BYTE const* iend[4];
|
|
174
|
+
} HUF_DecompressFastArgs;
|
|
175
|
+
|
|
176
|
+
typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Initializes args for the fast decoding loop.
|
|
180
|
+
* @returns 1 on success
|
|
181
|
+
* 0 if the fallback implementation should be used.
|
|
182
|
+
* Or an error code on failure.
|
|
183
|
+
*/
|
|
184
|
+
static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
|
|
185
|
+
{
|
|
186
|
+
void const* dt = DTable + 1;
|
|
187
|
+
U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
|
|
188
|
+
|
|
189
|
+
const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
|
|
190
|
+
|
|
191
|
+
BYTE* const oend = (BYTE*)dst + dstSize;
|
|
192
|
+
|
|
193
|
+
/* The fast decoding loop assumes 64-bit little-endian.
|
|
194
|
+
* This condition is false on x32.
|
|
195
|
+
*/
|
|
196
|
+
if (!MEM_isLittleEndian() || MEM_32bits())
|
|
197
|
+
return 0;
|
|
198
|
+
|
|
199
|
+
/* strict minimum : jump table + 1 byte per stream */
|
|
200
|
+
if (srcSize < 10)
|
|
201
|
+
return ERROR(corruption_detected);
|
|
202
|
+
|
|
203
|
+
/* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
|
|
204
|
+
* If table log is not correct at this point, fallback to the old decoder.
|
|
205
|
+
* On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
|
|
206
|
+
*/
|
|
207
|
+
if (dtLog != HUF_DECODER_FAST_TABLELOG)
|
|
208
|
+
return 0;
|
|
209
|
+
|
|
210
|
+
/* Read the jump table. */
|
|
211
|
+
{
|
|
212
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
213
|
+
size_t const length1 = MEM_readLE16(istart);
|
|
214
|
+
size_t const length2 = MEM_readLE16(istart+2);
|
|
215
|
+
size_t const length3 = MEM_readLE16(istart+4);
|
|
216
|
+
size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
|
|
217
|
+
args->iend[0] = istart + 6; /* jumpTable */
|
|
218
|
+
args->iend[1] = args->iend[0] + length1;
|
|
219
|
+
args->iend[2] = args->iend[1] + length2;
|
|
220
|
+
args->iend[3] = args->iend[2] + length3;
|
|
221
|
+
|
|
222
|
+
/* HUF_initFastDStream() requires this, and this small of an input
|
|
223
|
+
* won't benefit from the ASM loop anyways.
|
|
224
|
+
* length1 must be >= 16 so that ip[0] >= ilimit before the loop
|
|
225
|
+
* starts.
|
|
226
|
+
*/
|
|
227
|
+
if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
|
|
228
|
+
return 0;
|
|
229
|
+
if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
|
|
230
|
+
}
|
|
231
|
+
/* ip[] contains the position that is currently loaded into bits[]. */
|
|
232
|
+
args->ip[0] = args->iend[1] - sizeof(U64);
|
|
233
|
+
args->ip[1] = args->iend[2] - sizeof(U64);
|
|
234
|
+
args->ip[2] = args->iend[3] - sizeof(U64);
|
|
235
|
+
args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
|
|
236
|
+
|
|
237
|
+
/* op[] contains the output pointers. */
|
|
238
|
+
args->op[0] = (BYTE*)dst;
|
|
239
|
+
args->op[1] = args->op[0] + (dstSize+3)/4;
|
|
240
|
+
args->op[2] = args->op[1] + (dstSize+3)/4;
|
|
241
|
+
args->op[3] = args->op[2] + (dstSize+3)/4;
|
|
242
|
+
|
|
243
|
+
/* No point to call the ASM loop for tiny outputs. */
|
|
244
|
+
if (args->op[3] >= oend)
|
|
245
|
+
return 0;
|
|
246
|
+
|
|
247
|
+
/* bits[] is the bit container.
|
|
248
|
+
* It is read from the MSB down to the LSB.
|
|
249
|
+
* It is shifted left as it is read, and zeros are
|
|
250
|
+
* shifted in. After the lowest valid bit a 1 is
|
|
251
|
+
* set, so that CountTrailingZeros(bits[]) can be used
|
|
252
|
+
* to count how many bits we've consumed.
|
|
253
|
+
*/
|
|
254
|
+
args->bits[0] = HUF_initFastDStream(args->ip[0]);
|
|
255
|
+
args->bits[1] = HUF_initFastDStream(args->ip[1]);
|
|
256
|
+
args->bits[2] = HUF_initFastDStream(args->ip[2]);
|
|
257
|
+
args->bits[3] = HUF_initFastDStream(args->ip[3]);
|
|
258
|
+
|
|
259
|
+
/* If ip[] >= ilimit, it is guaranteed to be safe to
|
|
260
|
+
* reload bits[]. It may be beyond its section, but is
|
|
261
|
+
* guaranteed to be valid (>= istart).
|
|
262
|
+
*/
|
|
263
|
+
args->ilimit = ilimit;
|
|
264
|
+
|
|
265
|
+
args->oend = oend;
|
|
266
|
+
args->dt = dt;
|
|
267
|
+
|
|
268
|
+
return 1;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
|
|
272
|
+
{
|
|
273
|
+
/* Validate that we haven't overwritten. */
|
|
274
|
+
if (args->op[stream] > segmentEnd)
|
|
275
|
+
return ERROR(corruption_detected);
|
|
276
|
+
/* Validate that we haven't read beyond iend[].
|
|
277
|
+
* Note that ip[] may be < iend[] because the MSB is
|
|
278
|
+
* the next bit to read, and we may have consumed 100%
|
|
279
|
+
* of the stream, so down to iend[i] - 8 is valid.
|
|
280
|
+
*/
|
|
281
|
+
if (args->ip[stream] < args->iend[stream] - 8)
|
|
282
|
+
return ERROR(corruption_detected);
|
|
283
|
+
|
|
284
|
+
/* Construct the BIT_DStream_t. */
|
|
285
|
+
assert(sizeof(size_t) == 8);
|
|
286
|
+
bit->bitContainer = MEM_readLEST(args->ip[stream]);
|
|
287
|
+
bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
|
|
288
|
+
bit->start = (const char*)args->iend[0];
|
|
289
|
+
bit->limitPtr = bit->start + sizeof(size_t);
|
|
290
|
+
bit->ptr = (const char*)args->ip[stream];
|
|
291
|
+
|
|
292
|
+
return 0;
|
|
293
|
+
}
|
|
294
|
+
|
|
110
295
|
|
|
111
296
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
|
112
297
|
|
|
113
298
|
/*-***************************/
|
|
114
299
|
/* single-symbol decoding */
|
|
115
300
|
/*-***************************/
|
|
116
|
-
typedef struct { BYTE
|
|
301
|
+
typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
|
|
117
302
|
|
|
118
|
-
|
|
303
|
+
/**
|
|
304
|
+
* Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
|
|
305
|
+
* a time.
|
|
306
|
+
*/
|
|
307
|
+
static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
|
|
308
|
+
U64 D4;
|
|
309
|
+
if (MEM_isLittleEndian()) {
|
|
310
|
+
D4 = (U64)((symbol << 8) + nbBits);
|
|
311
|
+
} else {
|
|
312
|
+
D4 = (U64)(symbol + (nbBits << 8));
|
|
313
|
+
}
|
|
314
|
+
assert(D4 < (1U << 16));
|
|
315
|
+
D4 *= 0x0001000100010001ULL;
|
|
316
|
+
return D4;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Increase the tableLog to targetTableLog and rescales the stats.
|
|
321
|
+
* If tableLog > targetTableLog this is a no-op.
|
|
322
|
+
* @returns New tableLog
|
|
323
|
+
*/
|
|
324
|
+
static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
|
|
325
|
+
{
|
|
326
|
+
if (tableLog > targetTableLog)
|
|
327
|
+
return tableLog;
|
|
328
|
+
if (tableLog < targetTableLog) {
|
|
329
|
+
U32 const scale = targetTableLog - tableLog;
|
|
330
|
+
U32 s;
|
|
331
|
+
/* Increase the weight for all non-zero probability symbols by scale. */
|
|
332
|
+
for (s = 0; s < nbSymbols; ++s) {
|
|
333
|
+
huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
|
|
334
|
+
}
|
|
335
|
+
/* Update rankVal to reflect the new weights.
|
|
336
|
+
* All weights except 0 get moved to weight + scale.
|
|
337
|
+
* Weights [1, scale] are empty.
|
|
338
|
+
*/
|
|
339
|
+
for (s = targetTableLog; s > scale; --s) {
|
|
340
|
+
rankVal[s] = rankVal[s - scale];
|
|
341
|
+
}
|
|
342
|
+
for (s = scale; s > 0; --s) {
|
|
343
|
+
rankVal[s] = 0;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
return targetTableLog;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
typedef struct {
|
|
350
|
+
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
|
351
|
+
U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
|
352
|
+
U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
|
353
|
+
BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
|
|
354
|
+
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
|
|
355
|
+
} HUF_ReadDTableX1_Workspace;
|
|
356
|
+
|
|
357
|
+
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags)
|
|
119
358
|
{
|
|
120
359
|
U32 tableLog = 0;
|
|
121
360
|
U32 nbSymbols = 0;
|
|
122
361
|
size_t iSize;
|
|
123
362
|
void* const dtPtr = DTable + 1;
|
|
124
363
|
HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
|
|
364
|
+
HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
|
|
125
365
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
size_t spaceUsed32 = 0;
|
|
129
|
-
|
|
130
|
-
rankVal = (U32 *)workSpace + spaceUsed32;
|
|
131
|
-
spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
|
|
132
|
-
huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
|
133
|
-
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
|
134
|
-
|
|
135
|
-
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
366
|
+
DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
|
|
367
|
+
if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
136
368
|
|
|
137
369
|
DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
|
138
|
-
/*
|
|
370
|
+
/* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
|
139
371
|
|
|
140
|
-
iSize =
|
|
372
|
+
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
|
|
141
373
|
if (HUF_isError(iSize)) return iSize;
|
|
142
374
|
|
|
375
|
+
|
|
143
376
|
/* Table header */
|
|
144
377
|
{ DTableDesc dtd = HUF_getDTableDesc(DTable);
|
|
378
|
+
U32 const maxTableLog = dtd.maxTableLog + 1;
|
|
379
|
+
U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
|
|
380
|
+
tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
|
|
145
381
|
if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
|
|
146
382
|
dtd.tableType = 0;
|
|
147
383
|
dtd.tableLog = (BYTE)tableLog;
|
|
148
|
-
|
|
384
|
+
ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
|
|
149
385
|
}
|
|
150
386
|
|
|
151
|
-
/*
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
}
|
|
184
|
-
return iSize;
|
|
185
|
-
}
|
|
387
|
+
/* Compute symbols and rankStart given rankVal:
|
|
388
|
+
*
|
|
389
|
+
* rankVal already contains the number of values of each weight.
|
|
390
|
+
*
|
|
391
|
+
* symbols contains the symbols ordered by weight. First are the rankVal[0]
|
|
392
|
+
* weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
|
|
393
|
+
* symbols[0] is filled (but unused) to avoid a branch.
|
|
394
|
+
*
|
|
395
|
+
* rankStart contains the offset where each rank belongs in the DTable.
|
|
396
|
+
* rankStart[0] is not filled because there are no entries in the table for
|
|
397
|
+
* weight 0.
|
|
398
|
+
*/
|
|
399
|
+
{ int n;
|
|
400
|
+
U32 nextRankStart = 0;
|
|
401
|
+
int const unroll = 4;
|
|
402
|
+
int const nLimit = (int)nbSymbols - unroll + 1;
|
|
403
|
+
for (n=0; n<(int)tableLog+1; n++) {
|
|
404
|
+
U32 const curr = nextRankStart;
|
|
405
|
+
nextRankStart += wksp->rankVal[n];
|
|
406
|
+
wksp->rankStart[n] = curr;
|
|
407
|
+
}
|
|
408
|
+
for (n=0; n < nLimit; n += unroll) {
|
|
409
|
+
int u;
|
|
410
|
+
for (u=0; u < unroll; ++u) {
|
|
411
|
+
size_t const w = wksp->huffWeight[n+u];
|
|
412
|
+
wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
for (; n < (int)nbSymbols; ++n) {
|
|
416
|
+
size_t const w = wksp->huffWeight[n];
|
|
417
|
+
wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
186
420
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
421
|
+
/* fill DTable
|
|
422
|
+
* We fill all entries of each weight in order.
|
|
423
|
+
* That way length is a constant for each iteration of the outer loop.
|
|
424
|
+
* We can switch based on the length to a different inner loop which is
|
|
425
|
+
* optimized for that particular case.
|
|
426
|
+
*/
|
|
427
|
+
{ U32 w;
|
|
428
|
+
int symbol = wksp->rankVal[0];
|
|
429
|
+
int rankStart = 0;
|
|
430
|
+
for (w=1; w<tableLog+1; ++w) {
|
|
431
|
+
int const symbolCount = wksp->rankVal[w];
|
|
432
|
+
int const length = (1 << w) >> 1;
|
|
433
|
+
int uStart = rankStart;
|
|
434
|
+
BYTE const nbBits = (BYTE)(tableLog + 1 - w);
|
|
435
|
+
int s;
|
|
436
|
+
int u;
|
|
437
|
+
switch (length) {
|
|
438
|
+
case 1:
|
|
439
|
+
for (s=0; s<symbolCount; ++s) {
|
|
440
|
+
HUF_DEltX1 D;
|
|
441
|
+
D.byte = wksp->symbols[symbol + s];
|
|
442
|
+
D.nbBits = nbBits;
|
|
443
|
+
dt[uStart] = D;
|
|
444
|
+
uStart += 1;
|
|
445
|
+
}
|
|
446
|
+
break;
|
|
447
|
+
case 2:
|
|
448
|
+
for (s=0; s<symbolCount; ++s) {
|
|
449
|
+
HUF_DEltX1 D;
|
|
450
|
+
D.byte = wksp->symbols[symbol + s];
|
|
451
|
+
D.nbBits = nbBits;
|
|
452
|
+
dt[uStart+0] = D;
|
|
453
|
+
dt[uStart+1] = D;
|
|
454
|
+
uStart += 2;
|
|
455
|
+
}
|
|
456
|
+
break;
|
|
457
|
+
case 4:
|
|
458
|
+
for (s=0; s<symbolCount; ++s) {
|
|
459
|
+
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
|
460
|
+
MEM_write64(dt + uStart, D4);
|
|
461
|
+
uStart += 4;
|
|
462
|
+
}
|
|
463
|
+
break;
|
|
464
|
+
case 8:
|
|
465
|
+
for (s=0; s<symbolCount; ++s) {
|
|
466
|
+
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
|
467
|
+
MEM_write64(dt + uStart, D4);
|
|
468
|
+
MEM_write64(dt + uStart + 4, D4);
|
|
469
|
+
uStart += 8;
|
|
470
|
+
}
|
|
471
|
+
break;
|
|
472
|
+
default:
|
|
473
|
+
for (s=0; s<symbolCount; ++s) {
|
|
474
|
+
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
|
475
|
+
for (u=0; u < length; u += 16) {
|
|
476
|
+
MEM_write64(dt + uStart + u + 0, D4);
|
|
477
|
+
MEM_write64(dt + uStart + u + 4, D4);
|
|
478
|
+
MEM_write64(dt + uStart + u + 8, D4);
|
|
479
|
+
MEM_write64(dt + uStart + u + 12, D4);
|
|
480
|
+
}
|
|
481
|
+
assert(u == length);
|
|
482
|
+
uStart += length;
|
|
483
|
+
}
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
symbol += symbolCount;
|
|
487
|
+
rankStart += symbolCount * length;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
return iSize;
|
|
192
491
|
}
|
|
193
492
|
|
|
194
493
|
FORCE_INLINE_TEMPLATE BYTE
|
|
@@ -217,11 +516,15 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
|
|
|
217
516
|
BYTE* const pStart = p;
|
|
218
517
|
|
|
219
518
|
/* up to 4 symbols at a time */
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
519
|
+
if ((pEnd - p) > 3) {
|
|
520
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
|
|
521
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
|
522
|
+
HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
|
|
523
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
|
524
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
|
525
|
+
}
|
|
526
|
+
} else {
|
|
527
|
+
BIT_reloadDStream(bitDPtr);
|
|
225
528
|
}
|
|
226
529
|
|
|
227
530
|
/* [0-3] symbols remaining */
|
|
@@ -233,7 +536,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
|
|
|
233
536
|
while (p < pEnd)
|
|
234
537
|
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
|
235
538
|
|
|
236
|
-
return pEnd-pStart;
|
|
539
|
+
return (size_t)(pEnd-pStart);
|
|
237
540
|
}
|
|
238
541
|
|
|
239
542
|
FORCE_INLINE_TEMPLATE size_t
|
|
@@ -259,6 +562,10 @@ HUF_decompress1X1_usingDTable_internal_body(
|
|
|
259
562
|
return dstSize;
|
|
260
563
|
}
|
|
261
564
|
|
|
565
|
+
/* HUF_decompress4X1_usingDTable_internal_body():
|
|
566
|
+
* Conditions :
|
|
567
|
+
* @dstSize >= 6
|
|
568
|
+
*/
|
|
262
569
|
FORCE_INLINE_TEMPLATE size_t
|
|
263
570
|
HUF_decompress4X1_usingDTable_internal_body(
|
|
264
571
|
void* dst, size_t dstSize,
|
|
@@ -301,33 +608,37 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
301
608
|
U32 endSignal = 1;
|
|
302
609
|
|
|
303
610
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
|
611
|
+
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
|
612
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
|
304
613
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
|
305
614
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
|
306
615
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
|
307
616
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
|
308
617
|
|
|
309
618
|
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
619
|
+
if ((size_t)(oend - op4) >= sizeof(size_t)) {
|
|
620
|
+
for ( ; (endSignal) & (op4 < olimit) ; ) {
|
|
621
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
|
622
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
|
623
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
|
624
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
|
625
|
+
HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
|
|
626
|
+
HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
|
|
627
|
+
HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
|
|
628
|
+
HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
|
|
629
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
|
630
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
|
631
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
|
632
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
|
633
|
+
HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
|
|
634
|
+
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
|
635
|
+
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
|
636
|
+
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
|
637
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
|
638
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
|
639
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
|
640
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
|
641
|
+
}
|
|
331
642
|
}
|
|
332
643
|
|
|
333
644
|
/* check corruption */
|
|
@@ -353,99 +664,230 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
353
664
|
}
|
|
354
665
|
}
|
|
355
666
|
|
|
667
|
+
#if HUF_NEED_BMI2_FUNCTION
|
|
668
|
+
static BMI2_TARGET_ATTRIBUTE
|
|
669
|
+
size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
|
670
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
671
|
+
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
672
|
+
}
|
|
673
|
+
#endif
|
|
356
674
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
675
|
+
static
|
|
676
|
+
size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
|
677
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
678
|
+
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
679
|
+
}
|
|
361
680
|
|
|
362
|
-
|
|
363
|
-
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
|
|
681
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
364
682
|
|
|
683
|
+
HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
|
365
684
|
|
|
685
|
+
#endif
|
|
366
686
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
const void* cSrc, size_t cSrcSize,
|
|
370
|
-
const HUF_DTable* DTable)
|
|
687
|
+
static HUF_FAST_BMI2_ATTRS
|
|
688
|
+
void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
|
371
689
|
{
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
690
|
+
U64 bits[4];
|
|
691
|
+
BYTE const* ip[4];
|
|
692
|
+
BYTE* op[4];
|
|
693
|
+
U16 const* const dtable = (U16 const*)args->dt;
|
|
694
|
+
BYTE* const oend = args->oend;
|
|
695
|
+
BYTE const* const ilimit = args->ilimit;
|
|
696
|
+
|
|
697
|
+
/* Copy the arguments to local variables */
|
|
698
|
+
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
|
699
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
|
700
|
+
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
|
701
|
+
|
|
702
|
+
assert(MEM_isLittleEndian());
|
|
703
|
+
assert(!MEM_32bits());
|
|
704
|
+
|
|
705
|
+
for (;;) {
|
|
706
|
+
BYTE* olimit;
|
|
707
|
+
int stream;
|
|
708
|
+
int symbol;
|
|
709
|
+
|
|
710
|
+
/* Assert loop preconditions */
|
|
711
|
+
#ifndef NDEBUG
|
|
712
|
+
for (stream = 0; stream < 4; ++stream) {
|
|
713
|
+
assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
|
|
714
|
+
assert(ip[stream] >= ilimit);
|
|
715
|
+
}
|
|
716
|
+
#endif
|
|
717
|
+
/* Compute olimit */
|
|
718
|
+
{
|
|
719
|
+
/* Each iteration produces 5 output symbols per stream */
|
|
720
|
+
size_t const oiters = (size_t)(oend - op[3]) / 5;
|
|
721
|
+
/* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
|
|
722
|
+
* per stream.
|
|
723
|
+
*/
|
|
724
|
+
size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
|
|
725
|
+
/* We can safely run iters iterations before running bounds checks */
|
|
726
|
+
size_t const iters = MIN(oiters, iiters);
|
|
727
|
+
size_t const symbols = iters * 5;
|
|
728
|
+
|
|
729
|
+
/* We can simply check that op[3] < olimit, instead of checking all
|
|
730
|
+
* of our bounds, since we can't hit the other bounds until we've run
|
|
731
|
+
* iters iterations, which only happens when op[3] == olimit.
|
|
732
|
+
*/
|
|
733
|
+
olimit = op[3] + symbols;
|
|
734
|
+
|
|
735
|
+
/* Exit fast decoding loop once we get close to the end. */
|
|
736
|
+
if (op[3] + 20 > olimit)
|
|
737
|
+
break;
|
|
738
|
+
|
|
739
|
+
/* Exit the decoding loop if any input pointer has crossed the
|
|
740
|
+
* previous one. This indicates corruption, and a precondition
|
|
741
|
+
* to our loop is that ip[i] >= ip[0].
|
|
742
|
+
*/
|
|
743
|
+
for (stream = 1; stream < 4; ++stream) {
|
|
744
|
+
if (ip[stream] < ip[stream - 1])
|
|
745
|
+
goto _out;
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
#ifndef NDEBUG
|
|
750
|
+
for (stream = 1; stream < 4; ++stream) {
|
|
751
|
+
assert(ip[stream] >= ip[stream - 1]);
|
|
752
|
+
}
|
|
753
|
+
#endif
|
|
754
|
+
|
|
755
|
+
do {
|
|
756
|
+
/* Decode 5 symbols in each of the 4 streams */
|
|
757
|
+
for (symbol = 0; symbol < 5; ++symbol) {
|
|
758
|
+
for (stream = 0; stream < 4; ++stream) {
|
|
759
|
+
int const index = (int)(bits[stream] >> 53);
|
|
760
|
+
int const entry = (int)dtable[index];
|
|
761
|
+
bits[stream] <<= (entry & 63);
|
|
762
|
+
op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
/* Reload the bitstreams */
|
|
766
|
+
for (stream = 0; stream < 4; ++stream) {
|
|
767
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
|
768
|
+
int const nbBits = ctz & 7;
|
|
769
|
+
int const nbBytes = ctz >> 3;
|
|
770
|
+
op[stream] += 5;
|
|
771
|
+
ip[stream] -= nbBytes;
|
|
772
|
+
bits[stream] = MEM_read64(ip[stream]) | 1;
|
|
773
|
+
bits[stream] <<= nbBits;
|
|
774
|
+
}
|
|
775
|
+
} while (op[3] < olimit);
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
_out:
|
|
779
|
+
|
|
780
|
+
/* Save the final values of each of the state variables back to args. */
|
|
781
|
+
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
|
782
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
|
783
|
+
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
|
375
784
|
}
|
|
376
785
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
786
|
+
/**
|
|
787
|
+
* @returns @p dstSize on success (>= 6)
|
|
788
|
+
* 0 if the fallback implementation should be used
|
|
789
|
+
* An error if an error occurred
|
|
790
|
+
*/
|
|
791
|
+
static HUF_FAST_BMI2_ATTRS
|
|
792
|
+
size_t
|
|
793
|
+
HUF_decompress4X1_usingDTable_internal_fast(
|
|
794
|
+
void* dst, size_t dstSize,
|
|
795
|
+
const void* cSrc, size_t cSrcSize,
|
|
796
|
+
const HUF_DTable* DTable,
|
|
797
|
+
HUF_DecompressFastLoopFn loopFn)
|
|
380
798
|
{
|
|
381
|
-
const
|
|
799
|
+
void const* dt = DTable + 1;
|
|
800
|
+
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
|
801
|
+
BYTE* const oend = (BYTE*)dst + dstSize;
|
|
802
|
+
HUF_DecompressFastArgs args;
|
|
803
|
+
{ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
|
804
|
+
FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
|
|
805
|
+
if (ret == 0)
|
|
806
|
+
return 0;
|
|
807
|
+
}
|
|
382
808
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
ip
|
|
809
|
+
assert(args.ip[0] >= args.ilimit);
|
|
810
|
+
loopFn(&args);
|
|
811
|
+
|
|
812
|
+
/* Our loop guarantees that ip[] >= ilimit and that we haven't
|
|
813
|
+
* overwritten any op[].
|
|
814
|
+
*/
|
|
815
|
+
assert(args.ip[0] >= iend);
|
|
816
|
+
assert(args.ip[1] >= iend);
|
|
817
|
+
assert(args.ip[2] >= iend);
|
|
818
|
+
assert(args.ip[3] >= iend);
|
|
819
|
+
assert(args.op[3] <= oend);
|
|
820
|
+
(void)iend;
|
|
821
|
+
|
|
822
|
+
/* finish bit streams one by one. */
|
|
823
|
+
{ size_t const segmentSize = (dstSize+3) / 4;
|
|
824
|
+
BYTE* segmentEnd = (BYTE*)dst;
|
|
825
|
+
int i;
|
|
826
|
+
for (i = 0; i < 4; ++i) {
|
|
827
|
+
BIT_DStream_t bit;
|
|
828
|
+
if (segmentSize <= (size_t)(oend - segmentEnd))
|
|
829
|
+
segmentEnd += segmentSize;
|
|
830
|
+
else
|
|
831
|
+
segmentEnd = oend;
|
|
832
|
+
FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
|
|
833
|
+
/* Decompress and validate that we've produced exactly the expected length. */
|
|
834
|
+
args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
|
|
835
|
+
if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
|
|
836
|
+
}
|
|
837
|
+
}
|
|
387
838
|
|
|
388
|
-
|
|
839
|
+
/* decoded size */
|
|
840
|
+
assert(dstSize != 0);
|
|
841
|
+
return dstSize;
|
|
389
842
|
}
|
|
390
843
|
|
|
844
|
+
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
|
|
391
845
|
|
|
392
|
-
size_t
|
|
393
|
-
|
|
846
|
+
static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
|
847
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
|
394
848
|
{
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
workSpace, sizeof(workSpace));
|
|
398
|
-
}
|
|
849
|
+
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
|
|
850
|
+
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
|
|
399
851
|
|
|
400
|
-
|
|
401
|
-
{
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
852
|
+
#if DYNAMIC_BMI2
|
|
853
|
+
if (flags & HUF_flags_bmi2) {
|
|
854
|
+
fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
|
|
855
|
+
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
856
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
|
857
|
+
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
|
858
|
+
}
|
|
859
|
+
# endif
|
|
860
|
+
} else {
|
|
861
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
862
|
+
}
|
|
863
|
+
#endif
|
|
405
864
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
if (
|
|
413
|
-
|
|
865
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
|
866
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
|
867
|
+
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
|
868
|
+
}
|
|
869
|
+
#endif
|
|
870
|
+
|
|
871
|
+
if (!(flags & HUF_flags_disableFast)) {
|
|
872
|
+
size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
|
873
|
+
if (ret != 0)
|
|
874
|
+
return ret;
|
|
875
|
+
}
|
|
876
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
414
877
|
}
|
|
415
878
|
|
|
416
|
-
static size_t
|
|
879
|
+
static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
417
880
|
const void* cSrc, size_t cSrcSize,
|
|
418
|
-
void* workSpace, size_t wkspSize, int
|
|
881
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
419
882
|
{
|
|
420
883
|
const BYTE* ip = (const BYTE*) cSrc;
|
|
421
884
|
|
|
422
|
-
size_t const hSize = HUF_readDTableX1_wksp
|
|
423
|
-
workSpace, wkspSize);
|
|
885
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
424
886
|
if (HUF_isError(hSize)) return hSize;
|
|
425
887
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
426
888
|
ip += hSize; cSrcSize -= hSize;
|
|
427
889
|
|
|
428
|
-
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
432
|
-
const void* cSrc, size_t cSrcSize,
|
|
433
|
-
void* workSpace, size_t wkspSize)
|
|
434
|
-
{
|
|
435
|
-
return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
|
|
436
|
-
}
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
440
|
-
{
|
|
441
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
442
|
-
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
443
|
-
workSpace, sizeof(workSpace));
|
|
444
|
-
}
|
|
445
|
-
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
446
|
-
{
|
|
447
|
-
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
|
448
|
-
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
890
|
+
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
|
449
891
|
}
|
|
450
892
|
|
|
451
893
|
#endif /* HUF_FORCE_DECOMPRESS_X2 */
|
|
@@ -458,209 +900,322 @@ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
|
458
900
|
/* *************************/
|
|
459
901
|
|
|
460
902
|
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
|
|
461
|
-
typedef struct { BYTE symbol;
|
|
903
|
+
typedef struct { BYTE symbol; } sortedSymbol_t;
|
|
462
904
|
typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
|
|
463
905
|
typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
|
|
464
906
|
|
|
907
|
+
/**
|
|
908
|
+
* Constructs a HUF_DEltX2 in a U32.
|
|
909
|
+
*/
|
|
910
|
+
static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
|
|
911
|
+
{
|
|
912
|
+
U32 seq;
|
|
913
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
|
|
914
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
|
|
915
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
|
|
916
|
+
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
|
|
917
|
+
if (MEM_isLittleEndian()) {
|
|
918
|
+
seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
|
|
919
|
+
return seq + (nbBits << 16) + ((U32)level << 24);
|
|
920
|
+
} else {
|
|
921
|
+
seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
|
|
922
|
+
return (seq << 16) + (nbBits << 8) + (U32)level;
|
|
923
|
+
}
|
|
924
|
+
}
|
|
465
925
|
|
|
466
|
-
|
|
467
|
-
*
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
|
|
471
|
-
U32 nbBitsBaseline, U16 baseSeq)
|
|
926
|
+
/**
|
|
927
|
+
* Constructs a HUF_DEltX2.
|
|
928
|
+
*/
|
|
929
|
+
static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
|
|
472
930
|
{
|
|
473
931
|
HUF_DEltX2 DElt;
|
|
474
|
-
U32
|
|
932
|
+
U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
|
|
933
|
+
DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
|
|
934
|
+
ZSTD_memcpy(&DElt, &val, sizeof(val));
|
|
935
|
+
return DElt;
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
/**
|
|
939
|
+
* Constructs 2 HUF_DEltX2s and packs them into a U64.
|
|
940
|
+
*/
|
|
941
|
+
static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
|
|
942
|
+
{
|
|
943
|
+
U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
|
|
944
|
+
return (U64)DElt + ((U64)DElt << 32);
|
|
945
|
+
}
|
|
475
946
|
|
|
476
|
-
|
|
477
|
-
|
|
947
|
+
/**
|
|
948
|
+
* Fills the DTable rank with all the symbols from [begin, end) that are each
|
|
949
|
+
* nbBits long.
|
|
950
|
+
*
|
|
951
|
+
* @param DTableRank The start of the rank in the DTable.
|
|
952
|
+
* @param begin The first symbol to fill (inclusive).
|
|
953
|
+
* @param end The last symbol to fill (exclusive).
|
|
954
|
+
* @param nbBits Each symbol is nbBits long.
|
|
955
|
+
* @param tableLog The table log.
|
|
956
|
+
* @param baseSeq If level == 1 { 0 } else { the first level symbol }
|
|
957
|
+
* @param level The level in the table. Must be 1 or 2.
|
|
958
|
+
*/
|
|
959
|
+
static void HUF_fillDTableX2ForWeight(
|
|
960
|
+
HUF_DEltX2* DTableRank,
|
|
961
|
+
sortedSymbol_t const* begin, sortedSymbol_t const* end,
|
|
962
|
+
U32 nbBits, U32 tableLog,
|
|
963
|
+
U16 baseSeq, int const level)
|
|
964
|
+
{
|
|
965
|
+
U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
|
|
966
|
+
const sortedSymbol_t* ptr;
|
|
967
|
+
assert(level >= 1 && level <= 2);
|
|
968
|
+
switch (length) {
|
|
969
|
+
case 1:
|
|
970
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
971
|
+
HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
|
|
972
|
+
*DTableRank++ = DElt;
|
|
973
|
+
}
|
|
974
|
+
break;
|
|
975
|
+
case 2:
|
|
976
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
977
|
+
HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
|
|
978
|
+
DTableRank[0] = DElt;
|
|
979
|
+
DTableRank[1] = DElt;
|
|
980
|
+
DTableRank += 2;
|
|
981
|
+
}
|
|
982
|
+
break;
|
|
983
|
+
case 4:
|
|
984
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
985
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
|
986
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
|
987
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
|
988
|
+
DTableRank += 4;
|
|
989
|
+
}
|
|
990
|
+
break;
|
|
991
|
+
case 8:
|
|
992
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
993
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
|
994
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
|
995
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
|
996
|
+
ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
|
|
997
|
+
ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
|
|
998
|
+
DTableRank += 8;
|
|
999
|
+
}
|
|
1000
|
+
break;
|
|
1001
|
+
default:
|
|
1002
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
|
1003
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
|
1004
|
+
HUF_DEltX2* const DTableRankEnd = DTableRank + length;
|
|
1005
|
+
for (; DTableRank != DTableRankEnd; DTableRank += 8) {
|
|
1006
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
|
1007
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
|
1008
|
+
ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
|
|
1009
|
+
ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
break;
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
478
1015
|
|
|
479
|
-
|
|
1016
|
+
/* HUF_fillDTableX2Level2() :
|
|
1017
|
+
* `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
|
|
1018
|
+
static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
|
|
1019
|
+
const U32* rankVal, const int minWeight, const int maxWeight1,
|
|
1020
|
+
const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
|
|
1021
|
+
U32 nbBitsBaseline, U16 baseSeq)
|
|
1022
|
+
{
|
|
1023
|
+
/* Fill skipped values (all positions up to rankVal[minWeight]).
|
|
1024
|
+
* These are positions only get a single symbol because the combined weight
|
|
1025
|
+
* is too large.
|
|
1026
|
+
*/
|
|
480
1027
|
if (minWeight>1) {
|
|
481
|
-
U32
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
1028
|
+
U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
|
|
1029
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
|
|
1030
|
+
int const skipSize = rankVal[minWeight];
|
|
1031
|
+
assert(length > 1);
|
|
1032
|
+
assert((U32)skipSize < length);
|
|
1033
|
+
switch (length) {
|
|
1034
|
+
case 2:
|
|
1035
|
+
assert(skipSize == 1);
|
|
1036
|
+
ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
|
|
1037
|
+
break;
|
|
1038
|
+
case 4:
|
|
1039
|
+
assert(skipSize <= 4);
|
|
1040
|
+
ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
|
|
1041
|
+
ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
|
|
1042
|
+
break;
|
|
1043
|
+
default:
|
|
1044
|
+
{
|
|
1045
|
+
int i;
|
|
1046
|
+
for (i = 0; i < skipSize; i += 8) {
|
|
1047
|
+
ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
|
|
1048
|
+
ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
|
|
1049
|
+
ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
|
|
1050
|
+
ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
487
1054
|
}
|
|
488
1055
|
|
|
489
|
-
/*
|
|
490
|
-
{
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
const
|
|
494
|
-
const
|
|
495
|
-
const
|
|
496
|
-
U32
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
rankVal[weight] += length;
|
|
505
|
-
} }
|
|
1056
|
+
/* Fill each of the second level symbols by weight. */
|
|
1057
|
+
{
|
|
1058
|
+
int w;
|
|
1059
|
+
for (w = minWeight; w < maxWeight1; ++w) {
|
|
1060
|
+
int const begin = rankStart[w];
|
|
1061
|
+
int const end = rankStart[w+1];
|
|
1062
|
+
U32 const nbBits = nbBitsBaseline - w;
|
|
1063
|
+
U32 const totalBits = nbBits + consumedBits;
|
|
1064
|
+
HUF_fillDTableX2ForWeight(
|
|
1065
|
+
DTable + rankVal[w],
|
|
1066
|
+
sortedSymbols + begin, sortedSymbols + end,
|
|
1067
|
+
totalBits, targetLog,
|
|
1068
|
+
baseSeq, /* level */ 2);
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
506
1071
|
}
|
|
507
1072
|
|
|
508
|
-
|
|
509
1073
|
static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
|
|
510
|
-
const sortedSymbol_t* sortedList,
|
|
511
|
-
const U32* rankStart,
|
|
1074
|
+
const sortedSymbol_t* sortedList,
|
|
1075
|
+
const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight,
|
|
512
1076
|
const U32 nbBitsBaseline)
|
|
513
1077
|
{
|
|
514
|
-
U32 rankVal
|
|
1078
|
+
U32* const rankVal = rankValOrigin[0];
|
|
515
1079
|
const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
|
|
516
1080
|
const U32 minBits = nbBitsBaseline - maxWeight;
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
const
|
|
524
|
-
const
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
U32 sortedRank;
|
|
1081
|
+
int w;
|
|
1082
|
+
int const wEnd = (int)maxWeight + 1;
|
|
1083
|
+
|
|
1084
|
+
/* Fill DTable in order of weight. */
|
|
1085
|
+
for (w = 1; w < wEnd; ++w) {
|
|
1086
|
+
int const begin = (int)rankStart[w];
|
|
1087
|
+
int const end = (int)rankStart[w+1];
|
|
1088
|
+
U32 const nbBits = nbBitsBaseline - w;
|
|
1089
|
+
|
|
1090
|
+
if (targetLog-nbBits >= minBits) {
|
|
1091
|
+
/* Enough room for a second symbol. */
|
|
1092
|
+
int start = rankVal[w];
|
|
1093
|
+
U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
|
|
531
1094
|
int minWeight = nbBits + scaleLog;
|
|
1095
|
+
int s;
|
|
532
1096
|
if (minWeight < 1) minWeight = 1;
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
1097
|
+
/* Fill the DTable for every symbol of weight w.
|
|
1098
|
+
* These symbols get at least 1 second symbol.
|
|
1099
|
+
*/
|
|
1100
|
+
for (s = begin; s != end; ++s) {
|
|
1101
|
+
HUF_fillDTableX2Level2(
|
|
1102
|
+
DTable + start, targetLog, nbBits,
|
|
1103
|
+
rankValOrigin[nbBits], minWeight, wEnd,
|
|
1104
|
+
sortedList, rankStart,
|
|
1105
|
+
nbBitsBaseline, sortedList[s].symbol);
|
|
1106
|
+
start += length;
|
|
1107
|
+
}
|
|
538
1108
|
} else {
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
} }
|
|
547
|
-
rankVal[weight] += length;
|
|
1109
|
+
/* Only a single symbol. */
|
|
1110
|
+
HUF_fillDTableX2ForWeight(
|
|
1111
|
+
DTable + rankVal[w],
|
|
1112
|
+
sortedList + begin, sortedList + end,
|
|
1113
|
+
nbBits, targetLog,
|
|
1114
|
+
/* baseSeq */ 0, /* level */ 1);
|
|
1115
|
+
}
|
|
548
1116
|
}
|
|
549
1117
|
}
|
|
550
1118
|
|
|
1119
|
+
typedef struct {
|
|
1120
|
+
rankValCol_t rankVal[HUF_TABLELOG_MAX];
|
|
1121
|
+
U32 rankStats[HUF_TABLELOG_MAX + 1];
|
|
1122
|
+
U32 rankStart0[HUF_TABLELOG_MAX + 3];
|
|
1123
|
+
sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
|
|
1124
|
+
BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
|
|
1125
|
+
U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
|
1126
|
+
} HUF_ReadDTableX2_Workspace;
|
|
1127
|
+
|
|
551
1128
|
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
|
|
552
1129
|
const void* src, size_t srcSize,
|
|
553
|
-
void* workSpace, size_t wkspSize)
|
|
1130
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
554
1131
|
{
|
|
555
|
-
U32 tableLog, maxW,
|
|
1132
|
+
U32 tableLog, maxW, nbSymbols;
|
|
556
1133
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
|
557
|
-
U32
|
|
1134
|
+
U32 maxTableLog = dtd.maxTableLog;
|
|
558
1135
|
size_t iSize;
|
|
559
1136
|
void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
|
|
560
1137
|
HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
|
|
561
1138
|
U32 *rankStart;
|
|
562
1139
|
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
|
|
571
|
-
spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
|
|
572
|
-
rankStats = (U32 *)workSpace + spaceUsed32;
|
|
573
|
-
spaceUsed32 += HUF_TABLELOG_MAX + 1;
|
|
574
|
-
rankStart0 = (U32 *)workSpace + spaceUsed32;
|
|
575
|
-
spaceUsed32 += HUF_TABLELOG_MAX + 2;
|
|
576
|
-
sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
|
|
577
|
-
spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
|
|
578
|
-
weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
|
579
|
-
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
|
580
|
-
|
|
581
|
-
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
582
|
-
|
|
583
|
-
rankStart = rankStart0 + 1;
|
|
584
|
-
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
|
1140
|
+
HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
|
|
1141
|
+
|
|
1142
|
+
if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
|
|
1143
|
+
|
|
1144
|
+
rankStart = wksp->rankStart0 + 1;
|
|
1145
|
+
ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
|
|
1146
|
+
ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
|
|
585
1147
|
|
|
586
1148
|
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
|
|
587
1149
|
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
|
588
|
-
/*
|
|
1150
|
+
/* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
|
589
1151
|
|
|
590
|
-
iSize =
|
|
1152
|
+
iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags);
|
|
591
1153
|
if (HUF_isError(iSize)) return iSize;
|
|
592
1154
|
|
|
593
1155
|
/* check result */
|
|
594
1156
|
if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
|
|
1157
|
+
if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
|
|
595
1158
|
|
|
596
1159
|
/* find maxWeight */
|
|
597
|
-
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
|
|
1160
|
+
for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
|
|
598
1161
|
|
|
599
1162
|
/* Get start index of each weight */
|
|
600
1163
|
{ U32 w, nextRankStart = 0;
|
|
601
1164
|
for (w=1; w<maxW+1; w++) {
|
|
602
|
-
U32
|
|
603
|
-
nextRankStart += rankStats[w];
|
|
604
|
-
rankStart[w] =
|
|
1165
|
+
U32 curr = nextRankStart;
|
|
1166
|
+
nextRankStart += wksp->rankStats[w];
|
|
1167
|
+
rankStart[w] = curr;
|
|
605
1168
|
}
|
|
606
1169
|
rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
|
|
607
|
-
|
|
1170
|
+
rankStart[maxW+1] = nextRankStart;
|
|
608
1171
|
}
|
|
609
1172
|
|
|
610
1173
|
/* sort symbols by weight */
|
|
611
1174
|
{ U32 s;
|
|
612
1175
|
for (s=0; s<nbSymbols; s++) {
|
|
613
|
-
U32 const w = weightList[s];
|
|
1176
|
+
U32 const w = wksp->weightList[s];
|
|
614
1177
|
U32 const r = rankStart[w]++;
|
|
615
|
-
sortedSymbol[r].symbol = (BYTE)s;
|
|
616
|
-
sortedSymbol[r].weight = (BYTE)w;
|
|
1178
|
+
wksp->sortedSymbol[r].symbol = (BYTE)s;
|
|
617
1179
|
}
|
|
618
1180
|
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
|
|
619
1181
|
}
|
|
620
1182
|
|
|
621
1183
|
/* Build rankVal */
|
|
622
|
-
{ U32* const rankVal0 = rankVal[0];
|
|
1184
|
+
{ U32* const rankVal0 = wksp->rankVal[0];
|
|
623
1185
|
{ int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
|
|
624
1186
|
U32 nextRankVal = 0;
|
|
625
1187
|
U32 w;
|
|
626
1188
|
for (w=1; w<maxW+1; w++) {
|
|
627
|
-
U32
|
|
628
|
-
nextRankVal += rankStats[w] << (w+rescale);
|
|
629
|
-
rankVal0[w] =
|
|
1189
|
+
U32 curr = nextRankVal;
|
|
1190
|
+
nextRankVal += wksp->rankStats[w] << (w+rescale);
|
|
1191
|
+
rankVal0[w] = curr;
|
|
630
1192
|
} }
|
|
631
1193
|
{ U32 const minBits = tableLog+1 - maxW;
|
|
632
1194
|
U32 consumed;
|
|
633
1195
|
for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
|
|
634
|
-
U32* const rankValPtr = rankVal[consumed];
|
|
1196
|
+
U32* const rankValPtr = wksp->rankVal[consumed];
|
|
635
1197
|
U32 w;
|
|
636
1198
|
for (w = 1; w < maxW+1; w++) {
|
|
637
1199
|
rankValPtr[w] = rankVal0[w] >> consumed;
|
|
638
1200
|
} } } }
|
|
639
1201
|
|
|
640
1202
|
HUF_fillDTableX2(dt, maxTableLog,
|
|
641
|
-
sortedSymbol,
|
|
642
|
-
rankStart0, rankVal, maxW,
|
|
1203
|
+
wksp->sortedSymbol,
|
|
1204
|
+
wksp->rankStart0, wksp->rankVal, maxW,
|
|
643
1205
|
tableLog+1);
|
|
644
1206
|
|
|
645
1207
|
dtd.tableLog = (BYTE)maxTableLog;
|
|
646
1208
|
dtd.tableType = 1;
|
|
647
|
-
|
|
1209
|
+
ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
|
|
648
1210
|
return iSize;
|
|
649
1211
|
}
|
|
650
1212
|
|
|
651
|
-
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
652
|
-
{
|
|
653
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
654
|
-
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
|
655
|
-
workSpace, sizeof(workSpace));
|
|
656
|
-
}
|
|
657
|
-
|
|
658
1213
|
|
|
659
1214
|
FORCE_INLINE_TEMPLATE U32
|
|
660
1215
|
HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
|
661
1216
|
{
|
|
662
1217
|
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
|
663
|
-
|
|
1218
|
+
ZSTD_memcpy(op, &dt[val].sequence, 2);
|
|
664
1219
|
BIT_skipBits(DStream, dt[val].nbBits);
|
|
665
1220
|
return dt[val].length;
|
|
666
1221
|
}
|
|
@@ -669,15 +1224,17 @@ FORCE_INLINE_TEMPLATE U32
|
|
|
669
1224
|
HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
|
670
1225
|
{
|
|
671
1226
|
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
|
672
|
-
|
|
673
|
-
if (dt[val].length==1)
|
|
674
|
-
|
|
1227
|
+
ZSTD_memcpy(op, &dt[val].sequence, 1);
|
|
1228
|
+
if (dt[val].length==1) {
|
|
1229
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
|
1230
|
+
} else {
|
|
675
1231
|
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
|
676
1232
|
BIT_skipBits(DStream, dt[val].nbBits);
|
|
677
1233
|
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
|
678
1234
|
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
|
679
1235
|
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
|
680
|
-
|
|
1236
|
+
}
|
|
1237
|
+
}
|
|
681
1238
|
return 1;
|
|
682
1239
|
}
|
|
683
1240
|
|
|
@@ -699,19 +1256,37 @@ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
|
|
699
1256
|
BYTE* const pStart = p;
|
|
700
1257
|
|
|
701
1258
|
/* up to 8 symbols at a time */
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
1259
|
+
if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
|
|
1260
|
+
if (dtLog <= 11 && MEM_64bits()) {
|
|
1261
|
+
/* up to 10 symbols at a time */
|
|
1262
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
|
|
1263
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1264
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1265
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1266
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1267
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1268
|
+
}
|
|
1269
|
+
} else {
|
|
1270
|
+
/* up to 8 symbols at a time */
|
|
1271
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
|
1272
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
|
1273
|
+
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
|
|
1274
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
|
1275
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
} else {
|
|
1279
|
+
BIT_reloadDStream(bitDPtr);
|
|
707
1280
|
}
|
|
708
1281
|
|
|
709
1282
|
/* closer to end : up to 2 symbols at a time */
|
|
710
|
-
|
|
711
|
-
|
|
1283
|
+
if ((size_t)(pEnd - p) >= 2) {
|
|
1284
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
|
1285
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
|
712
1286
|
|
|
713
|
-
|
|
714
|
-
|
|
1287
|
+
while (p <= pEnd-2)
|
|
1288
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
|
1289
|
+
}
|
|
715
1290
|
|
|
716
1291
|
if (p < pEnd)
|
|
717
1292
|
p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
|
|
@@ -746,6 +1321,10 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
|
746
1321
|
return dstSize;
|
|
747
1322
|
}
|
|
748
1323
|
|
|
1324
|
+
/* HUF_decompress4X2_usingDTable_internal_body():
|
|
1325
|
+
* Conditions:
|
|
1326
|
+
* @dstSize >= 6
|
|
1327
|
+
*/
|
|
749
1328
|
FORCE_INLINE_TEMPLATE size_t
|
|
750
1329
|
HUF_decompress4X2_usingDTable_internal_body(
|
|
751
1330
|
void* dst, size_t dstSize,
|
|
@@ -786,58 +1365,62 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
786
1365
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
787
1366
|
U32 const dtLog = dtd.tableLog;
|
|
788
1367
|
|
|
789
|
-
if (length4 > cSrcSize) return ERROR(corruption_detected);
|
|
1368
|
+
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
|
1369
|
+
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
|
1370
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
|
790
1371
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
|
791
1372
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
|
792
1373
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
|
793
1374
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
|
794
1375
|
|
|
795
1376
|
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
|
796
|
-
|
|
1377
|
+
if ((size_t)(oend - op4) >= sizeof(size_t)) {
|
|
1378
|
+
for ( ; (endSignal) & (op4 < olimit); ) {
|
|
797
1379
|
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
1380
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1381
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
|
1382
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1383
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
|
1384
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1385
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
|
1386
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1387
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
|
1388
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
|
1389
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
|
1390
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1391
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
|
1392
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1393
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
|
1394
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1395
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
|
1396
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1397
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
|
1398
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
|
1399
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
|
818
1400
|
#else
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
1401
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1402
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1403
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1404
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1405
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
|
1406
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
|
1407
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
|
1408
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
|
1409
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
1410
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
1411
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
1412
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
1413
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
|
1414
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
|
1415
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
|
1416
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
|
1417
|
+
endSignal = (U32)LIKELY((U32)
|
|
1418
|
+
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
|
|
1419
|
+
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
|
|
1420
|
+
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
|
|
1421
|
+
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
|
|
840
1422
|
#endif
|
|
1423
|
+
}
|
|
841
1424
|
}
|
|
842
1425
|
|
|
843
1426
|
/* check corruption */
|
|
@@ -861,94 +1444,279 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
861
1444
|
}
|
|
862
1445
|
}
|
|
863
1446
|
|
|
864
|
-
|
|
865
|
-
|
|
1447
|
+
#if HUF_NEED_BMI2_FUNCTION
|
|
1448
|
+
static BMI2_TARGET_ATTRIBUTE
|
|
1449
|
+
size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
|
1450
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
1451
|
+
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1452
|
+
}
|
|
1453
|
+
#endif
|
|
866
1454
|
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
{
|
|
872
|
-
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
|
873
|
-
if (dtd.tableType != 1) return ERROR(GENERIC);
|
|
874
|
-
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
1455
|
+
static
|
|
1456
|
+
size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
|
1457
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
|
1458
|
+
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
875
1459
|
}
|
|
876
1460
|
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
1461
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
1462
|
+
|
|
1463
|
+
HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
|
1464
|
+
|
|
1465
|
+
#endif
|
|
1466
|
+
|
|
1467
|
+
static HUF_FAST_BMI2_ATTRS
|
|
1468
|
+
void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
|
880
1469
|
{
|
|
881
|
-
|
|
1470
|
+
U64 bits[4];
|
|
1471
|
+
BYTE const* ip[4];
|
|
1472
|
+
BYTE* op[4];
|
|
1473
|
+
BYTE* oend[4];
|
|
1474
|
+
HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
|
|
1475
|
+
BYTE const* const ilimit = args->ilimit;
|
|
1476
|
+
|
|
1477
|
+
/* Copy the arguments to local registers. */
|
|
1478
|
+
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
|
1479
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
|
1480
|
+
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
|
1481
|
+
|
|
1482
|
+
oend[0] = op[1];
|
|
1483
|
+
oend[1] = op[2];
|
|
1484
|
+
oend[2] = op[3];
|
|
1485
|
+
oend[3] = args->oend;
|
|
1486
|
+
|
|
1487
|
+
assert(MEM_isLittleEndian());
|
|
1488
|
+
assert(!MEM_32bits());
|
|
1489
|
+
|
|
1490
|
+
for (;;) {
|
|
1491
|
+
BYTE* olimit;
|
|
1492
|
+
int stream;
|
|
1493
|
+
int symbol;
|
|
1494
|
+
|
|
1495
|
+
/* Assert loop preconditions */
|
|
1496
|
+
#ifndef NDEBUG
|
|
1497
|
+
for (stream = 0; stream < 4; ++stream) {
|
|
1498
|
+
assert(op[stream] <= oend[stream]);
|
|
1499
|
+
assert(ip[stream] >= ilimit);
|
|
1500
|
+
}
|
|
1501
|
+
#endif
|
|
1502
|
+
/* Compute olimit */
|
|
1503
|
+
{
|
|
1504
|
+
/* Each loop does 5 table lookups for each of the 4 streams.
|
|
1505
|
+
* Each table lookup consumes up to 11 bits of input, and produces
|
|
1506
|
+
* up to 2 bytes of output.
|
|
1507
|
+
*/
|
|
1508
|
+
/* We can consume up to 7 bytes of input per iteration per stream.
|
|
1509
|
+
* We also know that each input pointer is >= ip[0]. So we can run
|
|
1510
|
+
* iters loops before running out of input.
|
|
1511
|
+
*/
|
|
1512
|
+
size_t iters = (size_t)(ip[0] - ilimit) / 7;
|
|
1513
|
+
/* Each iteration can produce up to 10 bytes of output per stream.
|
|
1514
|
+
* Each output stream my advance at different rates. So take the
|
|
1515
|
+
* minimum number of safe iterations among all the output streams.
|
|
1516
|
+
*/
|
|
1517
|
+
for (stream = 0; stream < 4; ++stream) {
|
|
1518
|
+
size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
|
|
1519
|
+
iters = MIN(iters, oiters);
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
/* Each iteration produces at least 5 output symbols. So until
|
|
1523
|
+
* op[3] crosses olimit, we know we haven't executed iters
|
|
1524
|
+
* iterations yet. This saves us maintaining an iters counter,
|
|
1525
|
+
* at the expense of computing the remaining # of iterations
|
|
1526
|
+
* more frequently.
|
|
1527
|
+
*/
|
|
1528
|
+
olimit = op[3] + (iters * 5);
|
|
1529
|
+
|
|
1530
|
+
/* Exit the fast decoding loop if we are too close to the end. */
|
|
1531
|
+
if (op[3] + 10 > olimit)
|
|
1532
|
+
break;
|
|
1533
|
+
|
|
1534
|
+
/* Exit the decoding loop if any input pointer has crossed the
|
|
1535
|
+
* previous one. This indicates corruption, and a precondition
|
|
1536
|
+
* to our loop is that ip[i] >= ip[0].
|
|
1537
|
+
*/
|
|
1538
|
+
for (stream = 1; stream < 4; ++stream) {
|
|
1539
|
+
if (ip[stream] < ip[stream - 1])
|
|
1540
|
+
goto _out;
|
|
1541
|
+
}
|
|
1542
|
+
}
|
|
882
1543
|
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
1544
|
+
#ifndef NDEBUG
|
|
1545
|
+
for (stream = 1; stream < 4; ++stream) {
|
|
1546
|
+
assert(ip[stream] >= ip[stream - 1]);
|
|
1547
|
+
}
|
|
1548
|
+
#endif
|
|
888
1549
|
|
|
889
|
-
|
|
890
|
-
|
|
1550
|
+
do {
|
|
1551
|
+
/* Do 5 table lookups for each of the first 3 streams */
|
|
1552
|
+
for (symbol = 0; symbol < 5; ++symbol) {
|
|
1553
|
+
for (stream = 0; stream < 3; ++stream) {
|
|
1554
|
+
int const index = (int)(bits[stream] >> 53);
|
|
1555
|
+
HUF_DEltX2 const entry = dtable[index];
|
|
1556
|
+
MEM_write16(op[stream], entry.sequence);
|
|
1557
|
+
bits[stream] <<= (entry.nbBits);
|
|
1558
|
+
op[stream] += (entry.length);
|
|
1559
|
+
}
|
|
1560
|
+
}
|
|
1561
|
+
/* Do 1 table lookup from the final stream */
|
|
1562
|
+
{
|
|
1563
|
+
int const index = (int)(bits[3] >> 53);
|
|
1564
|
+
HUF_DEltX2 const entry = dtable[index];
|
|
1565
|
+
MEM_write16(op[3], entry.sequence);
|
|
1566
|
+
bits[3] <<= (entry.nbBits);
|
|
1567
|
+
op[3] += (entry.length);
|
|
1568
|
+
}
|
|
1569
|
+
/* Do 4 table lookups from the final stream & reload bitstreams */
|
|
1570
|
+
for (stream = 0; stream < 4; ++stream) {
|
|
1571
|
+
/* Do a table lookup from the final stream.
|
|
1572
|
+
* This is interleaved with the reloading to reduce register
|
|
1573
|
+
* pressure. This shouldn't be necessary, but compilers can
|
|
1574
|
+
* struggle with codegen with high register pressure.
|
|
1575
|
+
*/
|
|
1576
|
+
{
|
|
1577
|
+
int const index = (int)(bits[3] >> 53);
|
|
1578
|
+
HUF_DEltX2 const entry = dtable[index];
|
|
1579
|
+
MEM_write16(op[3], entry.sequence);
|
|
1580
|
+
bits[3] <<= (entry.nbBits);
|
|
1581
|
+
op[3] += (entry.length);
|
|
1582
|
+
}
|
|
1583
|
+
/* Reload the bistreams. The final bitstream must be reloaded
|
|
1584
|
+
* after the 5th symbol was decoded.
|
|
1585
|
+
*/
|
|
1586
|
+
{
|
|
1587
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
|
1588
|
+
int const nbBits = ctz & 7;
|
|
1589
|
+
int const nbBytes = ctz >> 3;
|
|
1590
|
+
ip[stream] -= nbBytes;
|
|
1591
|
+
bits[stream] = MEM_read64(ip[stream]) | 1;
|
|
1592
|
+
bits[stream] <<= nbBits;
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
} while (op[3] < olimit);
|
|
1596
|
+
}
|
|
891
1597
|
|
|
1598
|
+
_out:
|
|
892
1599
|
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
|
898
|
-
workSpace, sizeof(workSpace));
|
|
1600
|
+
/* Save the final values of each of the state variables back to args. */
|
|
1601
|
+
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
|
1602
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
|
1603
|
+
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
|
899
1604
|
}
|
|
900
1605
|
|
|
901
|
-
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
902
|
-
{
|
|
903
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
|
904
|
-
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
905
|
-
}
|
|
906
1606
|
|
|
907
|
-
size_t
|
|
1607
|
+
static HUF_FAST_BMI2_ATTRS size_t
|
|
1608
|
+
HUF_decompress4X2_usingDTable_internal_fast(
|
|
908
1609
|
void* dst, size_t dstSize,
|
|
909
1610
|
const void* cSrc, size_t cSrcSize,
|
|
910
|
-
const HUF_DTable* DTable
|
|
1611
|
+
const HUF_DTable* DTable,
|
|
1612
|
+
HUF_DecompressFastLoopFn loopFn) {
|
|
1613
|
+
void const* dt = DTable + 1;
|
|
1614
|
+
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
|
1615
|
+
BYTE* const oend = (BYTE*)dst + dstSize;
|
|
1616
|
+
HUF_DecompressFastArgs args;
|
|
1617
|
+
{
|
|
1618
|
+
size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1619
|
+
FORWARD_IF_ERROR(ret, "Failed to init asm args");
|
|
1620
|
+
if (ret == 0)
|
|
1621
|
+
return 0;
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1624
|
+
assert(args.ip[0] >= args.ilimit);
|
|
1625
|
+
loopFn(&args);
|
|
1626
|
+
|
|
1627
|
+
/* note : op4 already verified within main loop */
|
|
1628
|
+
assert(args.ip[0] >= iend);
|
|
1629
|
+
assert(args.ip[1] >= iend);
|
|
1630
|
+
assert(args.ip[2] >= iend);
|
|
1631
|
+
assert(args.ip[3] >= iend);
|
|
1632
|
+
assert(args.op[3] <= oend);
|
|
1633
|
+
(void)iend;
|
|
1634
|
+
|
|
1635
|
+
/* finish bitStreams one by one */
|
|
1636
|
+
{
|
|
1637
|
+
size_t const segmentSize = (dstSize+3) / 4;
|
|
1638
|
+
BYTE* segmentEnd = (BYTE*)dst;
|
|
1639
|
+
int i;
|
|
1640
|
+
for (i = 0; i < 4; ++i) {
|
|
1641
|
+
BIT_DStream_t bit;
|
|
1642
|
+
if (segmentSize <= (size_t)(oend - segmentEnd))
|
|
1643
|
+
segmentEnd += segmentSize;
|
|
1644
|
+
else
|
|
1645
|
+
segmentEnd = oend;
|
|
1646
|
+
FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
|
|
1647
|
+
args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
|
|
1648
|
+
if (args.op[i] != segmentEnd)
|
|
1649
|
+
return ERROR(corruption_detected);
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
|
|
1653
|
+
/* decoded size */
|
|
1654
|
+
return dstSize;
|
|
1655
|
+
}
|
|
1656
|
+
|
|
1657
|
+
static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
|
1658
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
|
911
1659
|
{
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
1660
|
+
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
|
|
1661
|
+
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
|
|
1662
|
+
|
|
1663
|
+
#if DYNAMIC_BMI2
|
|
1664
|
+
if (flags & HUF_flags_bmi2) {
|
|
1665
|
+
fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
|
|
1666
|
+
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
|
1667
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
|
1668
|
+
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
|
1669
|
+
}
|
|
1670
|
+
# endif
|
|
1671
|
+
} else {
|
|
1672
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
1673
|
+
}
|
|
1674
|
+
#endif
|
|
1675
|
+
|
|
1676
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
|
1677
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
|
1678
|
+
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
|
1679
|
+
}
|
|
1680
|
+
#endif
|
|
1681
|
+
|
|
1682
|
+
if (!(flags & HUF_flags_disableFast)) {
|
|
1683
|
+
size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
|
1684
|
+
if (ret != 0)
|
|
1685
|
+
return ret;
|
|
1686
|
+
}
|
|
1687
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
|
915
1688
|
}
|
|
916
1689
|
|
|
917
|
-
|
|
1690
|
+
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
|
|
1691
|
+
|
|
1692
|
+
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
918
1693
|
const void* cSrc, size_t cSrcSize,
|
|
919
|
-
void* workSpace, size_t wkspSize, int
|
|
1694
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
920
1695
|
{
|
|
921
1696
|
const BYTE* ip = (const BYTE*) cSrc;
|
|
922
1697
|
|
|
923
|
-
size_t hSize = HUF_readDTableX2_wksp(
|
|
924
|
-
|
|
1698
|
+
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
|
|
1699
|
+
workSpace, wkspSize, flags);
|
|
925
1700
|
if (HUF_isError(hSize)) return hSize;
|
|
926
1701
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
927
1702
|
ip += hSize; cSrcSize -= hSize;
|
|
928
1703
|
|
|
929
|
-
return
|
|
1704
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
|
|
930
1705
|
}
|
|
931
1706
|
|
|
932
|
-
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1707
|
+
static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
933
1708
|
const void* cSrc, size_t cSrcSize,
|
|
934
|
-
void* workSpace, size_t wkspSize)
|
|
1709
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
935
1710
|
{
|
|
936
|
-
|
|
937
|
-
}
|
|
938
|
-
|
|
1711
|
+
const BYTE* ip = (const BYTE*) cSrc;
|
|
939
1712
|
|
|
940
|
-
size_t
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
workSpace, sizeof(workSpace));
|
|
946
|
-
}
|
|
1713
|
+
size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
|
|
1714
|
+
workSpace, wkspSize, flags);
|
|
1715
|
+
if (HUF_isError(hSize)) return hSize;
|
|
1716
|
+
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
1717
|
+
ip += hSize; cSrcSize -= hSize;
|
|
947
1718
|
|
|
948
|
-
|
|
949
|
-
{
|
|
950
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
|
951
|
-
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
|
1719
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
|
952
1720
|
}
|
|
953
1721
|
|
|
954
1722
|
#endif /* HUF_FORCE_DECOMPRESS_X1 */
|
|
@@ -958,66 +1726,28 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
|
958
1726
|
/* Universal decompression selectors */
|
|
959
1727
|
/* ***********************************/
|
|
960
1728
|
|
|
961
|
-
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
|
|
962
|
-
const void* cSrc, size_t cSrcSize,
|
|
963
|
-
const HUF_DTable* DTable)
|
|
964
|
-
{
|
|
965
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
966
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
967
|
-
(void)dtd;
|
|
968
|
-
assert(dtd.tableType == 0);
|
|
969
|
-
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
970
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
971
|
-
(void)dtd;
|
|
972
|
-
assert(dtd.tableType == 1);
|
|
973
|
-
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
974
|
-
#else
|
|
975
|
-
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
|
976
|
-
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
977
|
-
#endif
|
|
978
|
-
}
|
|
979
|
-
|
|
980
|
-
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
|
981
|
-
const void* cSrc, size_t cSrcSize,
|
|
982
|
-
const HUF_DTable* DTable)
|
|
983
|
-
{
|
|
984
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
985
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
986
|
-
(void)dtd;
|
|
987
|
-
assert(dtd.tableType == 0);
|
|
988
|
-
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
989
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
990
|
-
(void)dtd;
|
|
991
|
-
assert(dtd.tableType == 1);
|
|
992
|
-
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
993
|
-
#else
|
|
994
|
-
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
|
995
|
-
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
|
996
|
-
#endif
|
|
997
|
-
}
|
|
998
|
-
|
|
999
1729
|
|
|
1000
1730
|
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1001
1731
|
typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
|
|
1002
|
-
static const algo_time_t algoTime[16 /* Quantization */][
|
|
1732
|
+
static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
|
|
1003
1733
|
{
|
|
1004
1734
|
/* single, double, quad */
|
|
1005
|
-
{{0,0}, {1,1}
|
|
1006
|
-
{{0,0}, {1,1}
|
|
1007
|
-
{{
|
|
1008
|
-
{{
|
|
1009
|
-
{{
|
|
1010
|
-
{{
|
|
1011
|
-
{{
|
|
1012
|
-
{{
|
|
1013
|
-
{{
|
|
1014
|
-
{{
|
|
1015
|
-
{{
|
|
1016
|
-
{{
|
|
1017
|
-
{{
|
|
1018
|
-
{{
|
|
1019
|
-
{{
|
|
1020
|
-
{{
|
|
1735
|
+
{{0,0}, {1,1}}, /* Q==0 : impossible */
|
|
1736
|
+
{{0,0}, {1,1}}, /* Q==1 : impossible */
|
|
1737
|
+
{{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
|
|
1738
|
+
{{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
|
|
1739
|
+
{{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
|
|
1740
|
+
{{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
|
|
1741
|
+
{{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
|
|
1742
|
+
{{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
|
|
1743
|
+
{{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
|
|
1744
|
+
{{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
|
|
1745
|
+
{{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
|
|
1746
|
+
{{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
|
|
1747
|
+
{{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
|
|
1748
|
+
{{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
|
|
1749
|
+
{{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
|
|
1750
|
+
{{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
|
|
1021
1751
|
};
|
|
1022
1752
|
#endif
|
|
1023
1753
|
|
|
@@ -1044,188 +1774,92 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
|
|
1044
1774
|
U32 const D256 = (U32)(dstSize >> 8);
|
|
1045
1775
|
U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
|
|
1046
1776
|
U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
|
|
1047
|
-
DTime1 += DTime1 >>
|
|
1777
|
+
DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
|
|
1048
1778
|
return DTime1 < DTime0;
|
|
1049
1779
|
}
|
|
1050
1780
|
#endif
|
|
1051
1781
|
}
|
|
1052
1782
|
|
|
1053
|
-
|
|
1054
|
-
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
|
1055
|
-
|
|
1056
|
-
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1057
|
-
{
|
|
1058
|
-
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1059
|
-
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
|
1060
|
-
#endif
|
|
1061
|
-
|
|
1062
|
-
/* validation checks */
|
|
1063
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1064
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1065
|
-
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1066
|
-
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1067
|
-
|
|
1068
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1069
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1070
|
-
(void)algoNb;
|
|
1071
|
-
assert(algoNb == 0);
|
|
1072
|
-
return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
|
|
1073
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1074
|
-
(void)algoNb;
|
|
1075
|
-
assert(algoNb == 1);
|
|
1076
|
-
return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
|
|
1077
|
-
#else
|
|
1078
|
-
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
|
1079
|
-
#endif
|
|
1080
|
-
}
|
|
1081
|
-
}
|
|
1082
|
-
|
|
1083
|
-
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1084
|
-
{
|
|
1085
|
-
/* validation checks */
|
|
1086
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1087
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1088
|
-
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1089
|
-
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1090
|
-
|
|
1091
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1092
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1093
|
-
(void)algoNb;
|
|
1094
|
-
assert(algoNb == 0);
|
|
1095
|
-
return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
|
1096
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1097
|
-
(void)algoNb;
|
|
1098
|
-
assert(algoNb == 1);
|
|
1099
|
-
return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
|
1100
|
-
#else
|
|
1101
|
-
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
|
1102
|
-
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
|
1103
|
-
#endif
|
|
1104
|
-
}
|
|
1105
|
-
}
|
|
1106
|
-
|
|
1107
|
-
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
1108
|
-
{
|
|
1109
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1110
|
-
return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1111
|
-
workSpace, sizeof(workSpace));
|
|
1112
|
-
}
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
|
|
1116
|
-
size_t dstSize, const void* cSrc,
|
|
1117
|
-
size_t cSrcSize, void* workSpace,
|
|
1118
|
-
size_t wkspSize)
|
|
1119
|
-
{
|
|
1120
|
-
/* validation checks */
|
|
1121
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1122
|
-
if (cSrcSize == 0) return ERROR(corruption_detected);
|
|
1123
|
-
|
|
1124
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1125
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1126
|
-
(void)algoNb;
|
|
1127
|
-
assert(algoNb == 0);
|
|
1128
|
-
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
|
1129
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1130
|
-
(void)algoNb;
|
|
1131
|
-
assert(algoNb == 1);
|
|
1132
|
-
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
|
1133
|
-
#else
|
|
1134
|
-
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1135
|
-
cSrcSize, workSpace, wkspSize):
|
|
1136
|
-
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
|
1137
|
-
#endif
|
|
1138
|
-
}
|
|
1139
|
-
}
|
|
1140
|
-
|
|
1141
1783
|
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1142
1784
|
const void* cSrc, size_t cSrcSize,
|
|
1143
|
-
void* workSpace, size_t wkspSize)
|
|
1785
|
+
void* workSpace, size_t wkspSize, int flags)
|
|
1144
1786
|
{
|
|
1145
1787
|
/* validation checks */
|
|
1146
1788
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
1147
1789
|
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
|
1148
|
-
if (cSrcSize == dstSize) {
|
|
1149
|
-
if (cSrcSize == 1) {
|
|
1790
|
+
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
|
1791
|
+
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
|
1150
1792
|
|
|
1151
1793
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
|
1152
1794
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1153
1795
|
(void)algoNb;
|
|
1154
1796
|
assert(algoNb == 0);
|
|
1155
1797
|
return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1156
|
-
cSrcSize, workSpace, wkspSize);
|
|
1798
|
+
cSrcSize, workSpace, wkspSize, flags);
|
|
1157
1799
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1158
1800
|
(void)algoNb;
|
|
1159
1801
|
assert(algoNb == 1);
|
|
1160
1802
|
return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1161
|
-
cSrcSize, workSpace, wkspSize);
|
|
1803
|
+
cSrcSize, workSpace, wkspSize, flags);
|
|
1162
1804
|
#else
|
|
1163
1805
|
return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1164
|
-
cSrcSize, workSpace, wkspSize):
|
|
1806
|
+
cSrcSize, workSpace, wkspSize, flags):
|
|
1165
1807
|
HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
|
1166
|
-
cSrcSize, workSpace, wkspSize);
|
|
1808
|
+
cSrcSize, workSpace, wkspSize, flags);
|
|
1167
1809
|
#endif
|
|
1168
1810
|
}
|
|
1169
1811
|
}
|
|
1170
1812
|
|
|
1171
|
-
size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1172
|
-
const void* cSrc, size_t cSrcSize)
|
|
1173
|
-
{
|
|
1174
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
|
1175
|
-
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
|
1176
|
-
workSpace, sizeof(workSpace));
|
|
1177
|
-
}
|
|
1178
|
-
|
|
1179
1813
|
|
|
1180
|
-
size_t
|
|
1814
|
+
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
|
|
1181
1815
|
{
|
|
1182
1816
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
1183
1817
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1184
1818
|
(void)dtd;
|
|
1185
1819
|
assert(dtd.tableType == 0);
|
|
1186
|
-
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1820
|
+
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1187
1821
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1188
1822
|
(void)dtd;
|
|
1189
1823
|
assert(dtd.tableType == 1);
|
|
1190
|
-
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1824
|
+
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1191
1825
|
#else
|
|
1192
|
-
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1193
|
-
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1826
|
+
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
|
|
1827
|
+
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1194
1828
|
#endif
|
|
1195
1829
|
}
|
|
1196
1830
|
|
|
1197
1831
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
|
1198
|
-
size_t
|
|
1832
|
+
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
|
|
1199
1833
|
{
|
|
1200
1834
|
const BYTE* ip = (const BYTE*) cSrc;
|
|
1201
1835
|
|
|
1202
|
-
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
|
|
1836
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
1203
1837
|
if (HUF_isError(hSize)) return hSize;
|
|
1204
1838
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
|
1205
1839
|
ip += hSize; cSrcSize -= hSize;
|
|
1206
1840
|
|
|
1207
|
-
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
|
1841
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
|
1208
1842
|
}
|
|
1209
1843
|
#endif
|
|
1210
1844
|
|
|
1211
|
-
size_t
|
|
1845
|
+
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
|
|
1212
1846
|
{
|
|
1213
1847
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
1214
1848
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1215
1849
|
(void)dtd;
|
|
1216
1850
|
assert(dtd.tableType == 0);
|
|
1217
|
-
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1851
|
+
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1218
1852
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1219
1853
|
(void)dtd;
|
|
1220
1854
|
assert(dtd.tableType == 1);
|
|
1221
|
-
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1855
|
+
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1222
1856
|
#else
|
|
1223
|
-
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1224
|
-
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
|
1857
|
+
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
|
|
1858
|
+
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
|
1225
1859
|
#endif
|
|
1226
1860
|
}
|
|
1227
1861
|
|
|
1228
|
-
size_t
|
|
1862
|
+
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
|
|
1229
1863
|
{
|
|
1230
1864
|
/* validation checks */
|
|
1231
1865
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
|
@@ -1235,14 +1869,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
|
|
1235
1869
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
|
1236
1870
|
(void)algoNb;
|
|
1237
1871
|
assert(algoNb == 0);
|
|
1238
|
-
return
|
|
1872
|
+
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
1239
1873
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
|
1240
1874
|
(void)algoNb;
|
|
1241
1875
|
assert(algoNb == 1);
|
|
1242
|
-
return
|
|
1876
|
+
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
1243
1877
|
#else
|
|
1244
|
-
return algoNb ?
|
|
1245
|
-
|
|
1878
|
+
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
|
|
1879
|
+
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
|
1246
1880
|
#endif
|
|
1247
1881
|
}
|
|
1248
1882
|
}
|