zstd-ruby 1.4.4.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
- data/ext/zstdruby/libzstd/common/compiler.h +219 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
- data/ext/zstdruby/libzstd/common/error_private.c +11 -2
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +47 -116
- data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
- data/ext/zstdruby/libzstd/common/huf.h +112 -197
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +11 -5
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +78 -22
- data/ext/zstdruby/libzstd/common/threading.h +9 -13
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
- data/ext/zstdruby/libzstd/zstd.h +1277 -306
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +24 -39
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -289
- data/ext/zstdruby/libzstd/README.md +0 -159
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,47 +1,34 @@
|
|
1
1
|
/* ******************************************************************
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
notice, this list of conditions and the following disclaimer.
|
14
|
-
* Redistributions in binary form must reproduce the above
|
15
|
-
copyright notice, this list of conditions and the following disclaimer
|
16
|
-
in the documentation and/or other materials provided with the
|
17
|
-
distribution.
|
18
|
-
|
19
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
20
|
-
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
21
|
-
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
22
|
-
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
23
|
-
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
24
|
-
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
25
|
-
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
-
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
27
|
-
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
28
|
-
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
-
|
31
|
-
You can contact the author at :
|
32
|
-
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
2
|
+
* huff0 huffman decoder,
|
3
|
+
* part of Finite State Entropy library
|
4
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
5
|
+
*
|
6
|
+
* You can contact the author at :
|
7
|
+
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
8
|
+
*
|
9
|
+
* This source code is licensed under both the BSD-style license (found in the
|
10
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
11
|
+
* in the COPYING file in the root directory of this source tree).
|
12
|
+
* You may select, at your option, one of the above-listed licenses.
|
33
13
|
****************************************************************** */
|
34
14
|
|
35
15
|
/* **************************************************************
|
36
16
|
* Dependencies
|
37
17
|
****************************************************************/
|
38
|
-
#include
|
39
|
-
#include "compiler.h"
|
40
|
-
#include "bitstream.h" /* BIT_* */
|
41
|
-
#include "fse.h" /* to compress headers */
|
42
|
-
#
|
43
|
-
#include "
|
44
|
-
#include "
|
18
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
|
19
|
+
#include "../common/compiler.h"
|
20
|
+
#include "../common/bitstream.h" /* BIT_* */
|
21
|
+
#include "../common/fse.h" /* to compress headers */
|
22
|
+
#include "../common/huf.h"
|
23
|
+
#include "../common/error_private.h"
|
24
|
+
#include "../common/zstd_internal.h"
|
25
|
+
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
|
26
|
+
|
27
|
+
/* **************************************************************
|
28
|
+
* Constants
|
29
|
+
****************************************************************/
|
30
|
+
|
31
|
+
#define HUF_DECODER_FAST_TABLELOG 11
|
45
32
|
|
46
33
|
/* **************************************************************
|
47
34
|
* Macros
|
@@ -56,14 +43,33 @@
|
|
56
43
|
#error "Cannot force the use of the X1 and X2 decoders at the same time!"
|
57
44
|
#endif
|
58
45
|
|
46
|
+
/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
|
47
|
+
* supported at runtime, so we can add the BMI2 target attribute.
|
48
|
+
* When it is disabled, we will still get BMI2 if it is enabled statically.
|
49
|
+
*/
|
50
|
+
#if DYNAMIC_BMI2
|
51
|
+
# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
|
52
|
+
#else
|
53
|
+
# define HUF_FAST_BMI2_ATTRS
|
54
|
+
#endif
|
55
|
+
|
56
|
+
#ifdef __cplusplus
|
57
|
+
# define HUF_EXTERN_C extern "C"
|
58
|
+
#else
|
59
|
+
# define HUF_EXTERN_C
|
60
|
+
#endif
|
61
|
+
#define HUF_ASM_DECL HUF_EXTERN_C
|
62
|
+
|
63
|
+
#if DYNAMIC_BMI2
|
64
|
+
# define HUF_NEED_BMI2_FUNCTION 1
|
65
|
+
#else
|
66
|
+
# define HUF_NEED_BMI2_FUNCTION 0
|
67
|
+
#endif
|
59
68
|
|
60
69
|
/* **************************************************************
|
61
70
|
* Error Management
|
62
71
|
****************************************************************/
|
63
72
|
#define HUF_isError ERR_isError
|
64
|
-
#ifndef CHECK_F
|
65
|
-
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
66
|
-
#endif
|
67
73
|
|
68
74
|
|
69
75
|
/* **************************************************************
|
@@ -76,6 +82,11 @@
|
|
76
82
|
/* **************************************************************
|
77
83
|
* BMI2 Variant Wrappers
|
78
84
|
****************************************************************/
|
85
|
+
typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
|
86
|
+
const void *cSrc,
|
87
|
+
size_t cSrcSize,
|
88
|
+
const HUF_DTable *DTable);
|
89
|
+
|
79
90
|
#if DYNAMIC_BMI2
|
80
91
|
|
81
92
|
#define HUF_DGEN(fn) \
|
@@ -88,7 +99,7 @@
|
|
88
99
|
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
89
100
|
} \
|
90
101
|
\
|
91
|
-
static
|
102
|
+
static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
|
92
103
|
void* dst, size_t dstSize, \
|
93
104
|
const void* cSrc, size_t cSrcSize, \
|
94
105
|
const HUF_DTable* DTable) \
|
@@ -97,9 +108,9 @@
|
|
97
108
|
} \
|
98
109
|
\
|
99
110
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
100
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
111
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags) \
|
101
112
|
{ \
|
102
|
-
if (
|
113
|
+
if (flags & HUF_flags_bmi2) { \
|
103
114
|
return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
|
104
115
|
} \
|
105
116
|
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
|
@@ -109,9 +120,9 @@
|
|
109
120
|
|
110
121
|
#define HUF_DGEN(fn) \
|
111
122
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
112
|
-
size_t cSrcSize, HUF_DTable const* DTable, int
|
123
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags) \
|
113
124
|
{ \
|
114
|
-
(void)
|
125
|
+
(void)flags; \
|
115
126
|
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
116
127
|
}
|
117
128
|
|
@@ -126,82 +137,359 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved;
|
|
126
137
|
static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
127
138
|
{
|
128
139
|
DTableDesc dtd;
|
129
|
-
|
140
|
+
ZSTD_memcpy(&dtd, table, sizeof(dtd));
|
130
141
|
return dtd;
|
131
142
|
}
|
132
143
|
|
144
|
+
static size_t HUF_initFastDStream(BYTE const* ip) {
|
145
|
+
BYTE const lastByte = ip[7];
|
146
|
+
size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
|
147
|
+
size_t const value = MEM_readLEST(ip) | 1;
|
148
|
+
assert(bitsConsumed <= 8);
|
149
|
+
assert(sizeof(size_t) == 8);
|
150
|
+
return value << bitsConsumed;
|
151
|
+
}
|
152
|
+
|
153
|
+
|
154
|
+
/**
|
155
|
+
* The input/output arguments to the Huffman fast decoding loop:
|
156
|
+
*
|
157
|
+
* ip [in/out] - The input pointers, must be updated to reflect what is consumed.
|
158
|
+
* op [in/out] - The output pointers, must be updated to reflect what is written.
|
159
|
+
* bits [in/out] - The bitstream containers, must be updated to reflect the current state.
|
160
|
+
* dt [in] - The decoding table.
|
161
|
+
* ilimit [in] - The input limit, stop when any input pointer is below ilimit.
|
162
|
+
* oend [in] - The end of the output stream. op[3] must not cross oend.
|
163
|
+
* iend [in] - The end of each input stream. ip[i] may cross iend[i],
|
164
|
+
* as long as it is above ilimit, but that indicates corruption.
|
165
|
+
*/
|
166
|
+
typedef struct {
|
167
|
+
BYTE const* ip[4];
|
168
|
+
BYTE* op[4];
|
169
|
+
U64 bits[4];
|
170
|
+
void const* dt;
|
171
|
+
BYTE const* ilimit;
|
172
|
+
BYTE* oend;
|
173
|
+
BYTE const* iend[4];
|
174
|
+
} HUF_DecompressFastArgs;
|
175
|
+
|
176
|
+
typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
|
177
|
+
|
178
|
+
/**
|
179
|
+
* Initializes args for the fast decoding loop.
|
180
|
+
* @returns 1 on success
|
181
|
+
* 0 if the fallback implementation should be used.
|
182
|
+
* Or an error code on failure.
|
183
|
+
*/
|
184
|
+
static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
|
185
|
+
{
|
186
|
+
void const* dt = DTable + 1;
|
187
|
+
U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
|
188
|
+
|
189
|
+
const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
|
190
|
+
|
191
|
+
BYTE* const oend = (BYTE*)dst + dstSize;
|
192
|
+
|
193
|
+
/* The fast decoding loop assumes 64-bit little-endian.
|
194
|
+
* This condition is false on x32.
|
195
|
+
*/
|
196
|
+
if (!MEM_isLittleEndian() || MEM_32bits())
|
197
|
+
return 0;
|
198
|
+
|
199
|
+
/* strict minimum : jump table + 1 byte per stream */
|
200
|
+
if (srcSize < 10)
|
201
|
+
return ERROR(corruption_detected);
|
202
|
+
|
203
|
+
/* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
|
204
|
+
* If table log is not correct at this point, fallback to the old decoder.
|
205
|
+
* On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
|
206
|
+
*/
|
207
|
+
if (dtLog != HUF_DECODER_FAST_TABLELOG)
|
208
|
+
return 0;
|
209
|
+
|
210
|
+
/* Read the jump table. */
|
211
|
+
{
|
212
|
+
const BYTE* const istart = (const BYTE*)src;
|
213
|
+
size_t const length1 = MEM_readLE16(istart);
|
214
|
+
size_t const length2 = MEM_readLE16(istart+2);
|
215
|
+
size_t const length3 = MEM_readLE16(istart+4);
|
216
|
+
size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
|
217
|
+
args->iend[0] = istart + 6; /* jumpTable */
|
218
|
+
args->iend[1] = args->iend[0] + length1;
|
219
|
+
args->iend[2] = args->iend[1] + length2;
|
220
|
+
args->iend[3] = args->iend[2] + length3;
|
221
|
+
|
222
|
+
/* HUF_initFastDStream() requires this, and this small of an input
|
223
|
+
* won't benefit from the ASM loop anyways.
|
224
|
+
* length1 must be >= 16 so that ip[0] >= ilimit before the loop
|
225
|
+
* starts.
|
226
|
+
*/
|
227
|
+
if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
|
228
|
+
return 0;
|
229
|
+
if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
|
230
|
+
}
|
231
|
+
/* ip[] contains the position that is currently loaded into bits[]. */
|
232
|
+
args->ip[0] = args->iend[1] - sizeof(U64);
|
233
|
+
args->ip[1] = args->iend[2] - sizeof(U64);
|
234
|
+
args->ip[2] = args->iend[3] - sizeof(U64);
|
235
|
+
args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
|
236
|
+
|
237
|
+
/* op[] contains the output pointers. */
|
238
|
+
args->op[0] = (BYTE*)dst;
|
239
|
+
args->op[1] = args->op[0] + (dstSize+3)/4;
|
240
|
+
args->op[2] = args->op[1] + (dstSize+3)/4;
|
241
|
+
args->op[3] = args->op[2] + (dstSize+3)/4;
|
242
|
+
|
243
|
+
/* No point to call the ASM loop for tiny outputs. */
|
244
|
+
if (args->op[3] >= oend)
|
245
|
+
return 0;
|
246
|
+
|
247
|
+
/* bits[] is the bit container.
|
248
|
+
* It is read from the MSB down to the LSB.
|
249
|
+
* It is shifted left as it is read, and zeros are
|
250
|
+
* shifted in. After the lowest valid bit a 1 is
|
251
|
+
* set, so that CountTrailingZeros(bits[]) can be used
|
252
|
+
* to count how many bits we've consumed.
|
253
|
+
*/
|
254
|
+
args->bits[0] = HUF_initFastDStream(args->ip[0]);
|
255
|
+
args->bits[1] = HUF_initFastDStream(args->ip[1]);
|
256
|
+
args->bits[2] = HUF_initFastDStream(args->ip[2]);
|
257
|
+
args->bits[3] = HUF_initFastDStream(args->ip[3]);
|
258
|
+
|
259
|
+
/* If ip[] >= ilimit, it is guaranteed to be safe to
|
260
|
+
* reload bits[]. It may be beyond its section, but is
|
261
|
+
* guaranteed to be valid (>= istart).
|
262
|
+
*/
|
263
|
+
args->ilimit = ilimit;
|
264
|
+
|
265
|
+
args->oend = oend;
|
266
|
+
args->dt = dt;
|
267
|
+
|
268
|
+
return 1;
|
269
|
+
}
|
270
|
+
|
271
|
+
static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
|
272
|
+
{
|
273
|
+
/* Validate that we haven't overwritten. */
|
274
|
+
if (args->op[stream] > segmentEnd)
|
275
|
+
return ERROR(corruption_detected);
|
276
|
+
/* Validate that we haven't read beyond iend[].
|
277
|
+
* Note that ip[] may be < iend[] because the MSB is
|
278
|
+
* the next bit to read, and we may have consumed 100%
|
279
|
+
* of the stream, so down to iend[i] - 8 is valid.
|
280
|
+
*/
|
281
|
+
if (args->ip[stream] < args->iend[stream] - 8)
|
282
|
+
return ERROR(corruption_detected);
|
283
|
+
|
284
|
+
/* Construct the BIT_DStream_t. */
|
285
|
+
assert(sizeof(size_t) == 8);
|
286
|
+
bit->bitContainer = MEM_readLEST(args->ip[stream]);
|
287
|
+
bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
|
288
|
+
bit->start = (const char*)args->iend[0];
|
289
|
+
bit->limitPtr = bit->start + sizeof(size_t);
|
290
|
+
bit->ptr = (const char*)args->ip[stream];
|
291
|
+
|
292
|
+
return 0;
|
293
|
+
}
|
294
|
+
|
133
295
|
|
134
296
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
135
297
|
|
136
298
|
/*-***************************/
|
137
299
|
/* single-symbol decoding */
|
138
300
|
/*-***************************/
|
139
|
-
typedef struct { BYTE
|
301
|
+
typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
|
140
302
|
|
141
|
-
|
303
|
+
/**
|
304
|
+
* Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
|
305
|
+
* a time.
|
306
|
+
*/
|
307
|
+
static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
|
308
|
+
U64 D4;
|
309
|
+
if (MEM_isLittleEndian()) {
|
310
|
+
D4 = (U64)((symbol << 8) + nbBits);
|
311
|
+
} else {
|
312
|
+
D4 = (U64)(symbol + (nbBits << 8));
|
313
|
+
}
|
314
|
+
assert(D4 < (1U << 16));
|
315
|
+
D4 *= 0x0001000100010001ULL;
|
316
|
+
return D4;
|
317
|
+
}
|
318
|
+
|
319
|
+
/**
|
320
|
+
* Increase the tableLog to targetTableLog and rescales the stats.
|
321
|
+
* If tableLog > targetTableLog this is a no-op.
|
322
|
+
* @returns New tableLog
|
323
|
+
*/
|
324
|
+
static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
|
325
|
+
{
|
326
|
+
if (tableLog > targetTableLog)
|
327
|
+
return tableLog;
|
328
|
+
if (tableLog < targetTableLog) {
|
329
|
+
U32 const scale = targetTableLog - tableLog;
|
330
|
+
U32 s;
|
331
|
+
/* Increase the weight for all non-zero probability symbols by scale. */
|
332
|
+
for (s = 0; s < nbSymbols; ++s) {
|
333
|
+
huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
|
334
|
+
}
|
335
|
+
/* Update rankVal to reflect the new weights.
|
336
|
+
* All weights except 0 get moved to weight + scale.
|
337
|
+
* Weights [1, scale] are empty.
|
338
|
+
*/
|
339
|
+
for (s = targetTableLog; s > scale; --s) {
|
340
|
+
rankVal[s] = rankVal[s - scale];
|
341
|
+
}
|
342
|
+
for (s = scale; s > 0; --s) {
|
343
|
+
rankVal[s] = 0;
|
344
|
+
}
|
345
|
+
}
|
346
|
+
return targetTableLog;
|
347
|
+
}
|
348
|
+
|
349
|
+
typedef struct {
|
350
|
+
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
351
|
+
U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
352
|
+
U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
353
|
+
BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
|
354
|
+
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
|
355
|
+
} HUF_ReadDTableX1_Workspace;
|
356
|
+
|
357
|
+
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags)
|
142
358
|
{
|
143
359
|
U32 tableLog = 0;
|
144
360
|
U32 nbSymbols = 0;
|
145
361
|
size_t iSize;
|
146
362
|
void* const dtPtr = DTable + 1;
|
147
363
|
HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
|
364
|
+
HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
|
148
365
|
|
149
|
-
|
150
|
-
|
151
|
-
size_t spaceUsed32 = 0;
|
152
|
-
|
153
|
-
rankVal = (U32 *)workSpace + spaceUsed32;
|
154
|
-
spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
|
155
|
-
huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
156
|
-
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
157
|
-
|
158
|
-
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
366
|
+
DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
|
367
|
+
if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
|
159
368
|
|
160
369
|
DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
161
|
-
/*
|
370
|
+
/* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
162
371
|
|
163
|
-
iSize =
|
372
|
+
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
|
164
373
|
if (HUF_isError(iSize)) return iSize;
|
165
374
|
|
375
|
+
|
166
376
|
/* Table header */
|
167
377
|
{ DTableDesc dtd = HUF_getDTableDesc(DTable);
|
378
|
+
U32 const maxTableLog = dtd.maxTableLog + 1;
|
379
|
+
U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
|
380
|
+
tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
|
168
381
|
if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
|
169
382
|
dtd.tableType = 0;
|
170
383
|
dtd.tableLog = (BYTE)tableLog;
|
171
|
-
|
384
|
+
ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
|
172
385
|
}
|
173
386
|
|
174
|
-
/*
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
rankVal[
|
193
|
-
|
387
|
+
/* Compute symbols and rankStart given rankVal:
|
388
|
+
*
|
389
|
+
* rankVal already contains the number of values of each weight.
|
390
|
+
*
|
391
|
+
* symbols contains the symbols ordered by weight. First are the rankVal[0]
|
392
|
+
* weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
|
393
|
+
* symbols[0] is filled (but unused) to avoid a branch.
|
394
|
+
*
|
395
|
+
* rankStart contains the offset where each rank belongs in the DTable.
|
396
|
+
* rankStart[0] is not filled because there are no entries in the table for
|
397
|
+
* weight 0.
|
398
|
+
*/
|
399
|
+
{ int n;
|
400
|
+
U32 nextRankStart = 0;
|
401
|
+
int const unroll = 4;
|
402
|
+
int const nLimit = (int)nbSymbols - unroll + 1;
|
403
|
+
for (n=0; n<(int)tableLog+1; n++) {
|
404
|
+
U32 const curr = nextRankStart;
|
405
|
+
nextRankStart += wksp->rankVal[n];
|
406
|
+
wksp->rankStart[n] = curr;
|
407
|
+
}
|
408
|
+
for (n=0; n < nLimit; n += unroll) {
|
409
|
+
int u;
|
410
|
+
for (u=0; u < unroll; ++u) {
|
411
|
+
size_t const w = wksp->huffWeight[n+u];
|
412
|
+
wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
|
413
|
+
}
|
414
|
+
}
|
415
|
+
for (; n < (int)nbSymbols; ++n) {
|
416
|
+
size_t const w = wksp->huffWeight[n];
|
417
|
+
wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
|
418
|
+
}
|
419
|
+
}
|
194
420
|
|
421
|
+
/* fill DTable
|
422
|
+
* We fill all entries of each weight in order.
|
423
|
+
* That way length is a constant for each iteration of the outer loop.
|
424
|
+
* We can switch based on the length to a different inner loop which is
|
425
|
+
* optimized for that particular case.
|
426
|
+
*/
|
427
|
+
{ U32 w;
|
428
|
+
int symbol = wksp->rankVal[0];
|
429
|
+
int rankStart = 0;
|
430
|
+
for (w=1; w<tableLog+1; ++w) {
|
431
|
+
int const symbolCount = wksp->rankVal[w];
|
432
|
+
int const length = (1 << w) >> 1;
|
433
|
+
int uStart = rankStart;
|
434
|
+
BYTE const nbBits = (BYTE)(tableLog + 1 - w);
|
435
|
+
int s;
|
436
|
+
int u;
|
437
|
+
switch (length) {
|
438
|
+
case 1:
|
439
|
+
for (s=0; s<symbolCount; ++s) {
|
440
|
+
HUF_DEltX1 D;
|
441
|
+
D.byte = wksp->symbols[symbol + s];
|
442
|
+
D.nbBits = nbBits;
|
443
|
+
dt[uStart] = D;
|
444
|
+
uStart += 1;
|
445
|
+
}
|
446
|
+
break;
|
447
|
+
case 2:
|
448
|
+
for (s=0; s<symbolCount; ++s) {
|
449
|
+
HUF_DEltX1 D;
|
450
|
+
D.byte = wksp->symbols[symbol + s];
|
451
|
+
D.nbBits = nbBits;
|
452
|
+
dt[uStart+0] = D;
|
453
|
+
dt[uStart+1] = D;
|
454
|
+
uStart += 2;
|
455
|
+
}
|
456
|
+
break;
|
457
|
+
case 4:
|
458
|
+
for (s=0; s<symbolCount; ++s) {
|
459
|
+
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
460
|
+
MEM_write64(dt + uStart, D4);
|
461
|
+
uStart += 4;
|
462
|
+
}
|
463
|
+
break;
|
464
|
+
case 8:
|
465
|
+
for (s=0; s<symbolCount; ++s) {
|
466
|
+
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
467
|
+
MEM_write64(dt + uStart, D4);
|
468
|
+
MEM_write64(dt + uStart + 4, D4);
|
469
|
+
uStart += 8;
|
470
|
+
}
|
471
|
+
break;
|
472
|
+
default:
|
473
|
+
for (s=0; s<symbolCount; ++s) {
|
474
|
+
U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
|
475
|
+
for (u=0; u < length; u += 16) {
|
476
|
+
MEM_write64(dt + uStart + u + 0, D4);
|
477
|
+
MEM_write64(dt + uStart + u + 4, D4);
|
478
|
+
MEM_write64(dt + uStart + u + 8, D4);
|
479
|
+
MEM_write64(dt + uStart + u + 12, D4);
|
480
|
+
}
|
481
|
+
assert(u == length);
|
482
|
+
uStart += length;
|
483
|
+
}
|
484
|
+
break;
|
485
|
+
}
|
486
|
+
symbol += symbolCount;
|
487
|
+
rankStart += symbolCount * length;
|
488
|
+
}
|
489
|
+
}
|
195
490
|
return iSize;
|
196
491
|
}
|
197
492
|
|
198
|
-
size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
|
199
|
-
{
|
200
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
201
|
-
return HUF_readDTableX1_wksp(DTable, src, srcSize,
|
202
|
-
workSpace, sizeof(workSpace));
|
203
|
-
}
|
204
|
-
|
205
493
|
FORCE_INLINE_TEMPLATE BYTE
|
206
494
|
HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
|
207
495
|
{
|
@@ -228,11 +516,15 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
|
|
228
516
|
BYTE* const pStart = p;
|
229
517
|
|
230
518
|
/* up to 4 symbols at a time */
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
519
|
+
if ((pEnd - p) > 3) {
|
520
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
|
521
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
522
|
+
HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
|
523
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
524
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
525
|
+
}
|
526
|
+
} else {
|
527
|
+
BIT_reloadDStream(bitDPtr);
|
236
528
|
}
|
237
529
|
|
238
530
|
/* [0-3] symbols remaining */
|
@@ -244,7 +536,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
|
|
244
536
|
while (p < pEnd)
|
245
537
|
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
246
538
|
|
247
|
-
return pEnd-pStart;
|
539
|
+
return (size_t)(pEnd-pStart);
|
248
540
|
}
|
249
541
|
|
250
542
|
FORCE_INLINE_TEMPLATE size_t
|
@@ -270,6 +562,10 @@ HUF_decompress1X1_usingDTable_internal_body(
|
|
270
562
|
return dstSize;
|
271
563
|
}
|
272
564
|
|
565
|
+
/* HUF_decompress4X1_usingDTable_internal_body():
|
566
|
+
* Conditions :
|
567
|
+
* @dstSize >= 6
|
568
|
+
*/
|
273
569
|
FORCE_INLINE_TEMPLATE size_t
|
274
570
|
HUF_decompress4X1_usingDTable_internal_body(
|
275
571
|
void* dst, size_t dstSize,
|
@@ -282,6 +578,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
282
578
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
283
579
|
BYTE* const ostart = (BYTE*) dst;
|
284
580
|
BYTE* const oend = ostart + dstSize;
|
581
|
+
BYTE* const olimit = oend - 3;
|
285
582
|
const void* const dtPtr = DTable + 1;
|
286
583
|
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
287
584
|
|
@@ -306,39 +603,42 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
306
603
|
BYTE* op2 = opStart2;
|
307
604
|
BYTE* op3 = opStart3;
|
308
605
|
BYTE* op4 = opStart4;
|
309
|
-
U32 endSignal = BIT_DStream_unfinished;
|
310
606
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
311
607
|
U32 const dtLog = dtd.tableLog;
|
608
|
+
U32 endSignal = 1;
|
312
609
|
|
313
610
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
611
|
+
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
612
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
314
613
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
315
614
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
316
615
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
317
616
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
318
617
|
|
319
618
|
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
619
|
+
if ((size_t)(oend - op4) >= sizeof(size_t)) {
|
620
|
+
for ( ; (endSignal) & (op4 < olimit) ; ) {
|
621
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
622
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
623
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
624
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
625
|
+
HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
|
626
|
+
HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
|
627
|
+
HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
|
628
|
+
HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
|
629
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
630
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
631
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
632
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
633
|
+
HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
|
634
|
+
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
635
|
+
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
636
|
+
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
637
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
638
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
639
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
640
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
641
|
+
}
|
342
642
|
}
|
343
643
|
|
344
644
|
/* check corruption */
|
@@ -364,99 +664,230 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
364
664
|
}
|
365
665
|
}
|
366
666
|
|
667
|
+
#if HUF_NEED_BMI2_FUNCTION
|
668
|
+
static BMI2_TARGET_ATTRIBUTE
|
669
|
+
size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
670
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
671
|
+
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
672
|
+
}
|
673
|
+
#endif
|
367
674
|
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
675
|
+
static
|
676
|
+
size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
677
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
678
|
+
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
679
|
+
}
|
372
680
|
|
373
|
-
|
374
|
-
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
|
681
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
375
682
|
|
683
|
+
HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
376
684
|
|
685
|
+
#endif
|
377
686
|
|
378
|
-
|
379
|
-
|
380
|
-
const void* cSrc, size_t cSrcSize,
|
381
|
-
const HUF_DTable* DTable)
|
687
|
+
static HUF_FAST_BMI2_ATTRS
|
688
|
+
void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
382
689
|
{
|
383
|
-
|
384
|
-
|
385
|
-
|
690
|
+
U64 bits[4];
|
691
|
+
BYTE const* ip[4];
|
692
|
+
BYTE* op[4];
|
693
|
+
U16 const* const dtable = (U16 const*)args->dt;
|
694
|
+
BYTE* const oend = args->oend;
|
695
|
+
BYTE const* const ilimit = args->ilimit;
|
696
|
+
|
697
|
+
/* Copy the arguments to local variables */
|
698
|
+
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
699
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
700
|
+
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
701
|
+
|
702
|
+
assert(MEM_isLittleEndian());
|
703
|
+
assert(!MEM_32bits());
|
704
|
+
|
705
|
+
for (;;) {
|
706
|
+
BYTE* olimit;
|
707
|
+
int stream;
|
708
|
+
int symbol;
|
709
|
+
|
710
|
+
/* Assert loop preconditions */
|
711
|
+
#ifndef NDEBUG
|
712
|
+
for (stream = 0; stream < 4; ++stream) {
|
713
|
+
assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
|
714
|
+
assert(ip[stream] >= ilimit);
|
715
|
+
}
|
716
|
+
#endif
|
717
|
+
/* Compute olimit */
|
718
|
+
{
|
719
|
+
/* Each iteration produces 5 output symbols per stream */
|
720
|
+
size_t const oiters = (size_t)(oend - op[3]) / 5;
|
721
|
+
/* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
|
722
|
+
* per stream.
|
723
|
+
*/
|
724
|
+
size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
|
725
|
+
/* We can safely run iters iterations before running bounds checks */
|
726
|
+
size_t const iters = MIN(oiters, iiters);
|
727
|
+
size_t const symbols = iters * 5;
|
728
|
+
|
729
|
+
/* We can simply check that op[3] < olimit, instead of checking all
|
730
|
+
* of our bounds, since we can't hit the other bounds until we've run
|
731
|
+
* iters iterations, which only happens when op[3] == olimit.
|
732
|
+
*/
|
733
|
+
olimit = op[3] + symbols;
|
734
|
+
|
735
|
+
/* Exit fast decoding loop once we get close to the end. */
|
736
|
+
if (op[3] + 20 > olimit)
|
737
|
+
break;
|
738
|
+
|
739
|
+
/* Exit the decoding loop if any input pointer has crossed the
|
740
|
+
* previous one. This indicates corruption, and a precondition
|
741
|
+
* to our loop is that ip[i] >= ip[0].
|
742
|
+
*/
|
743
|
+
for (stream = 1; stream < 4; ++stream) {
|
744
|
+
if (ip[stream] < ip[stream - 1])
|
745
|
+
goto _out;
|
746
|
+
}
|
747
|
+
}
|
748
|
+
|
749
|
+
#ifndef NDEBUG
|
750
|
+
for (stream = 1; stream < 4; ++stream) {
|
751
|
+
assert(ip[stream] >= ip[stream - 1]);
|
752
|
+
}
|
753
|
+
#endif
|
754
|
+
|
755
|
+
do {
|
756
|
+
/* Decode 5 symbols in each of the 4 streams */
|
757
|
+
for (symbol = 0; symbol < 5; ++symbol) {
|
758
|
+
for (stream = 0; stream < 4; ++stream) {
|
759
|
+
int const index = (int)(bits[stream] >> 53);
|
760
|
+
int const entry = (int)dtable[index];
|
761
|
+
bits[stream] <<= (entry & 63);
|
762
|
+
op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
|
763
|
+
}
|
764
|
+
}
|
765
|
+
/* Reload the bitstreams */
|
766
|
+
for (stream = 0; stream < 4; ++stream) {
|
767
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
768
|
+
int const nbBits = ctz & 7;
|
769
|
+
int const nbBytes = ctz >> 3;
|
770
|
+
op[stream] += 5;
|
771
|
+
ip[stream] -= nbBytes;
|
772
|
+
bits[stream] = MEM_read64(ip[stream]) | 1;
|
773
|
+
bits[stream] <<= nbBits;
|
774
|
+
}
|
775
|
+
} while (op[3] < olimit);
|
776
|
+
}
|
777
|
+
|
778
|
+
_out:
|
779
|
+
|
780
|
+
/* Save the final values of each of the state variables back to args. */
|
781
|
+
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
782
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
783
|
+
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
386
784
|
}
|
387
785
|
|
388
|
-
|
389
|
-
|
390
|
-
|
786
|
+
/**
|
787
|
+
* @returns @p dstSize on success (>= 6)
|
788
|
+
* 0 if the fallback implementation should be used
|
789
|
+
* An error if an error occurred
|
790
|
+
*/
|
791
|
+
static HUF_FAST_BMI2_ATTRS
|
792
|
+
size_t
|
793
|
+
HUF_decompress4X1_usingDTable_internal_fast(
|
794
|
+
void* dst, size_t dstSize,
|
795
|
+
const void* cSrc, size_t cSrcSize,
|
796
|
+
const HUF_DTable* DTable,
|
797
|
+
HUF_DecompressFastLoopFn loopFn)
|
391
798
|
{
|
392
|
-
const
|
799
|
+
void const* dt = DTable + 1;
|
800
|
+
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
801
|
+
BYTE* const oend = (BYTE*)dst + dstSize;
|
802
|
+
HUF_DecompressFastArgs args;
|
803
|
+
{ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
804
|
+
FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
|
805
|
+
if (ret == 0)
|
806
|
+
return 0;
|
807
|
+
}
|
393
808
|
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
ip
|
809
|
+
assert(args.ip[0] >= args.ilimit);
|
810
|
+
loopFn(&args);
|
811
|
+
|
812
|
+
/* Our loop guarantees that ip[] >= ilimit and that we haven't
|
813
|
+
* overwritten any op[].
|
814
|
+
*/
|
815
|
+
assert(args.ip[0] >= iend);
|
816
|
+
assert(args.ip[1] >= iend);
|
817
|
+
assert(args.ip[2] >= iend);
|
818
|
+
assert(args.ip[3] >= iend);
|
819
|
+
assert(args.op[3] <= oend);
|
820
|
+
(void)iend;
|
821
|
+
|
822
|
+
/* finish bit streams one by one. */
|
823
|
+
{ size_t const segmentSize = (dstSize+3) / 4;
|
824
|
+
BYTE* segmentEnd = (BYTE*)dst;
|
825
|
+
int i;
|
826
|
+
for (i = 0; i < 4; ++i) {
|
827
|
+
BIT_DStream_t bit;
|
828
|
+
if (segmentSize <= (size_t)(oend - segmentEnd))
|
829
|
+
segmentEnd += segmentSize;
|
830
|
+
else
|
831
|
+
segmentEnd = oend;
|
832
|
+
FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
|
833
|
+
/* Decompress and validate that we've produced exactly the expected length. */
|
834
|
+
args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
|
835
|
+
if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
|
836
|
+
}
|
837
|
+
}
|
398
838
|
|
399
|
-
|
839
|
+
/* decoded size */
|
840
|
+
assert(dstSize != 0);
|
841
|
+
return dstSize;
|
400
842
|
}
|
401
843
|
|
844
|
+
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
|
402
845
|
|
403
|
-
size_t
|
404
|
-
|
846
|
+
static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
847
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
405
848
|
{
|
406
|
-
|
407
|
-
|
408
|
-
workSpace, sizeof(workSpace));
|
409
|
-
}
|
849
|
+
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
|
850
|
+
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
|
410
851
|
|
411
|
-
|
412
|
-
{
|
413
|
-
|
414
|
-
|
415
|
-
|
852
|
+
#if DYNAMIC_BMI2
|
853
|
+
if (flags & HUF_flags_bmi2) {
|
854
|
+
fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
|
855
|
+
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
856
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
857
|
+
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
858
|
+
}
|
859
|
+
# endif
|
860
|
+
} else {
|
861
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
862
|
+
}
|
863
|
+
#endif
|
416
864
|
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
if (
|
424
|
-
|
865
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
866
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
867
|
+
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
868
|
+
}
|
869
|
+
#endif
|
870
|
+
|
871
|
+
if (!(flags & HUF_flags_disableFast)) {
|
872
|
+
size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
873
|
+
if (ret != 0)
|
874
|
+
return ret;
|
875
|
+
}
|
876
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
425
877
|
}
|
426
878
|
|
427
|
-
static size_t
|
879
|
+
static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
428
880
|
const void* cSrc, size_t cSrcSize,
|
429
|
-
void* workSpace, size_t wkspSize, int
|
881
|
+
void* workSpace, size_t wkspSize, int flags)
|
430
882
|
{
|
431
883
|
const BYTE* ip = (const BYTE*) cSrc;
|
432
884
|
|
433
|
-
size_t const hSize = HUF_readDTableX1_wksp
|
434
|
-
workSpace, wkspSize);
|
885
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
435
886
|
if (HUF_isError(hSize)) return hSize;
|
436
887
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
437
888
|
ip += hSize; cSrcSize -= hSize;
|
438
889
|
|
439
|
-
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
440
|
-
}
|
441
|
-
|
442
|
-
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
443
|
-
const void* cSrc, size_t cSrcSize,
|
444
|
-
void* workSpace, size_t wkspSize)
|
445
|
-
{
|
446
|
-
return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
|
447
|
-
}
|
448
|
-
|
449
|
-
|
450
|
-
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
451
|
-
{
|
452
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
453
|
-
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
454
|
-
workSpace, sizeof(workSpace));
|
455
|
-
}
|
456
|
-
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
457
|
-
{
|
458
|
-
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
459
|
-
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
890
|
+
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
460
891
|
}
|
461
892
|
|
462
893
|
#endif /* HUF_FORCE_DECOMPRESS_X2 */
|
@@ -469,209 +900,322 @@ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
469
900
|
/* *************************/
|
470
901
|
|
471
902
|
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
|
472
|
-
typedef struct { BYTE symbol;
|
903
|
+
typedef struct { BYTE symbol; } sortedSymbol_t;
|
473
904
|
typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
|
474
905
|
typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
|
475
906
|
|
907
|
+
/**
|
908
|
+
* Constructs a HUF_DEltX2 in a U32.
|
909
|
+
*/
|
910
|
+
static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
|
911
|
+
{
|
912
|
+
U32 seq;
|
913
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
|
914
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
|
915
|
+
DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
|
916
|
+
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
|
917
|
+
if (MEM_isLittleEndian()) {
|
918
|
+
seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
|
919
|
+
return seq + (nbBits << 16) + ((U32)level << 24);
|
920
|
+
} else {
|
921
|
+
seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
|
922
|
+
return (seq << 16) + (nbBits << 8) + (U32)level;
|
923
|
+
}
|
924
|
+
}
|
476
925
|
|
477
|
-
|
478
|
-
*
|
479
|
-
|
480
|
-
|
481
|
-
const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
|
482
|
-
U32 nbBitsBaseline, U16 baseSeq)
|
926
|
+
/**
|
927
|
+
* Constructs a HUF_DEltX2.
|
928
|
+
*/
|
929
|
+
static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
|
483
930
|
{
|
484
931
|
HUF_DEltX2 DElt;
|
485
|
-
U32
|
932
|
+
U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
|
933
|
+
DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
|
934
|
+
ZSTD_memcpy(&DElt, &val, sizeof(val));
|
935
|
+
return DElt;
|
936
|
+
}
|
937
|
+
|
938
|
+
/**
|
939
|
+
* Constructs 2 HUF_DEltX2s and packs them into a U64.
|
940
|
+
*/
|
941
|
+
static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
|
942
|
+
{
|
943
|
+
U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
|
944
|
+
return (U64)DElt + ((U64)DElt << 32);
|
945
|
+
}
|
486
946
|
|
487
|
-
|
488
|
-
|
947
|
+
/**
|
948
|
+
* Fills the DTable rank with all the symbols from [begin, end) that are each
|
949
|
+
* nbBits long.
|
950
|
+
*
|
951
|
+
* @param DTableRank The start of the rank in the DTable.
|
952
|
+
* @param begin The first symbol to fill (inclusive).
|
953
|
+
* @param end The last symbol to fill (exclusive).
|
954
|
+
* @param nbBits Each symbol is nbBits long.
|
955
|
+
* @param tableLog The table log.
|
956
|
+
* @param baseSeq If level == 1 { 0 } else { the first level symbol }
|
957
|
+
* @param level The level in the table. Must be 1 or 2.
|
958
|
+
*/
|
959
|
+
static void HUF_fillDTableX2ForWeight(
|
960
|
+
HUF_DEltX2* DTableRank,
|
961
|
+
sortedSymbol_t const* begin, sortedSymbol_t const* end,
|
962
|
+
U32 nbBits, U32 tableLog,
|
963
|
+
U16 baseSeq, int const level)
|
964
|
+
{
|
965
|
+
U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
|
966
|
+
const sortedSymbol_t* ptr;
|
967
|
+
assert(level >= 1 && level <= 2);
|
968
|
+
switch (length) {
|
969
|
+
case 1:
|
970
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
971
|
+
HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
|
972
|
+
*DTableRank++ = DElt;
|
973
|
+
}
|
974
|
+
break;
|
975
|
+
case 2:
|
976
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
977
|
+
HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
|
978
|
+
DTableRank[0] = DElt;
|
979
|
+
DTableRank[1] = DElt;
|
980
|
+
DTableRank += 2;
|
981
|
+
}
|
982
|
+
break;
|
983
|
+
case 4:
|
984
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
985
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
986
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
987
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
988
|
+
DTableRank += 4;
|
989
|
+
}
|
990
|
+
break;
|
991
|
+
case 8:
|
992
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
993
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
994
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
995
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
996
|
+
ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
|
997
|
+
ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
|
998
|
+
DTableRank += 8;
|
999
|
+
}
|
1000
|
+
break;
|
1001
|
+
default:
|
1002
|
+
for (ptr = begin; ptr != end; ++ptr) {
|
1003
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
|
1004
|
+
HUF_DEltX2* const DTableRankEnd = DTableRank + length;
|
1005
|
+
for (; DTableRank != DTableRankEnd; DTableRank += 8) {
|
1006
|
+
ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
|
1007
|
+
ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
|
1008
|
+
ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
|
1009
|
+
ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
|
1010
|
+
}
|
1011
|
+
}
|
1012
|
+
break;
|
1013
|
+
}
|
1014
|
+
}
|
489
1015
|
|
490
|
-
|
1016
|
+
/* HUF_fillDTableX2Level2() :
|
1017
|
+
* `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
|
1018
|
+
static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
|
1019
|
+
const U32* rankVal, const int minWeight, const int maxWeight1,
|
1020
|
+
const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
|
1021
|
+
U32 nbBitsBaseline, U16 baseSeq)
|
1022
|
+
{
|
1023
|
+
/* Fill skipped values (all positions up to rankVal[minWeight]).
|
1024
|
+
* These are positions only get a single symbol because the combined weight
|
1025
|
+
* is too large.
|
1026
|
+
*/
|
491
1027
|
if (minWeight>1) {
|
492
|
-
U32
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
1028
|
+
U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
|
1029
|
+
U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
|
1030
|
+
int const skipSize = rankVal[minWeight];
|
1031
|
+
assert(length > 1);
|
1032
|
+
assert((U32)skipSize < length);
|
1033
|
+
switch (length) {
|
1034
|
+
case 2:
|
1035
|
+
assert(skipSize == 1);
|
1036
|
+
ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
|
1037
|
+
break;
|
1038
|
+
case 4:
|
1039
|
+
assert(skipSize <= 4);
|
1040
|
+
ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
|
1041
|
+
ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
|
1042
|
+
break;
|
1043
|
+
default:
|
1044
|
+
{
|
1045
|
+
int i;
|
1046
|
+
for (i = 0; i < skipSize; i += 8) {
|
1047
|
+
ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
|
1048
|
+
ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
|
1049
|
+
ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
|
1050
|
+
ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
|
1051
|
+
}
|
1052
|
+
}
|
1053
|
+
}
|
498
1054
|
}
|
499
1055
|
|
500
|
-
/*
|
501
|
-
{
|
502
|
-
|
503
|
-
|
504
|
-
const
|
505
|
-
const
|
506
|
-
const
|
507
|
-
U32
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
rankVal[weight] += length;
|
516
|
-
} }
|
1056
|
+
/* Fill each of the second level symbols by weight. */
|
1057
|
+
{
|
1058
|
+
int w;
|
1059
|
+
for (w = minWeight; w < maxWeight1; ++w) {
|
1060
|
+
int const begin = rankStart[w];
|
1061
|
+
int const end = rankStart[w+1];
|
1062
|
+
U32 const nbBits = nbBitsBaseline - w;
|
1063
|
+
U32 const totalBits = nbBits + consumedBits;
|
1064
|
+
HUF_fillDTableX2ForWeight(
|
1065
|
+
DTable + rankVal[w],
|
1066
|
+
sortedSymbols + begin, sortedSymbols + end,
|
1067
|
+
totalBits, targetLog,
|
1068
|
+
baseSeq, /* level */ 2);
|
1069
|
+
}
|
1070
|
+
}
|
517
1071
|
}
|
518
1072
|
|
519
|
-
|
520
1073
|
static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
|
521
|
-
const sortedSymbol_t* sortedList,
|
522
|
-
const U32* rankStart,
|
1074
|
+
const sortedSymbol_t* sortedList,
|
1075
|
+
const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight,
|
523
1076
|
const U32 nbBitsBaseline)
|
524
1077
|
{
|
525
|
-
U32 rankVal
|
1078
|
+
U32* const rankVal = rankValOrigin[0];
|
526
1079
|
const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
|
527
1080
|
const U32 minBits = nbBitsBaseline - maxWeight;
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
const
|
535
|
-
const
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
U32 sortedRank;
|
1081
|
+
int w;
|
1082
|
+
int const wEnd = (int)maxWeight + 1;
|
1083
|
+
|
1084
|
+
/* Fill DTable in order of weight. */
|
1085
|
+
for (w = 1; w < wEnd; ++w) {
|
1086
|
+
int const begin = (int)rankStart[w];
|
1087
|
+
int const end = (int)rankStart[w+1];
|
1088
|
+
U32 const nbBits = nbBitsBaseline - w;
|
1089
|
+
|
1090
|
+
if (targetLog-nbBits >= minBits) {
|
1091
|
+
/* Enough room for a second symbol. */
|
1092
|
+
int start = rankVal[w];
|
1093
|
+
U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
|
542
1094
|
int minWeight = nbBits + scaleLog;
|
1095
|
+
int s;
|
543
1096
|
if (minWeight < 1) minWeight = 1;
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
1097
|
+
/* Fill the DTable for every symbol of weight w.
|
1098
|
+
* These symbols get at least 1 second symbol.
|
1099
|
+
*/
|
1100
|
+
for (s = begin; s != end; ++s) {
|
1101
|
+
HUF_fillDTableX2Level2(
|
1102
|
+
DTable + start, targetLog, nbBits,
|
1103
|
+
rankValOrigin[nbBits], minWeight, wEnd,
|
1104
|
+
sortedList, rankStart,
|
1105
|
+
nbBitsBaseline, sortedList[s].symbol);
|
1106
|
+
start += length;
|
1107
|
+
}
|
549
1108
|
} else {
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
} }
|
558
|
-
rankVal[weight] += length;
|
1109
|
+
/* Only a single symbol. */
|
1110
|
+
HUF_fillDTableX2ForWeight(
|
1111
|
+
DTable + rankVal[w],
|
1112
|
+
sortedList + begin, sortedList + end,
|
1113
|
+
nbBits, targetLog,
|
1114
|
+
/* baseSeq */ 0, /* level */ 1);
|
1115
|
+
}
|
559
1116
|
}
|
560
1117
|
}
|
561
1118
|
|
1119
|
+
typedef struct {
|
1120
|
+
rankValCol_t rankVal[HUF_TABLELOG_MAX];
|
1121
|
+
U32 rankStats[HUF_TABLELOG_MAX + 1];
|
1122
|
+
U32 rankStart0[HUF_TABLELOG_MAX + 3];
|
1123
|
+
sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
|
1124
|
+
BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
|
1125
|
+
U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
1126
|
+
} HUF_ReadDTableX2_Workspace;
|
1127
|
+
|
562
1128
|
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
|
563
1129
|
const void* src, size_t srcSize,
|
564
|
-
void* workSpace, size_t wkspSize)
|
1130
|
+
void* workSpace, size_t wkspSize, int flags)
|
565
1131
|
{
|
566
|
-
U32 tableLog, maxW,
|
1132
|
+
U32 tableLog, maxW, nbSymbols;
|
567
1133
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
568
|
-
U32
|
1134
|
+
U32 maxTableLog = dtd.maxTableLog;
|
569
1135
|
size_t iSize;
|
570
1136
|
void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
|
571
1137
|
HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
|
572
1138
|
U32 *rankStart;
|
573
1139
|
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
|
582
|
-
spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
|
583
|
-
rankStats = (U32 *)workSpace + spaceUsed32;
|
584
|
-
spaceUsed32 += HUF_TABLELOG_MAX + 1;
|
585
|
-
rankStart0 = (U32 *)workSpace + spaceUsed32;
|
586
|
-
spaceUsed32 += HUF_TABLELOG_MAX + 2;
|
587
|
-
sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
|
588
|
-
spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
|
589
|
-
weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
590
|
-
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
591
|
-
|
592
|
-
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
593
|
-
|
594
|
-
rankStart = rankStart0 + 1;
|
595
|
-
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
1140
|
+
HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
|
1141
|
+
|
1142
|
+
if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
|
1143
|
+
|
1144
|
+
rankStart = wksp->rankStart0 + 1;
|
1145
|
+
ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
|
1146
|
+
ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
|
596
1147
|
|
597
1148
|
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
|
598
1149
|
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
599
|
-
/*
|
1150
|
+
/* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
600
1151
|
|
601
|
-
iSize =
|
1152
|
+
iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags);
|
602
1153
|
if (HUF_isError(iSize)) return iSize;
|
603
1154
|
|
604
1155
|
/* check result */
|
605
1156
|
if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
|
1157
|
+
if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
|
606
1158
|
|
607
1159
|
/* find maxWeight */
|
608
|
-
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
|
1160
|
+
for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
|
609
1161
|
|
610
1162
|
/* Get start index of each weight */
|
611
1163
|
{ U32 w, nextRankStart = 0;
|
612
1164
|
for (w=1; w<maxW+1; w++) {
|
613
|
-
U32
|
614
|
-
nextRankStart += rankStats[w];
|
615
|
-
rankStart[w] =
|
1165
|
+
U32 curr = nextRankStart;
|
1166
|
+
nextRankStart += wksp->rankStats[w];
|
1167
|
+
rankStart[w] = curr;
|
616
1168
|
}
|
617
1169
|
rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
|
618
|
-
|
1170
|
+
rankStart[maxW+1] = nextRankStart;
|
619
1171
|
}
|
620
1172
|
|
621
1173
|
/* sort symbols by weight */
|
622
1174
|
{ U32 s;
|
623
1175
|
for (s=0; s<nbSymbols; s++) {
|
624
|
-
U32 const w = weightList[s];
|
1176
|
+
U32 const w = wksp->weightList[s];
|
625
1177
|
U32 const r = rankStart[w]++;
|
626
|
-
sortedSymbol[r].symbol = (BYTE)s;
|
627
|
-
sortedSymbol[r].weight = (BYTE)w;
|
1178
|
+
wksp->sortedSymbol[r].symbol = (BYTE)s;
|
628
1179
|
}
|
629
1180
|
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
|
630
1181
|
}
|
631
1182
|
|
632
1183
|
/* Build rankVal */
|
633
|
-
{ U32* const rankVal0 = rankVal[0];
|
1184
|
+
{ U32* const rankVal0 = wksp->rankVal[0];
|
634
1185
|
{ int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
|
635
1186
|
U32 nextRankVal = 0;
|
636
1187
|
U32 w;
|
637
1188
|
for (w=1; w<maxW+1; w++) {
|
638
|
-
U32
|
639
|
-
nextRankVal += rankStats[w] << (w+rescale);
|
640
|
-
rankVal0[w] =
|
1189
|
+
U32 curr = nextRankVal;
|
1190
|
+
nextRankVal += wksp->rankStats[w] << (w+rescale);
|
1191
|
+
rankVal0[w] = curr;
|
641
1192
|
} }
|
642
1193
|
{ U32 const minBits = tableLog+1 - maxW;
|
643
1194
|
U32 consumed;
|
644
1195
|
for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
|
645
|
-
U32* const rankValPtr = rankVal[consumed];
|
1196
|
+
U32* const rankValPtr = wksp->rankVal[consumed];
|
646
1197
|
U32 w;
|
647
1198
|
for (w = 1; w < maxW+1; w++) {
|
648
1199
|
rankValPtr[w] = rankVal0[w] >> consumed;
|
649
1200
|
} } } }
|
650
1201
|
|
651
1202
|
HUF_fillDTableX2(dt, maxTableLog,
|
652
|
-
sortedSymbol,
|
653
|
-
rankStart0, rankVal, maxW,
|
1203
|
+
wksp->sortedSymbol,
|
1204
|
+
wksp->rankStart0, wksp->rankVal, maxW,
|
654
1205
|
tableLog+1);
|
655
1206
|
|
656
1207
|
dtd.tableLog = (BYTE)maxTableLog;
|
657
1208
|
dtd.tableType = 1;
|
658
|
-
|
1209
|
+
ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
|
659
1210
|
return iSize;
|
660
1211
|
}
|
661
1212
|
|
662
|
-
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
663
|
-
{
|
664
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
665
|
-
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
666
|
-
workSpace, sizeof(workSpace));
|
667
|
-
}
|
668
|
-
|
669
1213
|
|
670
1214
|
FORCE_INLINE_TEMPLATE U32
|
671
1215
|
HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
672
1216
|
{
|
673
1217
|
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
674
|
-
|
1218
|
+
ZSTD_memcpy(op, &dt[val].sequence, 2);
|
675
1219
|
BIT_skipBits(DStream, dt[val].nbBits);
|
676
1220
|
return dt[val].length;
|
677
1221
|
}
|
@@ -680,15 +1224,17 @@ FORCE_INLINE_TEMPLATE U32
|
|
680
1224
|
HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
681
1225
|
{
|
682
1226
|
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
683
|
-
|
684
|
-
if (dt[val].length==1)
|
685
|
-
|
1227
|
+
ZSTD_memcpy(op, &dt[val].sequence, 1);
|
1228
|
+
if (dt[val].length==1) {
|
1229
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
1230
|
+
} else {
|
686
1231
|
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
687
1232
|
BIT_skipBits(DStream, dt[val].nbBits);
|
688
1233
|
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
689
1234
|
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
690
1235
|
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
691
|
-
|
1236
|
+
}
|
1237
|
+
}
|
692
1238
|
return 1;
|
693
1239
|
}
|
694
1240
|
|
@@ -710,19 +1256,37 @@ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
|
710
1256
|
BYTE* const pStart = p;
|
711
1257
|
|
712
1258
|
/* up to 8 symbols at a time */
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
1259
|
+
if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
|
1260
|
+
if (dtLog <= 11 && MEM_64bits()) {
|
1261
|
+
/* up to 10 symbols at a time */
|
1262
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
|
1263
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
1264
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
1265
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
1266
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
1267
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
1268
|
+
}
|
1269
|
+
} else {
|
1270
|
+
/* up to 8 symbols at a time */
|
1271
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
1272
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
1273
|
+
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
|
1274
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
1275
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
1276
|
+
}
|
1277
|
+
}
|
1278
|
+
} else {
|
1279
|
+
BIT_reloadDStream(bitDPtr);
|
718
1280
|
}
|
719
1281
|
|
720
1282
|
/* closer to end : up to 2 symbols at a time */
|
721
|
-
|
722
|
-
|
1283
|
+
if ((size_t)(pEnd - p) >= 2) {
|
1284
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
1285
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
723
1286
|
|
724
|
-
|
725
|
-
|
1287
|
+
while (p <= pEnd-2)
|
1288
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
1289
|
+
}
|
726
1290
|
|
727
1291
|
if (p < pEnd)
|
728
1292
|
p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
|
@@ -757,7 +1321,10 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
757
1321
|
return dstSize;
|
758
1322
|
}
|
759
1323
|
|
760
|
-
|
1324
|
+
/* HUF_decompress4X2_usingDTable_internal_body():
|
1325
|
+
* Conditions:
|
1326
|
+
* @dstSize >= 6
|
1327
|
+
*/
|
761
1328
|
FORCE_INLINE_TEMPLATE size_t
|
762
1329
|
HUF_decompress4X2_usingDTable_internal_body(
|
763
1330
|
void* dst, size_t dstSize,
|
@@ -769,6 +1336,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
769
1336
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
770
1337
|
BYTE* const ostart = (BYTE*) dst;
|
771
1338
|
BYTE* const oend = ostart + dstSize;
|
1339
|
+
BYTE* const olimit = oend - (sizeof(size_t)-1);
|
772
1340
|
const void* const dtPtr = DTable+1;
|
773
1341
|
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
774
1342
|
|
@@ -793,37 +1361,66 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
793
1361
|
BYTE* op2 = opStart2;
|
794
1362
|
BYTE* op3 = opStart3;
|
795
1363
|
BYTE* op4 = opStart4;
|
796
|
-
U32 endSignal;
|
1364
|
+
U32 endSignal = 1;
|
797
1365
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
798
1366
|
U32 const dtLog = dtd.tableLog;
|
799
1367
|
|
800
|
-
if (length4 > cSrcSize) return ERROR(corruption_detected);
|
1368
|
+
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
1369
|
+
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
1370
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
801
1371
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
802
1372
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
803
1373
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
804
1374
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
805
1375
|
|
806
1376
|
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
1377
|
+
if ((size_t)(oend - op4) >= sizeof(size_t)) {
|
1378
|
+
for ( ; (endSignal) & (op4 < olimit); ) {
|
1379
|
+
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
1380
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
1381
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
1382
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
1383
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
1384
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
1385
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
1386
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
1387
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
1388
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
1389
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
1390
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
1391
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
1392
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
1393
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
1394
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
1395
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
1396
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
1397
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
1398
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
1399
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
1400
|
+
#else
|
1401
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
1402
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
1403
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
1404
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
1405
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
1406
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
1407
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
1408
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
1409
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
1410
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
1411
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
1412
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
1413
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
1414
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
1415
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
1416
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
1417
|
+
endSignal = (U32)LIKELY((U32)
|
1418
|
+
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
|
1419
|
+
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
|
1420
|
+
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
|
1421
|
+
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
|
1422
|
+
#endif
|
1423
|
+
}
|
827
1424
|
}
|
828
1425
|
|
829
1426
|
/* check corruption */
|
@@ -847,94 +1444,279 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
847
1444
|
}
|
848
1445
|
}
|
849
1446
|
|
850
|
-
|
851
|
-
|
1447
|
+
#if HUF_NEED_BMI2_FUNCTION
|
1448
|
+
static BMI2_TARGET_ATTRIBUTE
|
1449
|
+
size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
1450
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
1451
|
+
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
1452
|
+
}
|
1453
|
+
#endif
|
852
1454
|
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
{
|
858
|
-
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
859
|
-
if (dtd.tableType != 1) return ERROR(GENERIC);
|
860
|
-
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
1455
|
+
static
|
1456
|
+
size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
|
1457
|
+
size_t cSrcSize, HUF_DTable const* DTable) {
|
1458
|
+
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
861
1459
|
}
|
862
1460
|
|
863
|
-
|
864
|
-
|
865
|
-
|
1461
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
1462
|
+
|
1463
|
+
HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
1464
|
+
|
1465
|
+
#endif
|
1466
|
+
|
1467
|
+
static HUF_FAST_BMI2_ATTRS
|
1468
|
+
void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
866
1469
|
{
|
867
|
-
|
1470
|
+
U64 bits[4];
|
1471
|
+
BYTE const* ip[4];
|
1472
|
+
BYTE* op[4];
|
1473
|
+
BYTE* oend[4];
|
1474
|
+
HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
|
1475
|
+
BYTE const* const ilimit = args->ilimit;
|
1476
|
+
|
1477
|
+
/* Copy the arguments to local registers. */
|
1478
|
+
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
1479
|
+
ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
|
1480
|
+
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
1481
|
+
|
1482
|
+
oend[0] = op[1];
|
1483
|
+
oend[1] = op[2];
|
1484
|
+
oend[2] = op[3];
|
1485
|
+
oend[3] = args->oend;
|
1486
|
+
|
1487
|
+
assert(MEM_isLittleEndian());
|
1488
|
+
assert(!MEM_32bits());
|
1489
|
+
|
1490
|
+
for (;;) {
|
1491
|
+
BYTE* olimit;
|
1492
|
+
int stream;
|
1493
|
+
int symbol;
|
1494
|
+
|
1495
|
+
/* Assert loop preconditions */
|
1496
|
+
#ifndef NDEBUG
|
1497
|
+
for (stream = 0; stream < 4; ++stream) {
|
1498
|
+
assert(op[stream] <= oend[stream]);
|
1499
|
+
assert(ip[stream] >= ilimit);
|
1500
|
+
}
|
1501
|
+
#endif
|
1502
|
+
/* Compute olimit */
|
1503
|
+
{
|
1504
|
+
/* Each loop does 5 table lookups for each of the 4 streams.
|
1505
|
+
* Each table lookup consumes up to 11 bits of input, and produces
|
1506
|
+
* up to 2 bytes of output.
|
1507
|
+
*/
|
1508
|
+
/* We can consume up to 7 bytes of input per iteration per stream.
|
1509
|
+
* We also know that each input pointer is >= ip[0]. So we can run
|
1510
|
+
* iters loops before running out of input.
|
1511
|
+
*/
|
1512
|
+
size_t iters = (size_t)(ip[0] - ilimit) / 7;
|
1513
|
+
/* Each iteration can produce up to 10 bytes of output per stream.
|
1514
|
+
* Each output stream my advance at different rates. So take the
|
1515
|
+
* minimum number of safe iterations among all the output streams.
|
1516
|
+
*/
|
1517
|
+
for (stream = 0; stream < 4; ++stream) {
|
1518
|
+
size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
|
1519
|
+
iters = MIN(iters, oiters);
|
1520
|
+
}
|
1521
|
+
|
1522
|
+
/* Each iteration produces at least 5 output symbols. So until
|
1523
|
+
* op[3] crosses olimit, we know we haven't executed iters
|
1524
|
+
* iterations yet. This saves us maintaining an iters counter,
|
1525
|
+
* at the expense of computing the remaining # of iterations
|
1526
|
+
* more frequently.
|
1527
|
+
*/
|
1528
|
+
olimit = op[3] + (iters * 5);
|
1529
|
+
|
1530
|
+
/* Exit the fast decoding loop if we are too close to the end. */
|
1531
|
+
if (op[3] + 10 > olimit)
|
1532
|
+
break;
|
1533
|
+
|
1534
|
+
/* Exit the decoding loop if any input pointer has crossed the
|
1535
|
+
* previous one. This indicates corruption, and a precondition
|
1536
|
+
* to our loop is that ip[i] >= ip[0].
|
1537
|
+
*/
|
1538
|
+
for (stream = 1; stream < 4; ++stream) {
|
1539
|
+
if (ip[stream] < ip[stream - 1])
|
1540
|
+
goto _out;
|
1541
|
+
}
|
1542
|
+
}
|
868
1543
|
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
1544
|
+
#ifndef NDEBUG
|
1545
|
+
for (stream = 1; stream < 4; ++stream) {
|
1546
|
+
assert(ip[stream] >= ip[stream - 1]);
|
1547
|
+
}
|
1548
|
+
#endif
|
874
1549
|
|
875
|
-
|
876
|
-
|
1550
|
+
do {
|
1551
|
+
/* Do 5 table lookups for each of the first 3 streams */
|
1552
|
+
for (symbol = 0; symbol < 5; ++symbol) {
|
1553
|
+
for (stream = 0; stream < 3; ++stream) {
|
1554
|
+
int const index = (int)(bits[stream] >> 53);
|
1555
|
+
HUF_DEltX2 const entry = dtable[index];
|
1556
|
+
MEM_write16(op[stream], entry.sequence);
|
1557
|
+
bits[stream] <<= (entry.nbBits);
|
1558
|
+
op[stream] += (entry.length);
|
1559
|
+
}
|
1560
|
+
}
|
1561
|
+
/* Do 1 table lookup from the final stream */
|
1562
|
+
{
|
1563
|
+
int const index = (int)(bits[3] >> 53);
|
1564
|
+
HUF_DEltX2 const entry = dtable[index];
|
1565
|
+
MEM_write16(op[3], entry.sequence);
|
1566
|
+
bits[3] <<= (entry.nbBits);
|
1567
|
+
op[3] += (entry.length);
|
1568
|
+
}
|
1569
|
+
/* Do 4 table lookups from the final stream & reload bitstreams */
|
1570
|
+
for (stream = 0; stream < 4; ++stream) {
|
1571
|
+
/* Do a table lookup from the final stream.
|
1572
|
+
* This is interleaved with the reloading to reduce register
|
1573
|
+
* pressure. This shouldn't be necessary, but compilers can
|
1574
|
+
* struggle with codegen with high register pressure.
|
1575
|
+
*/
|
1576
|
+
{
|
1577
|
+
int const index = (int)(bits[3] >> 53);
|
1578
|
+
HUF_DEltX2 const entry = dtable[index];
|
1579
|
+
MEM_write16(op[3], entry.sequence);
|
1580
|
+
bits[3] <<= (entry.nbBits);
|
1581
|
+
op[3] += (entry.length);
|
1582
|
+
}
|
1583
|
+
/* Reload the bistreams. The final bitstream must be reloaded
|
1584
|
+
* after the 5th symbol was decoded.
|
1585
|
+
*/
|
1586
|
+
{
|
1587
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
1588
|
+
int const nbBits = ctz & 7;
|
1589
|
+
int const nbBytes = ctz >> 3;
|
1590
|
+
ip[stream] -= nbBytes;
|
1591
|
+
bits[stream] = MEM_read64(ip[stream]) | 1;
|
1592
|
+
bits[stream] <<= nbBits;
|
1593
|
+
}
|
1594
|
+
}
|
1595
|
+
} while (op[3] < olimit);
|
1596
|
+
}
|
877
1597
|
|
1598
|
+
_out:
|
878
1599
|
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
884
|
-
workSpace, sizeof(workSpace));
|
1600
|
+
/* Save the final values of each of the state variables back to args. */
|
1601
|
+
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
1602
|
+
ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
|
1603
|
+
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
885
1604
|
}
|
886
1605
|
|
887
|
-
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
888
|
-
{
|
889
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
890
|
-
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
891
|
-
}
|
892
1606
|
|
893
|
-
size_t
|
1607
|
+
static HUF_FAST_BMI2_ATTRS size_t
|
1608
|
+
HUF_decompress4X2_usingDTable_internal_fast(
|
894
1609
|
void* dst, size_t dstSize,
|
895
1610
|
const void* cSrc, size_t cSrcSize,
|
896
|
-
const HUF_DTable* DTable
|
1611
|
+
const HUF_DTable* DTable,
|
1612
|
+
HUF_DecompressFastLoopFn loopFn) {
|
1613
|
+
void const* dt = DTable + 1;
|
1614
|
+
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
1615
|
+
BYTE* const oend = (BYTE*)dst + dstSize;
|
1616
|
+
HUF_DecompressFastArgs args;
|
1617
|
+
{
|
1618
|
+
size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
1619
|
+
FORWARD_IF_ERROR(ret, "Failed to init asm args");
|
1620
|
+
if (ret == 0)
|
1621
|
+
return 0;
|
1622
|
+
}
|
1623
|
+
|
1624
|
+
assert(args.ip[0] >= args.ilimit);
|
1625
|
+
loopFn(&args);
|
1626
|
+
|
1627
|
+
/* note : op4 already verified within main loop */
|
1628
|
+
assert(args.ip[0] >= iend);
|
1629
|
+
assert(args.ip[1] >= iend);
|
1630
|
+
assert(args.ip[2] >= iend);
|
1631
|
+
assert(args.ip[3] >= iend);
|
1632
|
+
assert(args.op[3] <= oend);
|
1633
|
+
(void)iend;
|
1634
|
+
|
1635
|
+
/* finish bitStreams one by one */
|
1636
|
+
{
|
1637
|
+
size_t const segmentSize = (dstSize+3) / 4;
|
1638
|
+
BYTE* segmentEnd = (BYTE*)dst;
|
1639
|
+
int i;
|
1640
|
+
for (i = 0; i < 4; ++i) {
|
1641
|
+
BIT_DStream_t bit;
|
1642
|
+
if (segmentSize <= (size_t)(oend - segmentEnd))
|
1643
|
+
segmentEnd += segmentSize;
|
1644
|
+
else
|
1645
|
+
segmentEnd = oend;
|
1646
|
+
FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
|
1647
|
+
args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
|
1648
|
+
if (args.op[i] != segmentEnd)
|
1649
|
+
return ERROR(corruption_detected);
|
1650
|
+
}
|
1651
|
+
}
|
1652
|
+
|
1653
|
+
/* decoded size */
|
1654
|
+
return dstSize;
|
1655
|
+
}
|
1656
|
+
|
1657
|
+
static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
1658
|
+
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
897
1659
|
{
|
898
|
-
|
899
|
-
|
900
|
-
|
1660
|
+
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
|
1661
|
+
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
|
1662
|
+
|
1663
|
+
#if DYNAMIC_BMI2
|
1664
|
+
if (flags & HUF_flags_bmi2) {
|
1665
|
+
fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
|
1666
|
+
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
1667
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
1668
|
+
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
1669
|
+
}
|
1670
|
+
# endif
|
1671
|
+
} else {
|
1672
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
1673
|
+
}
|
1674
|
+
#endif
|
1675
|
+
|
1676
|
+
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
1677
|
+
if (!(flags & HUF_flags_disableAsm)) {
|
1678
|
+
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
1679
|
+
}
|
1680
|
+
#endif
|
1681
|
+
|
1682
|
+
if (!(flags & HUF_flags_disableFast)) {
|
1683
|
+
size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
1684
|
+
if (ret != 0)
|
1685
|
+
return ret;
|
1686
|
+
}
|
1687
|
+
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
901
1688
|
}
|
902
1689
|
|
903
|
-
|
1690
|
+
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
|
1691
|
+
|
1692
|
+
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
904
1693
|
const void* cSrc, size_t cSrcSize,
|
905
|
-
void* workSpace, size_t wkspSize, int
|
1694
|
+
void* workSpace, size_t wkspSize, int flags)
|
906
1695
|
{
|
907
1696
|
const BYTE* ip = (const BYTE*) cSrc;
|
908
1697
|
|
909
|
-
size_t hSize = HUF_readDTableX2_wksp(
|
910
|
-
|
1698
|
+
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
|
1699
|
+
workSpace, wkspSize, flags);
|
911
1700
|
if (HUF_isError(hSize)) return hSize;
|
912
1701
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
913
1702
|
ip += hSize; cSrcSize -= hSize;
|
914
1703
|
|
915
|
-
return
|
1704
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
|
916
1705
|
}
|
917
1706
|
|
918
|
-
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
1707
|
+
static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
919
1708
|
const void* cSrc, size_t cSrcSize,
|
920
|
-
void* workSpace, size_t wkspSize)
|
1709
|
+
void* workSpace, size_t wkspSize, int flags)
|
921
1710
|
{
|
922
|
-
|
923
|
-
}
|
924
|
-
|
1711
|
+
const BYTE* ip = (const BYTE*) cSrc;
|
925
1712
|
|
926
|
-
size_t
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
workSpace, sizeof(workSpace));
|
932
|
-
}
|
1713
|
+
size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
|
1714
|
+
workSpace, wkspSize, flags);
|
1715
|
+
if (HUF_isError(hSize)) return hSize;
|
1716
|
+
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
1717
|
+
ip += hSize; cSrcSize -= hSize;
|
933
1718
|
|
934
|
-
|
935
|
-
{
|
936
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
937
|
-
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
1719
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
938
1720
|
}
|
939
1721
|
|
940
1722
|
#endif /* HUF_FORCE_DECOMPRESS_X1 */
|
@@ -944,66 +1726,28 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
944
1726
|
/* Universal decompression selectors */
|
945
1727
|
/* ***********************************/
|
946
1728
|
|
947
|
-
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
|
948
|
-
const void* cSrc, size_t cSrcSize,
|
949
|
-
const HUF_DTable* DTable)
|
950
|
-
{
|
951
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
952
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
953
|
-
(void)dtd;
|
954
|
-
assert(dtd.tableType == 0);
|
955
|
-
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
956
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
957
|
-
(void)dtd;
|
958
|
-
assert(dtd.tableType == 1);
|
959
|
-
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
960
|
-
#else
|
961
|
-
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
962
|
-
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
963
|
-
#endif
|
964
|
-
}
|
965
|
-
|
966
|
-
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
967
|
-
const void* cSrc, size_t cSrcSize,
|
968
|
-
const HUF_DTable* DTable)
|
969
|
-
{
|
970
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
971
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
972
|
-
(void)dtd;
|
973
|
-
assert(dtd.tableType == 0);
|
974
|
-
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
975
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
976
|
-
(void)dtd;
|
977
|
-
assert(dtd.tableType == 1);
|
978
|
-
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
979
|
-
#else
|
980
|
-
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
981
|
-
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
982
|
-
#endif
|
983
|
-
}
|
984
|
-
|
985
1729
|
|
986
1730
|
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
987
1731
|
typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
|
988
|
-
static const algo_time_t algoTime[16 /* Quantization */][
|
1732
|
+
static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
|
989
1733
|
{
|
990
1734
|
/* single, double, quad */
|
991
|
-
{{0,0}, {1,1}
|
992
|
-
{{0,0}, {1,1}
|
993
|
-
{{
|
994
|
-
{{
|
995
|
-
{{
|
996
|
-
{{
|
997
|
-
{{
|
998
|
-
{{
|
999
|
-
{{
|
1000
|
-
{{
|
1001
|
-
{{
|
1002
|
-
{{
|
1003
|
-
{{
|
1004
|
-
{{
|
1005
|
-
{{
|
1006
|
-
{{
|
1735
|
+
{{0,0}, {1,1}}, /* Q==0 : impossible */
|
1736
|
+
{{0,0}, {1,1}}, /* Q==1 : impossible */
|
1737
|
+
{{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
|
1738
|
+
{{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
|
1739
|
+
{{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
|
1740
|
+
{{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
|
1741
|
+
{{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
|
1742
|
+
{{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
|
1743
|
+
{{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
|
1744
|
+
{{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
|
1745
|
+
{{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
|
1746
|
+
{{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
|
1747
|
+
{{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
|
1748
|
+
{{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
|
1749
|
+
{{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
|
1750
|
+
{{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
|
1007
1751
|
};
|
1008
1752
|
#endif
|
1009
1753
|
|
@@ -1030,188 +1774,92 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
|
1030
1774
|
U32 const D256 = (U32)(dstSize >> 8);
|
1031
1775
|
U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
|
1032
1776
|
U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
|
1033
|
-
DTime1 += DTime1 >>
|
1777
|
+
DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
|
1034
1778
|
return DTime1 < DTime0;
|
1035
1779
|
}
|
1036
1780
|
#endif
|
1037
1781
|
}
|
1038
1782
|
|
1039
|
-
|
1040
|
-
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
1041
|
-
|
1042
|
-
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1043
|
-
{
|
1044
|
-
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
1045
|
-
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
1046
|
-
#endif
|
1047
|
-
|
1048
|
-
/* validation checks */
|
1049
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
1050
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
1051
|
-
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
1052
|
-
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
1053
|
-
|
1054
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1055
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1056
|
-
(void)algoNb;
|
1057
|
-
assert(algoNb == 0);
|
1058
|
-
return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
|
1059
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1060
|
-
(void)algoNb;
|
1061
|
-
assert(algoNb == 1);
|
1062
|
-
return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
|
1063
|
-
#else
|
1064
|
-
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
1065
|
-
#endif
|
1066
|
-
}
|
1067
|
-
}
|
1068
|
-
|
1069
|
-
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1070
|
-
{
|
1071
|
-
/* validation checks */
|
1072
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
1073
|
-
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
1074
|
-
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
1075
|
-
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
1076
|
-
|
1077
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1078
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1079
|
-
(void)algoNb;
|
1080
|
-
assert(algoNb == 0);
|
1081
|
-
return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
1082
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1083
|
-
(void)algoNb;
|
1084
|
-
assert(algoNb == 1);
|
1085
|
-
return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
1086
|
-
#else
|
1087
|
-
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
1088
|
-
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
1089
|
-
#endif
|
1090
|
-
}
|
1091
|
-
}
|
1092
|
-
|
1093
|
-
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
1094
|
-
{
|
1095
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1096
|
-
return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
1097
|
-
workSpace, sizeof(workSpace));
|
1098
|
-
}
|
1099
|
-
|
1100
|
-
|
1101
|
-
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
|
1102
|
-
size_t dstSize, const void* cSrc,
|
1103
|
-
size_t cSrcSize, void* workSpace,
|
1104
|
-
size_t wkspSize)
|
1105
|
-
{
|
1106
|
-
/* validation checks */
|
1107
|
-
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
1108
|
-
if (cSrcSize == 0) return ERROR(corruption_detected);
|
1109
|
-
|
1110
|
-
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1111
|
-
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1112
|
-
(void)algoNb;
|
1113
|
-
assert(algoNb == 0);
|
1114
|
-
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
1115
|
-
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1116
|
-
(void)algoNb;
|
1117
|
-
assert(algoNb == 1);
|
1118
|
-
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
1119
|
-
#else
|
1120
|
-
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1121
|
-
cSrcSize, workSpace, wkspSize):
|
1122
|
-
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
1123
|
-
#endif
|
1124
|
-
}
|
1125
|
-
}
|
1126
|
-
|
1127
1783
|
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
1128
1784
|
const void* cSrc, size_t cSrcSize,
|
1129
|
-
void* workSpace, size_t wkspSize)
|
1785
|
+
void* workSpace, size_t wkspSize, int flags)
|
1130
1786
|
{
|
1131
1787
|
/* validation checks */
|
1132
1788
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
1133
1789
|
if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
|
1134
|
-
if (cSrcSize == dstSize) {
|
1135
|
-
if (cSrcSize == 1) {
|
1790
|
+
if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
|
1791
|
+
if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
1136
1792
|
|
1137
1793
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1138
1794
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1139
1795
|
(void)algoNb;
|
1140
1796
|
assert(algoNb == 0);
|
1141
1797
|
return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1142
|
-
cSrcSize, workSpace, wkspSize);
|
1798
|
+
cSrcSize, workSpace, wkspSize, flags);
|
1143
1799
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1144
1800
|
(void)algoNb;
|
1145
1801
|
assert(algoNb == 1);
|
1146
1802
|
return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1147
|
-
cSrcSize, workSpace, wkspSize);
|
1803
|
+
cSrcSize, workSpace, wkspSize, flags);
|
1148
1804
|
#else
|
1149
1805
|
return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1150
|
-
cSrcSize, workSpace, wkspSize):
|
1806
|
+
cSrcSize, workSpace, wkspSize, flags):
|
1151
1807
|
HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1152
|
-
cSrcSize, workSpace, wkspSize);
|
1808
|
+
cSrcSize, workSpace, wkspSize, flags);
|
1153
1809
|
#endif
|
1154
1810
|
}
|
1155
1811
|
}
|
1156
1812
|
|
1157
|
-
size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
1158
|
-
const void* cSrc, size_t cSrcSize)
|
1159
|
-
{
|
1160
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
1161
|
-
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
1162
|
-
workSpace, sizeof(workSpace));
|
1163
|
-
}
|
1164
|
-
|
1165
1813
|
|
1166
|
-
size_t
|
1814
|
+
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
|
1167
1815
|
{
|
1168
1816
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1169
1817
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1170
1818
|
(void)dtd;
|
1171
1819
|
assert(dtd.tableType == 0);
|
1172
|
-
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1820
|
+
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1173
1821
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1174
1822
|
(void)dtd;
|
1175
1823
|
assert(dtd.tableType == 1);
|
1176
|
-
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1824
|
+
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1177
1825
|
#else
|
1178
|
-
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1179
|
-
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1826
|
+
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
|
1827
|
+
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1180
1828
|
#endif
|
1181
1829
|
}
|
1182
1830
|
|
1183
1831
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
1184
|
-
size_t
|
1832
|
+
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
|
1185
1833
|
{
|
1186
1834
|
const BYTE* ip = (const BYTE*) cSrc;
|
1187
1835
|
|
1188
|
-
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
|
1836
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
1189
1837
|
if (HUF_isError(hSize)) return hSize;
|
1190
1838
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
1191
1839
|
ip += hSize; cSrcSize -= hSize;
|
1192
1840
|
|
1193
|
-
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx,
|
1841
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
|
1194
1842
|
}
|
1195
1843
|
#endif
|
1196
1844
|
|
1197
|
-
size_t
|
1845
|
+
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
|
1198
1846
|
{
|
1199
1847
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1200
1848
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1201
1849
|
(void)dtd;
|
1202
1850
|
assert(dtd.tableType == 0);
|
1203
|
-
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1851
|
+
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1204
1852
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1205
1853
|
(void)dtd;
|
1206
1854
|
assert(dtd.tableType == 1);
|
1207
|
-
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1855
|
+
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1208
1856
|
#else
|
1209
|
-
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1210
|
-
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable,
|
1857
|
+
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
|
1858
|
+
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
|
1211
1859
|
#endif
|
1212
1860
|
}
|
1213
1861
|
|
1214
|
-
size_t
|
1862
|
+
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
|
1215
1863
|
{
|
1216
1864
|
/* validation checks */
|
1217
1865
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
@@ -1221,14 +1869,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
|
1221
1869
|
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
1222
1870
|
(void)algoNb;
|
1223
1871
|
assert(algoNb == 0);
|
1224
|
-
return
|
1872
|
+
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
1225
1873
|
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
1226
1874
|
(void)algoNb;
|
1227
1875
|
assert(algoNb == 1);
|
1228
|
-
return
|
1876
|
+
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
1229
1877
|
#else
|
1230
|
-
return algoNb ?
|
1231
|
-
|
1878
|
+
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
|
1879
|
+
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
|
1232
1880
|
#endif
|
1233
1881
|
}
|
1234
1882
|
}
|