zstdlib 0.12.0-x86_64-darwin → 0.13.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +7 -0
- data/Rakefile +1 -1
- data/ext/zstdlib_c/extconf.rb +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/allocations.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/bitstream.h +49 -29
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/compiler.h +114 -22
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/cpu.h +36 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/debug.c +6 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/debug.h +20 -11
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/error_private.h +45 -36
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/fse.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/fse_decompress.c +19 -17
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/huf.h +14 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/mem.h +0 -9
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/pool.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/pool.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/portability_macros.h +2 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/threading.c +8 -2
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/xxhash.c +5 -11
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/xxhash.h +2341 -1007
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_internal.h +5 -5
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/fse_compress.c +8 -7
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/huf_compress.c +54 -25
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress.c +282 -161
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_internal.h +29 -27
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_superblock.c +224 -113
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_cwksp.h +19 -13
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_double_fast.c +17 -5
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_double_fast.h +11 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_fast.c +14 -6
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_lazy.c +129 -87
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_lazy.h +103 -28
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_ldm.c +8 -2
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_opt.c +216 -112
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_opt.h +31 -7
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstdmt_compress.c +94 -79
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/huf_decompress.c +188 -126
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/huf_decompress_amd64.S +38 -19
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress.c +84 -32
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress_block.c +231 -208
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress_block.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress_internal.h +2 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/zstd.h +129 -60
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzclose.c +1 -3
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzlib.c +20 -73
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzread.c +17 -58
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzwrite.c +18 -58
- data/lib/2.4/zstdlib_c.bundle +0 -0
- data/lib/2.5/zstdlib_c.bundle +0 -0
- data/lib/2.6/zstdlib_c.bundle +0 -0
- data/lib/2.7/zstdlib_c.bundle +0 -0
- data/lib/3.0/zstdlib_c.bundle +0 -0
- data/lib/3.1/zstdlib_c.bundle +0 -0
- data/lib/3.2/zstdlib_c.bundle +0 -0
- data/lib/3.3/zstdlib_c.bundle +0 -0
- metadata +75 -75
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/bits.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/entropy_common.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/error_private.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/threading.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_common.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_deps.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_trace.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/clevels.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/hist.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/hist.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_literals.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_literals.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_sequences.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_sequences.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_superblock.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_fast.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_ldm.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_ldm_geartab.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstdmt_compress.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_ddict.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_ddict.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/zdict.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/zstd_errors.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzcompatibility.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzguts.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/zstd_zlibwrapper.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/zstd_zlibwrapper.h +0 -0
@@ -34,6 +34,12 @@
|
|
34
34
|
* Macros
|
35
35
|
****************************************************************/
|
36
36
|
|
37
|
+
#ifdef HUF_DISABLE_FAST_DECODE
|
38
|
+
# define HUF_ENABLE_FAST_DECODE 0
|
39
|
+
#else
|
40
|
+
# define HUF_ENABLE_FAST_DECODE 1
|
41
|
+
#endif
|
42
|
+
|
37
43
|
/* These two optional macros force the use one way or another of the two
|
38
44
|
* Huffman decompression implementations. You can't force in both directions
|
39
45
|
* at the same time.
|
@@ -158,17 +164,18 @@ static size_t HUF_initFastDStream(BYTE const* ip) {
|
|
158
164
|
* op [in/out] - The output pointers, must be updated to reflect what is written.
|
159
165
|
* bits [in/out] - The bitstream containers, must be updated to reflect the current state.
|
160
166
|
* dt [in] - The decoding table.
|
161
|
-
*
|
167
|
+
* ilowest [in] - The beginning of the valid range of the input. Decoders may read
|
168
|
+
* down to this pointer. It may be below iend[0].
|
162
169
|
* oend [in] - The end of the output stream. op[3] must not cross oend.
|
163
170
|
* iend [in] - The end of each input stream. ip[i] may cross iend[i],
|
164
|
-
* as long as it is above
|
171
|
+
* as long as it is above ilowest, but that indicates corruption.
|
165
172
|
*/
|
166
173
|
typedef struct {
|
167
174
|
BYTE const* ip[4];
|
168
175
|
BYTE* op[4];
|
169
176
|
U64 bits[4];
|
170
177
|
void const* dt;
|
171
|
-
BYTE const*
|
178
|
+
BYTE const* ilowest;
|
172
179
|
BYTE* oend;
|
173
180
|
BYTE const* iend[4];
|
174
181
|
} HUF_DecompressFastArgs;
|
@@ -186,9 +193,9 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
|
|
186
193
|
void const* dt = DTable + 1;
|
187
194
|
U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
|
188
195
|
|
189
|
-
const BYTE* const
|
196
|
+
const BYTE* const istart = (const BYTE*)src;
|
190
197
|
|
191
|
-
BYTE* const oend = (BYTE*)dst
|
198
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
|
192
199
|
|
193
200
|
/* The fast decoding loop assumes 64-bit little-endian.
|
194
201
|
* This condition is false on x32.
|
@@ -196,6 +203,11 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
|
|
196
203
|
if (!MEM_isLittleEndian() || MEM_32bits())
|
197
204
|
return 0;
|
198
205
|
|
206
|
+
/* Avoid nullptr addition */
|
207
|
+
if (dstSize == 0)
|
208
|
+
return 0;
|
209
|
+
assert(dst != NULL);
|
210
|
+
|
199
211
|
/* strict minimum : jump table + 1 byte per stream */
|
200
212
|
if (srcSize < 10)
|
201
213
|
return ERROR(corruption_detected);
|
@@ -209,7 +221,6 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
|
|
209
221
|
|
210
222
|
/* Read the jump table. */
|
211
223
|
{
|
212
|
-
const BYTE* const istart = (const BYTE*)src;
|
213
224
|
size_t const length1 = MEM_readLE16(istart);
|
214
225
|
size_t const length2 = MEM_readLE16(istart+2);
|
215
226
|
size_t const length3 = MEM_readLE16(istart+4);
|
@@ -221,10 +232,8 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
|
|
221
232
|
|
222
233
|
/* HUF_initFastDStream() requires this, and this small of an input
|
223
234
|
* won't benefit from the ASM loop anyways.
|
224
|
-
* length1 must be >= 16 so that ip[0] >= ilimit before the loop
|
225
|
-
* starts.
|
226
235
|
*/
|
227
|
-
if (length1 <
|
236
|
+
if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
|
228
237
|
return 0;
|
229
238
|
if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
|
230
239
|
}
|
@@ -256,11 +265,12 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
|
|
256
265
|
args->bits[2] = HUF_initFastDStream(args->ip[2]);
|
257
266
|
args->bits[3] = HUF_initFastDStream(args->ip[3]);
|
258
267
|
|
259
|
-
/*
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
268
|
+
/* The decoders must be sure to never read beyond ilowest.
|
269
|
+
* This is lower than iend[0], but allowing decoders to read
|
270
|
+
* down to ilowest can allow an extra iteration or two in the
|
271
|
+
* fast loop.
|
272
|
+
*/
|
273
|
+
args->ilowest = istart;
|
264
274
|
|
265
275
|
args->oend = oend;
|
266
276
|
args->dt = dt;
|
@@ -285,13 +295,31 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArg
|
|
285
295
|
assert(sizeof(size_t) == 8);
|
286
296
|
bit->bitContainer = MEM_readLEST(args->ip[stream]);
|
287
297
|
bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
|
288
|
-
bit->start = (const char*)args->
|
298
|
+
bit->start = (const char*)args->ilowest;
|
289
299
|
bit->limitPtr = bit->start + sizeof(size_t);
|
290
300
|
bit->ptr = (const char*)args->ip[stream];
|
291
301
|
|
292
302
|
return 0;
|
293
303
|
}
|
294
304
|
|
305
|
+
/* Calls X(N) for each stream 0, 1, 2, 3. */
|
306
|
+
#define HUF_4X_FOR_EACH_STREAM(X) \
|
307
|
+
do { \
|
308
|
+
X(0); \
|
309
|
+
X(1); \
|
310
|
+
X(2); \
|
311
|
+
X(3); \
|
312
|
+
} while (0)
|
313
|
+
|
314
|
+
/* Calls X(N, var) for each stream 0, 1, 2, 3. */
|
315
|
+
#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
|
316
|
+
do { \
|
317
|
+
X(0, (var)); \
|
318
|
+
X(1, (var)); \
|
319
|
+
X(2, (var)); \
|
320
|
+
X(3, (var)); \
|
321
|
+
} while (0)
|
322
|
+
|
295
323
|
|
296
324
|
#ifndef HUF_FORCE_DECOMPRESS_X2
|
297
325
|
|
@@ -500,15 +528,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog
|
|
500
528
|
}
|
501
529
|
|
502
530
|
#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
|
503
|
-
*ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
|
531
|
+
do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
|
504
532
|
|
505
|
-
#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)
|
506
|
-
|
507
|
-
|
533
|
+
#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
|
534
|
+
do { \
|
535
|
+
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
536
|
+
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
|
537
|
+
} while (0)
|
508
538
|
|
509
|
-
#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr)
|
510
|
-
|
511
|
-
|
539
|
+
#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
|
540
|
+
do { \
|
541
|
+
if (MEM_64bits()) \
|
542
|
+
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
|
543
|
+
} while (0)
|
512
544
|
|
513
545
|
HINT_INLINE size_t
|
514
546
|
HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
|
@@ -546,7 +578,7 @@ HUF_decompress1X1_usingDTable_internal_body(
|
|
546
578
|
const HUF_DTable* DTable)
|
547
579
|
{
|
548
580
|
BYTE* op = (BYTE*)dst;
|
549
|
-
BYTE* const oend = op
|
581
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
|
550
582
|
const void* dtPtr = DTable + 1;
|
551
583
|
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
552
584
|
BIT_DStream_t bitD;
|
@@ -574,6 +606,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
574
606
|
{
|
575
607
|
/* Check */
|
576
608
|
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
609
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
577
610
|
|
578
611
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
579
612
|
BYTE* const ostart = (BYTE*) dst;
|
@@ -609,7 +642,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
609
642
|
|
610
643
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
611
644
|
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
612
|
-
|
645
|
+
assert(dstSize >= 6); /* validated above */
|
613
646
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
614
647
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
615
648
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
@@ -692,7 +725,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
692
725
|
BYTE* op[4];
|
693
726
|
U16 const* const dtable = (U16 const*)args->dt;
|
694
727
|
BYTE* const oend = args->oend;
|
695
|
-
BYTE const* const
|
728
|
+
BYTE const* const ilowest = args->ilowest;
|
696
729
|
|
697
730
|
/* Copy the arguments to local variables */
|
698
731
|
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
@@ -705,13 +738,12 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
705
738
|
for (;;) {
|
706
739
|
BYTE* olimit;
|
707
740
|
int stream;
|
708
|
-
int symbol;
|
709
741
|
|
710
742
|
/* Assert loop preconditions */
|
711
743
|
#ifndef NDEBUG
|
712
744
|
for (stream = 0; stream < 4; ++stream) {
|
713
745
|
assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
|
714
|
-
assert(ip[stream] >=
|
746
|
+
assert(ip[stream] >= ilowest);
|
715
747
|
}
|
716
748
|
#endif
|
717
749
|
/* Compute olimit */
|
@@ -721,7 +753,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
721
753
|
/* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
|
722
754
|
* per stream.
|
723
755
|
*/
|
724
|
-
size_t const iiters = (size_t)(ip[0] -
|
756
|
+
size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
|
725
757
|
/* We can safely run iters iterations before running bounds checks */
|
726
758
|
size_t const iters = MIN(oiters, iiters);
|
727
759
|
size_t const symbols = iters * 5;
|
@@ -732,8 +764,8 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
732
764
|
*/
|
733
765
|
olimit = op[3] + symbols;
|
734
766
|
|
735
|
-
/* Exit fast decoding loop once we
|
736
|
-
if (op[3]
|
767
|
+
/* Exit fast decoding loop once we reach the end. */
|
768
|
+
if (op[3] == olimit)
|
737
769
|
break;
|
738
770
|
|
739
771
|
/* Exit the decoding loop if any input pointer has crossed the
|
@@ -752,27 +784,42 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
752
784
|
}
|
753
785
|
#endif
|
754
786
|
|
787
|
+
#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \
|
788
|
+
do { \
|
789
|
+
int const index = (int)(bits[(_stream)] >> 53); \
|
790
|
+
int const entry = (int)dtable[index]; \
|
791
|
+
bits[(_stream)] <<= (entry & 0x3F); \
|
792
|
+
op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
|
793
|
+
} while (0)
|
794
|
+
|
795
|
+
#define HUF_4X1_RELOAD_STREAM(_stream) \
|
796
|
+
do { \
|
797
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
|
798
|
+
int const nbBits = ctz & 7; \
|
799
|
+
int const nbBytes = ctz >> 3; \
|
800
|
+
op[(_stream)] += 5; \
|
801
|
+
ip[(_stream)] -= nbBytes; \
|
802
|
+
bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
|
803
|
+
bits[(_stream)] <<= nbBits; \
|
804
|
+
} while (0)
|
805
|
+
|
806
|
+
/* Manually unroll the loop because compilers don't consistently
|
807
|
+
* unroll the inner loops, which destroys performance.
|
808
|
+
*/
|
755
809
|
do {
|
756
810
|
/* Decode 5 symbols in each of the 4 streams */
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
/* Reload the bitstreams */
|
766
|
-
for (stream = 0; stream < 4; ++stream) {
|
767
|
-
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
768
|
-
int const nbBits = ctz & 7;
|
769
|
-
int const nbBytes = ctz >> 3;
|
770
|
-
op[stream] += 5;
|
771
|
-
ip[stream] -= nbBytes;
|
772
|
-
bits[stream] = MEM_read64(ip[stream]) | 1;
|
773
|
-
bits[stream] <<= nbBits;
|
774
|
-
}
|
811
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
|
812
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
|
813
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
|
814
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
|
815
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
|
816
|
+
|
817
|
+
/* Reload each of the 4 the bitstreams */
|
818
|
+
HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
|
775
819
|
} while (op[3] < olimit);
|
820
|
+
|
821
|
+
#undef HUF_4X1_DECODE_SYMBOL
|
822
|
+
#undef HUF_4X1_RELOAD_STREAM
|
776
823
|
}
|
777
824
|
|
778
825
|
_out:
|
@@ -797,8 +844,8 @@ HUF_decompress4X1_usingDTable_internal_fast(
|
|
797
844
|
HUF_DecompressFastLoopFn loopFn)
|
798
845
|
{
|
799
846
|
void const* dt = DTable + 1;
|
800
|
-
const
|
801
|
-
BYTE* const oend = (BYTE*)dst
|
847
|
+
BYTE const* const ilowest = (BYTE const*)cSrc;
|
848
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
|
802
849
|
HUF_DecompressFastArgs args;
|
803
850
|
{ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
804
851
|
FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
|
@@ -806,18 +853,22 @@ HUF_decompress4X1_usingDTable_internal_fast(
|
|
806
853
|
return 0;
|
807
854
|
}
|
808
855
|
|
809
|
-
assert(args.ip[0] >= args.
|
856
|
+
assert(args.ip[0] >= args.ilowest);
|
810
857
|
loopFn(&args);
|
811
858
|
|
812
|
-
/* Our loop guarantees that ip[] >=
|
859
|
+
/* Our loop guarantees that ip[] >= ilowest and that we haven't
|
813
860
|
* overwritten any op[].
|
814
861
|
*/
|
815
|
-
assert(args.ip[0] >=
|
816
|
-
assert(args.ip[
|
817
|
-
assert(args.ip[
|
818
|
-
assert(args.ip[
|
862
|
+
assert(args.ip[0] >= ilowest);
|
863
|
+
assert(args.ip[0] >= ilowest);
|
864
|
+
assert(args.ip[1] >= ilowest);
|
865
|
+
assert(args.ip[2] >= ilowest);
|
866
|
+
assert(args.ip[3] >= ilowest);
|
819
867
|
assert(args.op[3] <= oend);
|
820
|
-
|
868
|
+
|
869
|
+
assert(ilowest == args.ilowest);
|
870
|
+
assert(ilowest + 6 == args.iend[0]);
|
871
|
+
(void)ilowest;
|
821
872
|
|
822
873
|
/* finish bit streams one by one. */
|
823
874
|
{ size_t const segmentSize = (dstSize+3) / 4;
|
@@ -868,7 +919,7 @@ static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize,
|
|
868
919
|
}
|
869
920
|
#endif
|
870
921
|
|
871
|
-
if (!(flags & HUF_flags_disableFast)) {
|
922
|
+
if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
|
872
923
|
size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
873
924
|
if (ret != 0)
|
874
925
|
return ret;
|
@@ -1239,15 +1290,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, c
|
|
1239
1290
|
}
|
1240
1291
|
|
1241
1292
|
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
|
1242
|
-
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
|
1293
|
+
do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
|
1243
1294
|
|
1244
|
-
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)
|
1245
|
-
|
1246
|
-
|
1295
|
+
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
|
1296
|
+
do { \
|
1297
|
+
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
1298
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
|
1299
|
+
} while (0)
|
1247
1300
|
|
1248
|
-
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr)
|
1249
|
-
|
1250
|
-
|
1301
|
+
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
|
1302
|
+
do { \
|
1303
|
+
if (MEM_64bits()) \
|
1304
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
|
1305
|
+
} while (0)
|
1251
1306
|
|
1252
1307
|
HINT_INLINE size_t
|
1253
1308
|
HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
@@ -1307,7 +1362,7 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
1307
1362
|
|
1308
1363
|
/* decode */
|
1309
1364
|
{ BYTE* const ostart = (BYTE*) dst;
|
1310
|
-
BYTE* const oend = ostart
|
1365
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
|
1311
1366
|
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
1312
1367
|
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
1313
1368
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
@@ -1332,6 +1387,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
1332
1387
|
const HUF_DTable* DTable)
|
1333
1388
|
{
|
1334
1389
|
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
1390
|
+
if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
|
1335
1391
|
|
1336
1392
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
1337
1393
|
BYTE* const ostart = (BYTE*) dst;
|
@@ -1367,7 +1423,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
1367
1423
|
|
1368
1424
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
1369
1425
|
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
|
1370
|
-
|
1426
|
+
assert(dstSize >= 6 /* validated above */);
|
1371
1427
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
1372
1428
|
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
1373
1429
|
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
@@ -1472,7 +1528,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
1472
1528
|
BYTE* op[4];
|
1473
1529
|
BYTE* oend[4];
|
1474
1530
|
HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
|
1475
|
-
BYTE const* const
|
1531
|
+
BYTE const* const ilowest = args->ilowest;
|
1476
1532
|
|
1477
1533
|
/* Copy the arguments to local registers. */
|
1478
1534
|
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
@@ -1490,13 +1546,12 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
1490
1546
|
for (;;) {
|
1491
1547
|
BYTE* olimit;
|
1492
1548
|
int stream;
|
1493
|
-
int symbol;
|
1494
1549
|
|
1495
1550
|
/* Assert loop preconditions */
|
1496
1551
|
#ifndef NDEBUG
|
1497
1552
|
for (stream = 0; stream < 4; ++stream) {
|
1498
1553
|
assert(op[stream] <= oend[stream]);
|
1499
|
-
assert(ip[stream] >=
|
1554
|
+
assert(ip[stream] >= ilowest);
|
1500
1555
|
}
|
1501
1556
|
#endif
|
1502
1557
|
/* Compute olimit */
|
@@ -1509,7 +1564,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
1509
1564
|
* We also know that each input pointer is >= ip[0]. So we can run
|
1510
1565
|
* iters loops before running out of input.
|
1511
1566
|
*/
|
1512
|
-
size_t iters = (size_t)(ip[0] -
|
1567
|
+
size_t iters = (size_t)(ip[0] - ilowest) / 7;
|
1513
1568
|
/* Each iteration can produce up to 10 bytes of output per stream.
|
1514
1569
|
* Each output stream my advance at different rates. So take the
|
1515
1570
|
* minimum number of safe iterations among all the output streams.
|
@@ -1527,8 +1582,8 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
1527
1582
|
*/
|
1528
1583
|
olimit = op[3] + (iters * 5);
|
1529
1584
|
|
1530
|
-
/* Exit the fast decoding loop
|
1531
|
-
if (op[3]
|
1585
|
+
/* Exit the fast decoding loop once we reach the end. */
|
1586
|
+
if (op[3] == olimit)
|
1532
1587
|
break;
|
1533
1588
|
|
1534
1589
|
/* Exit the decoding loop if any input pointer has crossed the
|
@@ -1547,54 +1602,58 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
|
|
1547
1602
|
}
|
1548
1603
|
#endif
|
1549
1604
|
|
1605
|
+
#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \
|
1606
|
+
do { \
|
1607
|
+
if ((_decode3) || (_stream) != 3) { \
|
1608
|
+
int const index = (int)(bits[(_stream)] >> 53); \
|
1609
|
+
HUF_DEltX2 const entry = dtable[index]; \
|
1610
|
+
MEM_write16(op[(_stream)], entry.sequence); \
|
1611
|
+
bits[(_stream)] <<= (entry.nbBits) & 0x3F; \
|
1612
|
+
op[(_stream)] += (entry.length); \
|
1613
|
+
} \
|
1614
|
+
} while (0)
|
1615
|
+
|
1616
|
+
#define HUF_4X2_RELOAD_STREAM(_stream) \
|
1617
|
+
do { \
|
1618
|
+
HUF_4X2_DECODE_SYMBOL(3, 1); \
|
1619
|
+
{ \
|
1620
|
+
int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
|
1621
|
+
int const nbBits = ctz & 7; \
|
1622
|
+
int const nbBytes = ctz >> 3; \
|
1623
|
+
ip[(_stream)] -= nbBytes; \
|
1624
|
+
bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
|
1625
|
+
bits[(_stream)] <<= nbBits; \
|
1626
|
+
} \
|
1627
|
+
} while (0)
|
1628
|
+
|
1629
|
+
/* Manually unroll the loop because compilers don't consistently
|
1630
|
+
* unroll the inner loops, which destroys performance.
|
1631
|
+
*/
|
1550
1632
|
do {
|
1551
|
-
/*
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1559
|
-
|
1560
|
-
|
1561
|
-
/*
|
1562
|
-
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
|
1569
|
-
/* Do 4 table lookups from the final stream & reload bitstreams */
|
1570
|
-
for (stream = 0; stream < 4; ++stream) {
|
1571
|
-
/* Do a table lookup from the final stream.
|
1572
|
-
* This is interleaved with the reloading to reduce register
|
1573
|
-
* pressure. This shouldn't be necessary, but compilers can
|
1574
|
-
* struggle with codegen with high register pressure.
|
1575
|
-
*/
|
1576
|
-
{
|
1577
|
-
int const index = (int)(bits[3] >> 53);
|
1578
|
-
HUF_DEltX2 const entry = dtable[index];
|
1579
|
-
MEM_write16(op[3], entry.sequence);
|
1580
|
-
bits[3] <<= (entry.nbBits);
|
1581
|
-
op[3] += (entry.length);
|
1582
|
-
}
|
1583
|
-
/* Reload the bistreams. The final bitstream must be reloaded
|
1584
|
-
* after the 5th symbol was decoded.
|
1585
|
-
*/
|
1586
|
-
{
|
1587
|
-
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
1588
|
-
int const nbBits = ctz & 7;
|
1589
|
-
int const nbBytes = ctz >> 3;
|
1590
|
-
ip[stream] -= nbBytes;
|
1591
|
-
bits[stream] = MEM_read64(ip[stream]) | 1;
|
1592
|
-
bits[stream] <<= nbBits;
|
1593
|
-
}
|
1594
|
-
}
|
1633
|
+
/* Decode 5 symbols from each of the first 3 streams.
|
1634
|
+
* The final stream will be decoded during the reload phase
|
1635
|
+
* to reduce register pressure.
|
1636
|
+
*/
|
1637
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
1638
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
1639
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
1640
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
1641
|
+
HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
|
1642
|
+
|
1643
|
+
/* Decode one symbol from the final stream */
|
1644
|
+
HUF_4X2_DECODE_SYMBOL(3, 1);
|
1645
|
+
|
1646
|
+
/* Decode 4 symbols from the final stream & reload bitstreams.
|
1647
|
+
* The final stream is reloaded last, meaning that all 5 symbols
|
1648
|
+
* are decoded from the final stream before it is reloaded.
|
1649
|
+
*/
|
1650
|
+
HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
|
1595
1651
|
} while (op[3] < olimit);
|
1596
1652
|
}
|
1597
1653
|
|
1654
|
+
#undef HUF_4X2_DECODE_SYMBOL
|
1655
|
+
#undef HUF_4X2_RELOAD_STREAM
|
1656
|
+
|
1598
1657
|
_out:
|
1599
1658
|
|
1600
1659
|
/* Save the final values of each of the state variables back to args. */
|
@@ -1611,8 +1670,8 @@ HUF_decompress4X2_usingDTable_internal_fast(
|
|
1611
1670
|
const HUF_DTable* DTable,
|
1612
1671
|
HUF_DecompressFastLoopFn loopFn) {
|
1613
1672
|
void const* dt = DTable + 1;
|
1614
|
-
const BYTE* const
|
1615
|
-
BYTE* const oend = (BYTE*)dst
|
1673
|
+
const BYTE* const ilowest = (const BYTE*)cSrc;
|
1674
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
|
1616
1675
|
HUF_DecompressFastArgs args;
|
1617
1676
|
{
|
1618
1677
|
size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
@@ -1621,16 +1680,19 @@ HUF_decompress4X2_usingDTable_internal_fast(
|
|
1621
1680
|
return 0;
|
1622
1681
|
}
|
1623
1682
|
|
1624
|
-
assert(args.ip[0] >= args.
|
1683
|
+
assert(args.ip[0] >= args.ilowest);
|
1625
1684
|
loopFn(&args);
|
1626
1685
|
|
1627
1686
|
/* note : op4 already verified within main loop */
|
1628
|
-
assert(args.ip[0] >=
|
1629
|
-
assert(args.ip[1] >=
|
1630
|
-
assert(args.ip[2] >=
|
1631
|
-
assert(args.ip[3] >=
|
1687
|
+
assert(args.ip[0] >= ilowest);
|
1688
|
+
assert(args.ip[1] >= ilowest);
|
1689
|
+
assert(args.ip[2] >= ilowest);
|
1690
|
+
assert(args.ip[3] >= ilowest);
|
1632
1691
|
assert(args.op[3] <= oend);
|
1633
|
-
|
1692
|
+
|
1693
|
+
assert(ilowest == args.ilowest);
|
1694
|
+
assert(ilowest + 6 == args.iend[0]);
|
1695
|
+
(void)ilowest;
|
1634
1696
|
|
1635
1697
|
/* finish bitStreams one by one */
|
1636
1698
|
{
|
@@ -1679,7 +1741,7 @@ static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize,
|
|
1679
1741
|
}
|
1680
1742
|
#endif
|
1681
1743
|
|
1682
|
-
if (!(flags & HUF_flags_disableFast)) {
|
1744
|
+
if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
|
1683
1745
|
size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
1684
1746
|
if (ret != 0)
|
1685
1747
|
return ret;
|