zstdlib 0.6.0-x86-mingw32 → 0.7.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +5 -0
- data/ext/zstdlib/extconf.rb +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/bitstream.h +31 -37
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/compiler.h +19 -3
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/cpu.h +1 -1
- data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/debug.h +11 -31
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/error_private.c +2 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/error_private.h +6 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/fse.h +11 -31
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -37
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/huf.h +15 -33
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/mem.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/pool.c +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/pool.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/threading.c +4 -3
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/threading.h +4 -3
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/xxhash.c +15 -33
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_internal.h +112 -15
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/hist.c +15 -35
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/hist.h +12 -32
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress.c +450 -275
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +136 -14
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_literals.c +10 -6
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_literals.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_sequences.c +24 -20
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_cwksp.h +3 -13
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +11 -8
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_fast.c +36 -24
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_lazy.c +34 -11
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_ldm.c +27 -5
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_opt.c +38 -84
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +48 -21
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -62
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +264 -148
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +312 -203
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/zstd.h +62 -21
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzclose.c +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzlib.c +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzread.c +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzwrite.c +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
- metadata +64 -62
- data/ext/zstdlib/zstd-1.4.4/lib/common/debug.c +0 -44
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,14 +15,14 @@
|
|
15
15
|
* Dependencies
|
16
16
|
*********************************************************/
|
17
17
|
#include <string.h> /* memcpy, memmove, memset */
|
18
|
-
#include "compiler.h" /* prefetch */
|
19
|
-
#include "cpu.h" /* bmi2 */
|
20
|
-
#include "mem.h" /* low level memory routines */
|
18
|
+
#include "../common/compiler.h" /* prefetch */
|
19
|
+
#include "../common/cpu.h" /* bmi2 */
|
20
|
+
#include "../common/mem.h" /* low level memory routines */
|
21
21
|
#define FSE_STATIC_LINKING_ONLY
|
22
|
-
#include "fse.h"
|
22
|
+
#include "../common/fse.h"
|
23
23
|
#define HUF_STATIC_LINKING_ONLY
|
24
|
-
#include "huf.h"
|
25
|
-
#include "zstd_internal.h"
|
24
|
+
#include "../common/huf.h"
|
25
|
+
#include "../common/zstd_internal.h"
|
26
26
|
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
27
27
|
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
28
28
|
#include "zstd_decompress_block.h"
|
@@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
|
56
56
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
57
57
|
blockProperties_t* bpPtr)
|
58
58
|
{
|
59
|
-
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
|
59
|
+
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
|
60
60
|
|
61
61
|
{ U32 const cBlockHeader = MEM_readLE24(src);
|
62
62
|
U32 const cSize = cBlockHeader >> 3;
|
@@ -64,7 +64,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
64
64
|
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
|
65
65
|
bpPtr->origSize = cSize; /* only useful for RLE */
|
66
66
|
if (bpPtr->blockType == bt_rle) return 1;
|
67
|
-
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
|
67
|
+
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
|
68
68
|
return cSize;
|
69
69
|
}
|
70
70
|
}
|
@@ -80,7 +80,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
80
80
|
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
|
81
81
|
{
|
82
82
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
83
|
-
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
|
83
|
+
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
84
84
|
|
85
85
|
{ const BYTE* const istart = (const BYTE*) src;
|
86
86
|
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
@@ -89,7 +89,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
89
89
|
{
|
90
90
|
case set_repeat:
|
91
91
|
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
92
|
-
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
|
92
|
+
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
93
93
|
/* fall-through */
|
94
94
|
|
95
95
|
case set_compressed:
|
@@ -121,8 +121,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
121
121
|
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
122
122
|
break;
|
123
123
|
}
|
124
|
-
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
125
|
-
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
|
124
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
125
|
+
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
126
126
|
|
127
127
|
/* prefetch huffman table if cold */
|
128
128
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
@@ -160,7 +160,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
160
160
|
}
|
161
161
|
}
|
162
162
|
|
163
|
-
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
|
163
|
+
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
164
164
|
|
165
165
|
dctx->litPtr = dctx->litBuffer;
|
166
166
|
dctx->litSize = litSize;
|
@@ -190,7 +190,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
190
190
|
}
|
191
191
|
|
192
192
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
193
|
-
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
|
193
|
+
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
194
194
|
memcpy(dctx->litBuffer, istart+lhSize, litSize);
|
195
195
|
dctx->litPtr = dctx->litBuffer;
|
196
196
|
dctx->litSize = litSize;
|
@@ -222,7 +222,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
222
222
|
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
223
223
|
break;
|
224
224
|
}
|
225
|
-
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
225
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
226
226
|
memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
|
227
227
|
dctx->litPtr = dctx->litBuffer;
|
228
228
|
dctx->litSize = litSize;
|
@@ -440,8 +440,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
440
440
|
switch(type)
|
441
441
|
{
|
442
442
|
case set_rle :
|
443
|
-
RETURN_ERROR_IF(!srcSize, srcSize_wrong);
|
444
|
-
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
|
443
|
+
RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
|
444
|
+
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
445
445
|
{ U32 const symbol = *(const BYTE*)src;
|
446
446
|
U32 const baseline = baseValue[symbol];
|
447
447
|
U32 const nbBits = nbAdditionalBits[symbol];
|
@@ -453,7 +453,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
453
453
|
*DTablePtr = defaultTable;
|
454
454
|
return 0;
|
455
455
|
case set_repeat:
|
456
|
-
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
|
456
|
+
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
|
457
457
|
/* prefetch FSE table if used */
|
458
458
|
if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
|
459
459
|
const void* const pStart = *DTablePtr;
|
@@ -465,8 +465,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
465
465
|
{ unsigned tableLog;
|
466
466
|
S16 norm[MaxSeq+1];
|
467
467
|
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
468
|
-
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
|
469
|
-
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
|
468
|
+
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
|
469
|
+
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
|
470
470
|
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
471
471
|
*DTablePtr = DTableSpace;
|
472
472
|
return headerSize;
|
@@ -487,28 +487,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
487
487
|
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
488
488
|
|
489
489
|
/* check */
|
490
|
-
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
|
490
|
+
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
|
491
491
|
|
492
492
|
/* SeqHead */
|
493
493
|
nbSeq = *ip++;
|
494
494
|
if (!nbSeq) {
|
495
495
|
*nbSeqPtr=0;
|
496
|
-
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
|
496
|
+
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
|
497
497
|
return 1;
|
498
498
|
}
|
499
499
|
if (nbSeq > 0x7F) {
|
500
500
|
if (nbSeq == 0xFF) {
|
501
|
-
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
|
501
|
+
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
502
502
|
nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
|
503
503
|
} else {
|
504
|
-
RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
|
504
|
+
RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
|
505
505
|
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
506
506
|
}
|
507
507
|
}
|
508
508
|
*nbSeqPtr = nbSeq;
|
509
509
|
|
510
510
|
/* FSE table descriptors */
|
511
|
-
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
|
511
|
+
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
|
512
512
|
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
513
513
|
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
514
514
|
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
@@ -521,7 +521,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
521
521
|
LL_base, LL_bits,
|
522
522
|
LL_defaultDTable, dctx->fseEntropy,
|
523
523
|
dctx->ddictIsCold, nbSeq);
|
524
|
-
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
|
524
|
+
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
525
525
|
ip += llhSize;
|
526
526
|
}
|
527
527
|
|
@@ -531,7 +531,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
531
531
|
OF_base, OF_bits,
|
532
532
|
OF_defaultDTable, dctx->fseEntropy,
|
533
533
|
dctx->ddictIsCold, nbSeq);
|
534
|
-
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
|
534
|
+
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
535
535
|
ip += ofhSize;
|
536
536
|
}
|
537
537
|
|
@@ -541,7 +541,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
541
541
|
ML_base, ML_bits,
|
542
542
|
ML_defaultDTable, dctx->fseEntropy,
|
543
543
|
dctx->ddictIsCold, nbSeq);
|
544
|
-
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
|
544
|
+
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
545
545
|
ip += mlhSize;
|
546
546
|
}
|
547
547
|
}
|
@@ -580,7 +580,7 @@ typedef struct {
|
|
580
580
|
* Precondition: *ip <= *op
|
581
581
|
* Postcondition: *op - *op >= 8
|
582
582
|
*/
|
583
|
-
|
583
|
+
HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
584
584
|
assert(*ip <= *op);
|
585
585
|
if (offset < 8) {
|
586
586
|
/* close range match, overlap */
|
@@ -665,15 +665,15 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
665
665
|
{
|
666
666
|
BYTE* const oLitEnd = op + sequence.litLength;
|
667
667
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
668
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
669
668
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
670
669
|
const BYTE* match = oLitEnd - sequence.offset;
|
671
670
|
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
672
671
|
|
673
|
-
/* bounds checks */
|
674
|
-
|
675
|
-
RETURN_ERROR_IF(
|
676
|
-
|
672
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
673
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
674
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
675
|
+
assert(op < op + sequenceLength);
|
676
|
+
assert(oLitEnd < op + sequenceLength);
|
677
677
|
|
678
678
|
/* copy literals */
|
679
679
|
ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
|
@@ -683,7 +683,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
683
683
|
/* copy Match */
|
684
684
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
685
685
|
/* offset beyond prefix */
|
686
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
686
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
687
687
|
match = dictEnd - (prefixStart-match);
|
688
688
|
if (match + sequence.matchLength <= dictEnd) {
|
689
689
|
memmove(oLitEnd, match, sequence.matchLength);
|
@@ -709,16 +709,27 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
709
709
|
BYTE* const oLitEnd = op + sequence.litLength;
|
710
710
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
711
711
|
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
712
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
712
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
|
713
713
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
714
714
|
const BYTE* match = oLitEnd - sequence.offset;
|
715
715
|
|
716
|
-
|
717
|
-
assert(
|
718
|
-
|
716
|
+
assert(op != NULL /* Precondition */);
|
717
|
+
assert(oend_w < oend /* No underflow */);
|
718
|
+
/* Handle edge cases in a slow path:
|
719
|
+
* - Read beyond end of literals
|
720
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
721
|
+
* - 32-bit mode and the match length overflows
|
722
|
+
*/
|
723
|
+
if (UNLIKELY(
|
724
|
+
iLitEnd > litLimit ||
|
725
|
+
oMatchEnd > oend_w ||
|
726
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
719
727
|
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
720
728
|
|
721
729
|
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
730
|
+
assert(op <= oLitEnd /* No overflow */);
|
731
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
732
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
722
733
|
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
723
734
|
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
724
735
|
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
@@ -729,7 +740,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
729
740
|
*/
|
730
741
|
assert(WILDCOPY_OVERLENGTH >= 16);
|
731
742
|
ZSTD_copy16(op, (*litPtr));
|
732
|
-
if (sequence.litLength > 16) {
|
743
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
733
744
|
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
|
734
745
|
}
|
735
746
|
op = oLitEnd;
|
@@ -738,7 +749,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
738
749
|
/* Copy Match */
|
739
750
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
740
751
|
/* offset beyond prefix -> go into extDict */
|
741
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
752
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
742
753
|
match = dictEnd + (match - prefixStart);
|
743
754
|
if (match + sequence.matchLength <= dictEnd) {
|
744
755
|
memmove(oLitEnd, match, sequence.matchLength);
|
@@ -760,7 +771,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
760
771
|
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
761
772
|
* without overlap checking.
|
762
773
|
*/
|
763
|
-
if (sequence.offset >= WILDCOPY_VECLEN) {
|
774
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
764
775
|
/* We bet on a full wildcopy for matches, since we expect matches to be
|
765
776
|
* longer than literals (in general). In silesia, ~10% of matches are longer
|
766
777
|
* than 16 bytes.
|
@@ -802,6 +813,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
802
813
|
DStatePtr->state = DInfo.nextState + lowBits;
|
803
814
|
}
|
804
815
|
|
816
|
+
FORCE_INLINE_TEMPLATE void
|
817
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
818
|
+
{
|
819
|
+
U32 const nbBits = DInfo.nbBits;
|
820
|
+
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
821
|
+
DStatePtr->state = DInfo.nextState + lowBits;
|
822
|
+
}
|
823
|
+
|
805
824
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
806
825
|
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
|
807
826
|
* bits before reloading. This value is the maximum number of bytes we read
|
@@ -813,25 +832,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
813
832
|
: 0)
|
814
833
|
|
815
834
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
835
|
+
typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
|
816
836
|
|
817
|
-
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
818
837
|
FORCE_INLINE_TEMPLATE seq_t
|
819
|
-
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
838
|
+
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
|
820
839
|
{
|
821
840
|
seq_t seq;
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
U32 const
|
826
|
-
U32 const
|
827
|
-
U32 const
|
828
|
-
|
841
|
+
ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
|
842
|
+
ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
|
843
|
+
ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
|
844
|
+
U32 const llBase = llDInfo.baseValue;
|
845
|
+
U32 const mlBase = mlDInfo.baseValue;
|
846
|
+
U32 const ofBase = ofDInfo.baseValue;
|
847
|
+
BYTE const llBits = llDInfo.nbAdditionalBits;
|
848
|
+
BYTE const mlBits = mlDInfo.nbAdditionalBits;
|
849
|
+
BYTE const ofBits = ofDInfo.nbAdditionalBits;
|
850
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
829
851
|
|
830
852
|
/* sequence */
|
831
853
|
{ size_t offset;
|
832
|
-
if (
|
833
|
-
offset = 0;
|
834
|
-
else {
|
854
|
+
if (ofBits > 1) {
|
835
855
|
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
836
856
|
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
837
857
|
assert(ofBits <= MaxOff);
|
@@ -845,59 +865,138 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
845
865
|
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
846
866
|
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
847
867
|
}
|
848
|
-
}
|
849
|
-
|
850
|
-
if (ofBits <= 1) {
|
851
|
-
offset += (llBase==0);
|
852
|
-
if (offset) {
|
853
|
-
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
854
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
855
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
856
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
857
|
-
seqState->prevOffset[0] = offset = temp;
|
858
|
-
} else { /* offset == 0 */
|
859
|
-
offset = seqState->prevOffset[0];
|
860
|
-
}
|
861
|
-
} else {
|
862
868
|
seqState->prevOffset[2] = seqState->prevOffset[1];
|
863
869
|
seqState->prevOffset[1] = seqState->prevOffset[0];
|
864
870
|
seqState->prevOffset[0] = offset;
|
865
|
-
}
|
871
|
+
} else {
|
872
|
+
U32 const ll0 = (llBase == 0);
|
873
|
+
if (LIKELY((ofBits == 0))) {
|
874
|
+
if (LIKELY(!ll0))
|
875
|
+
offset = seqState->prevOffset[0];
|
876
|
+
else {
|
877
|
+
offset = seqState->prevOffset[1];
|
878
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
879
|
+
seqState->prevOffset[0] = offset;
|
880
|
+
}
|
881
|
+
} else {
|
882
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
883
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
884
|
+
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
885
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
886
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
887
|
+
seqState->prevOffset[0] = offset = temp;
|
888
|
+
} } }
|
866
889
|
seq.offset = offset;
|
867
890
|
}
|
868
891
|
|
869
|
-
seq.matchLength = mlBase
|
870
|
-
|
892
|
+
seq.matchLength = mlBase;
|
893
|
+
if (mlBits > 0)
|
894
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
895
|
+
|
871
896
|
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
872
897
|
BIT_reloadDStream(&seqState->DStream);
|
873
|
-
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
898
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
874
899
|
BIT_reloadDStream(&seqState->DStream);
|
875
900
|
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
876
901
|
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
877
902
|
|
878
|
-
seq.litLength = llBase
|
879
|
-
|
903
|
+
seq.litLength = llBase;
|
904
|
+
if (llBits > 0)
|
905
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
906
|
+
|
880
907
|
if (MEM_32bits())
|
881
908
|
BIT_reloadDStream(&seqState->DStream);
|
882
909
|
|
883
910
|
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
884
911
|
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
885
912
|
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
913
|
+
if (prefetch == ZSTD_p_prefetch) {
|
914
|
+
size_t const pos = seqState->pos + seq.litLength;
|
915
|
+
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
916
|
+
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
917
|
+
* No consequence though : no memory access will occur, offset is only used for prefetching */
|
918
|
+
seqState->pos = pos + seq.matchLength;
|
919
|
+
}
|
920
|
+
|
921
|
+
/* ANS state update
|
922
|
+
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
923
|
+
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
924
|
+
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
925
|
+
* better option, so it is the default for other compilers. But, if you
|
926
|
+
* measure that it is worse, please put up a pull request.
|
927
|
+
*/
|
928
|
+
{
|
929
|
+
#if defined(__GNUC__) && !defined(__clang__)
|
930
|
+
const int kUseUpdateFseState = 1;
|
931
|
+
#else
|
932
|
+
const int kUseUpdateFseState = 0;
|
933
|
+
#endif
|
934
|
+
if (kUseUpdateFseState) {
|
935
|
+
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
936
|
+
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
937
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
938
|
+
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
939
|
+
} else {
|
940
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
|
941
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
|
942
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
943
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
|
944
|
+
}
|
945
|
+
}
|
891
946
|
|
892
947
|
return seq;
|
893
948
|
}
|
894
949
|
|
950
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
951
|
+
static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
952
|
+
{
|
953
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
954
|
+
/* No dictionary used. */
|
955
|
+
if (dctx->dictContentEndForFuzzing == NULL) return 0;
|
956
|
+
/* Dictionary is our prefix. */
|
957
|
+
if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
|
958
|
+
/* Dictionary is not our ext-dict. */
|
959
|
+
if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
|
960
|
+
/* Dictionary is not within our window size. */
|
961
|
+
if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
|
962
|
+
/* Dictionary is active. */
|
963
|
+
return 1;
|
964
|
+
}
|
965
|
+
|
966
|
+
MEM_STATIC void ZSTD_assertValidSequence(
|
967
|
+
ZSTD_DCtx const* dctx,
|
968
|
+
BYTE const* op, BYTE const* oend,
|
969
|
+
seq_t const seq,
|
970
|
+
BYTE const* prefixStart, BYTE const* virtualStart)
|
971
|
+
{
|
972
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
973
|
+
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
974
|
+
BYTE const* const oLitEnd = op + seq.litLength;
|
975
|
+
DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
|
976
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
977
|
+
assert(op <= oend);
|
978
|
+
assert((size_t)(oend - op) >= sequenceSize);
|
979
|
+
assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
|
980
|
+
if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
|
981
|
+
size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
|
982
|
+
/* Offset must be within the dictionary. */
|
983
|
+
assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
|
984
|
+
assert(seq.offset <= windowSize + dictSize);
|
985
|
+
} else {
|
986
|
+
/* Offset must be within our window. */
|
987
|
+
assert(seq.offset <= windowSize);
|
988
|
+
}
|
989
|
+
}
|
990
|
+
#endif
|
991
|
+
|
992
|
+
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
895
993
|
FORCE_INLINE_TEMPLATE size_t
|
896
994
|
DONT_VECTORIZE
|
897
995
|
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
898
996
|
void* dst, size_t maxDstSize,
|
899
997
|
const void* seqStart, size_t seqSize, int nbSeq,
|
900
|
-
const ZSTD_longOffset_e isLongOffset
|
998
|
+
const ZSTD_longOffset_e isLongOffset,
|
999
|
+
const int frame)
|
901
1000
|
{
|
902
1001
|
const BYTE* ip = (const BYTE*)seqStart;
|
903
1002
|
const BYTE* const iend = ip + seqSize;
|
@@ -910,46 +1009,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
910
1009
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
911
1010
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
912
1011
|
DEBUGLOG(5, "ZSTD_decompressSequences_body");
|
1012
|
+
(void)frame;
|
913
1013
|
|
914
1014
|
/* Regen sequences */
|
915
1015
|
if (nbSeq) {
|
916
1016
|
seqState_t seqState;
|
1017
|
+
size_t error = 0;
|
917
1018
|
dctx->fseEntropy = 1;
|
918
1019
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
919
1020
|
RETURN_ERROR_IF(
|
920
1021
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
921
|
-
corruption_detected);
|
1022
|
+
corruption_detected, "");
|
922
1023
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
923
1024
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
924
1025
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
1026
|
+
assert(dst != NULL);
|
925
1027
|
|
926
1028
|
ZSTD_STATIC_ASSERT(
|
927
1029
|
BIT_DStream_unfinished < BIT_DStream_completed &&
|
928
1030
|
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
929
1031
|
BIT_DStream_completed < BIT_DStream_overflow);
|
930
1032
|
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
1033
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
1034
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
1035
|
+
*
|
1036
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
1037
|
+
* speed swings based on the alignment of the decompression loop. This
|
1038
|
+
* performance swing is caused by parts of the decompression loop falling
|
1039
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
1040
|
+
* when it can't we get much worse performance. You can measure if you've
|
1041
|
+
* hit the good case or the bad case with this perf command for some
|
1042
|
+
* compressed file test.zst:
|
1043
|
+
*
|
1044
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
1045
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
1046
|
+
*
|
1047
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
1048
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
1049
|
+
* If it is pretty even then you may be in an okay case.
|
1050
|
+
*
|
1051
|
+
* I've been able to reproduce this issue on the following CPUs:
|
1052
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
1053
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
1054
|
+
* I never got performance swings, but I was able to
|
1055
|
+
* go from the good case of mostly DSB to half of the
|
1056
|
+
* cycles served from MITE.
|
1057
|
+
* - Coffeelake: Intel i9-9900k
|
1058
|
+
*
|
1059
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
1060
|
+
* of the following CPUS:
|
1061
|
+
* - Haswell
|
1062
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
1063
|
+
* - Skylake
|
1064
|
+
*
|
1065
|
+
* If you are seeing performance stability this script can help test.
|
1066
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
1067
|
+
*
|
1068
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
1069
|
+
*/
|
1070
|
+
__asm__(".p2align 5");
|
1071
|
+
__asm__("nop");
|
1072
|
+
__asm__(".p2align 4");
|
1073
|
+
#endif
|
1074
|
+
for ( ; ; ) {
|
1075
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
|
1076
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
1077
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1078
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1079
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1080
|
+
#endif
|
1081
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1082
|
+
BIT_reloadDStream(&(seqState.DStream));
|
1083
|
+
/* gcc and clang both don't like early returns in this loop.
|
1084
|
+
* gcc doesn't like early breaks either.
|
1085
|
+
* Instead save an error and report it at the end.
|
1086
|
+
* When there is an error, don't increment op, so we don't
|
1087
|
+
* overwrite.
|
1088
|
+
*/
|
1089
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
|
1090
|
+
else op += oneSeqSize;
|
1091
|
+
if (UNLIKELY(!--nbSeq)) break;
|
1092
|
+
}
|
939
1093
|
|
940
1094
|
/* check if reached exact end */
|
941
1095
|
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
942
|
-
|
943
|
-
RETURN_ERROR_IF(
|
1096
|
+
if (ZSTD_isError(error)) return error;
|
1097
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
1098
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
944
1099
|
/* save reps for next block */
|
945
1100
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
946
1101
|
}
|
947
1102
|
|
948
1103
|
/* last literal segment */
|
949
1104
|
{ size_t const lastLLSize = litEnd - litPtr;
|
950
|
-
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
951
|
-
|
952
|
-
|
1105
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1106
|
+
if (op != NULL) {
|
1107
|
+
memcpy(op, litPtr, lastLLSize);
|
1108
|
+
op += lastLLSize;
|
1109
|
+
}
|
953
1110
|
}
|
954
1111
|
|
955
1112
|
return op-ostart;
|
@@ -959,99 +1116,21 @@ static size_t
|
|
959
1116
|
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
960
1117
|
void* dst, size_t maxDstSize,
|
961
1118
|
const void* seqStart, size_t seqSize, int nbSeq,
|
962
|
-
const ZSTD_longOffset_e isLongOffset
|
1119
|
+
const ZSTD_longOffset_e isLongOffset,
|
1120
|
+
const int frame)
|
963
1121
|
{
|
964
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1122
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
965
1123
|
}
|
966
1124
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
967
1125
|
|
968
|
-
|
969
|
-
|
970
1126
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
971
|
-
FORCE_INLINE_TEMPLATE seq_t
|
972
|
-
ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
|
973
|
-
{
|
974
|
-
seq_t seq;
|
975
|
-
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
|
976
|
-
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
|
977
|
-
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
|
978
|
-
U32 const totalBits = llBits+mlBits+ofBits;
|
979
|
-
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
|
980
|
-
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
|
981
|
-
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
|
982
|
-
|
983
|
-
/* sequence */
|
984
|
-
{ size_t offset;
|
985
|
-
if (!ofBits)
|
986
|
-
offset = 0;
|
987
|
-
else {
|
988
|
-
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
989
|
-
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
990
|
-
assert(ofBits <= MaxOff);
|
991
|
-
if (MEM_32bits() && longOffsets) {
|
992
|
-
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
993
|
-
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
994
|
-
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
995
|
-
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
996
|
-
} else {
|
997
|
-
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
998
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
999
|
-
}
|
1000
|
-
}
|
1001
|
-
|
1002
|
-
if (ofBits <= 1) {
|
1003
|
-
offset += (llBase==0);
|
1004
|
-
if (offset) {
|
1005
|
-
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
1006
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
1007
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
1008
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
1009
|
-
seqState->prevOffset[0] = offset = temp;
|
1010
|
-
} else {
|
1011
|
-
offset = seqState->prevOffset[0];
|
1012
|
-
}
|
1013
|
-
} else {
|
1014
|
-
seqState->prevOffset[2] = seqState->prevOffset[1];
|
1015
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
1016
|
-
seqState->prevOffset[0] = offset;
|
1017
|
-
}
|
1018
|
-
seq.offset = offset;
|
1019
|
-
}
|
1020
|
-
|
1021
|
-
seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
|
1022
|
-
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
1023
|
-
BIT_reloadDStream(&seqState->DStream);
|
1024
|
-
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
1025
|
-
BIT_reloadDStream(&seqState->DStream);
|
1026
|
-
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
|
1027
|
-
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
1028
|
-
|
1029
|
-
seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
|
1030
|
-
if (MEM_32bits())
|
1031
|
-
BIT_reloadDStream(&seqState->DStream);
|
1032
|
-
|
1033
|
-
{ size_t const pos = seqState->pos + seq.litLength;
|
1034
|
-
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
1035
|
-
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
1036
|
-
* No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
|
1037
|
-
seqState->pos = pos + seq.matchLength;
|
1038
|
-
}
|
1039
|
-
|
1040
|
-
/* ANS state update */
|
1041
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
1042
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
1043
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
1044
|
-
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
1045
|
-
|
1046
|
-
return seq;
|
1047
|
-
}
|
1048
|
-
|
1049
1127
|
FORCE_INLINE_TEMPLATE size_t
|
1050
1128
|
ZSTD_decompressSequencesLong_body(
|
1051
1129
|
ZSTD_DCtx* dctx,
|
1052
1130
|
void* dst, size_t maxDstSize,
|
1053
1131
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1054
|
-
const ZSTD_longOffset_e isLongOffset
|
1132
|
+
const ZSTD_longOffset_e isLongOffset,
|
1133
|
+
const int frame)
|
1055
1134
|
{
|
1056
1135
|
const BYTE* ip = (const BYTE*)seqStart;
|
1057
1136
|
const BYTE* const iend = ip + seqSize;
|
@@ -1063,6 +1142,7 @@ ZSTD_decompressSequencesLong_body(
|
|
1063
1142
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
1064
1143
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
1065
1144
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
1145
|
+
(void)frame;
|
1066
1146
|
|
1067
1147
|
/* Regen sequences */
|
1068
1148
|
if (nbSeq) {
|
@@ -1078,36 +1158,45 @@ ZSTD_decompressSequencesLong_body(
|
|
1078
1158
|
seqState.prefixStart = prefixStart;
|
1079
1159
|
seqState.pos = (size_t)(op-prefixStart);
|
1080
1160
|
seqState.dictEnd = dictEnd;
|
1161
|
+
assert(dst != NULL);
|
1081
1162
|
assert(iend >= ip);
|
1082
1163
|
RETURN_ERROR_IF(
|
1083
1164
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
1084
|
-
corruption_detected);
|
1165
|
+
corruption_detected, "");
|
1085
1166
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
1086
1167
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
1087
1168
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
1088
1169
|
|
1089
1170
|
/* prepare in advance */
|
1090
1171
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
1091
|
-
sequences[seqNb] =
|
1172
|
+
sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
|
1092
1173
|
PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
1093
1174
|
}
|
1094
|
-
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
|
1175
|
+
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
1095
1176
|
|
1096
1177
|
/* decode and decompress */
|
1097
1178
|
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
1098
|
-
seq_t const sequence =
|
1179
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
|
1099
1180
|
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
1181
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1182
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1183
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
1184
|
+
#endif
|
1100
1185
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1101
1186
|
PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
1102
1187
|
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
1103
1188
|
op += oneSeqSize;
|
1104
1189
|
}
|
1105
|
-
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
|
1190
|
+
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
|
1106
1191
|
|
1107
1192
|
/* finish queue */
|
1108
1193
|
seqNb -= seqAdvance;
|
1109
1194
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
1110
1195
|
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
1196
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1197
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1198
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
1199
|
+
#endif
|
1111
1200
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1112
1201
|
op += oneSeqSize;
|
1113
1202
|
}
|
@@ -1118,9 +1207,11 @@ ZSTD_decompressSequencesLong_body(
|
|
1118
1207
|
|
1119
1208
|
/* last literal segment */
|
1120
1209
|
{ size_t const lastLLSize = litEnd - litPtr;
|
1121
|
-
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
1122
|
-
|
1123
|
-
|
1210
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1211
|
+
if (op != NULL) {
|
1212
|
+
memcpy(op, litPtr, lastLLSize);
|
1213
|
+
op += lastLLSize;
|
1214
|
+
}
|
1124
1215
|
}
|
1125
1216
|
|
1126
1217
|
return op-ostart;
|
@@ -1130,9 +1221,10 @@ static size_t
|
|
1130
1221
|
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
1131
1222
|
void* dst, size_t maxDstSize,
|
1132
1223
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1133
|
-
const ZSTD_longOffset_e isLongOffset
|
1224
|
+
const ZSTD_longOffset_e isLongOffset,
|
1225
|
+
const int frame)
|
1134
1226
|
{
|
1135
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1227
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1136
1228
|
}
|
1137
1229
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1138
1230
|
|
@@ -1146,9 +1238,10 @@ DONT_VECTORIZE
|
|
1146
1238
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
1147
1239
|
void* dst, size_t maxDstSize,
|
1148
1240
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1149
|
-
const ZSTD_longOffset_e isLongOffset
|
1241
|
+
const ZSTD_longOffset_e isLongOffset,
|
1242
|
+
const int frame)
|
1150
1243
|
{
|
1151
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1244
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1152
1245
|
}
|
1153
1246
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1154
1247
|
|
@@ -1157,9 +1250,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
|
|
1157
1250
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
1158
1251
|
void* dst, size_t maxDstSize,
|
1159
1252
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1160
|
-
const ZSTD_longOffset_e isLongOffset
|
1253
|
+
const ZSTD_longOffset_e isLongOffset,
|
1254
|
+
const int frame)
|
1161
1255
|
{
|
1162
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1256
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1163
1257
|
}
|
1164
1258
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1165
1259
|
|
@@ -1169,21 +1263,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
|
|
1169
1263
|
ZSTD_DCtx* dctx,
|
1170
1264
|
void* dst, size_t maxDstSize,
|
1171
1265
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1172
|
-
const ZSTD_longOffset_e isLongOffset
|
1266
|
+
const ZSTD_longOffset_e isLongOffset,
|
1267
|
+
const int frame);
|
1173
1268
|
|
1174
1269
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1175
1270
|
static size_t
|
1176
1271
|
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
1177
1272
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1178
|
-
const ZSTD_longOffset_e isLongOffset
|
1273
|
+
const ZSTD_longOffset_e isLongOffset,
|
1274
|
+
const int frame)
|
1179
1275
|
{
|
1180
1276
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
1181
1277
|
#if DYNAMIC_BMI2
|
1182
1278
|
if (dctx->bmi2) {
|
1183
|
-
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1279
|
+
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1184
1280
|
}
|
1185
1281
|
#endif
|
1186
|
-
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1282
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1187
1283
|
}
|
1188
1284
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1189
1285
|
|
@@ -1198,15 +1294,16 @@ static size_t
|
|
1198
1294
|
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
1199
1295
|
void* dst, size_t maxDstSize,
|
1200
1296
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1201
|
-
const ZSTD_longOffset_e isLongOffset
|
1297
|
+
const ZSTD_longOffset_e isLongOffset,
|
1298
|
+
const int frame)
|
1202
1299
|
{
|
1203
1300
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
1204
1301
|
#if DYNAMIC_BMI2
|
1205
1302
|
if (dctx->bmi2) {
|
1206
|
-
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1303
|
+
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1207
1304
|
}
|
1208
1305
|
#endif
|
1209
|
-
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1306
|
+
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1210
1307
|
}
|
1211
1308
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1212
1309
|
|
@@ -1240,7 +1337,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
|
|
1240
1337
|
}
|
1241
1338
|
#endif
|
1242
1339
|
|
1243
|
-
|
1244
1340
|
size_t
|
1245
1341
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
1246
1342
|
void* dst, size_t dstCapacity,
|
@@ -1256,7 +1352,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1256
1352
|
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
1257
1353
|
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
1258
1354
|
|
1259
|
-
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
|
1355
|
+
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
1260
1356
|
|
1261
1357
|
/* Decode literals section */
|
1262
1358
|
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
@@ -1282,6 +1378,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1282
1378
|
ip += seqHSize;
|
1283
1379
|
srcSize -= seqHSize;
|
1284
1380
|
|
1381
|
+
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
1382
|
+
|
1285
1383
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
1286
1384
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
1287
1385
|
if ( !usePrefetchDecoder
|
@@ -1300,17 +1398,28 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1300
1398
|
if (usePrefetchDecoder)
|
1301
1399
|
#endif
|
1302
1400
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
1303
|
-
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
1401
|
+
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
1304
1402
|
#endif
|
1305
1403
|
|
1306
1404
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1307
1405
|
/* else */
|
1308
|
-
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
1406
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
1309
1407
|
#endif
|
1310
1408
|
}
|
1311
1409
|
}
|
1312
1410
|
|
1313
1411
|
|
1412
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
|
1413
|
+
{
|
1414
|
+
if (dst != dctx->previousDstEnd) { /* not contiguous */
|
1415
|
+
dctx->dictEnd = dctx->previousDstEnd;
|
1416
|
+
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
1417
|
+
dctx->prefixStart = dst;
|
1418
|
+
dctx->previousDstEnd = dst;
|
1419
|
+
}
|
1420
|
+
}
|
1421
|
+
|
1422
|
+
|
1314
1423
|
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
1315
1424
|
void* dst, size_t dstCapacity,
|
1316
1425
|
const void* src, size_t srcSize)
|