zstd-ruby 1.3.8.0 → 1.4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -5
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +133 -61
- data/ext/zstdruby/libzstd/README.md +51 -18
- data/ext/zstdruby/libzstd/common/bitstream.h +38 -39
- data/ext/zstdruby/libzstd/common/compiler.h +41 -6
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +11 -31
- data/ext/zstdruby/libzstd/common/entropy_common.c +13 -33
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/error_private.h +6 -2
- data/ext/zstdruby/libzstd/common/fse.h +13 -33
- data/ext/zstdruby/libzstd/common/fse_decompress.c +12 -35
- data/ext/zstdruby/libzstd/common/huf.h +15 -33
- data/ext/zstdruby/libzstd/common/mem.h +75 -2
- data/ext/zstdruby/libzstd/common/pool.c +8 -4
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/threading.c +52 -6
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +25 -37
- data/ext/zstdruby/libzstd/common/xxhash.h +11 -31
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +203 -22
- data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -42
- data/ext/zstdruby/libzstd/compress/hist.c +15 -35
- data/ext/zstdruby/libzstd/compress/hist.h +12 -32
- data/ext/zstdruby/libzstd/compress/huf_compress.c +92 -92
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1460 -1472
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +330 -65
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +419 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +845 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +525 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +65 -43
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +264 -159
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +74 -42
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +33 -11
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +7 -2
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +108 -125
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +129 -93
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +46 -28
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +76 -60
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +14 -10
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +471 -258
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +471 -346
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +25 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +220 -65
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +81 -7
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +85 -56
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +43 -19
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +73 -35
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +3 -2
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +49 -15
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +142 -117
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +13 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +54 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +13 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +55 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +13 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +62 -29
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +13 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +145 -109
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +14 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +56 -26
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +11 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +65 -28
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +11 -6
- data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
- data/ext/zstdruby/libzstd/zstd.h +921 -597
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +2 -2
- metadata +19 -14
- data/ext/zstdruby/libzstd/dll/libzstd.def +0 -87
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,14 +15,14 @@
|
|
|
15
15
|
* Dependencies
|
|
16
16
|
*********************************************************/
|
|
17
17
|
#include <string.h> /* memcpy, memmove, memset */
|
|
18
|
-
#include "compiler.h" /* prefetch */
|
|
19
|
-
#include "cpu.h" /* bmi2 */
|
|
20
|
-
#include "mem.h" /* low level memory routines */
|
|
18
|
+
#include "../common/compiler.h" /* prefetch */
|
|
19
|
+
#include "../common/cpu.h" /* bmi2 */
|
|
20
|
+
#include "../common/mem.h" /* low level memory routines */
|
|
21
21
|
#define FSE_STATIC_LINKING_ONLY
|
|
22
|
-
#include "fse.h"
|
|
22
|
+
#include "../common/fse.h"
|
|
23
23
|
#define HUF_STATIC_LINKING_ONLY
|
|
24
|
-
#include "huf.h"
|
|
25
|
-
#include "zstd_internal.h"
|
|
24
|
+
#include "../common/huf.h"
|
|
25
|
+
#include "../common/zstd_internal.h"
|
|
26
26
|
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
|
27
27
|
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
|
28
28
|
#include "zstd_decompress_block.h"
|
|
@@ -56,14 +56,15 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
|
|
56
56
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
57
57
|
blockProperties_t* bpPtr)
|
|
58
58
|
{
|
|
59
|
-
|
|
59
|
+
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
|
|
60
|
+
|
|
60
61
|
{ U32 const cBlockHeader = MEM_readLE24(src);
|
|
61
62
|
U32 const cSize = cBlockHeader >> 3;
|
|
62
63
|
bpPtr->lastBlock = cBlockHeader & 1;
|
|
63
64
|
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
|
|
64
65
|
bpPtr->origSize = cSize; /* only useful for RLE */
|
|
65
66
|
if (bpPtr->blockType == bt_rle) return 1;
|
|
66
|
-
|
|
67
|
+
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
|
|
67
68
|
return cSize;
|
|
68
69
|
}
|
|
69
70
|
}
|
|
@@ -78,7 +79,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
78
79
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
79
80
|
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
|
|
80
81
|
{
|
|
81
|
-
|
|
82
|
+
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
|
83
|
+
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
|
82
84
|
|
|
83
85
|
{ const BYTE* const istart = (const BYTE*) src;
|
|
84
86
|
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
|
@@ -86,11 +88,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
86
88
|
switch(litEncType)
|
|
87
89
|
{
|
|
88
90
|
case set_repeat:
|
|
89
|
-
|
|
91
|
+
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
|
92
|
+
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
|
90
93
|
/* fall-through */
|
|
91
94
|
|
|
92
95
|
case set_compressed:
|
|
93
|
-
|
|
96
|
+
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
|
|
94
97
|
{ size_t lhSize, litSize, litCSize;
|
|
95
98
|
U32 singleStream=0;
|
|
96
99
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
|
@@ -115,11 +118,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
115
118
|
/* 2 - 2 - 18 - 18 */
|
|
116
119
|
lhSize = 5;
|
|
117
120
|
litSize = (lhc >> 4) & 0x3FFFF;
|
|
118
|
-
litCSize = (lhc >> 22) + (istart[4] << 10);
|
|
121
|
+
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
|
119
122
|
break;
|
|
120
123
|
}
|
|
121
|
-
|
|
122
|
-
|
|
124
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
125
|
+
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
|
123
126
|
|
|
124
127
|
/* prefetch huffman table if cold */
|
|
125
128
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
|
@@ -157,7 +160,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
157
160
|
}
|
|
158
161
|
}
|
|
159
162
|
|
|
160
|
-
|
|
163
|
+
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
|
161
164
|
|
|
162
165
|
dctx->litPtr = dctx->litBuffer;
|
|
163
166
|
dctx->litSize = litSize;
|
|
@@ -187,7 +190,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
187
190
|
}
|
|
188
191
|
|
|
189
192
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
|
190
|
-
|
|
193
|
+
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
|
191
194
|
memcpy(dctx->litBuffer, istart+lhSize, litSize);
|
|
192
195
|
dctx->litPtr = dctx->litBuffer;
|
|
193
196
|
dctx->litSize = litSize;
|
|
@@ -216,17 +219,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
216
219
|
case 3:
|
|
217
220
|
lhSize = 3;
|
|
218
221
|
litSize = MEM_readLE24(istart) >> 4;
|
|
219
|
-
|
|
222
|
+
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
|
220
223
|
break;
|
|
221
224
|
}
|
|
222
|
-
|
|
225
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
223
226
|
memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
|
|
224
227
|
dctx->litPtr = dctx->litBuffer;
|
|
225
228
|
dctx->litSize = litSize;
|
|
226
229
|
return lhSize+1;
|
|
227
230
|
}
|
|
228
231
|
default:
|
|
229
|
-
|
|
232
|
+
RETURN_ERROR(corruption_detected, "impossible");
|
|
230
233
|
}
|
|
231
234
|
}
|
|
232
235
|
}
|
|
@@ -390,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
390
393
|
symbolNext[s] = 1;
|
|
391
394
|
} else {
|
|
392
395
|
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
|
|
393
|
-
|
|
396
|
+
assert(normalizedCounter[s]>=0);
|
|
397
|
+
symbolNext[s] = (U16)normalizedCounter[s];
|
|
394
398
|
} } }
|
|
395
399
|
memcpy(dt, &DTableH, sizeof(DTableH));
|
|
396
400
|
}
|
|
@@ -436,8 +440,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
436
440
|
switch(type)
|
|
437
441
|
{
|
|
438
442
|
case set_rle :
|
|
439
|
-
|
|
440
|
-
|
|
443
|
+
RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
|
|
444
|
+
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
|
441
445
|
{ U32 const symbol = *(const BYTE*)src;
|
|
442
446
|
U32 const baseline = baseValue[symbol];
|
|
443
447
|
U32 const nbBits = nbAdditionalBits[symbol];
|
|
@@ -449,7 +453,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
449
453
|
*DTablePtr = defaultTable;
|
|
450
454
|
return 0;
|
|
451
455
|
case set_repeat:
|
|
452
|
-
|
|
456
|
+
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
|
|
453
457
|
/* prefetch FSE table if used */
|
|
454
458
|
if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
|
|
455
459
|
const void* const pStart = *DTablePtr;
|
|
@@ -461,15 +465,15 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
461
465
|
{ unsigned tableLog;
|
|
462
466
|
S16 norm[MaxSeq+1];
|
|
463
467
|
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
|
464
|
-
|
|
465
|
-
|
|
468
|
+
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
|
|
469
|
+
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
|
|
466
470
|
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
|
467
471
|
*DTablePtr = DTableSpace;
|
|
468
472
|
return headerSize;
|
|
469
473
|
}
|
|
470
|
-
default :
|
|
474
|
+
default :
|
|
471
475
|
assert(0);
|
|
472
|
-
|
|
476
|
+
RETURN_ERROR(GENERIC, "impossible");
|
|
473
477
|
}
|
|
474
478
|
}
|
|
475
479
|
|
|
@@ -483,28 +487,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
483
487
|
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
|
484
488
|
|
|
485
489
|
/* check */
|
|
486
|
-
|
|
490
|
+
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
|
|
487
491
|
|
|
488
492
|
/* SeqHead */
|
|
489
493
|
nbSeq = *ip++;
|
|
490
494
|
if (!nbSeq) {
|
|
491
495
|
*nbSeqPtr=0;
|
|
492
|
-
|
|
496
|
+
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
|
|
493
497
|
return 1;
|
|
494
498
|
}
|
|
495
499
|
if (nbSeq > 0x7F) {
|
|
496
500
|
if (nbSeq == 0xFF) {
|
|
497
|
-
|
|
501
|
+
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
|
498
502
|
nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
|
|
499
503
|
} else {
|
|
500
|
-
|
|
504
|
+
RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
|
|
501
505
|
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
|
502
506
|
}
|
|
503
507
|
}
|
|
504
508
|
*nbSeqPtr = nbSeq;
|
|
505
509
|
|
|
506
510
|
/* FSE table descriptors */
|
|
507
|
-
|
|
511
|
+
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
|
|
508
512
|
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
|
509
513
|
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
|
510
514
|
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
|
@@ -517,7 +521,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
517
521
|
LL_base, LL_bits,
|
|
518
522
|
LL_defaultDTable, dctx->fseEntropy,
|
|
519
523
|
dctx->ddictIsCold, nbSeq);
|
|
520
|
-
|
|
524
|
+
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
521
525
|
ip += llhSize;
|
|
522
526
|
}
|
|
523
527
|
|
|
@@ -527,7 +531,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
527
531
|
OF_base, OF_bits,
|
|
528
532
|
OF_defaultDTable, dctx->fseEntropy,
|
|
529
533
|
dctx->ddictIsCold, nbSeq);
|
|
530
|
-
|
|
534
|
+
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
531
535
|
ip += ofhSize;
|
|
532
536
|
}
|
|
533
537
|
|
|
@@ -537,7 +541,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
537
541
|
ML_base, ML_bits,
|
|
538
542
|
ML_defaultDTable, dctx->fseEntropy,
|
|
539
543
|
dctx->ddictIsCold, nbSeq);
|
|
540
|
-
|
|
544
|
+
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
541
545
|
ip += mlhSize;
|
|
542
546
|
}
|
|
543
547
|
}
|
|
@@ -569,38 +573,118 @@ typedef struct {
|
|
|
569
573
|
size_t pos;
|
|
570
574
|
} seqState_t;
|
|
571
575
|
|
|
576
|
+
/*! ZSTD_overlapCopy8() :
|
|
577
|
+
* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
|
|
578
|
+
* If the offset is < 8 then the offset is spread to at least 8 bytes.
|
|
579
|
+
*
|
|
580
|
+
* Precondition: *ip <= *op
|
|
581
|
+
* Postcondition: *op - *op >= 8
|
|
582
|
+
*/
|
|
583
|
+
HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
584
|
+
assert(*ip <= *op);
|
|
585
|
+
if (offset < 8) {
|
|
586
|
+
/* close range match, overlap */
|
|
587
|
+
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
|
588
|
+
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
|
589
|
+
int const sub2 = dec64table[offset];
|
|
590
|
+
(*op)[0] = (*ip)[0];
|
|
591
|
+
(*op)[1] = (*ip)[1];
|
|
592
|
+
(*op)[2] = (*ip)[2];
|
|
593
|
+
(*op)[3] = (*ip)[3];
|
|
594
|
+
*ip += dec32table[offset];
|
|
595
|
+
ZSTD_copy4(*op+4, *ip);
|
|
596
|
+
*ip -= sub2;
|
|
597
|
+
} else {
|
|
598
|
+
ZSTD_copy8(*op, *ip);
|
|
599
|
+
}
|
|
600
|
+
*ip += 8;
|
|
601
|
+
*op += 8;
|
|
602
|
+
assert(*op - *ip >= 8);
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
/*! ZSTD_safecopy() :
|
|
606
|
+
* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
|
|
607
|
+
* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
|
|
608
|
+
* This function is only called in the uncommon case where the sequence is near the end of the block. It
|
|
609
|
+
* should be fast for a single long sequence, but can be slow for several short sequences.
|
|
610
|
+
*
|
|
611
|
+
* @param ovtype controls the overlap detection
|
|
612
|
+
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
|
|
613
|
+
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
|
614
|
+
* The src buffer must be before the dst buffer.
|
|
615
|
+
*/
|
|
616
|
+
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
617
|
+
ptrdiff_t const diff = op - ip;
|
|
618
|
+
BYTE* const oend = op + length;
|
|
619
|
+
|
|
620
|
+
assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
|
|
621
|
+
(ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
|
|
622
|
+
|
|
623
|
+
if (length < 8) {
|
|
624
|
+
/* Handle short lengths. */
|
|
625
|
+
while (op < oend) *op++ = *ip++;
|
|
626
|
+
return;
|
|
627
|
+
}
|
|
628
|
+
if (ovtype == ZSTD_overlap_src_before_dst) {
|
|
629
|
+
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
|
630
|
+
assert(length >= 8);
|
|
631
|
+
ZSTD_overlapCopy8(&op, &ip, diff);
|
|
632
|
+
assert(op - ip >= 8);
|
|
633
|
+
assert(op <= oend);
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
if (oend <= oend_w) {
|
|
637
|
+
/* No risk of overwrite. */
|
|
638
|
+
ZSTD_wildcopy(op, ip, length, ovtype);
|
|
639
|
+
return;
|
|
640
|
+
}
|
|
641
|
+
if (op <= oend_w) {
|
|
642
|
+
/* Wildcopy until we get close to the end. */
|
|
643
|
+
assert(oend > oend_w);
|
|
644
|
+
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
|
645
|
+
ip += oend_w - op;
|
|
646
|
+
op = oend_w;
|
|
647
|
+
}
|
|
648
|
+
/* Handle the leftovers. */
|
|
649
|
+
while (op < oend) *op++ = *ip++;
|
|
650
|
+
}
|
|
572
651
|
|
|
573
|
-
/*
|
|
574
|
-
*
|
|
575
|
-
*
|
|
576
|
-
*
|
|
577
|
-
*
|
|
578
|
-
*
|
|
579
|
-
*
|
|
652
|
+
/* ZSTD_execSequenceEnd():
|
|
653
|
+
* This version handles cases that are near the end of the output buffer. It requires
|
|
654
|
+
* more careful checks to make sure there is no overflow. By separating out these hard
|
|
655
|
+
* and unlikely cases, we can speed up the common cases.
|
|
656
|
+
*
|
|
657
|
+
* NOTE: This function needs to be fast for a single long sequence, but doesn't need
|
|
658
|
+
* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
|
|
659
|
+
*/
|
|
580
660
|
FORCE_NOINLINE
|
|
581
|
-
size_t
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
661
|
+
size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
662
|
+
BYTE* const oend, seq_t sequence,
|
|
663
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
664
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
585
665
|
{
|
|
586
666
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
587
667
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
588
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
589
668
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
590
669
|
const BYTE* match = oLitEnd - sequence.offset;
|
|
670
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
591
671
|
|
|
592
|
-
/*
|
|
593
|
-
|
|
594
|
-
|
|
672
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
|
673
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
|
674
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
|
675
|
+
assert(op < op + sequenceLength);
|
|
676
|
+
assert(oLitEnd < op + sequenceLength);
|
|
595
677
|
|
|
596
678
|
/* copy literals */
|
|
597
|
-
|
|
679
|
+
ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
|
|
680
|
+
op = oLitEnd;
|
|
681
|
+
*litPtr = iLitEnd;
|
|
598
682
|
|
|
599
683
|
/* copy Match */
|
|
600
|
-
if (sequence.offset > (size_t)(oLitEnd -
|
|
684
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
601
685
|
/* offset beyond prefix */
|
|
602
|
-
|
|
603
|
-
match = dictEnd - (
|
|
686
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
687
|
+
match = dictEnd - (prefixStart-match);
|
|
604
688
|
if (match + sequence.matchLength <= dictEnd) {
|
|
605
689
|
memmove(oLitEnd, match, sequence.matchLength);
|
|
606
690
|
return sequenceLength;
|
|
@@ -610,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
|
|
|
610
694
|
memmove(oLitEnd, match, length1);
|
|
611
695
|
op = oLitEnd + length1;
|
|
612
696
|
sequence.matchLength -= length1;
|
|
613
|
-
match =
|
|
697
|
+
match = prefixStart;
|
|
614
698
|
} }
|
|
615
|
-
|
|
699
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
616
700
|
return sequenceLength;
|
|
617
701
|
}
|
|
618
702
|
|
|
619
|
-
|
|
620
703
|
HINT_INLINE
|
|
621
704
|
size_t ZSTD_execSequence(BYTE* op,
|
|
622
705
|
BYTE* const oend, seq_t sequence,
|
|
@@ -626,27 +709,47 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
626
709
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
627
710
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
628
711
|
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
629
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
712
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
|
|
630
713
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
631
714
|
const BYTE* match = oLitEnd - sequence.offset;
|
|
632
715
|
|
|
633
|
-
/*
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
if (
|
|
641
|
-
|
|
716
|
+
assert(op != NULL /* Precondition */);
|
|
717
|
+
assert(oend_w < oend /* No underflow */);
|
|
718
|
+
/* Handle edge cases in a slow path:
|
|
719
|
+
* - Read beyond end of literals
|
|
720
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
721
|
+
* - 32-bit mode and the match length overflows
|
|
722
|
+
*/
|
|
723
|
+
if (UNLIKELY(
|
|
724
|
+
iLitEnd > litLimit ||
|
|
725
|
+
oMatchEnd > oend_w ||
|
|
726
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
727
|
+
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
728
|
+
|
|
729
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
730
|
+
assert(op <= oLitEnd /* No overflow */);
|
|
731
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
|
732
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
|
733
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
|
734
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
|
735
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
|
736
|
+
|
|
737
|
+
/* Copy Literals:
|
|
738
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
|
739
|
+
* We likely don't need the full 32-byte wildcopy.
|
|
740
|
+
*/
|
|
741
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
|
742
|
+
ZSTD_copy16(op, (*litPtr));
|
|
743
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
|
744
|
+
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
|
|
745
|
+
}
|
|
642
746
|
op = oLitEnd;
|
|
643
747
|
*litPtr = iLitEnd; /* update for next sequence */
|
|
644
748
|
|
|
645
|
-
/*
|
|
749
|
+
/* Copy Match */
|
|
646
750
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
647
751
|
/* offset beyond prefix -> go into extDict */
|
|
648
|
-
|
|
649
|
-
return ERROR(corruption_detected);
|
|
752
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
|
650
753
|
match = dictEnd + (match - prefixStart);
|
|
651
754
|
if (match + sequence.matchLength <= dictEnd) {
|
|
652
755
|
memmove(oLitEnd, match, sequence.matchLength);
|
|
@@ -658,121 +761,33 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
658
761
|
op = oLitEnd + length1;
|
|
659
762
|
sequence.matchLength -= length1;
|
|
660
763
|
match = prefixStart;
|
|
661
|
-
if (op > oend_w || sequence.matchLength < MINMATCH) {
|
|
662
|
-
U32 i;
|
|
663
|
-
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
|
|
664
|
-
return sequenceLength;
|
|
665
|
-
}
|
|
666
764
|
} }
|
|
667
|
-
/*
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
match
|
|
682
|
-
|
|
683
|
-
ZSTD_copy8(op, match);
|
|
684
|
-
}
|
|
685
|
-
op += 8; match += 8;
|
|
686
|
-
|
|
687
|
-
if (oMatchEnd > oend-(16-MINMATCH)) {
|
|
688
|
-
if (op < oend_w) {
|
|
689
|
-
ZSTD_wildcopy(op, match, oend_w - op);
|
|
690
|
-
match += oend_w - op;
|
|
691
|
-
op = oend_w;
|
|
692
|
-
}
|
|
693
|
-
while (op < oMatchEnd) *op++ = *match++;
|
|
694
|
-
} else {
|
|
695
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
|
765
|
+
/* Match within prefix of 1 or more bytes */
|
|
766
|
+
assert(op <= oMatchEnd);
|
|
767
|
+
assert(oMatchEnd <= oend_w);
|
|
768
|
+
assert(match >= prefixStart);
|
|
769
|
+
assert(sequence.matchLength >= 1);
|
|
770
|
+
|
|
771
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
|
772
|
+
* without overlap checking.
|
|
773
|
+
*/
|
|
774
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
|
775
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
|
776
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
|
777
|
+
* than 16 bytes.
|
|
778
|
+
*/
|
|
779
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
|
780
|
+
return sequenceLength;
|
|
696
781
|
}
|
|
697
|
-
|
|
698
|
-
}
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
HINT_INLINE
|
|
702
|
-
size_t ZSTD_execSequenceLong(BYTE* op,
|
|
703
|
-
BYTE* const oend, seq_t sequence,
|
|
704
|
-
const BYTE** litPtr, const BYTE* const litLimit,
|
|
705
|
-
const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
|
|
706
|
-
{
|
|
707
|
-
BYTE* const oLitEnd = op + sequence.litLength;
|
|
708
|
-
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
709
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
710
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
711
|
-
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
712
|
-
const BYTE* match = sequence.match;
|
|
782
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
|
713
783
|
|
|
714
|
-
/*
|
|
715
|
-
|
|
716
|
-
if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
|
|
717
|
-
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
|
|
718
|
-
|
|
719
|
-
/* copy Literals */
|
|
720
|
-
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
|
721
|
-
if (sequence.litLength > 8)
|
|
722
|
-
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
|
723
|
-
op = oLitEnd;
|
|
724
|
-
*litPtr = iLitEnd; /* update for next sequence */
|
|
784
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
|
785
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
|
725
786
|
|
|
726
|
-
/*
|
|
727
|
-
if (sequence.
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
if (match + sequence.matchLength <= dictEnd) {
|
|
731
|
-
memmove(oLitEnd, match, sequence.matchLength);
|
|
732
|
-
return sequenceLength;
|
|
733
|
-
}
|
|
734
|
-
/* span extDict & currentPrefixSegment */
|
|
735
|
-
{ size_t const length1 = dictEnd - match;
|
|
736
|
-
memmove(oLitEnd, match, length1);
|
|
737
|
-
op = oLitEnd + length1;
|
|
738
|
-
sequence.matchLength -= length1;
|
|
739
|
-
match = prefixStart;
|
|
740
|
-
if (op > oend_w || sequence.matchLength < MINMATCH) {
|
|
741
|
-
U32 i;
|
|
742
|
-
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
|
|
743
|
-
return sequenceLength;
|
|
744
|
-
}
|
|
745
|
-
} }
|
|
746
|
-
assert(op <= oend_w);
|
|
747
|
-
assert(sequence.matchLength >= MINMATCH);
|
|
748
|
-
|
|
749
|
-
/* match within prefix */
|
|
750
|
-
if (sequence.offset < 8) {
|
|
751
|
-
/* close range match, overlap */
|
|
752
|
-
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
|
753
|
-
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
|
754
|
-
int const sub2 = dec64table[sequence.offset];
|
|
755
|
-
op[0] = match[0];
|
|
756
|
-
op[1] = match[1];
|
|
757
|
-
op[2] = match[2];
|
|
758
|
-
op[3] = match[3];
|
|
759
|
-
match += dec32table[sequence.offset];
|
|
760
|
-
ZSTD_copy4(op+4, match);
|
|
761
|
-
match -= sub2;
|
|
762
|
-
} else {
|
|
763
|
-
ZSTD_copy8(op, match);
|
|
764
|
-
}
|
|
765
|
-
op += 8; match += 8;
|
|
766
|
-
|
|
767
|
-
if (oMatchEnd > oend-(16-MINMATCH)) {
|
|
768
|
-
if (op < oend_w) {
|
|
769
|
-
ZSTD_wildcopy(op, match, oend_w - op);
|
|
770
|
-
match += oend_w - op;
|
|
771
|
-
op = oend_w;
|
|
772
|
-
}
|
|
773
|
-
while (op < oMatchEnd) *op++ = *match++;
|
|
774
|
-
} else {
|
|
775
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
|
787
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
|
788
|
+
if (sequence.matchLength > 8) {
|
|
789
|
+
assert(op < oMatchEnd);
|
|
790
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
|
|
776
791
|
}
|
|
777
792
|
return sequenceLength;
|
|
778
793
|
}
|
|
@@ -798,10 +813,18 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
|
798
813
|
DStatePtr->state = DInfo.nextState + lowBits;
|
|
799
814
|
}
|
|
800
815
|
|
|
816
|
+
FORCE_INLINE_TEMPLATE void
|
|
817
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
|
818
|
+
{
|
|
819
|
+
U32 const nbBits = DInfo.nbBits;
|
|
820
|
+
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
821
|
+
DStatePtr->state = DInfo.nextState + lowBits;
|
|
822
|
+
}
|
|
823
|
+
|
|
801
824
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
|
802
825
|
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
|
|
803
826
|
* bits before reloading. This value is the maximum number of bytes we read
|
|
804
|
-
* after reloading when we are decoding long
|
|
827
|
+
* after reloading when we are decoding long offsets.
|
|
805
828
|
*/
|
|
806
829
|
#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
|
|
807
830
|
(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
|
|
@@ -809,25 +832,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
|
809
832
|
: 0)
|
|
810
833
|
|
|
811
834
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
|
835
|
+
typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
|
|
812
836
|
|
|
813
|
-
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
814
837
|
FORCE_INLINE_TEMPLATE seq_t
|
|
815
|
-
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
838
|
+
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
|
|
816
839
|
{
|
|
817
840
|
seq_t seq;
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
U32 const
|
|
822
|
-
U32 const
|
|
823
|
-
U32 const
|
|
824
|
-
|
|
841
|
+
ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
|
|
842
|
+
ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
|
|
843
|
+
ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
|
|
844
|
+
U32 const llBase = llDInfo.baseValue;
|
|
845
|
+
U32 const mlBase = mlDInfo.baseValue;
|
|
846
|
+
U32 const ofBase = ofDInfo.baseValue;
|
|
847
|
+
BYTE const llBits = llDInfo.nbAdditionalBits;
|
|
848
|
+
BYTE const mlBits = mlDInfo.nbAdditionalBits;
|
|
849
|
+
BYTE const ofBits = ofDInfo.nbAdditionalBits;
|
|
850
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
|
825
851
|
|
|
826
852
|
/* sequence */
|
|
827
853
|
{ size_t offset;
|
|
828
|
-
if (
|
|
829
|
-
offset = 0;
|
|
830
|
-
else {
|
|
854
|
+
if (ofBits > 1) {
|
|
831
855
|
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
832
856
|
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
833
857
|
assert(ofBits <= MaxOff);
|
|
@@ -841,58 +865,138 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
|
841
865
|
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
842
866
|
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
843
867
|
}
|
|
844
|
-
}
|
|
845
|
-
|
|
846
|
-
if (ofBits <= 1) {
|
|
847
|
-
offset += (llBase==0);
|
|
848
|
-
if (offset) {
|
|
849
|
-
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
850
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
851
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
852
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
853
|
-
seqState->prevOffset[0] = offset = temp;
|
|
854
|
-
} else { /* offset == 0 */
|
|
855
|
-
offset = seqState->prevOffset[0];
|
|
856
|
-
}
|
|
857
|
-
} else {
|
|
858
868
|
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
859
869
|
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
860
870
|
seqState->prevOffset[0] = offset;
|
|
861
|
-
}
|
|
871
|
+
} else {
|
|
872
|
+
U32 const ll0 = (llBase == 0);
|
|
873
|
+
if (LIKELY((ofBits == 0))) {
|
|
874
|
+
if (LIKELY(!ll0))
|
|
875
|
+
offset = seqState->prevOffset[0];
|
|
876
|
+
else {
|
|
877
|
+
offset = seqState->prevOffset[1];
|
|
878
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
879
|
+
seqState->prevOffset[0] = offset;
|
|
880
|
+
}
|
|
881
|
+
} else {
|
|
882
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
|
883
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
884
|
+
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
885
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
886
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
887
|
+
seqState->prevOffset[0] = offset = temp;
|
|
888
|
+
} } }
|
|
862
889
|
seq.offset = offset;
|
|
863
890
|
}
|
|
864
891
|
|
|
865
|
-
seq.matchLength = mlBase
|
|
866
|
-
|
|
892
|
+
seq.matchLength = mlBase;
|
|
893
|
+
if (mlBits > 0)
|
|
894
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
|
895
|
+
|
|
867
896
|
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
|
868
897
|
BIT_reloadDStream(&seqState->DStream);
|
|
869
|
-
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
898
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
870
899
|
BIT_reloadDStream(&seqState->DStream);
|
|
871
900
|
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
|
872
901
|
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
|
873
902
|
|
|
874
|
-
seq.litLength = llBase
|
|
875
|
-
|
|
903
|
+
seq.litLength = llBase;
|
|
904
|
+
if (llBits > 0)
|
|
905
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
|
906
|
+
|
|
876
907
|
if (MEM_32bits())
|
|
877
908
|
BIT_reloadDStream(&seqState->DStream);
|
|
878
909
|
|
|
879
910
|
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
|
880
911
|
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
881
912
|
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
913
|
+
if (prefetch == ZSTD_p_prefetch) {
|
|
914
|
+
size_t const pos = seqState->pos + seq.litLength;
|
|
915
|
+
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
|
916
|
+
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
917
|
+
* No consequence though : no memory access will occur, offset is only used for prefetching */
|
|
918
|
+
seqState->pos = pos + seq.matchLength;
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
/* ANS state update
|
|
922
|
+
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
|
923
|
+
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
|
924
|
+
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
|
925
|
+
* better option, so it is the default for other compilers. But, if you
|
|
926
|
+
* measure that it is worse, please put up a pull request.
|
|
927
|
+
*/
|
|
928
|
+
{
|
|
929
|
+
#if defined(__GNUC__) && !defined(__clang__)
|
|
930
|
+
const int kUseUpdateFseState = 1;
|
|
931
|
+
#else
|
|
932
|
+
const int kUseUpdateFseState = 0;
|
|
933
|
+
#endif
|
|
934
|
+
if (kUseUpdateFseState) {
|
|
935
|
+
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
|
936
|
+
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
|
937
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
938
|
+
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
|
939
|
+
} else {
|
|
940
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
|
|
941
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
|
|
942
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
943
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
|
|
944
|
+
}
|
|
945
|
+
}
|
|
887
946
|
|
|
888
947
|
return seq;
|
|
889
948
|
}
|
|
890
949
|
|
|
950
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
951
|
+
static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
|
952
|
+
{
|
|
953
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
|
954
|
+
/* No dictionary used. */
|
|
955
|
+
if (dctx->dictContentEndForFuzzing == NULL) return 0;
|
|
956
|
+
/* Dictionary is our prefix. */
|
|
957
|
+
if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
|
|
958
|
+
/* Dictionary is not our ext-dict. */
|
|
959
|
+
if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
|
|
960
|
+
/* Dictionary is not within our window size. */
|
|
961
|
+
if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
|
|
962
|
+
/* Dictionary is active. */
|
|
963
|
+
return 1;
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
MEM_STATIC void ZSTD_assertValidSequence(
|
|
967
|
+
ZSTD_DCtx const* dctx,
|
|
968
|
+
BYTE const* op, BYTE const* oend,
|
|
969
|
+
seq_t const seq,
|
|
970
|
+
BYTE const* prefixStart, BYTE const* virtualStart)
|
|
971
|
+
{
|
|
972
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
|
973
|
+
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
|
974
|
+
BYTE const* const oLitEnd = op + seq.litLength;
|
|
975
|
+
DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
|
|
976
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
977
|
+
assert(op <= oend);
|
|
978
|
+
assert((size_t)(oend - op) >= sequenceSize);
|
|
979
|
+
assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
|
|
980
|
+
if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
|
|
981
|
+
size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
|
|
982
|
+
/* Offset must be within the dictionary. */
|
|
983
|
+
assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
|
|
984
|
+
assert(seq.offset <= windowSize + dictSize);
|
|
985
|
+
} else {
|
|
986
|
+
/* Offset must be within our window. */
|
|
987
|
+
assert(seq.offset <= windowSize);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
#endif
|
|
991
|
+
|
|
992
|
+
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
891
993
|
FORCE_INLINE_TEMPLATE size_t
|
|
994
|
+
DONT_VECTORIZE
|
|
892
995
|
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
893
996
|
void* dst, size_t maxDstSize,
|
|
894
997
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
895
|
-
const ZSTD_longOffset_e isLongOffset
|
|
998
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
999
|
+
const int frame)
|
|
896
1000
|
{
|
|
897
1001
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
898
1002
|
const BYTE* const iend = ip + seqSize;
|
|
@@ -905,38 +1009,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
|
905
1009
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
|
906
1010
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
907
1011
|
DEBUGLOG(5, "ZSTD_decompressSequences_body");
|
|
1012
|
+
(void)frame;
|
|
908
1013
|
|
|
909
1014
|
/* Regen sequences */
|
|
910
1015
|
if (nbSeq) {
|
|
911
1016
|
seqState_t seqState;
|
|
1017
|
+
size_t error = 0;
|
|
912
1018
|
dctx->fseEntropy = 1;
|
|
913
1019
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
914
|
-
|
|
1020
|
+
RETURN_ERROR_IF(
|
|
1021
|
+
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
|
1022
|
+
corruption_detected, "");
|
|
915
1023
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
916
1024
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
917
1025
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
1026
|
+
assert(dst != NULL);
|
|
1027
|
+
|
|
1028
|
+
ZSTD_STATIC_ASSERT(
|
|
1029
|
+
BIT_DStream_unfinished < BIT_DStream_completed &&
|
|
1030
|
+
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
|
1031
|
+
BIT_DStream_completed < BIT_DStream_overflow);
|
|
1032
|
+
|
|
1033
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1034
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
|
1035
|
+
*
|
|
1036
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
|
1037
|
+
* speed swings based on the alignment of the decompression loop. This
|
|
1038
|
+
* performance swing is caused by parts of the decompression loop falling
|
|
1039
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
|
1040
|
+
* when it can't we get much worse performance. You can measure if you've
|
|
1041
|
+
* hit the good case or the bad case with this perf command for some
|
|
1042
|
+
* compressed file test.zst:
|
|
1043
|
+
*
|
|
1044
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
|
1045
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
|
1046
|
+
*
|
|
1047
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
|
1048
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
|
1049
|
+
* If it is pretty even then you may be in an okay case.
|
|
1050
|
+
*
|
|
1051
|
+
* I've been able to reproduce this issue on the following CPUs:
|
|
1052
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
|
1053
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
|
1054
|
+
* I never got performance swings, but I was able to
|
|
1055
|
+
* go from the good case of mostly DSB to half of the
|
|
1056
|
+
* cycles served from MITE.
|
|
1057
|
+
* - Coffeelake: Intel i9-9900k
|
|
1058
|
+
*
|
|
1059
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
|
1060
|
+
* of the following CPUS:
|
|
1061
|
+
* - Haswell
|
|
1062
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
|
1063
|
+
* - Skylake
|
|
1064
|
+
*
|
|
1065
|
+
* If you are seeing performance stability this script can help test.
|
|
1066
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
|
1067
|
+
*
|
|
1068
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
|
1069
|
+
*/
|
|
1070
|
+
__asm__(".p2align 5");
|
|
1071
|
+
__asm__("nop");
|
|
1072
|
+
__asm__(".p2align 4");
|
|
1073
|
+
#endif
|
|
1074
|
+
for ( ; ; ) {
|
|
1075
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
|
|
1076
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
|
1077
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1078
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1079
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1080
|
+
#endif
|
|
1081
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1082
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1083
|
+
/* gcc and clang both don't like early returns in this loop.
|
|
1084
|
+
* gcc doesn't like early breaks either.
|
|
1085
|
+
* Instead save an error and report it at the end.
|
|
1086
|
+
* When there is an error, don't increment op, so we don't
|
|
1087
|
+
* overwrite.
|
|
1088
|
+
*/
|
|
1089
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
|
|
1090
|
+
else op += oneSeqSize;
|
|
1091
|
+
if (UNLIKELY(!--nbSeq)) break;
|
|
1092
|
+
}
|
|
927
1093
|
|
|
928
1094
|
/* check if reached exact end */
|
|
929
1095
|
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
|
930
|
-
if (
|
|
1096
|
+
if (ZSTD_isError(error)) return error;
|
|
1097
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
|
1098
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
|
931
1099
|
/* save reps for next block */
|
|
932
1100
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
|
933
1101
|
}
|
|
934
1102
|
|
|
935
1103
|
/* last literal segment */
|
|
936
1104
|
{ size_t const lastLLSize = litEnd - litPtr;
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
1105
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1106
|
+
if (op != NULL) {
|
|
1107
|
+
memcpy(op, litPtr, lastLLSize);
|
|
1108
|
+
op += lastLLSize;
|
|
1109
|
+
}
|
|
940
1110
|
}
|
|
941
1111
|
|
|
942
1112
|
return op-ostart;
|
|
@@ -946,99 +1116,21 @@ static size_t
|
|
|
946
1116
|
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
|
947
1117
|
void* dst, size_t maxDstSize,
|
|
948
1118
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
949
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1119
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1120
|
+
const int frame)
|
|
950
1121
|
{
|
|
951
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1122
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
952
1123
|
}
|
|
953
1124
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
954
1125
|
|
|
955
|
-
|
|
956
|
-
|
|
957
1126
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
958
|
-
FORCE_INLINE_TEMPLATE seq_t
|
|
959
|
-
ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
|
|
960
|
-
{
|
|
961
|
-
seq_t seq;
|
|
962
|
-
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
|
|
963
|
-
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
|
|
964
|
-
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
|
|
965
|
-
U32 const totalBits = llBits+mlBits+ofBits;
|
|
966
|
-
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
|
|
967
|
-
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
|
|
968
|
-
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
|
|
969
|
-
|
|
970
|
-
/* sequence */
|
|
971
|
-
{ size_t offset;
|
|
972
|
-
if (!ofBits)
|
|
973
|
-
offset = 0;
|
|
974
|
-
else {
|
|
975
|
-
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
976
|
-
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
977
|
-
assert(ofBits <= MaxOff);
|
|
978
|
-
if (MEM_32bits() && longOffsets) {
|
|
979
|
-
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
|
980
|
-
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
|
981
|
-
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
|
982
|
-
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
|
983
|
-
} else {
|
|
984
|
-
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
985
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
986
|
-
}
|
|
987
|
-
}
|
|
988
|
-
|
|
989
|
-
if (ofBits <= 1) {
|
|
990
|
-
offset += (llBase==0);
|
|
991
|
-
if (offset) {
|
|
992
|
-
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
993
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
994
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
995
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
996
|
-
seqState->prevOffset[0] = offset = temp;
|
|
997
|
-
} else {
|
|
998
|
-
offset = seqState->prevOffset[0];
|
|
999
|
-
}
|
|
1000
|
-
} else {
|
|
1001
|
-
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1002
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1003
|
-
seqState->prevOffset[0] = offset;
|
|
1004
|
-
}
|
|
1005
|
-
seq.offset = offset;
|
|
1006
|
-
}
|
|
1007
|
-
|
|
1008
|
-
seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
|
|
1009
|
-
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
|
1010
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1011
|
-
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
1012
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1013
|
-
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
|
|
1014
|
-
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
|
1015
|
-
|
|
1016
|
-
seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
|
|
1017
|
-
if (MEM_32bits())
|
|
1018
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1019
|
-
|
|
1020
|
-
{ size_t const pos = seqState->pos + seq.litLength;
|
|
1021
|
-
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
|
1022
|
-
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1023
|
-
* No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
|
|
1024
|
-
seqState->pos = pos + seq.matchLength;
|
|
1025
|
-
}
|
|
1026
|
-
|
|
1027
|
-
/* ANS state update */
|
|
1028
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
|
1029
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
|
1030
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1031
|
-
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
|
1032
|
-
|
|
1033
|
-
return seq;
|
|
1034
|
-
}
|
|
1035
|
-
|
|
1036
1127
|
FORCE_INLINE_TEMPLATE size_t
|
|
1037
1128
|
ZSTD_decompressSequencesLong_body(
|
|
1038
1129
|
ZSTD_DCtx* dctx,
|
|
1039
1130
|
void* dst, size_t maxDstSize,
|
|
1040
1131
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1041
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1132
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1133
|
+
const int frame)
|
|
1042
1134
|
{
|
|
1043
1135
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
1044
1136
|
const BYTE* const iend = ip + seqSize;
|
|
@@ -1050,6 +1142,7 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1050
1142
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
1051
1143
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
|
1052
1144
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
1145
|
+
(void)frame;
|
|
1053
1146
|
|
|
1054
1147
|
/* Regen sequences */
|
|
1055
1148
|
if (nbSeq) {
|
|
@@ -1065,34 +1158,45 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1065
1158
|
seqState.prefixStart = prefixStart;
|
|
1066
1159
|
seqState.pos = (size_t)(op-prefixStart);
|
|
1067
1160
|
seqState.dictEnd = dictEnd;
|
|
1161
|
+
assert(dst != NULL);
|
|
1068
1162
|
assert(iend >= ip);
|
|
1069
|
-
|
|
1163
|
+
RETURN_ERROR_IF(
|
|
1164
|
+
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
|
1165
|
+
corruption_detected, "");
|
|
1070
1166
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
1071
1167
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
1072
1168
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1073
1169
|
|
|
1074
1170
|
/* prepare in advance */
|
|
1075
1171
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
|
1076
|
-
sequences[seqNb] =
|
|
1172
|
+
sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
|
|
1077
1173
|
PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
|
1078
1174
|
}
|
|
1079
|
-
|
|
1175
|
+
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
|
1080
1176
|
|
|
1081
1177
|
/* decode and decompress */
|
|
1082
1178
|
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
|
1083
|
-
seq_t const sequence =
|
|
1084
|
-
size_t const oneSeqSize =
|
|
1179
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
|
|
1180
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
|
1181
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1182
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1183
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1184
|
+
#endif
|
|
1085
1185
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1086
1186
|
PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
|
1087
1187
|
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1088
1188
|
op += oneSeqSize;
|
|
1089
1189
|
}
|
|
1090
|
-
|
|
1190
|
+
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
|
|
1091
1191
|
|
|
1092
1192
|
/* finish queue */
|
|
1093
1193
|
seqNb -= seqAdvance;
|
|
1094
1194
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
|
1095
|
-
size_t const oneSeqSize =
|
|
1195
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
|
1196
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1197
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1198
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1199
|
+
#endif
|
|
1096
1200
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1097
1201
|
op += oneSeqSize;
|
|
1098
1202
|
}
|
|
@@ -1103,9 +1207,11 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1103
1207
|
|
|
1104
1208
|
/* last literal segment */
|
|
1105
1209
|
{ size_t const lastLLSize = litEnd - litPtr;
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1210
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1211
|
+
if (op != NULL) {
|
|
1212
|
+
memcpy(op, litPtr, lastLLSize);
|
|
1213
|
+
op += lastLLSize;
|
|
1214
|
+
}
|
|
1109
1215
|
}
|
|
1110
1216
|
|
|
1111
1217
|
return op-ostart;
|
|
@@ -1115,9 +1221,10 @@ static size_t
|
|
|
1115
1221
|
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
1116
1222
|
void* dst, size_t maxDstSize,
|
|
1117
1223
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1118
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1224
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1225
|
+
const int frame)
|
|
1119
1226
|
{
|
|
1120
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1227
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1121
1228
|
}
|
|
1122
1229
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1123
1230
|
|
|
@@ -1127,12 +1234,14 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
|
1127
1234
|
|
|
1128
1235
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1129
1236
|
static TARGET_ATTRIBUTE("bmi2") size_t
|
|
1237
|
+
DONT_VECTORIZE
|
|
1130
1238
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
|
1131
1239
|
void* dst, size_t maxDstSize,
|
|
1132
1240
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1133
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1241
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1242
|
+
const int frame)
|
|
1134
1243
|
{
|
|
1135
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1244
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1136
1245
|
}
|
|
1137
1246
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1138
1247
|
|
|
@@ -1141,9 +1250,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
|
|
|
1141
1250
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
|
1142
1251
|
void* dst, size_t maxDstSize,
|
|
1143
1252
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1144
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1253
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1254
|
+
const int frame)
|
|
1145
1255
|
{
|
|
1146
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1256
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1147
1257
|
}
|
|
1148
1258
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1149
1259
|
|
|
@@ -1153,21 +1263,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
|
|
|
1153
1263
|
ZSTD_DCtx* dctx,
|
|
1154
1264
|
void* dst, size_t maxDstSize,
|
|
1155
1265
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1156
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1266
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1267
|
+
const int frame);
|
|
1157
1268
|
|
|
1158
1269
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1159
1270
|
static size_t
|
|
1160
1271
|
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1161
1272
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1162
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1273
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1274
|
+
const int frame)
|
|
1163
1275
|
{
|
|
1164
1276
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
|
1165
1277
|
#if DYNAMIC_BMI2
|
|
1166
1278
|
if (dctx->bmi2) {
|
|
1167
|
-
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1279
|
+
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1168
1280
|
}
|
|
1169
1281
|
#endif
|
|
1170
|
-
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1282
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1171
1283
|
}
|
|
1172
1284
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1173
1285
|
|
|
@@ -1176,21 +1288,22 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
|
1176
1288
|
/* ZSTD_decompressSequencesLong() :
|
|
1177
1289
|
* decompression function triggered when a minimum share of offsets is considered "long",
|
|
1178
1290
|
* aka out of cache.
|
|
1179
|
-
* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes
|
|
1291
|
+
* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
|
|
1180
1292
|
* This function will try to mitigate main memory latency through the use of prefetching */
|
|
1181
1293
|
static size_t
|
|
1182
1294
|
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
1183
1295
|
void* dst, size_t maxDstSize,
|
|
1184
1296
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1185
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1297
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1298
|
+
const int frame)
|
|
1186
1299
|
{
|
|
1187
1300
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
|
1188
1301
|
#if DYNAMIC_BMI2
|
|
1189
1302
|
if (dctx->bmi2) {
|
|
1190
|
-
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1303
|
+
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1191
1304
|
}
|
|
1192
1305
|
#endif
|
|
1193
|
-
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1306
|
+
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1194
1307
|
}
|
|
1195
1308
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1196
1309
|
|
|
@@ -1224,7 +1337,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
|
|
|
1224
1337
|
}
|
|
1225
1338
|
#endif
|
|
1226
1339
|
|
|
1227
|
-
|
|
1228
1340
|
size_t
|
|
1229
1341
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1230
1342
|
void* dst, size_t dstCapacity,
|
|
@@ -1240,7 +1352,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1240
1352
|
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
|
1241
1353
|
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
|
1242
1354
|
|
|
1243
|
-
|
|
1355
|
+
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
|
1244
1356
|
|
|
1245
1357
|
/* Decode literals section */
|
|
1246
1358
|
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
|
@@ -1266,6 +1378,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1266
1378
|
ip += seqHSize;
|
|
1267
1379
|
srcSize -= seqHSize;
|
|
1268
1380
|
|
|
1381
|
+
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
|
1382
|
+
|
|
1269
1383
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
|
1270
1384
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1271
1385
|
if ( !usePrefetchDecoder
|
|
@@ -1284,17 +1398,28 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1284
1398
|
if (usePrefetchDecoder)
|
|
1285
1399
|
#endif
|
|
1286
1400
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1287
|
-
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
1401
|
+
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1288
1402
|
#endif
|
|
1289
1403
|
|
|
1290
1404
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1291
1405
|
/* else */
|
|
1292
|
-
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
1406
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1293
1407
|
#endif
|
|
1294
1408
|
}
|
|
1295
1409
|
}
|
|
1296
1410
|
|
|
1297
1411
|
|
|
1412
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
|
|
1413
|
+
{
|
|
1414
|
+
if (dst != dctx->previousDstEnd) { /* not contiguous */
|
|
1415
|
+
dctx->dictEnd = dctx->previousDstEnd;
|
|
1416
|
+
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
|
1417
|
+
dctx->prefixStart = dst;
|
|
1418
|
+
dctx->previousDstEnd = dst;
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
|
|
1298
1423
|
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
|
1299
1424
|
void* dst, size_t dstCapacity,
|
|
1300
1425
|
const void* src, size_t srcSize)
|