zstd-ruby 1.4.0.0 → 1.4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/Makefile +274 -107
- data/ext/zstdruby/libzstd/README.md +75 -16
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +154 -5
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +7 -3
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
- data/ext/zstdruby/libzstd/common/huf.h +41 -38
- data/ext/zstdruby/libzstd/common/mem.h +68 -22
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
- data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
- data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/zstd.h +655 -118
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +20 -10
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -14,15 +14,15 @@
|
|
|
14
14
|
/*-*******************************************************
|
|
15
15
|
* Dependencies
|
|
16
16
|
*********************************************************/
|
|
17
|
-
#include
|
|
18
|
-
#include "compiler.h" /* prefetch */
|
|
19
|
-
#include "cpu.h" /* bmi2 */
|
|
20
|
-
#include "mem.h" /* low level memory routines */
|
|
17
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
|
18
|
+
#include "../common/compiler.h" /* prefetch */
|
|
19
|
+
#include "../common/cpu.h" /* bmi2 */
|
|
20
|
+
#include "../common/mem.h" /* low level memory routines */
|
|
21
21
|
#define FSE_STATIC_LINKING_ONLY
|
|
22
|
-
#include "fse.h"
|
|
22
|
+
#include "../common/fse.h"
|
|
23
23
|
#define HUF_STATIC_LINKING_ONLY
|
|
24
|
-
#include "huf.h"
|
|
25
|
-
#include "zstd_internal.h"
|
|
24
|
+
#include "../common/huf.h"
|
|
25
|
+
#include "../common/zstd_internal.h"
|
|
26
26
|
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
|
27
27
|
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
|
28
28
|
#include "zstd_decompress_block.h"
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
/*_*******************************************************
|
|
45
45
|
* Memory operations
|
|
46
46
|
**********************************************************/
|
|
47
|
-
static void ZSTD_copy4(void* dst, const void* src) {
|
|
47
|
+
static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
/*-*************************************************************
|
|
@@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
|
|
56
56
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
57
57
|
blockProperties_t* bpPtr)
|
|
58
58
|
{
|
|
59
|
-
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
|
|
59
|
+
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
|
|
60
60
|
|
|
61
61
|
{ U32 const cBlockHeader = MEM_readLE24(src);
|
|
62
62
|
U32 const cSize = cBlockHeader >> 3;
|
|
@@ -64,7 +64,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
|
64
64
|
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
|
|
65
65
|
bpPtr->origSize = cSize; /* only useful for RLE */
|
|
66
66
|
if (bpPtr->blockType == bt_rle) return 1;
|
|
67
|
-
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
|
|
67
|
+
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
|
|
68
68
|
return cSize;
|
|
69
69
|
}
|
|
70
70
|
}
|
|
@@ -79,7 +79,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
79
79
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
80
80
|
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
|
|
81
81
|
{
|
|
82
|
-
|
|
82
|
+
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
|
83
|
+
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
|
83
84
|
|
|
84
85
|
{ const BYTE* const istart = (const BYTE*) src;
|
|
85
86
|
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
|
@@ -87,7 +88,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
87
88
|
switch(litEncType)
|
|
88
89
|
{
|
|
89
90
|
case set_repeat:
|
|
90
|
-
|
|
91
|
+
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
|
92
|
+
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
|
91
93
|
/* fall-through */
|
|
92
94
|
|
|
93
95
|
case set_compressed:
|
|
@@ -116,11 +118,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
116
118
|
/* 2 - 2 - 18 - 18 */
|
|
117
119
|
lhSize = 5;
|
|
118
120
|
litSize = (lhc >> 4) & 0x3FFFF;
|
|
119
|
-
litCSize = (lhc >> 22) + (istart[4] << 10);
|
|
121
|
+
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
|
120
122
|
break;
|
|
121
123
|
}
|
|
122
|
-
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
|
123
|
-
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
|
|
124
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
125
|
+
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
|
124
126
|
|
|
125
127
|
/* prefetch huffman table if cold */
|
|
126
128
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
|
@@ -158,13 +160,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
158
160
|
}
|
|
159
161
|
}
|
|
160
162
|
|
|
161
|
-
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
|
|
163
|
+
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
|
162
164
|
|
|
163
165
|
dctx->litPtr = dctx->litBuffer;
|
|
164
166
|
dctx->litSize = litSize;
|
|
165
167
|
dctx->litEntropy = 1;
|
|
166
168
|
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
|
167
|
-
|
|
169
|
+
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
168
170
|
return litCSize + lhSize;
|
|
169
171
|
}
|
|
170
172
|
|
|
@@ -188,11 +190,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
188
190
|
}
|
|
189
191
|
|
|
190
192
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
|
191
|
-
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
|
|
192
|
-
|
|
193
|
+
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
|
194
|
+
ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
|
|
193
195
|
dctx->litPtr = dctx->litBuffer;
|
|
194
196
|
dctx->litSize = litSize;
|
|
195
|
-
|
|
197
|
+
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
|
196
198
|
return lhSize+litSize;
|
|
197
199
|
}
|
|
198
200
|
/* direct reference into compressed stream */
|
|
@@ -220,8 +222,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
220
222
|
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
|
221
223
|
break;
|
|
222
224
|
}
|
|
223
|
-
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
|
224
|
-
|
|
225
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
|
226
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
|
|
225
227
|
dctx->litPtr = dctx->litBuffer;
|
|
226
228
|
dctx->litSize = litSize;
|
|
227
229
|
return lhSize+1;
|
|
@@ -234,7 +236,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
|
234
236
|
|
|
235
237
|
/* Default FSE distribution tables.
|
|
236
238
|
* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
|
|
237
|
-
* https://github.com/facebook/zstd/blob/
|
|
239
|
+
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
|
|
238
240
|
* They were generated programmatically with following method :
|
|
239
241
|
* - start from default distributions, present in /lib/common/zstd_internal.h
|
|
240
242
|
* - generate tables normally, using ZSTD_buildFSETable()
|
|
@@ -362,23 +364,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
|
362
364
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
|
363
365
|
* cannot fail if input is valid =>
|
|
364
366
|
* all inputs are presumed validated at this stage */
|
|
365
|
-
|
|
366
|
-
|
|
367
|
+
FORCE_INLINE_TEMPLATE
|
|
368
|
+
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
|
367
369
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
368
370
|
const U32* baseValue, const U32* nbAdditionalBits,
|
|
369
|
-
unsigned tableLog)
|
|
371
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
370
372
|
{
|
|
371
373
|
ZSTD_seqSymbol* const tableDecode = dt+1;
|
|
372
|
-
U16 symbolNext[MaxSeq+1];
|
|
373
|
-
|
|
374
374
|
U32 const maxSV1 = maxSymbolValue + 1;
|
|
375
375
|
U32 const tableSize = 1 << tableLog;
|
|
376
|
-
|
|
376
|
+
|
|
377
|
+
U16* symbolNext = (U16*)wksp;
|
|
378
|
+
BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
|
|
379
|
+
U32 highThreshold = tableSize - 1;
|
|
380
|
+
|
|
377
381
|
|
|
378
382
|
/* Sanity Checks */
|
|
379
383
|
assert(maxSymbolValue <= MaxSeq);
|
|
380
384
|
assert(tableLog <= MaxFSELog);
|
|
381
|
-
|
|
385
|
+
assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
|
|
386
|
+
(void)wkspSize;
|
|
382
387
|
/* Init, lay down lowprob symbols */
|
|
383
388
|
{ ZSTD_seqSymbol_header DTableH;
|
|
384
389
|
DTableH.tableLog = tableLog;
|
|
@@ -391,18 +396,72 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
391
396
|
symbolNext[s] = 1;
|
|
392
397
|
} else {
|
|
393
398
|
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
|
|
394
|
-
|
|
399
|
+
assert(normalizedCounter[s]>=0);
|
|
400
|
+
symbolNext[s] = (U16)normalizedCounter[s];
|
|
395
401
|
} } }
|
|
396
|
-
|
|
402
|
+
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
|
397
403
|
}
|
|
398
404
|
|
|
399
405
|
/* Spread symbols */
|
|
400
|
-
|
|
406
|
+
assert(tableSize <= 512);
|
|
407
|
+
/* Specialized symbol spreading for the case when there are
|
|
408
|
+
* no low probability (-1 count) symbols. When compressing
|
|
409
|
+
* small blocks we avoid low probability symbols to hit this
|
|
410
|
+
* case, since header decoding speed matters more.
|
|
411
|
+
*/
|
|
412
|
+
if (highThreshold == tableSize - 1) {
|
|
413
|
+
size_t const tableMask = tableSize-1;
|
|
414
|
+
size_t const step = FSE_TABLESTEP(tableSize);
|
|
415
|
+
/* First lay down the symbols in order.
|
|
416
|
+
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
|
417
|
+
* misses since small blocks generally have small table logs, so nearly
|
|
418
|
+
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
|
419
|
+
* our buffer to handle the over-write.
|
|
420
|
+
*/
|
|
421
|
+
{
|
|
422
|
+
U64 const add = 0x0101010101010101ull;
|
|
423
|
+
size_t pos = 0;
|
|
424
|
+
U64 sv = 0;
|
|
425
|
+
U32 s;
|
|
426
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
|
427
|
+
int i;
|
|
428
|
+
int const n = normalizedCounter[s];
|
|
429
|
+
MEM_write64(spread + pos, sv);
|
|
430
|
+
for (i = 8; i < n; i += 8) {
|
|
431
|
+
MEM_write64(spread + pos + i, sv);
|
|
432
|
+
}
|
|
433
|
+
pos += n;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
/* Now we spread those positions across the table.
|
|
437
|
+
* The benefit of doing it in two stages is that we avoid the the
|
|
438
|
+
* variable size inner loop, which caused lots of branch misses.
|
|
439
|
+
* Now we can run through all the positions without any branch misses.
|
|
440
|
+
* We unroll the loop twice, since that is what emperically worked best.
|
|
441
|
+
*/
|
|
442
|
+
{
|
|
443
|
+
size_t position = 0;
|
|
444
|
+
size_t s;
|
|
445
|
+
size_t const unroll = 2;
|
|
446
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
|
447
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
|
448
|
+
size_t u;
|
|
449
|
+
for (u = 0; u < unroll; ++u) {
|
|
450
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
|
451
|
+
tableDecode[uPosition].baseValue = spread[s + u];
|
|
452
|
+
}
|
|
453
|
+
position = (position + (unroll * step)) & tableMask;
|
|
454
|
+
}
|
|
455
|
+
assert(position == 0);
|
|
456
|
+
}
|
|
457
|
+
} else {
|
|
458
|
+
U32 const tableMask = tableSize-1;
|
|
401
459
|
U32 const step = FSE_TABLESTEP(tableSize);
|
|
402
460
|
U32 s, position = 0;
|
|
403
461
|
for (s=0; s<maxSV1; s++) {
|
|
404
462
|
int i;
|
|
405
|
-
|
|
463
|
+
int const n = normalizedCounter[s];
|
|
464
|
+
for (i=0; i<n; i++) {
|
|
406
465
|
tableDecode[position].baseValue = s;
|
|
407
466
|
position = (position + step) & tableMask;
|
|
408
467
|
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
|
@@ -411,7 +470,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
411
470
|
}
|
|
412
471
|
|
|
413
472
|
/* Build Decoding table */
|
|
414
|
-
{
|
|
473
|
+
{
|
|
474
|
+
U32 u;
|
|
415
475
|
for (u=0; u<tableSize; u++) {
|
|
416
476
|
U32 const symbol = tableDecode[u].baseValue;
|
|
417
477
|
U32 const nextState = symbolNext[symbol]++;
|
|
@@ -420,7 +480,46 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
|
420
480
|
assert(nbAdditionalBits[symbol] < 255);
|
|
421
481
|
tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
|
|
422
482
|
tableDecode[u].baseValue = baseValue[symbol];
|
|
423
|
-
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
/* Avoids the FORCE_INLINE of the _body() function. */
|
|
488
|
+
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
|
489
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
490
|
+
const U32* baseValue, const U32* nbAdditionalBits,
|
|
491
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
492
|
+
{
|
|
493
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
494
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
#if DYNAMIC_BMI2
|
|
498
|
+
TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
|
499
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
500
|
+
const U32* baseValue, const U32* nbAdditionalBits,
|
|
501
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
|
502
|
+
{
|
|
503
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
|
504
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
505
|
+
}
|
|
506
|
+
#endif
|
|
507
|
+
|
|
508
|
+
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
509
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
|
510
|
+
const U32* baseValue, const U32* nbAdditionalBits,
|
|
511
|
+
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
|
512
|
+
{
|
|
513
|
+
#if DYNAMIC_BMI2
|
|
514
|
+
if (bmi2) {
|
|
515
|
+
ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
|
|
516
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
#endif
|
|
520
|
+
(void)bmi2;
|
|
521
|
+
ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
|
|
522
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
|
424
523
|
}
|
|
425
524
|
|
|
426
525
|
|
|
@@ -432,13 +531,14 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
432
531
|
const void* src, size_t srcSize,
|
|
433
532
|
const U32* baseValue, const U32* nbAdditionalBits,
|
|
434
533
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
|
435
|
-
int ddictIsCold, int nbSeq
|
|
534
|
+
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
|
535
|
+
int bmi2)
|
|
436
536
|
{
|
|
437
537
|
switch(type)
|
|
438
538
|
{
|
|
439
539
|
case set_rle :
|
|
440
|
-
RETURN_ERROR_IF(!srcSize, srcSize_wrong);
|
|
441
|
-
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
|
|
540
|
+
RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
|
|
541
|
+
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
|
442
542
|
{ U32 const symbol = *(const BYTE*)src;
|
|
443
543
|
U32 const baseline = baseValue[symbol];
|
|
444
544
|
U32 const nbBits = nbAdditionalBits[symbol];
|
|
@@ -450,7 +550,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
450
550
|
*DTablePtr = defaultTable;
|
|
451
551
|
return 0;
|
|
452
552
|
case set_repeat:
|
|
453
|
-
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
|
|
553
|
+
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
|
|
454
554
|
/* prefetch FSE table if used */
|
|
455
555
|
if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
|
|
456
556
|
const void* const pStart = *DTablePtr;
|
|
@@ -462,9 +562,9 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
462
562
|
{ unsigned tableLog;
|
|
463
563
|
S16 norm[MaxSeq+1];
|
|
464
564
|
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
|
465
|
-
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
|
|
466
|
-
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
|
|
467
|
-
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
|
565
|
+
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
|
|
566
|
+
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
|
|
567
|
+
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
|
|
468
568
|
*DTablePtr = DTableSpace;
|
|
469
569
|
return headerSize;
|
|
470
570
|
}
|
|
@@ -477,35 +577,36 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
|
477
577
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
478
578
|
const void* src, size_t srcSize)
|
|
479
579
|
{
|
|
480
|
-
const BYTE* const istart = (const BYTE*
|
|
580
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
481
581
|
const BYTE* const iend = istart + srcSize;
|
|
482
582
|
const BYTE* ip = istart;
|
|
483
583
|
int nbSeq;
|
|
484
584
|
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
|
485
585
|
|
|
486
586
|
/* check */
|
|
487
|
-
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
|
|
587
|
+
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
|
|
488
588
|
|
|
489
589
|
/* SeqHead */
|
|
490
590
|
nbSeq = *ip++;
|
|
491
591
|
if (!nbSeq) {
|
|
492
592
|
*nbSeqPtr=0;
|
|
493
|
-
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
|
|
593
|
+
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
|
|
494
594
|
return 1;
|
|
495
595
|
}
|
|
496
596
|
if (nbSeq > 0x7F) {
|
|
497
597
|
if (nbSeq == 0xFF) {
|
|
498
|
-
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
|
|
499
|
-
nbSeq = MEM_readLE16(ip) + LONGNBSEQ
|
|
598
|
+
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
|
599
|
+
nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
|
|
600
|
+
ip+=2;
|
|
500
601
|
} else {
|
|
501
|
-
RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
|
|
602
|
+
RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
|
|
502
603
|
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
|
503
604
|
}
|
|
504
605
|
}
|
|
505
606
|
*nbSeqPtr = nbSeq;
|
|
506
607
|
|
|
507
608
|
/* FSE table descriptors */
|
|
508
|
-
RETURN_ERROR_IF(ip+
|
|
609
|
+
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
|
|
509
610
|
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
|
510
611
|
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
|
511
612
|
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
|
@@ -517,8 +618,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
517
618
|
ip, iend-ip,
|
|
518
619
|
LL_base, LL_bits,
|
|
519
620
|
LL_defaultDTable, dctx->fseEntropy,
|
|
520
|
-
dctx->ddictIsCold, nbSeq
|
|
521
|
-
|
|
621
|
+
dctx->ddictIsCold, nbSeq,
|
|
622
|
+
dctx->workspace, sizeof(dctx->workspace),
|
|
623
|
+
dctx->bmi2);
|
|
624
|
+
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
522
625
|
ip += llhSize;
|
|
523
626
|
}
|
|
524
627
|
|
|
@@ -527,8 +630,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
527
630
|
ip, iend-ip,
|
|
528
631
|
OF_base, OF_bits,
|
|
529
632
|
OF_defaultDTable, dctx->fseEntropy,
|
|
530
|
-
dctx->ddictIsCold, nbSeq
|
|
531
|
-
|
|
633
|
+
dctx->ddictIsCold, nbSeq,
|
|
634
|
+
dctx->workspace, sizeof(dctx->workspace),
|
|
635
|
+
dctx->bmi2);
|
|
636
|
+
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
532
637
|
ip += ofhSize;
|
|
533
638
|
}
|
|
534
639
|
|
|
@@ -537,8 +642,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
|
537
642
|
ip, iend-ip,
|
|
538
643
|
ML_base, ML_bits,
|
|
539
644
|
ML_defaultDTable, dctx->fseEntropy,
|
|
540
|
-
dctx->ddictIsCold, nbSeq
|
|
541
|
-
|
|
645
|
+
dctx->ddictIsCold, nbSeq,
|
|
646
|
+
dctx->workspace, sizeof(dctx->workspace),
|
|
647
|
+
dctx->bmi2);
|
|
648
|
+
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
|
542
649
|
ip += mlhSize;
|
|
543
650
|
}
|
|
544
651
|
}
|
|
@@ -570,54 +677,133 @@ typedef struct {
|
|
|
570
677
|
size_t pos;
|
|
571
678
|
} seqState_t;
|
|
572
679
|
|
|
680
|
+
/*! ZSTD_overlapCopy8() :
|
|
681
|
+
* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
|
|
682
|
+
* If the offset is < 8 then the offset is spread to at least 8 bytes.
|
|
683
|
+
*
|
|
684
|
+
* Precondition: *ip <= *op
|
|
685
|
+
* Postcondition: *op - *op >= 8
|
|
686
|
+
*/
|
|
687
|
+
HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
688
|
+
assert(*ip <= *op);
|
|
689
|
+
if (offset < 8) {
|
|
690
|
+
/* close range match, overlap */
|
|
691
|
+
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
|
692
|
+
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
|
693
|
+
int const sub2 = dec64table[offset];
|
|
694
|
+
(*op)[0] = (*ip)[0];
|
|
695
|
+
(*op)[1] = (*ip)[1];
|
|
696
|
+
(*op)[2] = (*ip)[2];
|
|
697
|
+
(*op)[3] = (*ip)[3];
|
|
698
|
+
*ip += dec32table[offset];
|
|
699
|
+
ZSTD_copy4(*op+4, *ip);
|
|
700
|
+
*ip -= sub2;
|
|
701
|
+
} else {
|
|
702
|
+
ZSTD_copy8(*op, *ip);
|
|
703
|
+
}
|
|
704
|
+
*ip += 8;
|
|
705
|
+
*op += 8;
|
|
706
|
+
assert(*op - *ip >= 8);
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
/*! ZSTD_safecopy() :
|
|
710
|
+
* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
|
|
711
|
+
* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
|
|
712
|
+
* This function is only called in the uncommon case where the sequence is near the end of the block. It
|
|
713
|
+
* should be fast for a single long sequence, but can be slow for several short sequences.
|
|
714
|
+
*
|
|
715
|
+
* @param ovtype controls the overlap detection
|
|
716
|
+
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
|
|
717
|
+
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
|
718
|
+
* The src buffer must be before the dst buffer.
|
|
719
|
+
*/
|
|
720
|
+
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
|
721
|
+
ptrdiff_t const diff = op - ip;
|
|
722
|
+
BYTE* const oend = op + length;
|
|
723
|
+
|
|
724
|
+
assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
|
|
725
|
+
(ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
|
|
573
726
|
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
727
|
+
if (length < 8) {
|
|
728
|
+
/* Handle short lengths. */
|
|
729
|
+
while (op < oend) *op++ = *ip++;
|
|
730
|
+
return;
|
|
731
|
+
}
|
|
732
|
+
if (ovtype == ZSTD_overlap_src_before_dst) {
|
|
733
|
+
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
|
734
|
+
assert(length >= 8);
|
|
735
|
+
ZSTD_overlapCopy8(&op, &ip, diff);
|
|
736
|
+
assert(op - ip >= 8);
|
|
737
|
+
assert(op <= oend);
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
if (oend <= oend_w) {
|
|
741
|
+
/* No risk of overwrite. */
|
|
742
|
+
ZSTD_wildcopy(op, ip, length, ovtype);
|
|
743
|
+
return;
|
|
744
|
+
}
|
|
745
|
+
if (op <= oend_w) {
|
|
746
|
+
/* Wildcopy until we get close to the end. */
|
|
747
|
+
assert(oend > oend_w);
|
|
748
|
+
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
|
749
|
+
ip += oend_w - op;
|
|
750
|
+
op = oend_w;
|
|
751
|
+
}
|
|
752
|
+
/* Handle the leftovers. */
|
|
753
|
+
while (op < oend) *op++ = *ip++;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
/* ZSTD_execSequenceEnd():
|
|
757
|
+
* This version handles cases that are near the end of the output buffer. It requires
|
|
758
|
+
* more careful checks to make sure there is no overflow. By separating out these hard
|
|
759
|
+
* and unlikely cases, we can speed up the common cases.
|
|
760
|
+
*
|
|
761
|
+
* NOTE: This function needs to be fast for a single long sequence, but doesn't need
|
|
762
|
+
* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
|
|
763
|
+
*/
|
|
581
764
|
FORCE_NOINLINE
|
|
582
|
-
size_t
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
765
|
+
size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
766
|
+
BYTE* const oend, seq_t sequence,
|
|
767
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
|
768
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
|
586
769
|
{
|
|
587
770
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
588
771
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
589
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
590
772
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
591
773
|
const BYTE* match = oLitEnd - sequence.offset;
|
|
774
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
592
775
|
|
|
593
|
-
/*
|
|
594
|
-
RETURN_ERROR_IF(
|
|
595
|
-
RETURN_ERROR_IF(
|
|
776
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
|
777
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
|
778
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
|
779
|
+
assert(op < op + sequenceLength);
|
|
780
|
+
assert(oLitEnd < op + sequenceLength);
|
|
596
781
|
|
|
597
782
|
/* copy literals */
|
|
598
|
-
|
|
783
|
+
ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
|
|
784
|
+
op = oLitEnd;
|
|
785
|
+
*litPtr = iLitEnd;
|
|
599
786
|
|
|
600
787
|
/* copy Match */
|
|
601
|
-
if (sequence.offset > (size_t)(oLitEnd -
|
|
788
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
602
789
|
/* offset beyond prefix */
|
|
603
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd -
|
|
604
|
-
match = dictEnd - (
|
|
790
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
|
791
|
+
match = dictEnd - (prefixStart-match);
|
|
605
792
|
if (match + sequence.matchLength <= dictEnd) {
|
|
606
|
-
|
|
793
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
607
794
|
return sequenceLength;
|
|
608
795
|
}
|
|
609
796
|
/* span extDict & currentPrefixSegment */
|
|
610
797
|
{ size_t const length1 = dictEnd - match;
|
|
611
|
-
|
|
798
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
612
799
|
op = oLitEnd + length1;
|
|
613
800
|
sequence.matchLength -= length1;
|
|
614
|
-
match =
|
|
801
|
+
match = prefixStart;
|
|
615
802
|
} }
|
|
616
|
-
|
|
803
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
|
617
804
|
return sequenceLength;
|
|
618
805
|
}
|
|
619
806
|
|
|
620
|
-
|
|
621
807
|
HINT_INLINE
|
|
622
808
|
size_t ZSTD_execSequence(BYTE* op,
|
|
623
809
|
BYTE* const oend, seq_t sequence,
|
|
@@ -627,152 +813,85 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
|
627
813
|
BYTE* const oLitEnd = op + sequence.litLength;
|
|
628
814
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
629
815
|
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
630
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
816
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
|
|
631
817
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
632
818
|
const BYTE* match = oLitEnd - sequence.offset;
|
|
633
819
|
|
|
634
|
-
/*
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
if (
|
|
642
|
-
|
|
820
|
+
assert(op != NULL /* Precondition */);
|
|
821
|
+
assert(oend_w < oend /* No underflow */);
|
|
822
|
+
/* Handle edge cases in a slow path:
|
|
823
|
+
* - Read beyond end of literals
|
|
824
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
|
825
|
+
* - 32-bit mode and the match length overflows
|
|
826
|
+
*/
|
|
827
|
+
if (UNLIKELY(
|
|
828
|
+
iLitEnd > litLimit ||
|
|
829
|
+
oMatchEnd > oend_w ||
|
|
830
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
|
831
|
+
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
|
832
|
+
|
|
833
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
|
834
|
+
assert(op <= oLitEnd /* No overflow */);
|
|
835
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
|
836
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
|
837
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
|
838
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
|
839
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
|
840
|
+
|
|
841
|
+
/* Copy Literals:
|
|
842
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
|
843
|
+
* We likely don't need the full 32-byte wildcopy.
|
|
844
|
+
*/
|
|
845
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
|
846
|
+
ZSTD_copy16(op, (*litPtr));
|
|
847
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
|
848
|
+
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
|
|
849
|
+
}
|
|
643
850
|
op = oLitEnd;
|
|
644
851
|
*litPtr = iLitEnd; /* update for next sequence */
|
|
645
852
|
|
|
646
|
-
/*
|
|
853
|
+
/* Copy Match */
|
|
647
854
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
648
855
|
/* offset beyond prefix -> go into extDict */
|
|
649
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
|
856
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
|
650
857
|
match = dictEnd + (match - prefixStart);
|
|
651
858
|
if (match + sequence.matchLength <= dictEnd) {
|
|
652
|
-
|
|
859
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
|
653
860
|
return sequenceLength;
|
|
654
861
|
}
|
|
655
862
|
/* span extDict & currentPrefixSegment */
|
|
656
863
|
{ size_t const length1 = dictEnd - match;
|
|
657
|
-
|
|
864
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
|
658
865
|
op = oLitEnd + length1;
|
|
659
866
|
sequence.matchLength -= length1;
|
|
660
867
|
match = prefixStart;
|
|
661
|
-
if (op > oend_w || sequence.matchLength < MINMATCH) {
|
|
662
|
-
U32 i;
|
|
663
|
-
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
|
|
664
|
-
return sequenceLength;
|
|
665
|
-
}
|
|
666
868
|
} }
|
|
667
|
-
/*
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
match
|
|
682
|
-
|
|
683
|
-
ZSTD_copy8(op, match);
|
|
684
|
-
}
|
|
685
|
-
op += 8; match += 8;
|
|
686
|
-
|
|
687
|
-
if (oMatchEnd > oend-(16-MINMATCH)) {
|
|
688
|
-
if (op < oend_w) {
|
|
689
|
-
ZSTD_wildcopy(op, match, oend_w - op);
|
|
690
|
-
match += oend_w - op;
|
|
691
|
-
op = oend_w;
|
|
692
|
-
}
|
|
693
|
-
while (op < oMatchEnd) *op++ = *match++;
|
|
694
|
-
} else {
|
|
695
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
|
869
|
+
/* Match within prefix of 1 or more bytes */
|
|
870
|
+
assert(op <= oMatchEnd);
|
|
871
|
+
assert(oMatchEnd <= oend_w);
|
|
872
|
+
assert(match >= prefixStart);
|
|
873
|
+
assert(sequence.matchLength >= 1);
|
|
874
|
+
|
|
875
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
|
876
|
+
* without overlap checking.
|
|
877
|
+
*/
|
|
878
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
|
879
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
|
880
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
|
881
|
+
* than 16 bytes.
|
|
882
|
+
*/
|
|
883
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
|
884
|
+
return sequenceLength;
|
|
696
885
|
}
|
|
697
|
-
|
|
698
|
-
}
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
HINT_INLINE
|
|
702
|
-
size_t ZSTD_execSequenceLong(BYTE* op,
|
|
703
|
-
BYTE* const oend, seq_t sequence,
|
|
704
|
-
const BYTE** litPtr, const BYTE* const litLimit,
|
|
705
|
-
const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
|
|
706
|
-
{
|
|
707
|
-
BYTE* const oLitEnd = op + sequence.litLength;
|
|
708
|
-
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
|
709
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
|
710
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
|
711
|
-
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
|
712
|
-
const BYTE* match = sequence.match;
|
|
713
|
-
|
|
714
|
-
/* check */
|
|
715
|
-
RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
|
|
716
|
-
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
|
|
717
|
-
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
|
|
718
|
-
|
|
719
|
-
/* copy Literals */
|
|
720
|
-
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
|
721
|
-
if (sequence.litLength > 8)
|
|
722
|
-
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
|
723
|
-
op = oLitEnd;
|
|
724
|
-
*litPtr = iLitEnd; /* update for next sequence */
|
|
725
|
-
|
|
726
|
-
/* copy Match */
|
|
727
|
-
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
|
728
|
-
/* offset beyond prefix */
|
|
729
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
|
|
730
|
-
if (match + sequence.matchLength <= dictEnd) {
|
|
731
|
-
memmove(oLitEnd, match, sequence.matchLength);
|
|
732
|
-
return sequenceLength;
|
|
733
|
-
}
|
|
734
|
-
/* span extDict & currentPrefixSegment */
|
|
735
|
-
{ size_t const length1 = dictEnd - match;
|
|
736
|
-
memmove(oLitEnd, match, length1);
|
|
737
|
-
op = oLitEnd + length1;
|
|
738
|
-
sequence.matchLength -= length1;
|
|
739
|
-
match = prefixStart;
|
|
740
|
-
if (op > oend_w || sequence.matchLength < MINMATCH) {
|
|
741
|
-
U32 i;
|
|
742
|
-
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
|
|
743
|
-
return sequenceLength;
|
|
744
|
-
}
|
|
745
|
-
} }
|
|
746
|
-
assert(op <= oend_w);
|
|
747
|
-
assert(sequence.matchLength >= MINMATCH);
|
|
886
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
|
748
887
|
|
|
749
|
-
/*
|
|
750
|
-
|
|
751
|
-
/* close range match, overlap */
|
|
752
|
-
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
|
753
|
-
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
|
754
|
-
int const sub2 = dec64table[sequence.offset];
|
|
755
|
-
op[0] = match[0];
|
|
756
|
-
op[1] = match[1];
|
|
757
|
-
op[2] = match[2];
|
|
758
|
-
op[3] = match[3];
|
|
759
|
-
match += dec32table[sequence.offset];
|
|
760
|
-
ZSTD_copy4(op+4, match);
|
|
761
|
-
match -= sub2;
|
|
762
|
-
} else {
|
|
763
|
-
ZSTD_copy8(op, match);
|
|
764
|
-
}
|
|
765
|
-
op += 8; match += 8;
|
|
888
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
|
889
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
|
766
890
|
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
op = oend_w;
|
|
772
|
-
}
|
|
773
|
-
while (op < oMatchEnd) *op++ = *match++;
|
|
774
|
-
} else {
|
|
775
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
|
891
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
|
892
|
+
if (sequence.matchLength > 8) {
|
|
893
|
+
assert(op < oMatchEnd);
|
|
894
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
|
|
776
895
|
}
|
|
777
896
|
return sequenceLength;
|
|
778
897
|
}
|
|
@@ -798,6 +917,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
|
798
917
|
DStatePtr->state = DInfo.nextState + lowBits;
|
|
799
918
|
}
|
|
800
919
|
|
|
920
|
+
FORCE_INLINE_TEMPLATE void
|
|
921
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
|
922
|
+
{
|
|
923
|
+
U32 const nbBits = DInfo.nbBits;
|
|
924
|
+
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
925
|
+
DStatePtr->state = DInfo.nextState + lowBits;
|
|
926
|
+
}
|
|
927
|
+
|
|
801
928
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
|
802
929
|
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
|
|
803
930
|
* bits before reloading. This value is the maximum number of bytes we read
|
|
@@ -809,25 +936,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
|
809
936
|
: 0)
|
|
810
937
|
|
|
811
938
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
|
939
|
+
typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
|
|
812
940
|
|
|
813
|
-
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
814
941
|
FORCE_INLINE_TEMPLATE seq_t
|
|
815
|
-
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
942
|
+
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
|
|
816
943
|
{
|
|
817
944
|
seq_t seq;
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
U32 const
|
|
822
|
-
U32 const
|
|
823
|
-
U32 const
|
|
824
|
-
|
|
945
|
+
ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
|
|
946
|
+
ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
|
|
947
|
+
ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
|
|
948
|
+
U32 const llBase = llDInfo.baseValue;
|
|
949
|
+
U32 const mlBase = mlDInfo.baseValue;
|
|
950
|
+
U32 const ofBase = ofDInfo.baseValue;
|
|
951
|
+
BYTE const llBits = llDInfo.nbAdditionalBits;
|
|
952
|
+
BYTE const mlBits = mlDInfo.nbAdditionalBits;
|
|
953
|
+
BYTE const ofBits = ofDInfo.nbAdditionalBits;
|
|
954
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
|
825
955
|
|
|
826
956
|
/* sequence */
|
|
827
957
|
{ size_t offset;
|
|
828
|
-
if (
|
|
829
|
-
offset = 0;
|
|
830
|
-
else {
|
|
958
|
+
if (ofBits > 1) {
|
|
831
959
|
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
832
960
|
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
833
961
|
assert(ofBits <= MaxOff);
|
|
@@ -841,62 +969,146 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
|
841
969
|
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
842
970
|
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
843
971
|
}
|
|
844
|
-
}
|
|
845
|
-
|
|
846
|
-
if (ofBits <= 1) {
|
|
847
|
-
offset += (llBase==0);
|
|
848
|
-
if (offset) {
|
|
849
|
-
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
850
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
851
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
852
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
853
|
-
seqState->prevOffset[0] = offset = temp;
|
|
854
|
-
} else { /* offset == 0 */
|
|
855
|
-
offset = seqState->prevOffset[0];
|
|
856
|
-
}
|
|
857
|
-
} else {
|
|
858
972
|
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
859
973
|
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
860
974
|
seqState->prevOffset[0] = offset;
|
|
861
|
-
}
|
|
975
|
+
} else {
|
|
976
|
+
U32 const ll0 = (llBase == 0);
|
|
977
|
+
if (LIKELY((ofBits == 0))) {
|
|
978
|
+
if (LIKELY(!ll0))
|
|
979
|
+
offset = seqState->prevOffset[0];
|
|
980
|
+
else {
|
|
981
|
+
offset = seqState->prevOffset[1];
|
|
982
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
983
|
+
seqState->prevOffset[0] = offset;
|
|
984
|
+
}
|
|
985
|
+
} else {
|
|
986
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
|
987
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
988
|
+
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
989
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
990
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
991
|
+
seqState->prevOffset[0] = offset = temp;
|
|
992
|
+
} } }
|
|
862
993
|
seq.offset = offset;
|
|
863
994
|
}
|
|
864
995
|
|
|
865
|
-
seq.matchLength = mlBase
|
|
866
|
-
|
|
996
|
+
seq.matchLength = mlBase;
|
|
997
|
+
if (mlBits > 0)
|
|
998
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
|
999
|
+
|
|
867
1000
|
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
|
868
1001
|
BIT_reloadDStream(&seqState->DStream);
|
|
869
|
-
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
1002
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
870
1003
|
BIT_reloadDStream(&seqState->DStream);
|
|
871
1004
|
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
|
872
1005
|
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
|
873
1006
|
|
|
874
|
-
seq.litLength = llBase
|
|
875
|
-
|
|
1007
|
+
seq.litLength = llBase;
|
|
1008
|
+
if (llBits > 0)
|
|
1009
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
|
1010
|
+
|
|
876
1011
|
if (MEM_32bits())
|
|
877
1012
|
BIT_reloadDStream(&seqState->DStream);
|
|
878
1013
|
|
|
879
1014
|
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
|
880
1015
|
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
881
1016
|
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
1017
|
+
if (prefetch == ZSTD_p_prefetch) {
|
|
1018
|
+
size_t const pos = seqState->pos + seq.litLength;
|
|
1019
|
+
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
|
1020
|
+
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1021
|
+
* No consequence though : no memory access will occur, offset is only used for prefetching */
|
|
1022
|
+
seqState->pos = pos + seq.matchLength;
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
/* ANS state update
|
|
1026
|
+
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
|
1027
|
+
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
|
1028
|
+
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
|
1029
|
+
* better option, so it is the default for other compilers. But, if you
|
|
1030
|
+
* measure that it is worse, please put up a pull request.
|
|
1031
|
+
*/
|
|
1032
|
+
{
|
|
1033
|
+
#if defined(__GNUC__) && !defined(__clang__)
|
|
1034
|
+
const int kUseUpdateFseState = 1;
|
|
1035
|
+
#else
|
|
1036
|
+
const int kUseUpdateFseState = 0;
|
|
1037
|
+
#endif
|
|
1038
|
+
if (kUseUpdateFseState) {
|
|
1039
|
+
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
|
1040
|
+
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
|
1041
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1042
|
+
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
|
1043
|
+
} else {
|
|
1044
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
|
|
1045
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
|
|
1046
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1047
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
887
1050
|
|
|
888
1051
|
return seq;
|
|
889
1052
|
}
|
|
890
1053
|
|
|
1054
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
1055
|
+
MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
|
1056
|
+
{
|
|
1057
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
|
1058
|
+
/* No dictionary used. */
|
|
1059
|
+
if (dctx->dictContentEndForFuzzing == NULL) return 0;
|
|
1060
|
+
/* Dictionary is our prefix. */
|
|
1061
|
+
if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
|
|
1062
|
+
/* Dictionary is not our ext-dict. */
|
|
1063
|
+
if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
|
|
1064
|
+
/* Dictionary is not within our window size. */
|
|
1065
|
+
if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
|
|
1066
|
+
/* Dictionary is active. */
|
|
1067
|
+
return 1;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
MEM_STATIC void ZSTD_assertValidSequence(
|
|
1071
|
+
ZSTD_DCtx const* dctx,
|
|
1072
|
+
BYTE const* op, BYTE const* oend,
|
|
1073
|
+
seq_t const seq,
|
|
1074
|
+
BYTE const* prefixStart, BYTE const* virtualStart)
|
|
1075
|
+
{
|
|
1076
|
+
#if DEBUGLEVEL >= 1
|
|
1077
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
|
1078
|
+
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
|
1079
|
+
BYTE const* const oLitEnd = op + seq.litLength;
|
|
1080
|
+
DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
|
|
1081
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
|
1082
|
+
assert(op <= oend);
|
|
1083
|
+
assert((size_t)(oend - op) >= sequenceSize);
|
|
1084
|
+
assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
|
|
1085
|
+
if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
|
|
1086
|
+
size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
|
|
1087
|
+
/* Offset must be within the dictionary. */
|
|
1088
|
+
assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
|
|
1089
|
+
assert(seq.offset <= windowSize + dictSize);
|
|
1090
|
+
} else {
|
|
1091
|
+
/* Offset must be within our window. */
|
|
1092
|
+
assert(seq.offset <= windowSize);
|
|
1093
|
+
}
|
|
1094
|
+
#else
|
|
1095
|
+
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
|
|
1096
|
+
#endif
|
|
1097
|
+
}
|
|
1098
|
+
#endif
|
|
1099
|
+
|
|
1100
|
+
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
891
1101
|
FORCE_INLINE_TEMPLATE size_t
|
|
1102
|
+
DONT_VECTORIZE
|
|
892
1103
|
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
893
1104
|
void* dst, size_t maxDstSize,
|
|
894
1105
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
895
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1106
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1107
|
+
const int frame)
|
|
896
1108
|
{
|
|
897
1109
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
898
1110
|
const BYTE* const iend = ip + seqSize;
|
|
899
|
-
BYTE* const ostart = (BYTE*
|
|
1111
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
900
1112
|
BYTE* const oend = ostart + maxDstSize;
|
|
901
1113
|
BYTE* op = ostart;
|
|
902
1114
|
const BYTE* litPtr = dctx->litPtr;
|
|
@@ -905,40 +1117,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
|
905
1117
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
|
906
1118
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
907
1119
|
DEBUGLOG(5, "ZSTD_decompressSequences_body");
|
|
1120
|
+
(void)frame;
|
|
908
1121
|
|
|
909
1122
|
/* Regen sequences */
|
|
910
1123
|
if (nbSeq) {
|
|
911
1124
|
seqState_t seqState;
|
|
1125
|
+
size_t error = 0;
|
|
912
1126
|
dctx->fseEntropy = 1;
|
|
913
1127
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
|
914
1128
|
RETURN_ERROR_IF(
|
|
915
1129
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
|
916
|
-
corruption_detected);
|
|
1130
|
+
corruption_detected, "");
|
|
917
1131
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
918
1132
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
919
1133
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
1134
|
+
assert(dst != NULL);
|
|
1135
|
+
|
|
1136
|
+
ZSTD_STATIC_ASSERT(
|
|
1137
|
+
BIT_DStream_unfinished < BIT_DStream_completed &&
|
|
1138
|
+
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
|
1139
|
+
BIT_DStream_completed < BIT_DStream_overflow);
|
|
1140
|
+
|
|
1141
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
1142
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
|
1143
|
+
*
|
|
1144
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
|
1145
|
+
* speed swings based on the alignment of the decompression loop. This
|
|
1146
|
+
* performance swing is caused by parts of the decompression loop falling
|
|
1147
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
|
1148
|
+
* when it can't we get much worse performance. You can measure if you've
|
|
1149
|
+
* hit the good case or the bad case with this perf command for some
|
|
1150
|
+
* compressed file test.zst:
|
|
1151
|
+
*
|
|
1152
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
|
1153
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
|
1154
|
+
*
|
|
1155
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
|
1156
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
|
1157
|
+
* If it is pretty even then you may be in an okay case.
|
|
1158
|
+
*
|
|
1159
|
+
* I've been able to reproduce this issue on the following CPUs:
|
|
1160
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
|
1161
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
|
1162
|
+
* I never got performance swings, but I was able to
|
|
1163
|
+
* go from the good case of mostly DSB to half of the
|
|
1164
|
+
* cycles served from MITE.
|
|
1165
|
+
* - Coffeelake: Intel i9-9900k
|
|
1166
|
+
*
|
|
1167
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
|
1168
|
+
* of the following CPUS:
|
|
1169
|
+
* - Haswell
|
|
1170
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
|
1171
|
+
* - Skylake
|
|
1172
|
+
*
|
|
1173
|
+
* If you are seeing performance stability this script can help test.
|
|
1174
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
|
1175
|
+
*
|
|
1176
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
|
1177
|
+
*/
|
|
1178
|
+
__asm__(".p2align 5");
|
|
1179
|
+
__asm__("nop");
|
|
1180
|
+
__asm__(".p2align 4");
|
|
1181
|
+
#endif
|
|
1182
|
+
for ( ; ; ) {
|
|
1183
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
|
|
1184
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
|
1185
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1186
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1187
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
|
1188
|
+
#endif
|
|
1189
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
|
1190
|
+
BIT_reloadDStream(&(seqState.DStream));
|
|
1191
|
+
op += oneSeqSize;
|
|
1192
|
+
/* gcc and clang both don't like early returns in this loop.
|
|
1193
|
+
* Instead break and check for an error at the end of the loop.
|
|
1194
|
+
*/
|
|
1195
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
|
|
1196
|
+
error = oneSeqSize;
|
|
1197
|
+
break;
|
|
1198
|
+
}
|
|
1199
|
+
if (UNLIKELY(!--nbSeq)) break;
|
|
1200
|
+
}
|
|
929
1201
|
|
|
930
1202
|
/* check if reached exact end */
|
|
931
1203
|
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
|
932
|
-
|
|
1204
|
+
if (ZSTD_isError(error)) return error;
|
|
1205
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
|
1206
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
|
933
1207
|
/* save reps for next block */
|
|
934
1208
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
|
935
1209
|
}
|
|
936
1210
|
|
|
937
1211
|
/* last literal segment */
|
|
938
1212
|
{ size_t const lastLLSize = litEnd - litPtr;
|
|
939
|
-
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
|
940
|
-
|
|
941
|
-
|
|
1213
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1214
|
+
if (op != NULL) {
|
|
1215
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
|
1216
|
+
op += lastLLSize;
|
|
1217
|
+
}
|
|
942
1218
|
}
|
|
943
1219
|
|
|
944
1220
|
return op-ostart;
|
|
@@ -948,103 +1224,25 @@ static size_t
|
|
|
948
1224
|
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
|
949
1225
|
void* dst, size_t maxDstSize,
|
|
950
1226
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
951
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1227
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1228
|
+
const int frame)
|
|
952
1229
|
{
|
|
953
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1230
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
954
1231
|
}
|
|
955
1232
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
956
1233
|
|
|
957
|
-
|
|
958
|
-
|
|
959
1234
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
960
|
-
FORCE_INLINE_TEMPLATE seq_t
|
|
961
|
-
ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
|
|
962
|
-
{
|
|
963
|
-
seq_t seq;
|
|
964
|
-
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
|
|
965
|
-
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
|
|
966
|
-
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
|
|
967
|
-
U32 const totalBits = llBits+mlBits+ofBits;
|
|
968
|
-
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
|
|
969
|
-
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
|
|
970
|
-
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
|
|
971
|
-
|
|
972
|
-
/* sequence */
|
|
973
|
-
{ size_t offset;
|
|
974
|
-
if (!ofBits)
|
|
975
|
-
offset = 0;
|
|
976
|
-
else {
|
|
977
|
-
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
|
978
|
-
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
|
979
|
-
assert(ofBits <= MaxOff);
|
|
980
|
-
if (MEM_32bits() && longOffsets) {
|
|
981
|
-
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
|
982
|
-
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
|
983
|
-
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
|
984
|
-
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
|
985
|
-
} else {
|
|
986
|
-
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
|
987
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
|
988
|
-
}
|
|
989
|
-
}
|
|
990
|
-
|
|
991
|
-
if (ofBits <= 1) {
|
|
992
|
-
offset += (llBase==0);
|
|
993
|
-
if (offset) {
|
|
994
|
-
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
|
995
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
|
996
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
997
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
998
|
-
seqState->prevOffset[0] = offset = temp;
|
|
999
|
-
} else {
|
|
1000
|
-
offset = seqState->prevOffset[0];
|
|
1001
|
-
}
|
|
1002
|
-
} else {
|
|
1003
|
-
seqState->prevOffset[2] = seqState->prevOffset[1];
|
|
1004
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
|
1005
|
-
seqState->prevOffset[0] = offset;
|
|
1006
|
-
}
|
|
1007
|
-
seq.offset = offset;
|
|
1008
|
-
}
|
|
1009
|
-
|
|
1010
|
-
seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
|
|
1011
|
-
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
|
1012
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1013
|
-
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
|
1014
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1015
|
-
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
|
|
1016
|
-
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
|
1017
|
-
|
|
1018
|
-
seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
|
|
1019
|
-
if (MEM_32bits())
|
|
1020
|
-
BIT_reloadDStream(&seqState->DStream);
|
|
1021
|
-
|
|
1022
|
-
{ size_t const pos = seqState->pos + seq.litLength;
|
|
1023
|
-
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
|
1024
|
-
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
|
1025
|
-
* No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
|
|
1026
|
-
seqState->pos = pos + seq.matchLength;
|
|
1027
|
-
}
|
|
1028
|
-
|
|
1029
|
-
/* ANS state update */
|
|
1030
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
|
1031
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
|
1032
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
|
1033
|
-
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
|
1034
|
-
|
|
1035
|
-
return seq;
|
|
1036
|
-
}
|
|
1037
|
-
|
|
1038
1235
|
FORCE_INLINE_TEMPLATE size_t
|
|
1039
1236
|
ZSTD_decompressSequencesLong_body(
|
|
1040
1237
|
ZSTD_DCtx* dctx,
|
|
1041
1238
|
void* dst, size_t maxDstSize,
|
|
1042
1239
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1043
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1240
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1241
|
+
const int frame)
|
|
1044
1242
|
{
|
|
1045
1243
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
1046
1244
|
const BYTE* const iend = ip + seqSize;
|
|
1047
|
-
BYTE* const ostart = (BYTE*
|
|
1245
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
1048
1246
|
BYTE* const oend = ostart + maxDstSize;
|
|
1049
1247
|
BYTE* op = ostart;
|
|
1050
1248
|
const BYTE* litPtr = dctx->litPtr;
|
|
@@ -1052,6 +1250,7 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1052
1250
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
|
1053
1251
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
|
1054
1252
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
|
1253
|
+
(void)frame;
|
|
1055
1254
|
|
|
1056
1255
|
/* Regen sequences */
|
|
1057
1256
|
if (nbSeq) {
|
|
@@ -1067,36 +1266,45 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1067
1266
|
seqState.prefixStart = prefixStart;
|
|
1068
1267
|
seqState.pos = (size_t)(op-prefixStart);
|
|
1069
1268
|
seqState.dictEnd = dictEnd;
|
|
1269
|
+
assert(dst != NULL);
|
|
1070
1270
|
assert(iend >= ip);
|
|
1071
1271
|
RETURN_ERROR_IF(
|
|
1072
1272
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
|
1073
|
-
corruption_detected);
|
|
1273
|
+
corruption_detected, "");
|
|
1074
1274
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
|
1075
1275
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
|
1076
1276
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
|
1077
1277
|
|
|
1078
1278
|
/* prepare in advance */
|
|
1079
1279
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
|
1080
|
-
sequences[seqNb] =
|
|
1280
|
+
sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
|
|
1081
1281
|
PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
|
1082
1282
|
}
|
|
1083
|
-
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
|
|
1283
|
+
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
|
1084
1284
|
|
|
1085
1285
|
/* decode and decompress */
|
|
1086
1286
|
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
|
1087
|
-
seq_t const sequence =
|
|
1088
|
-
size_t const oneSeqSize =
|
|
1287
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
|
|
1288
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
|
1289
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1290
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1291
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1292
|
+
#endif
|
|
1089
1293
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1090
1294
|
PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
|
1091
1295
|
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
|
1092
1296
|
op += oneSeqSize;
|
|
1093
1297
|
}
|
|
1094
|
-
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
|
|
1298
|
+
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
|
|
1095
1299
|
|
|
1096
1300
|
/* finish queue */
|
|
1097
1301
|
seqNb -= seqAdvance;
|
|
1098
1302
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
|
1099
|
-
size_t const oneSeqSize =
|
|
1303
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
|
1304
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
|
1305
|
+
assert(!ZSTD_isError(oneSeqSize));
|
|
1306
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
|
1307
|
+
#endif
|
|
1100
1308
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
|
1101
1309
|
op += oneSeqSize;
|
|
1102
1310
|
}
|
|
@@ -1107,9 +1315,11 @@ ZSTD_decompressSequencesLong_body(
|
|
|
1107
1315
|
|
|
1108
1316
|
/* last literal segment */
|
|
1109
1317
|
{ size_t const lastLLSize = litEnd - litPtr;
|
|
1110
|
-
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
|
1111
|
-
|
|
1112
|
-
|
|
1318
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
|
1319
|
+
if (op != NULL) {
|
|
1320
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
|
1321
|
+
op += lastLLSize;
|
|
1322
|
+
}
|
|
1113
1323
|
}
|
|
1114
1324
|
|
|
1115
1325
|
return op-ostart;
|
|
@@ -1119,9 +1329,10 @@ static size_t
|
|
|
1119
1329
|
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
1120
1330
|
void* dst, size_t maxDstSize,
|
|
1121
1331
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1122
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1332
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1333
|
+
const int frame)
|
|
1123
1334
|
{
|
|
1124
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1335
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1125
1336
|
}
|
|
1126
1337
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1127
1338
|
|
|
@@ -1131,12 +1342,14 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
|
1131
1342
|
|
|
1132
1343
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1133
1344
|
static TARGET_ATTRIBUTE("bmi2") size_t
|
|
1345
|
+
DONT_VECTORIZE
|
|
1134
1346
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
|
1135
1347
|
void* dst, size_t maxDstSize,
|
|
1136
1348
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1137
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1349
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1350
|
+
const int frame)
|
|
1138
1351
|
{
|
|
1139
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1352
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1140
1353
|
}
|
|
1141
1354
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1142
1355
|
|
|
@@ -1145,9 +1358,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
|
|
|
1145
1358
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
|
1146
1359
|
void* dst, size_t maxDstSize,
|
|
1147
1360
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1148
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1361
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1362
|
+
const int frame)
|
|
1149
1363
|
{
|
|
1150
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1364
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1151
1365
|
}
|
|
1152
1366
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1153
1367
|
|
|
@@ -1157,21 +1371,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
|
|
|
1157
1371
|
ZSTD_DCtx* dctx,
|
|
1158
1372
|
void* dst, size_t maxDstSize,
|
|
1159
1373
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1160
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1374
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1375
|
+
const int frame);
|
|
1161
1376
|
|
|
1162
1377
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1163
1378
|
static size_t
|
|
1164
1379
|
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|
1165
1380
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1166
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1381
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1382
|
+
const int frame)
|
|
1167
1383
|
{
|
|
1168
1384
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
|
1169
1385
|
#if DYNAMIC_BMI2
|
|
1170
1386
|
if (dctx->bmi2) {
|
|
1171
|
-
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1387
|
+
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1172
1388
|
}
|
|
1173
1389
|
#endif
|
|
1174
|
-
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1390
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1175
1391
|
}
|
|
1176
1392
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
|
1177
1393
|
|
|
@@ -1186,15 +1402,16 @@ static size_t
|
|
|
1186
1402
|
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
1187
1403
|
void* dst, size_t maxDstSize,
|
|
1188
1404
|
const void* seqStart, size_t seqSize, int nbSeq,
|
|
1189
|
-
const ZSTD_longOffset_e isLongOffset
|
|
1405
|
+
const ZSTD_longOffset_e isLongOffset,
|
|
1406
|
+
const int frame)
|
|
1190
1407
|
{
|
|
1191
1408
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
|
1192
1409
|
#if DYNAMIC_BMI2
|
|
1193
1410
|
if (dctx->bmi2) {
|
|
1194
|
-
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1411
|
+
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1195
1412
|
}
|
|
1196
1413
|
#endif
|
|
1197
|
-
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
|
1414
|
+
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
|
1198
1415
|
}
|
|
1199
1416
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
|
1200
1417
|
|
|
@@ -1228,7 +1445,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
|
|
|
1228
1445
|
}
|
|
1229
1446
|
#endif
|
|
1230
1447
|
|
|
1231
|
-
|
|
1232
1448
|
size_t
|
|
1233
1449
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1234
1450
|
void* dst, size_t dstCapacity,
|
|
@@ -1244,7 +1460,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1244
1460
|
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
|
1245
1461
|
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
|
1246
1462
|
|
|
1247
|
-
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
|
|
1463
|
+
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
|
1248
1464
|
|
|
1249
1465
|
/* Decode literals section */
|
|
1250
1466
|
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
|
@@ -1270,6 +1486,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1270
1486
|
ip += seqHSize;
|
|
1271
1487
|
srcSize -= seqHSize;
|
|
1272
1488
|
|
|
1489
|
+
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
|
1490
|
+
|
|
1273
1491
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
|
1274
1492
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
|
1275
1493
|
if ( !usePrefetchDecoder
|
|
@@ -1288,23 +1506,34 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
|
1288
1506
|
if (usePrefetchDecoder)
|
|
1289
1507
|
#endif
|
|
1290
1508
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
|
1291
|
-
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
1509
|
+
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1292
1510
|
#endif
|
|
1293
1511
|
|
|
1294
1512
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
|
1295
1513
|
/* else */
|
|
1296
|
-
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
1514
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
|
1297
1515
|
#endif
|
|
1298
1516
|
}
|
|
1299
1517
|
}
|
|
1300
1518
|
|
|
1301
1519
|
|
|
1520
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
|
|
1521
|
+
{
|
|
1522
|
+
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
|
|
1523
|
+
dctx->dictEnd = dctx->previousDstEnd;
|
|
1524
|
+
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
|
1525
|
+
dctx->prefixStart = dst;
|
|
1526
|
+
dctx->previousDstEnd = dst;
|
|
1527
|
+
}
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
|
|
1302
1531
|
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
|
1303
1532
|
void* dst, size_t dstCapacity,
|
|
1304
1533
|
const void* src, size_t srcSize)
|
|
1305
1534
|
{
|
|
1306
1535
|
size_t dSize;
|
|
1307
|
-
ZSTD_checkContinuity(dctx, dst);
|
|
1536
|
+
ZSTD_checkContinuity(dctx, dst, dstCapacity);
|
|
1308
1537
|
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
|
|
1309
1538
|
dctx->previousDstEnd = (char*)dst + dSize;
|
|
1310
1539
|
return dSize;
|