zstdlib 0.3.0-x64-mingw32 → 0.8.0-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +30 -1
- data/README.md +2 -2
- data/Rakefile +1 -1
- data/ext/zstdlib/extconf.rb +3 -3
- data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
- data/ext/zstdlib/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/bitstream.h +59 -51
- data/ext/zstdlib/zstd-1.5.0/lib/common/compiler.h +289 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/cpu.h +1 -3
- data/ext/zstdlib/zstd-1.5.0/lib/common/debug.c +24 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/debug.h +22 -49
- data/ext/zstdlib/zstd-1.5.0/lib/common/entropy_common.c +362 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.c +3 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.h +8 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse.h +50 -42
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse_decompress.c +149 -55
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/huf.h +43 -39
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/mem.h +69 -25
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.c +30 -20
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.h +3 -3
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.c +51 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.h +36 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.c +40 -92
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.h +12 -32
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/zstd_common.c +10 -10
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_deps.h +111 -0
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_internal.h +490 -0
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_trace.h +154 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/fse_compress.c +47 -63
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.c +41 -63
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.h +13 -33
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/huf_compress.c +332 -193
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress.c +6393 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_internal.h +522 -86
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.c +25 -16
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.h +2 -2
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.c +50 -24
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.h +11 -4
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.c +572 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_cwksp.h +662 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.c +43 -41
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.c +85 -80
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.c +2184 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.h +125 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.c +333 -208
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.h +15 -3
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.c +228 -129
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstdmt_compress.c +151 -440
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstdmt_compress.h +110 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/huf_decompress.c +395 -276
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.c +20 -16
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.h +3 -3
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress.c +628 -231
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.c +606 -380
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_internal.h +39 -9
- data/ext/zstdlib/zstd-1.5.0/lib/zdict.h +452 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/zstd.h +740 -153
- data/ext/zstdlib/{zstd-1.4.2/lib/common → zstd-1.5.0/lib}/zstd_errors.h +3 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzlib.c +9 -9
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzread.c +16 -8
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzwrite.c +8 -8
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.c +131 -45
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
- metadata +76 -67
- data/ext/zstdlib/zstd-1.4.2/lib/common/compiler.h +0 -147
- data/ext/zstdlib/zstd-1.4.2/lib/common/debug.c +0 -44
- data/ext/zstdlib/zstd-1.4.2/lib/common/entropy_common.c +0 -236
- data/ext/zstdlib/zstd-1.4.2/lib/common/zstd_internal.h +0 -371
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress.c +0 -3904
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.c +0 -1111
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.h +0 -67
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstdmt_compress.h +0 -192
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -14,15 +14,15 @@
|
|
14
14
|
/*-*******************************************************
|
15
15
|
* Dependencies
|
16
16
|
*********************************************************/
|
17
|
-
#include
|
18
|
-
#include "compiler.h" /* prefetch */
|
19
|
-
#include "cpu.h" /* bmi2 */
|
20
|
-
#include "mem.h" /* low level memory routines */
|
17
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
18
|
+
#include "../common/compiler.h" /* prefetch */
|
19
|
+
#include "../common/cpu.h" /* bmi2 */
|
20
|
+
#include "../common/mem.h" /* low level memory routines */
|
21
21
|
#define FSE_STATIC_LINKING_ONLY
|
22
|
-
#include "fse.h"
|
22
|
+
#include "../common/fse.h"
|
23
23
|
#define HUF_STATIC_LINKING_ONLY
|
24
|
-
#include "huf.h"
|
25
|
-
#include "zstd_internal.h"
|
24
|
+
#include "../common/huf.h"
|
25
|
+
#include "../common/zstd_internal.h"
|
26
26
|
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
27
27
|
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
28
28
|
#include "zstd_decompress_block.h"
|
@@ -44,7 +44,7 @@
|
|
44
44
|
/*_*******************************************************
|
45
45
|
* Memory operations
|
46
46
|
**********************************************************/
|
47
|
-
static void ZSTD_copy4(void* dst, const void* src) {
|
47
|
+
static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
|
48
48
|
|
49
49
|
|
50
50
|
/*-*************************************************************
|
@@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
|
56
56
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
57
57
|
blockProperties_t* bpPtr)
|
58
58
|
{
|
59
|
-
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
|
59
|
+
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
|
60
60
|
|
61
61
|
{ U32 const cBlockHeader = MEM_readLE24(src);
|
62
62
|
U32 const cSize = cBlockHeader >> 3;
|
@@ -64,7 +64,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
64
64
|
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
|
65
65
|
bpPtr->origSize = cSize; /* only useful for RLE */
|
66
66
|
if (bpPtr->blockType == bt_rle) return 1;
|
67
|
-
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
|
67
|
+
RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
|
68
68
|
return cSize;
|
69
69
|
}
|
70
70
|
}
|
@@ -79,7 +79,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
79
79
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
80
80
|
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
|
81
81
|
{
|
82
|
-
|
82
|
+
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
83
|
+
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
83
84
|
|
84
85
|
{ const BYTE* const istart = (const BYTE*) src;
|
85
86
|
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
@@ -87,7 +88,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
87
88
|
switch(litEncType)
|
88
89
|
{
|
89
90
|
case set_repeat:
|
90
|
-
|
91
|
+
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
92
|
+
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
91
93
|
/* fall-through */
|
92
94
|
|
93
95
|
case set_compressed:
|
@@ -116,11 +118,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
116
118
|
/* 2 - 2 - 18 - 18 */
|
117
119
|
lhSize = 5;
|
118
120
|
litSize = (lhc >> 4) & 0x3FFFF;
|
119
|
-
litCSize = (lhc >> 22) + (istart[4] << 10);
|
121
|
+
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
120
122
|
break;
|
121
123
|
}
|
122
|
-
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
123
|
-
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
|
124
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
125
|
+
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
124
126
|
|
125
127
|
/* prefetch huffman table if cold */
|
126
128
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
@@ -158,13 +160,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
158
160
|
}
|
159
161
|
}
|
160
162
|
|
161
|
-
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
|
163
|
+
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
162
164
|
|
163
165
|
dctx->litPtr = dctx->litBuffer;
|
164
166
|
dctx->litSize = litSize;
|
165
167
|
dctx->litEntropy = 1;
|
166
168
|
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
167
|
-
|
169
|
+
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
168
170
|
return litCSize + lhSize;
|
169
171
|
}
|
170
172
|
|
@@ -188,11 +190,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
188
190
|
}
|
189
191
|
|
190
192
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
191
|
-
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
|
192
|
-
|
193
|
+
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
194
|
+
ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
|
193
195
|
dctx->litPtr = dctx->litBuffer;
|
194
196
|
dctx->litSize = litSize;
|
195
|
-
|
197
|
+
ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
196
198
|
return lhSize+litSize;
|
197
199
|
}
|
198
200
|
/* direct reference into compressed stream */
|
@@ -220,8 +222,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
220
222
|
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
221
223
|
break;
|
222
224
|
}
|
223
|
-
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
224
|
-
|
225
|
+
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
|
226
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
|
225
227
|
dctx->litPtr = dctx->litBuffer;
|
226
228
|
dctx->litSize = litSize;
|
227
229
|
return lhSize+1;
|
@@ -234,7 +236,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
234
236
|
|
235
237
|
/* Default FSE distribution tables.
|
236
238
|
* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
|
237
|
-
* https://github.com/facebook/zstd/blob/
|
239
|
+
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
|
238
240
|
* They were generated programmatically with following method :
|
239
241
|
* - start from default distributions, present in /lib/common/zstd_internal.h
|
240
242
|
* - generate tables normally, using ZSTD_buildFSETable()
|
@@ -362,23 +364,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
362
364
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
363
365
|
* cannot fail if input is valid =>
|
364
366
|
* all inputs are presumed validated at this stage */
|
365
|
-
|
366
|
-
|
367
|
+
FORCE_INLINE_TEMPLATE
|
368
|
+
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
367
369
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
368
370
|
const U32* baseValue, const U32* nbAdditionalBits,
|
369
|
-
unsigned tableLog)
|
371
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
370
372
|
{
|
371
373
|
ZSTD_seqSymbol* const tableDecode = dt+1;
|
372
|
-
U16 symbolNext[MaxSeq+1];
|
373
|
-
|
374
374
|
U32 const maxSV1 = maxSymbolValue + 1;
|
375
375
|
U32 const tableSize = 1 << tableLog;
|
376
|
-
|
376
|
+
|
377
|
+
U16* symbolNext = (U16*)wksp;
|
378
|
+
BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
|
379
|
+
U32 highThreshold = tableSize - 1;
|
380
|
+
|
377
381
|
|
378
382
|
/* Sanity Checks */
|
379
383
|
assert(maxSymbolValue <= MaxSeq);
|
380
384
|
assert(tableLog <= MaxFSELog);
|
381
|
-
|
385
|
+
assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
|
386
|
+
(void)wkspSize;
|
382
387
|
/* Init, lay down lowprob symbols */
|
383
388
|
{ ZSTD_seqSymbol_header DTableH;
|
384
389
|
DTableH.tableLog = tableLog;
|
@@ -391,18 +396,72 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
391
396
|
symbolNext[s] = 1;
|
392
397
|
} else {
|
393
398
|
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
|
394
|
-
|
399
|
+
assert(normalizedCounter[s]>=0);
|
400
|
+
symbolNext[s] = (U16)normalizedCounter[s];
|
395
401
|
} } }
|
396
|
-
|
402
|
+
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
397
403
|
}
|
398
404
|
|
399
405
|
/* Spread symbols */
|
400
|
-
|
406
|
+
assert(tableSize <= 512);
|
407
|
+
/* Specialized symbol spreading for the case when there are
|
408
|
+
* no low probability (-1 count) symbols. When compressing
|
409
|
+
* small blocks we avoid low probability symbols to hit this
|
410
|
+
* case, since header decoding speed matters more.
|
411
|
+
*/
|
412
|
+
if (highThreshold == tableSize - 1) {
|
413
|
+
size_t const tableMask = tableSize-1;
|
414
|
+
size_t const step = FSE_TABLESTEP(tableSize);
|
415
|
+
/* First lay down the symbols in order.
|
416
|
+
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
417
|
+
* misses since small blocks generally have small table logs, so nearly
|
418
|
+
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
419
|
+
* our buffer to handle the over-write.
|
420
|
+
*/
|
421
|
+
{
|
422
|
+
U64 const add = 0x0101010101010101ull;
|
423
|
+
size_t pos = 0;
|
424
|
+
U64 sv = 0;
|
425
|
+
U32 s;
|
426
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
427
|
+
int i;
|
428
|
+
int const n = normalizedCounter[s];
|
429
|
+
MEM_write64(spread + pos, sv);
|
430
|
+
for (i = 8; i < n; i += 8) {
|
431
|
+
MEM_write64(spread + pos + i, sv);
|
432
|
+
}
|
433
|
+
pos += n;
|
434
|
+
}
|
435
|
+
}
|
436
|
+
/* Now we spread those positions across the table.
|
437
|
+
* The benefit of doing it in two stages is that we avoid the the
|
438
|
+
* variable size inner loop, which caused lots of branch misses.
|
439
|
+
* Now we can run through all the positions without any branch misses.
|
440
|
+
* We unroll the loop twice, since that is what emperically worked best.
|
441
|
+
*/
|
442
|
+
{
|
443
|
+
size_t position = 0;
|
444
|
+
size_t s;
|
445
|
+
size_t const unroll = 2;
|
446
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
447
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
448
|
+
size_t u;
|
449
|
+
for (u = 0; u < unroll; ++u) {
|
450
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
451
|
+
tableDecode[uPosition].baseValue = spread[s + u];
|
452
|
+
}
|
453
|
+
position = (position + (unroll * step)) & tableMask;
|
454
|
+
}
|
455
|
+
assert(position == 0);
|
456
|
+
}
|
457
|
+
} else {
|
458
|
+
U32 const tableMask = tableSize-1;
|
401
459
|
U32 const step = FSE_TABLESTEP(tableSize);
|
402
460
|
U32 s, position = 0;
|
403
461
|
for (s=0; s<maxSV1; s++) {
|
404
462
|
int i;
|
405
|
-
|
463
|
+
int const n = normalizedCounter[s];
|
464
|
+
for (i=0; i<n; i++) {
|
406
465
|
tableDecode[position].baseValue = s;
|
407
466
|
position = (position + step) & tableMask;
|
408
467
|
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
@@ -411,7 +470,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
411
470
|
}
|
412
471
|
|
413
472
|
/* Build Decoding table */
|
414
|
-
{
|
473
|
+
{
|
474
|
+
U32 u;
|
415
475
|
for (u=0; u<tableSize; u++) {
|
416
476
|
U32 const symbol = tableDecode[u].baseValue;
|
417
477
|
U32 const nextState = symbolNext[symbol]++;
|
@@ -420,7 +480,46 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
420
480
|
assert(nbAdditionalBits[symbol] < 255);
|
421
481
|
tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
|
422
482
|
tableDecode[u].baseValue = baseValue[symbol];
|
423
|
-
|
483
|
+
}
|
484
|
+
}
|
485
|
+
}
|
486
|
+
|
487
|
+
/* Avoids the FORCE_INLINE of the _body() function. */
|
488
|
+
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
489
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
490
|
+
const U32* baseValue, const U32* nbAdditionalBits,
|
491
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
492
|
+
{
|
493
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
494
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
495
|
+
}
|
496
|
+
|
497
|
+
#if DYNAMIC_BMI2
|
498
|
+
TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
499
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
500
|
+
const U32* baseValue, const U32* nbAdditionalBits,
|
501
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
502
|
+
{
|
503
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
504
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
505
|
+
}
|
506
|
+
#endif
|
507
|
+
|
508
|
+
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
509
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
510
|
+
const U32* baseValue, const U32* nbAdditionalBits,
|
511
|
+
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
512
|
+
{
|
513
|
+
#if DYNAMIC_BMI2
|
514
|
+
if (bmi2) {
|
515
|
+
ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
|
516
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
517
|
+
return;
|
518
|
+
}
|
519
|
+
#endif
|
520
|
+
(void)bmi2;
|
521
|
+
ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
|
522
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
424
523
|
}
|
425
524
|
|
426
525
|
|
@@ -432,13 +531,14 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
432
531
|
const void* src, size_t srcSize,
|
433
532
|
const U32* baseValue, const U32* nbAdditionalBits,
|
434
533
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
435
|
-
int ddictIsCold, int nbSeq
|
534
|
+
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
535
|
+
int bmi2)
|
436
536
|
{
|
437
537
|
switch(type)
|
438
538
|
{
|
439
539
|
case set_rle :
|
440
|
-
RETURN_ERROR_IF(!srcSize, srcSize_wrong);
|
441
|
-
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
|
540
|
+
RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
|
541
|
+
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
442
542
|
{ U32 const symbol = *(const BYTE*)src;
|
443
543
|
U32 const baseline = baseValue[symbol];
|
444
544
|
U32 const nbBits = nbAdditionalBits[symbol];
|
@@ -450,7 +550,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
450
550
|
*DTablePtr = defaultTable;
|
451
551
|
return 0;
|
452
552
|
case set_repeat:
|
453
|
-
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
|
553
|
+
RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
|
454
554
|
/* prefetch FSE table if used */
|
455
555
|
if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
|
456
556
|
const void* const pStart = *DTablePtr;
|
@@ -462,9 +562,9 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
462
562
|
{ unsigned tableLog;
|
463
563
|
S16 norm[MaxSeq+1];
|
464
564
|
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
465
|
-
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
|
466
|
-
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
|
467
|
-
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
565
|
+
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
|
566
|
+
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
|
567
|
+
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
|
468
568
|
*DTablePtr = DTableSpace;
|
469
569
|
return headerSize;
|
470
570
|
}
|
@@ -477,35 +577,36 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
477
577
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
478
578
|
const void* src, size_t srcSize)
|
479
579
|
{
|
480
|
-
const BYTE* const istart = (const BYTE*
|
580
|
+
const BYTE* const istart = (const BYTE*)src;
|
481
581
|
const BYTE* const iend = istart + srcSize;
|
482
582
|
const BYTE* ip = istart;
|
483
583
|
int nbSeq;
|
484
584
|
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
485
585
|
|
486
586
|
/* check */
|
487
|
-
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
|
587
|
+
RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
|
488
588
|
|
489
589
|
/* SeqHead */
|
490
590
|
nbSeq = *ip++;
|
491
591
|
if (!nbSeq) {
|
492
592
|
*nbSeqPtr=0;
|
493
|
-
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
|
593
|
+
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
|
494
594
|
return 1;
|
495
595
|
}
|
496
596
|
if (nbSeq > 0x7F) {
|
497
597
|
if (nbSeq == 0xFF) {
|
498
|
-
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
|
499
|
-
nbSeq = MEM_readLE16(ip) + LONGNBSEQ
|
598
|
+
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
599
|
+
nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
|
600
|
+
ip+=2;
|
500
601
|
} else {
|
501
|
-
RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
|
602
|
+
RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
|
502
603
|
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
503
604
|
}
|
504
605
|
}
|
505
606
|
*nbSeqPtr = nbSeq;
|
506
607
|
|
507
608
|
/* FSE table descriptors */
|
508
|
-
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
|
609
|
+
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
|
509
610
|
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
510
611
|
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
511
612
|
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
@@ -517,8 +618,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
517
618
|
ip, iend-ip,
|
518
619
|
LL_base, LL_bits,
|
519
620
|
LL_defaultDTable, dctx->fseEntropy,
|
520
|
-
dctx->ddictIsCold, nbSeq
|
521
|
-
|
621
|
+
dctx->ddictIsCold, nbSeq,
|
622
|
+
dctx->workspace, sizeof(dctx->workspace),
|
623
|
+
dctx->bmi2);
|
624
|
+
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
522
625
|
ip += llhSize;
|
523
626
|
}
|
524
627
|
|
@@ -527,8 +630,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
527
630
|
ip, iend-ip,
|
528
631
|
OF_base, OF_bits,
|
529
632
|
OF_defaultDTable, dctx->fseEntropy,
|
530
|
-
dctx->ddictIsCold, nbSeq
|
531
|
-
|
633
|
+
dctx->ddictIsCold, nbSeq,
|
634
|
+
dctx->workspace, sizeof(dctx->workspace),
|
635
|
+
dctx->bmi2);
|
636
|
+
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
532
637
|
ip += ofhSize;
|
533
638
|
}
|
534
639
|
|
@@ -537,8 +642,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
537
642
|
ip, iend-ip,
|
538
643
|
ML_base, ML_bits,
|
539
644
|
ML_defaultDTable, dctx->fseEntropy,
|
540
|
-
dctx->ddictIsCold, nbSeq
|
541
|
-
|
645
|
+
dctx->ddictIsCold, nbSeq,
|
646
|
+
dctx->workspace, sizeof(dctx->workspace),
|
647
|
+
dctx->bmi2);
|
648
|
+
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
542
649
|
ip += mlhSize;
|
543
650
|
}
|
544
651
|
}
|
@@ -551,7 +658,6 @@ typedef struct {
|
|
551
658
|
size_t litLength;
|
552
659
|
size_t matchLength;
|
553
660
|
size_t offset;
|
554
|
-
const BYTE* match;
|
555
661
|
} seq_t;
|
556
662
|
|
557
663
|
typedef struct {
|
@@ -565,59 +671,135 @@ typedef struct {
|
|
565
671
|
ZSTD_fseState stateOffb;
|
566
672
|
ZSTD_fseState stateML;
|
567
673
|
size_t prevOffset[ZSTD_REP_NUM];
|
568
|
-
const BYTE* prefixStart;
|
569
|
-
const BYTE* dictEnd;
|
570
|
-
size_t pos;
|
571
674
|
} seqState_t;
|
572
675
|
|
676
|
+
/*! ZSTD_overlapCopy8() :
|
677
|
+
* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
|
678
|
+
* If the offset is < 8 then the offset is spread to at least 8 bytes.
|
679
|
+
*
|
680
|
+
* Precondition: *ip <= *op
|
681
|
+
* Postcondition: *op - *op >= 8
|
682
|
+
*/
|
683
|
+
HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
684
|
+
assert(*ip <= *op);
|
685
|
+
if (offset < 8) {
|
686
|
+
/* close range match, overlap */
|
687
|
+
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
688
|
+
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
689
|
+
int const sub2 = dec64table[offset];
|
690
|
+
(*op)[0] = (*ip)[0];
|
691
|
+
(*op)[1] = (*ip)[1];
|
692
|
+
(*op)[2] = (*ip)[2];
|
693
|
+
(*op)[3] = (*ip)[3];
|
694
|
+
*ip += dec32table[offset];
|
695
|
+
ZSTD_copy4(*op+4, *ip);
|
696
|
+
*ip -= sub2;
|
697
|
+
} else {
|
698
|
+
ZSTD_copy8(*op, *ip);
|
699
|
+
}
|
700
|
+
*ip += 8;
|
701
|
+
*op += 8;
|
702
|
+
assert(*op - *ip >= 8);
|
703
|
+
}
|
704
|
+
|
705
|
+
/*! ZSTD_safecopy() :
|
706
|
+
* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
|
707
|
+
* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
|
708
|
+
* This function is only called in the uncommon case where the sequence is near the end of the block. It
|
709
|
+
* should be fast for a single long sequence, but can be slow for several short sequences.
|
710
|
+
*
|
711
|
+
* @param ovtype controls the overlap detection
|
712
|
+
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
|
713
|
+
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
714
|
+
* The src buffer must be before the dst buffer.
|
715
|
+
*/
|
716
|
+
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
717
|
+
ptrdiff_t const diff = op - ip;
|
718
|
+
BYTE* const oend = op + length;
|
719
|
+
|
720
|
+
assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
|
721
|
+
(ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
|
722
|
+
|
723
|
+
if (length < 8) {
|
724
|
+
/* Handle short lengths. */
|
725
|
+
while (op < oend) *op++ = *ip++;
|
726
|
+
return;
|
727
|
+
}
|
728
|
+
if (ovtype == ZSTD_overlap_src_before_dst) {
|
729
|
+
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
730
|
+
assert(length >= 8);
|
731
|
+
ZSTD_overlapCopy8(&op, &ip, diff);
|
732
|
+
assert(op - ip >= 8);
|
733
|
+
assert(op <= oend);
|
734
|
+
}
|
573
735
|
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
736
|
+
if (oend <= oend_w) {
|
737
|
+
/* No risk of overwrite. */
|
738
|
+
ZSTD_wildcopy(op, ip, length, ovtype);
|
739
|
+
return;
|
740
|
+
}
|
741
|
+
if (op <= oend_w) {
|
742
|
+
/* Wildcopy until we get close to the end. */
|
743
|
+
assert(oend > oend_w);
|
744
|
+
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
745
|
+
ip += oend_w - op;
|
746
|
+
op = oend_w;
|
747
|
+
}
|
748
|
+
/* Handle the leftovers. */
|
749
|
+
while (op < oend) *op++ = *ip++;
|
750
|
+
}
|
751
|
+
|
752
|
+
/* ZSTD_execSequenceEnd():
|
753
|
+
* This version handles cases that are near the end of the output buffer. It requires
|
754
|
+
* more careful checks to make sure there is no overflow. By separating out these hard
|
755
|
+
* and unlikely cases, we can speed up the common cases.
|
756
|
+
*
|
757
|
+
* NOTE: This function needs to be fast for a single long sequence, but doesn't need
|
758
|
+
* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
|
759
|
+
*/
|
581
760
|
FORCE_NOINLINE
|
582
|
-
size_t
|
583
|
-
|
584
|
-
|
585
|
-
|
761
|
+
size_t ZSTD_execSequenceEnd(BYTE* op,
|
762
|
+
BYTE* const oend, seq_t sequence,
|
763
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
764
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
586
765
|
{
|
587
766
|
BYTE* const oLitEnd = op + sequence.litLength;
|
588
767
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
589
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
590
768
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
591
769
|
const BYTE* match = oLitEnd - sequence.offset;
|
770
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
592
771
|
|
593
|
-
/*
|
594
|
-
RETURN_ERROR_IF(
|
595
|
-
RETURN_ERROR_IF(
|
772
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
773
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
774
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
775
|
+
assert(op < op + sequenceLength);
|
776
|
+
assert(oLitEnd < op + sequenceLength);
|
596
777
|
|
597
778
|
/* copy literals */
|
598
|
-
|
779
|
+
ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
|
780
|
+
op = oLitEnd;
|
781
|
+
*litPtr = iLitEnd;
|
599
782
|
|
600
783
|
/* copy Match */
|
601
|
-
if (sequence.offset > (size_t)(oLitEnd -
|
784
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
602
785
|
/* offset beyond prefix */
|
603
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd -
|
604
|
-
match = dictEnd - (
|
786
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
787
|
+
match = dictEnd - (prefixStart-match);
|
605
788
|
if (match + sequence.matchLength <= dictEnd) {
|
606
|
-
|
789
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
607
790
|
return sequenceLength;
|
608
791
|
}
|
609
792
|
/* span extDict & currentPrefixSegment */
|
610
793
|
{ size_t const length1 = dictEnd - match;
|
611
|
-
|
794
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
612
795
|
op = oLitEnd + length1;
|
613
796
|
sequence.matchLength -= length1;
|
614
|
-
match =
|
797
|
+
match = prefixStart;
|
615
798
|
} }
|
616
|
-
|
799
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
617
800
|
return sequenceLength;
|
618
801
|
}
|
619
802
|
|
620
|
-
|
621
803
|
HINT_INLINE
|
622
804
|
size_t ZSTD_execSequence(BYTE* op,
|
623
805
|
BYTE* const oend, seq_t sequence,
|
@@ -627,155 +809,85 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
627
809
|
BYTE* const oLitEnd = op + sequence.litLength;
|
628
810
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
629
811
|
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
630
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
812
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
|
631
813
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
632
814
|
const BYTE* match = oLitEnd - sequence.offset;
|
633
815
|
|
634
|
-
/*
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
816
|
+
assert(op != NULL /* Precondition */);
|
817
|
+
assert(oend_w < oend /* No underflow */);
|
818
|
+
/* Handle edge cases in a slow path:
|
819
|
+
* - Read beyond end of literals
|
820
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
821
|
+
* - 32-bit mode and the match length overflows
|
822
|
+
*/
|
823
|
+
if (UNLIKELY(
|
824
|
+
iLitEnd > litLimit ||
|
825
|
+
oMatchEnd > oend_w ||
|
826
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
827
|
+
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
828
|
+
|
829
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
830
|
+
assert(op <= oLitEnd /* No overflow */);
|
831
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
832
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
833
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
834
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
835
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
836
|
+
|
837
|
+
/* Copy Literals:
|
838
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
839
|
+
* We likely don't need the full 32-byte wildcopy.
|
840
|
+
*/
|
841
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
842
|
+
ZSTD_copy16(op, (*litPtr));
|
843
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
844
|
+
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
|
845
|
+
}
|
644
846
|
op = oLitEnd;
|
645
847
|
*litPtr = iLitEnd; /* update for next sequence */
|
646
848
|
|
647
|
-
/*
|
849
|
+
/* Copy Match */
|
648
850
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
649
851
|
/* offset beyond prefix -> go into extDict */
|
650
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
852
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
651
853
|
match = dictEnd + (match - prefixStart);
|
652
854
|
if (match + sequence.matchLength <= dictEnd) {
|
653
|
-
|
855
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
654
856
|
return sequenceLength;
|
655
857
|
}
|
656
858
|
/* span extDict & currentPrefixSegment */
|
657
859
|
{ size_t const length1 = dictEnd - match;
|
658
|
-
|
860
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
659
861
|
op = oLitEnd + length1;
|
660
862
|
sequence.matchLength -= length1;
|
661
863
|
match = prefixStart;
|
662
|
-
if (op > oend_w || sequence.matchLength < MINMATCH) {
|
663
|
-
U32 i;
|
664
|
-
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
|
665
|
-
return sequenceLength;
|
666
|
-
}
|
667
864
|
} }
|
668
|
-
/*
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
match
|
683
|
-
|
684
|
-
ZSTD_copy8(op, match);
|
685
|
-
}
|
686
|
-
op += 8; match += 8;
|
687
|
-
|
688
|
-
if (oMatchEnd > oend-(16-MINMATCH)) {
|
689
|
-
if (op < oend_w) {
|
690
|
-
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
691
|
-
match += oend_w - op;
|
692
|
-
op = oend_w;
|
693
|
-
}
|
694
|
-
while (op < oMatchEnd) *op++ = *match++;
|
695
|
-
} else {
|
696
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
865
|
+
/* Match within prefix of 1 or more bytes */
|
866
|
+
assert(op <= oMatchEnd);
|
867
|
+
assert(oMatchEnd <= oend_w);
|
868
|
+
assert(match >= prefixStart);
|
869
|
+
assert(sequence.matchLength >= 1);
|
870
|
+
|
871
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
872
|
+
* without overlap checking.
|
873
|
+
*/
|
874
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
875
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
876
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
877
|
+
* than 16 bytes.
|
878
|
+
*/
|
879
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
880
|
+
return sequenceLength;
|
697
881
|
}
|
698
|
-
|
699
|
-
}
|
700
|
-
|
701
|
-
|
702
|
-
HINT_INLINE
|
703
|
-
size_t ZSTD_execSequenceLong(BYTE* op,
|
704
|
-
BYTE* const oend, seq_t sequence,
|
705
|
-
const BYTE** litPtr, const BYTE* const litLimit,
|
706
|
-
const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
|
707
|
-
{
|
708
|
-
BYTE* const oLitEnd = op + sequence.litLength;
|
709
|
-
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
710
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
711
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
712
|
-
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
713
|
-
const BYTE* match = sequence.match;
|
714
|
-
|
715
|
-
/* check */
|
716
|
-
RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
|
717
|
-
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
|
718
|
-
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
|
719
|
-
|
720
|
-
/* copy Literals */
|
721
|
-
if (sequence.litLength > 8)
|
722
|
-
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
723
|
-
else
|
724
|
-
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
725
|
-
|
726
|
-
op = oLitEnd;
|
727
|
-
*litPtr = iLitEnd; /* update for next sequence */
|
728
|
-
|
729
|
-
/* copy Match */
|
730
|
-
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
731
|
-
/* offset beyond prefix */
|
732
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
|
733
|
-
if (match + sequence.matchLength <= dictEnd) {
|
734
|
-
memmove(oLitEnd, match, sequence.matchLength);
|
735
|
-
return sequenceLength;
|
736
|
-
}
|
737
|
-
/* span extDict & currentPrefixSegment */
|
738
|
-
{ size_t const length1 = dictEnd - match;
|
739
|
-
memmove(oLitEnd, match, length1);
|
740
|
-
op = oLitEnd + length1;
|
741
|
-
sequence.matchLength -= length1;
|
742
|
-
match = prefixStart;
|
743
|
-
if (op > oend_w || sequence.matchLength < MINMATCH) {
|
744
|
-
U32 i;
|
745
|
-
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
|
746
|
-
return sequenceLength;
|
747
|
-
}
|
748
|
-
} }
|
749
|
-
assert(op <= oend_w);
|
750
|
-
assert(sequence.matchLength >= MINMATCH);
|
882
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
751
883
|
|
752
|
-
/*
|
753
|
-
|
754
|
-
/* close range match, overlap */
|
755
|
-
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
756
|
-
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
757
|
-
int const sub2 = dec64table[sequence.offset];
|
758
|
-
op[0] = match[0];
|
759
|
-
op[1] = match[1];
|
760
|
-
op[2] = match[2];
|
761
|
-
op[3] = match[3];
|
762
|
-
match += dec32table[sequence.offset];
|
763
|
-
ZSTD_copy4(op+4, match);
|
764
|
-
match -= sub2;
|
765
|
-
} else {
|
766
|
-
ZSTD_copy8(op, match);
|
767
|
-
}
|
768
|
-
op += 8; match += 8;
|
884
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
885
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
769
886
|
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
op = oend_w;
|
775
|
-
}
|
776
|
-
while (op < oMatchEnd) *op++ = *match++;
|
777
|
-
} else {
|
778
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
887
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
888
|
+
if (sequence.matchLength > 8) {
|
889
|
+
assert(op < oMatchEnd);
|
890
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
|
779
891
|
}
|
780
892
|
return sequenceLength;
|
781
893
|
}
|
@@ -801,6 +913,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
801
913
|
DStatePtr->state = DInfo.nextState + lowBits;
|
802
914
|
}
|
803
915
|
|
916
|
+
FORCE_INLINE_TEMPLATE void
|
917
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
918
|
+
{
|
919
|
+
U32 const nbBits = DInfo.nbBits;
|
920
|
+
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
921
|
+
DStatePtr->state = DInfo.nextState + lowBits;
|
922
|
+
}
|
923
|
+
|
804
924
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
805
925
|
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
|
806
926
|
* bits before reloading. This value is the maximum number of bytes we read
|
@@ -813,24 +933,24 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
|
813
933
|
|
814
934
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
815
935
|
|
816
|
-
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
817
936
|
FORCE_INLINE_TEMPLATE seq_t
|
818
937
|
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
819
938
|
{
|
820
939
|
seq_t seq;
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
U32 const
|
825
|
-
U32 const
|
826
|
-
U32 const
|
827
|
-
|
940
|
+
ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
|
941
|
+
ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
|
942
|
+
ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
|
943
|
+
U32 const llBase = llDInfo.baseValue;
|
944
|
+
U32 const mlBase = mlDInfo.baseValue;
|
945
|
+
U32 const ofBase = ofDInfo.baseValue;
|
946
|
+
BYTE const llBits = llDInfo.nbAdditionalBits;
|
947
|
+
BYTE const mlBits = mlDInfo.nbAdditionalBits;
|
948
|
+
BYTE const ofBits = ofDInfo.nbAdditionalBits;
|
949
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
828
950
|
|
829
951
|
/* sequence */
|
830
952
|
{ size_t offset;
|
831
|
-
if (
|
832
|
-
offset = 0;
|
833
|
-
else {
|
953
|
+
if (ofBits > 1) {
|
834
954
|
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
835
955
|
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
836
956
|
assert(ofBits <= MaxOff);
|
@@ -844,63 +964,138 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
844
964
|
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
845
965
|
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
846
966
|
}
|
847
|
-
}
|
848
|
-
|
849
|
-
if (ofBits <= 1) {
|
850
|
-
offset += (llBase==0);
|
851
|
-
if (offset) {
|
852
|
-
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
853
|
-
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
854
|
-
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
855
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
856
|
-
seqState->prevOffset[0] = offset = temp;
|
857
|
-
} else { /* offset == 0 */
|
858
|
-
offset = seqState->prevOffset[0];
|
859
|
-
}
|
860
|
-
} else {
|
861
967
|
seqState->prevOffset[2] = seqState->prevOffset[1];
|
862
968
|
seqState->prevOffset[1] = seqState->prevOffset[0];
|
863
969
|
seqState->prevOffset[0] = offset;
|
864
|
-
}
|
970
|
+
} else {
|
971
|
+
U32 const ll0 = (llBase == 0);
|
972
|
+
if (LIKELY((ofBits == 0))) {
|
973
|
+
if (LIKELY(!ll0))
|
974
|
+
offset = seqState->prevOffset[0];
|
975
|
+
else {
|
976
|
+
offset = seqState->prevOffset[1];
|
977
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
978
|
+
seqState->prevOffset[0] = offset;
|
979
|
+
}
|
980
|
+
} else {
|
981
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
982
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
983
|
+
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
984
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
985
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
986
|
+
seqState->prevOffset[0] = offset = temp;
|
987
|
+
} } }
|
865
988
|
seq.offset = offset;
|
866
989
|
}
|
867
990
|
|
868
|
-
seq.matchLength = mlBase
|
869
|
-
|
991
|
+
seq.matchLength = mlBase;
|
992
|
+
if (mlBits > 0)
|
993
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
994
|
+
|
870
995
|
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
871
996
|
BIT_reloadDStream(&seqState->DStream);
|
872
|
-
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
997
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
873
998
|
BIT_reloadDStream(&seqState->DStream);
|
874
999
|
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
875
1000
|
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
876
1001
|
|
877
|
-
seq.litLength = llBase
|
878
|
-
|
1002
|
+
seq.litLength = llBase;
|
1003
|
+
if (llBits > 0)
|
1004
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
1005
|
+
|
879
1006
|
if (MEM_32bits())
|
880
1007
|
BIT_reloadDStream(&seqState->DStream);
|
881
1008
|
|
882
1009
|
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
883
1010
|
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
884
1011
|
|
885
|
-
/* ANS state update
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
1012
|
+
/* ANS state update
|
1013
|
+
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
1014
|
+
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
1015
|
+
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
1016
|
+
* better option, so it is the default for other compilers. But, if you
|
1017
|
+
* measure that it is worse, please put up a pull request.
|
1018
|
+
*/
|
1019
|
+
{
|
1020
|
+
#if defined(__GNUC__) && !defined(__clang__)
|
1021
|
+
const int kUseUpdateFseState = 1;
|
1022
|
+
#else
|
1023
|
+
const int kUseUpdateFseState = 0;
|
1024
|
+
#endif
|
1025
|
+
if (kUseUpdateFseState) {
|
1026
|
+
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
1027
|
+
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
1028
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
1029
|
+
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
1030
|
+
} else {
|
1031
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
|
1032
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
|
1033
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
1034
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
|
1035
|
+
}
|
1036
|
+
}
|
890
1037
|
|
891
1038
|
return seq;
|
892
1039
|
}
|
893
1040
|
|
1041
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
1042
|
+
MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
1043
|
+
{
|
1044
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
1045
|
+
/* No dictionary used. */
|
1046
|
+
if (dctx->dictContentEndForFuzzing == NULL) return 0;
|
1047
|
+
/* Dictionary is our prefix. */
|
1048
|
+
if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
|
1049
|
+
/* Dictionary is not our ext-dict. */
|
1050
|
+
if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
|
1051
|
+
/* Dictionary is not within our window size. */
|
1052
|
+
if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
|
1053
|
+
/* Dictionary is active. */
|
1054
|
+
return 1;
|
1055
|
+
}
|
1056
|
+
|
1057
|
+
MEM_STATIC void ZSTD_assertValidSequence(
|
1058
|
+
ZSTD_DCtx const* dctx,
|
1059
|
+
BYTE const* op, BYTE const* oend,
|
1060
|
+
seq_t const seq,
|
1061
|
+
BYTE const* prefixStart, BYTE const* virtualStart)
|
1062
|
+
{
|
1063
|
+
#if DEBUGLEVEL >= 1
|
1064
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
1065
|
+
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
1066
|
+
BYTE const* const oLitEnd = op + seq.litLength;
|
1067
|
+
DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
|
1068
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
1069
|
+
assert(op <= oend);
|
1070
|
+
assert((size_t)(oend - op) >= sequenceSize);
|
1071
|
+
assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
|
1072
|
+
if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
|
1073
|
+
size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
|
1074
|
+
/* Offset must be within the dictionary. */
|
1075
|
+
assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
|
1076
|
+
assert(seq.offset <= windowSize + dictSize);
|
1077
|
+
} else {
|
1078
|
+
/* Offset must be within our window. */
|
1079
|
+
assert(seq.offset <= windowSize);
|
1080
|
+
}
|
1081
|
+
#else
|
1082
|
+
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
|
1083
|
+
#endif
|
1084
|
+
}
|
1085
|
+
#endif
|
1086
|
+
|
1087
|
+
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
894
1088
|
FORCE_INLINE_TEMPLATE size_t
|
895
1089
|
DONT_VECTORIZE
|
896
1090
|
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
897
1091
|
void* dst, size_t maxDstSize,
|
898
1092
|
const void* seqStart, size_t seqSize, int nbSeq,
|
899
|
-
const ZSTD_longOffset_e isLongOffset
|
1093
|
+
const ZSTD_longOffset_e isLongOffset,
|
1094
|
+
const int frame)
|
900
1095
|
{
|
901
1096
|
const BYTE* ip = (const BYTE*)seqStart;
|
902
1097
|
const BYTE* const iend = ip + seqSize;
|
903
|
-
BYTE* const ostart = (BYTE*
|
1098
|
+
BYTE* const ostart = (BYTE*)dst;
|
904
1099
|
BYTE* const oend = ostart + maxDstSize;
|
905
1100
|
BYTE* op = ostart;
|
906
1101
|
const BYTE* litPtr = dctx->litPtr;
|
@@ -909,6 +1104,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
909
1104
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
910
1105
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
911
1106
|
DEBUGLOG(5, "ZSTD_decompressSequences_body");
|
1107
|
+
(void)frame;
|
912
1108
|
|
913
1109
|
/* Regen sequences */
|
914
1110
|
if (nbSeq) {
|
@@ -917,38 +1113,97 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
917
1113
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
918
1114
|
RETURN_ERROR_IF(
|
919
1115
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
920
|
-
corruption_detected);
|
1116
|
+
corruption_detected, "");
|
921
1117
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
922
1118
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
923
1119
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
1120
|
+
assert(dst != NULL);
|
924
1121
|
|
925
1122
|
ZSTD_STATIC_ASSERT(
|
926
1123
|
BIT_DStream_unfinished < BIT_DStream_completed &&
|
927
1124
|
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
928
1125
|
BIT_DStream_completed < BIT_DStream_overflow);
|
929
1126
|
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
1127
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
1128
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
1129
|
+
*
|
1130
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
1131
|
+
* speed swings based on the alignment of the decompression loop. This
|
1132
|
+
* performance swing is caused by parts of the decompression loop falling
|
1133
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
1134
|
+
* when it can't we get much worse performance. You can measure if you've
|
1135
|
+
* hit the good case or the bad case with this perf command for some
|
1136
|
+
* compressed file test.zst:
|
1137
|
+
*
|
1138
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
1139
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
1140
|
+
*
|
1141
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
1142
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
1143
|
+
* If it is pretty even then you may be in an okay case.
|
1144
|
+
*
|
1145
|
+
* This issue has been reproduced on the following CPUs:
|
1146
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
1147
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
1148
|
+
* I never got performance swings, but I was able to
|
1149
|
+
* go from the good case of mostly DSB to half of the
|
1150
|
+
* cycles served from MITE.
|
1151
|
+
* - Coffeelake: Intel i9-9900k
|
1152
|
+
* - Coffeelake: Intel i7-9700k
|
1153
|
+
*
|
1154
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
1155
|
+
* of the following CPUS:
|
1156
|
+
* - Haswell
|
1157
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
1158
|
+
* - Skylake
|
1159
|
+
*
|
1160
|
+
* If you are seeing performance stability this script can help test.
|
1161
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
1162
|
+
*
|
1163
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
1164
|
+
*/
|
1165
|
+
__asm__(".p2align 6");
|
1166
|
+
__asm__("nop");
|
1167
|
+
__asm__(".p2align 5");
|
1168
|
+
__asm__("nop");
|
1169
|
+
# if __GNUC__ >= 9
|
1170
|
+
/* better for gcc-9 and gcc-10, worse for clang and gcc-8 */
|
1171
|
+
__asm__(".p2align 3");
|
1172
|
+
# else
|
1173
|
+
__asm__(".p2align 4");
|
1174
|
+
# endif
|
1175
|
+
#endif
|
1176
|
+
for ( ; ; ) {
|
1177
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1178
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
1179
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1180
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1181
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1182
|
+
#endif
|
1183
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1184
|
+
return oneSeqSize;
|
1185
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1186
|
+
op += oneSeqSize;
|
1187
|
+
if (UNLIKELY(!--nbSeq))
|
1188
|
+
break;
|
1189
|
+
BIT_reloadDStream(&(seqState.DStream));
|
1190
|
+
}
|
938
1191
|
|
939
1192
|
/* check if reached exact end */
|
940
1193
|
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
941
|
-
RETURN_ERROR_IF(nbSeq, corruption_detected);
|
942
|
-
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
|
1194
|
+
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
1195
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
|
943
1196
|
/* save reps for next block */
|
944
1197
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
945
1198
|
}
|
946
1199
|
|
947
1200
|
/* last literal segment */
|
948
1201
|
{ size_t const lastLLSize = litEnd - litPtr;
|
949
|
-
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
950
|
-
|
951
|
-
|
1202
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1203
|
+
if (op != NULL) {
|
1204
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
1205
|
+
op += lastLLSize;
|
1206
|
+
}
|
952
1207
|
}
|
953
1208
|
|
954
1209
|
return op-ostart;
|
@@ -958,103 +1213,43 @@ static size_t
|
|
958
1213
|
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
959
1214
|
void* dst, size_t maxDstSize,
|
960
1215
|
const void* seqStart, size_t seqSize, int nbSeq,
|
961
|
-
const ZSTD_longOffset_e isLongOffset
|
1216
|
+
const ZSTD_longOffset_e isLongOffset,
|
1217
|
+
const int frame)
|
962
1218
|
{
|
963
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1219
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
964
1220
|
}
|
965
1221
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
966
1222
|
|
967
|
-
|
968
|
-
|
969
1223
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
970
|
-
FORCE_INLINE_TEMPLATE seq_t
|
971
|
-
ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
|
972
|
-
{
|
973
|
-
seq_t seq;
|
974
|
-
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
|
975
|
-
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
|
976
|
-
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
|
977
|
-
U32 const totalBits = llBits+mlBits+ofBits;
|
978
|
-
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
|
979
|
-
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
|
980
|
-
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
|
981
|
-
|
982
|
-
/* sequence */
|
983
|
-
{ size_t offset;
|
984
|
-
if (!ofBits)
|
985
|
-
offset = 0;
|
986
|
-
else {
|
987
|
-
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
988
|
-
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
989
|
-
assert(ofBits <= MaxOff);
|
990
|
-
if (MEM_32bits() && longOffsets) {
|
991
|
-
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
992
|
-
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
993
|
-
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
994
|
-
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
995
|
-
} else {
|
996
|
-
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
997
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
998
|
-
}
|
999
|
-
}
|
1000
1224
|
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
offset = seqState->prevOffset[0];
|
1011
|
-
}
|
1012
|
-
} else {
|
1013
|
-
seqState->prevOffset[2] = seqState->prevOffset[1];
|
1014
|
-
seqState->prevOffset[1] = seqState->prevOffset[0];
|
1015
|
-
seqState->prevOffset[0] = offset;
|
1016
|
-
}
|
1017
|
-
seq.offset = offset;
|
1018
|
-
}
|
1019
|
-
|
1020
|
-
seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
|
1021
|
-
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
1022
|
-
BIT_reloadDStream(&seqState->DStream);
|
1023
|
-
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
1024
|
-
BIT_reloadDStream(&seqState->DStream);
|
1025
|
-
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
|
1026
|
-
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
1027
|
-
|
1028
|
-
seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
|
1029
|
-
if (MEM_32bits())
|
1030
|
-
BIT_reloadDStream(&seqState->DStream);
|
1031
|
-
|
1032
|
-
{ size_t const pos = seqState->pos + seq.litLength;
|
1033
|
-
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
1034
|
-
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
1035
|
-
* No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
|
1036
|
-
seqState->pos = pos + seq.matchLength;
|
1225
|
+
FORCE_INLINE_TEMPLATE size_t
|
1226
|
+
ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
|
1227
|
+
const BYTE* const prefixStart, const BYTE* const dictEnd)
|
1228
|
+
{
|
1229
|
+
prefetchPos += sequence.litLength;
|
1230
|
+
{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
|
1231
|
+
const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
1232
|
+
* No consequence though : memory address is only used for prefetching, not for dereferencing */
|
1233
|
+
PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
1037
1234
|
}
|
1038
|
-
|
1039
|
-
/* ANS state update */
|
1040
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
1041
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
1042
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
1043
|
-
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
1044
|
-
|
1045
|
-
return seq;
|
1235
|
+
return prefetchPos + sequence.matchLength;
|
1046
1236
|
}
|
1047
1237
|
|
1238
|
+
/* This decoding function employs prefetching
|
1239
|
+
* to reduce latency impact of cache misses.
|
1240
|
+
* It's generally employed when block contains a significant portion of long-distance matches
|
1241
|
+
* or when coupled with a "cold" dictionary */
|
1048
1242
|
FORCE_INLINE_TEMPLATE size_t
|
1049
1243
|
ZSTD_decompressSequencesLong_body(
|
1050
1244
|
ZSTD_DCtx* dctx,
|
1051
1245
|
void* dst, size_t maxDstSize,
|
1052
1246
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1053
|
-
const ZSTD_longOffset_e isLongOffset
|
1247
|
+
const ZSTD_longOffset_e isLongOffset,
|
1248
|
+
const int frame)
|
1054
1249
|
{
|
1055
1250
|
const BYTE* ip = (const BYTE*)seqStart;
|
1056
1251
|
const BYTE* const iend = ip + seqSize;
|
1057
|
-
BYTE* const ostart = (BYTE*
|
1252
|
+
BYTE* const ostart = (BYTE*)dst;
|
1058
1253
|
BYTE* const oend = ostart + maxDstSize;
|
1059
1254
|
BYTE* op = ostart;
|
1060
1255
|
const BYTE* litPtr = dctx->litPtr;
|
@@ -1062,51 +1257,62 @@ ZSTD_decompressSequencesLong_body(
|
|
1062
1257
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
1063
1258
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
1064
1259
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
1260
|
+
(void)frame;
|
1065
1261
|
|
1066
1262
|
/* Regen sequences */
|
1067
1263
|
if (nbSeq) {
|
1068
|
-
#define STORED_SEQS
|
1264
|
+
#define STORED_SEQS 8
|
1069
1265
|
#define STORED_SEQS_MASK (STORED_SEQS-1)
|
1070
|
-
#define ADVANCED_SEQS
|
1266
|
+
#define ADVANCED_SEQS STORED_SEQS
|
1071
1267
|
seq_t sequences[STORED_SEQS];
|
1072
1268
|
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
|
1073
1269
|
seqState_t seqState;
|
1074
1270
|
int seqNb;
|
1271
|
+
size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
|
1272
|
+
|
1075
1273
|
dctx->fseEntropy = 1;
|
1076
1274
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
1077
|
-
|
1078
|
-
seqState.pos = (size_t)(op-prefixStart);
|
1079
|
-
seqState.dictEnd = dictEnd;
|
1275
|
+
assert(dst != NULL);
|
1080
1276
|
assert(iend >= ip);
|
1081
1277
|
RETURN_ERROR_IF(
|
1082
1278
|
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
|
1083
|
-
corruption_detected);
|
1279
|
+
corruption_detected, "");
|
1084
1280
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
1085
1281
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
1086
1282
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
1087
1283
|
|
1088
1284
|
/* prepare in advance */
|
1089
1285
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
1090
|
-
|
1091
|
-
|
1286
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1287
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1288
|
+
sequences[seqNb] = sequence;
|
1092
1289
|
}
|
1093
|
-
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
|
1290
|
+
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
1094
1291
|
|
1095
1292
|
/* decode and decompress */
|
1096
1293
|
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
1097
|
-
seq_t const sequence =
|
1098
|
-
size_t const oneSeqSize =
|
1294
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
1295
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
1296
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1297
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1298
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
1299
|
+
#endif
|
1099
1300
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1100
|
-
|
1301
|
+
|
1302
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1101
1303
|
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
1102
1304
|
op += oneSeqSize;
|
1103
1305
|
}
|
1104
|
-
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
|
1306
|
+
RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
|
1105
1307
|
|
1106
1308
|
/* finish queue */
|
1107
1309
|
seqNb -= seqAdvance;
|
1108
1310
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
1109
|
-
size_t const oneSeqSize =
|
1311
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
1312
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1313
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1314
|
+
if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
1315
|
+
#endif
|
1110
1316
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1111
1317
|
op += oneSeqSize;
|
1112
1318
|
}
|
@@ -1117,9 +1323,11 @@ ZSTD_decompressSequencesLong_body(
|
|
1117
1323
|
|
1118
1324
|
/* last literal segment */
|
1119
1325
|
{ size_t const lastLLSize = litEnd - litPtr;
|
1120
|
-
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
|
1121
|
-
|
1122
|
-
|
1326
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1327
|
+
if (op != NULL) {
|
1328
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
1329
|
+
op += lastLLSize;
|
1330
|
+
}
|
1123
1331
|
}
|
1124
1332
|
|
1125
1333
|
return op-ostart;
|
@@ -1129,9 +1337,10 @@ static size_t
|
|
1129
1337
|
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
1130
1338
|
void* dst, size_t maxDstSize,
|
1131
1339
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1132
|
-
const ZSTD_longOffset_e isLongOffset
|
1340
|
+
const ZSTD_longOffset_e isLongOffset,
|
1341
|
+
const int frame)
|
1133
1342
|
{
|
1134
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1343
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1135
1344
|
}
|
1136
1345
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1137
1346
|
|
@@ -1145,9 +1354,10 @@ DONT_VECTORIZE
|
|
1145
1354
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
1146
1355
|
void* dst, size_t maxDstSize,
|
1147
1356
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1148
|
-
const ZSTD_longOffset_e isLongOffset
|
1357
|
+
const ZSTD_longOffset_e isLongOffset,
|
1358
|
+
const int frame)
|
1149
1359
|
{
|
1150
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1360
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1151
1361
|
}
|
1152
1362
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1153
1363
|
|
@@ -1156,9 +1366,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t
|
|
1156
1366
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
1157
1367
|
void* dst, size_t maxDstSize,
|
1158
1368
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1159
|
-
const ZSTD_longOffset_e isLongOffset
|
1369
|
+
const ZSTD_longOffset_e isLongOffset,
|
1370
|
+
const int frame)
|
1160
1371
|
{
|
1161
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1372
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1162
1373
|
}
|
1163
1374
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1164
1375
|
|
@@ -1168,21 +1379,23 @@ typedef size_t (*ZSTD_decompressSequences_t)(
|
|
1168
1379
|
ZSTD_DCtx* dctx,
|
1169
1380
|
void* dst, size_t maxDstSize,
|
1170
1381
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1171
|
-
const ZSTD_longOffset_e isLongOffset
|
1382
|
+
const ZSTD_longOffset_e isLongOffset,
|
1383
|
+
const int frame);
|
1172
1384
|
|
1173
1385
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1174
1386
|
static size_t
|
1175
1387
|
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
1176
1388
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1177
|
-
const ZSTD_longOffset_e isLongOffset
|
1389
|
+
const ZSTD_longOffset_e isLongOffset,
|
1390
|
+
const int frame)
|
1178
1391
|
{
|
1179
1392
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
1180
1393
|
#if DYNAMIC_BMI2
|
1181
1394
|
if (dctx->bmi2) {
|
1182
|
-
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1395
|
+
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1183
1396
|
}
|
1184
1397
|
#endif
|
1185
|
-
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1398
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1186
1399
|
}
|
1187
1400
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1188
1401
|
|
@@ -1197,15 +1410,16 @@ static size_t
|
|
1197
1410
|
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
1198
1411
|
void* dst, size_t maxDstSize,
|
1199
1412
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1200
|
-
const ZSTD_longOffset_e isLongOffset
|
1413
|
+
const ZSTD_longOffset_e isLongOffset,
|
1414
|
+
const int frame)
|
1201
1415
|
{
|
1202
1416
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
1203
1417
|
#if DYNAMIC_BMI2
|
1204
1418
|
if (dctx->bmi2) {
|
1205
|
-
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1419
|
+
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1206
1420
|
}
|
1207
1421
|
#endif
|
1208
|
-
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1422
|
+
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
1209
1423
|
}
|
1210
1424
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1211
1425
|
|
@@ -1239,7 +1453,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
|
|
1239
1453
|
}
|
1240
1454
|
#endif
|
1241
1455
|
|
1242
|
-
|
1243
1456
|
size_t
|
1244
1457
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
1245
1458
|
void* dst, size_t dstCapacity,
|
@@ -1255,7 +1468,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1255
1468
|
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
1256
1469
|
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
1257
1470
|
|
1258
|
-
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
|
1471
|
+
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
|
1259
1472
|
|
1260
1473
|
/* Decode literals section */
|
1261
1474
|
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
@@ -1281,6 +1494,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1281
1494
|
ip += seqHSize;
|
1282
1495
|
srcSize -= seqHSize;
|
1283
1496
|
|
1497
|
+
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
1498
|
+
|
1284
1499
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
1285
1500
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
1286
1501
|
if ( !usePrefetchDecoder
|
@@ -1299,23 +1514,34 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1299
1514
|
if (usePrefetchDecoder)
|
1300
1515
|
#endif
|
1301
1516
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
1302
|
-
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
1517
|
+
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
1303
1518
|
#endif
|
1304
1519
|
|
1305
1520
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1306
1521
|
/* else */
|
1307
|
-
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
1522
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
|
1308
1523
|
#endif
|
1309
1524
|
}
|
1310
1525
|
}
|
1311
1526
|
|
1312
1527
|
|
1528
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
|
1529
|
+
{
|
1530
|
+
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
|
1531
|
+
dctx->dictEnd = dctx->previousDstEnd;
|
1532
|
+
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
1533
|
+
dctx->prefixStart = dst;
|
1534
|
+
dctx->previousDstEnd = dst;
|
1535
|
+
}
|
1536
|
+
}
|
1537
|
+
|
1538
|
+
|
1313
1539
|
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
1314
1540
|
void* dst, size_t dstCapacity,
|
1315
1541
|
const void* src, size_t srcSize)
|
1316
1542
|
{
|
1317
1543
|
size_t dSize;
|
1318
|
-
ZSTD_checkContinuity(dctx, dst);
|
1544
|
+
ZSTD_checkContinuity(dctx, dst, dstCapacity);
|
1319
1545
|
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
|
1320
1546
|
dctx->previousDstEnd = (char*)dst + dSize;
|
1321
1547
|
return dSize;
|