zstdlib 0.13.0-x86-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +6 -0
  3. data/CHANGES.md +107 -0
  4. data/Gemfile +3 -0
  5. data/README.md +107 -0
  6. data/Rakefile +59 -0
  7. data/ext/zstdlib_c/extconf.rb +59 -0
  8. data/ext/zstdlib_c/ruby/zlib-2.2/zstdlib.c +4675 -0
  9. data/ext/zstdlib_c/ruby/zlib-2.3/zstdlib.c +4702 -0
  10. data/ext/zstdlib_c/ruby/zlib-2.4/zstdlib.c +4859 -0
  11. data/ext/zstdlib_c/ruby/zlib-2.5/zstdlib.c +4864 -0
  12. data/ext/zstdlib_c/ruby/zlib-2.6/zstdlib.c +4906 -0
  13. data/ext/zstdlib_c/ruby/zlib-2.7/zstdlib.c +4895 -0
  14. data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
  15. data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
  16. data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
  17. data/ext/zstdlib_c/ruby/zlib-3.3/zstdlib.c +5090 -0
  18. data/ext/zstdlib_c/zlib-1.3.1/adler32.c +164 -0
  19. data/ext/zstdlib_c/zlib-1.3.1/compress.c +75 -0
  20. data/ext/zstdlib_c/zlib-1.3.1/crc32.c +1049 -0
  21. data/ext/zstdlib_c/zlib-1.3.1/crc32.h +9446 -0
  22. data/ext/zstdlib_c/zlib-1.3.1/deflate.c +2139 -0
  23. data/ext/zstdlib_c/zlib-1.3.1/deflate.h +377 -0
  24. data/ext/zstdlib_c/zlib-1.3.1/gzclose.c +23 -0
  25. data/ext/zstdlib_c/zlib-1.3.1/gzguts.h +214 -0
  26. data/ext/zstdlib_c/zlib-1.3.1/gzlib.c +582 -0
  27. data/ext/zstdlib_c/zlib-1.3.1/gzread.c +602 -0
  28. data/ext/zstdlib_c/zlib-1.3.1/gzwrite.c +631 -0
  29. data/ext/zstdlib_c/zlib-1.3.1/infback.c +628 -0
  30. data/ext/zstdlib_c/zlib-1.3.1/inffast.c +320 -0
  31. data/ext/zstdlib_c/zlib-1.3.1/inffast.h +11 -0
  32. data/ext/zstdlib_c/zlib-1.3.1/inffixed.h +94 -0
  33. data/ext/zstdlib_c/zlib-1.3.1/inflate.c +1526 -0
  34. data/ext/zstdlib_c/zlib-1.3.1/inflate.h +126 -0
  35. data/ext/zstdlib_c/zlib-1.3.1/inftrees.c +299 -0
  36. data/ext/zstdlib_c/zlib-1.3.1/inftrees.h +62 -0
  37. data/ext/zstdlib_c/zlib-1.3.1/trees.c +1117 -0
  38. data/ext/zstdlib_c/zlib-1.3.1/trees.h +128 -0
  39. data/ext/zstdlib_c/zlib-1.3.1/uncompr.c +85 -0
  40. data/ext/zstdlib_c/zlib-1.3.1/zconf.h +543 -0
  41. data/ext/zstdlib_c/zlib-1.3.1/zlib.h +1938 -0
  42. data/ext/zstdlib_c/zlib-1.3.1/zutil.c +299 -0
  43. data/ext/zstdlib_c/zlib-1.3.1/zutil.h +254 -0
  44. data/ext/zstdlib_c/zlib.mk +14 -0
  45. data/ext/zstdlib_c/zlibwrapper/zlibwrapper.c +10 -0
  46. data/ext/zstdlib_c/zlibwrapper.mk +14 -0
  47. data/ext/zstdlib_c/zstd-1.5.6/lib/common/allocations.h +55 -0
  48. data/ext/zstdlib_c/zstd-1.5.6/lib/common/bits.h +200 -0
  49. data/ext/zstdlib_c/zstd-1.5.6/lib/common/bitstream.h +457 -0
  50. data/ext/zstdlib_c/zstd-1.5.6/lib/common/compiler.h +450 -0
  51. data/ext/zstdlib_c/zstd-1.5.6/lib/common/cpu.h +249 -0
  52. data/ext/zstdlib_c/zstd-1.5.6/lib/common/debug.c +30 -0
  53. data/ext/zstdlib_c/zstd-1.5.6/lib/common/debug.h +116 -0
  54. data/ext/zstdlib_c/zstd-1.5.6/lib/common/entropy_common.c +340 -0
  55. data/ext/zstdlib_c/zstd-1.5.6/lib/common/error_private.c +63 -0
  56. data/ext/zstdlib_c/zstd-1.5.6/lib/common/error_private.h +168 -0
  57. data/ext/zstdlib_c/zstd-1.5.6/lib/common/fse.h +640 -0
  58. data/ext/zstdlib_c/zstd-1.5.6/lib/common/fse_decompress.c +313 -0
  59. data/ext/zstdlib_c/zstd-1.5.6/lib/common/huf.h +286 -0
  60. data/ext/zstdlib_c/zstd-1.5.6/lib/common/mem.h +426 -0
  61. data/ext/zstdlib_c/zstd-1.5.6/lib/common/pool.c +371 -0
  62. data/ext/zstdlib_c/zstd-1.5.6/lib/common/pool.h +90 -0
  63. data/ext/zstdlib_c/zstd-1.5.6/lib/common/portability_macros.h +158 -0
  64. data/ext/zstdlib_c/zstd-1.5.6/lib/common/threading.c +182 -0
  65. data/ext/zstdlib_c/zstd-1.5.6/lib/common/threading.h +150 -0
  66. data/ext/zstdlib_c/zstd-1.5.6/lib/common/xxhash.c +18 -0
  67. data/ext/zstdlib_c/zstd-1.5.6/lib/common/xxhash.h +7020 -0
  68. data/ext/zstdlib_c/zstd-1.5.6/lib/common/zstd_common.c +48 -0
  69. data/ext/zstdlib_c/zstd-1.5.6/lib/common/zstd_deps.h +111 -0
  70. data/ext/zstdlib_c/zstd-1.5.6/lib/common/zstd_internal.h +392 -0
  71. data/ext/zstdlib_c/zstd-1.5.6/lib/common/zstd_trace.h +163 -0
  72. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/clevels.h +134 -0
  73. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/fse_compress.c +625 -0
  74. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/hist.c +181 -0
  75. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/hist.h +75 -0
  76. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/huf_compress.c +1464 -0
  77. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress.c +7153 -0
  78. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_internal.h +1534 -0
  79. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_literals.c +235 -0
  80. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_literals.h +39 -0
  81. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_sequences.c +442 -0
  82. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_sequences.h +54 -0
  83. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_superblock.c +688 -0
  84. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_superblock.h +32 -0
  85. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_cwksp.h +748 -0
  86. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_double_fast.c +770 -0
  87. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_double_fast.h +50 -0
  88. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_fast.c +968 -0
  89. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_fast.h +38 -0
  90. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_lazy.c +2199 -0
  91. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_lazy.h +202 -0
  92. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_ldm.c +730 -0
  93. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_ldm.h +117 -0
  94. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_ldm_geartab.h +106 -0
  95. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_opt.c +1576 -0
  96. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_opt.h +80 -0
  97. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstdmt_compress.c +1882 -0
  98. data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstdmt_compress.h +113 -0
  99. data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/huf_decompress.c +1944 -0
  100. data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/huf_decompress_amd64.S +595 -0
  101. data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_ddict.c +244 -0
  102. data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_ddict.h +44 -0
  103. data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_decompress.c +2407 -0
  104. data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_decompress_block.c +2215 -0
  105. data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_decompress_block.h +73 -0
  106. data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_decompress_internal.h +240 -0
  107. data/ext/zstdlib_c/zstd-1.5.6/lib/zdict.h +474 -0
  108. data/ext/zstdlib_c/zstd-1.5.6/lib/zstd.h +3089 -0
  109. data/ext/zstdlib_c/zstd-1.5.6/lib/zstd_errors.h +114 -0
  110. data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzclose.c +26 -0
  111. data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzcompatibility.h +68 -0
  112. data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzguts.h +229 -0
  113. data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzlib.c +587 -0
  114. data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzread.c +637 -0
  115. data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzwrite.c +631 -0
  116. data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/zstd_zlibwrapper.c +1200 -0
  117. data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/zstd_zlibwrapper.h +91 -0
  118. data/ext/zstdlib_c/zstd.mk +15 -0
  119. data/lib/2.4/zstdlib_c.so +0 -0
  120. data/lib/2.5/zstdlib_c.so +0 -0
  121. data/lib/2.6/zstdlib_c.so +0 -0
  122. data/lib/2.7/zstdlib_c.so +0 -0
  123. data/lib/3.0/zstdlib_c.so +0 -0
  124. data/lib/3.1/zstdlib_c.so +0 -0
  125. data/lib/3.2/zstdlib_c.so +0 -0
  126. data/lib/3.3/zstdlib_c.so +0 -0
  127. data/lib/zstdlib.rb +6 -0
  128. data/test/zstdlib_test.rb +21 -0
  129. metadata +243 -0
@@ -0,0 +1,2215 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ /* zstd_decompress_block :
12
+ * this module takes care of decompressing _compressed_ block */
13
+
14
+ /*-*******************************************************
15
+ * Dependencies
16
+ *********************************************************/
17
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18
+ #include "../common/compiler.h" /* prefetch */
19
+ #include "../common/cpu.h" /* bmi2 */
20
+ #include "../common/mem.h" /* low level memory routines */
21
+ #define FSE_STATIC_LINKING_ONLY
22
+ #include "../common/fse.h"
23
+ #include "../common/huf.h"
24
+ #include "../common/zstd_internal.h"
25
+ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
26
+ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
27
+ #include "zstd_decompress_block.h"
28
+ #include "../common/bits.h" /* ZSTD_highbit32 */
29
+
30
+ /*_*******************************************************
31
+ * Macros
32
+ **********************************************************/
33
+
34
+ /* These two optional macros force the use one way or another of the two
35
+ * ZSTD_decompressSequences implementations. You can't force in both directions
36
+ * at the same time.
37
+ */
38
+ #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39
+ defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40
+ #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41
+ #endif
42
+
43
+
44
+ /*_*******************************************************
45
+ * Memory operations
46
+ **********************************************************/
47
+ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
48
+
49
+
50
+ /*-*************************************************************
51
+ * Block decoding
52
+ ***************************************************************/
53
+
54
+ static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
55
+ {
56
+ size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
57
+ assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
58
+ return blockSizeMax;
59
+ }
60
+
61
+ /*! ZSTD_getcBlockSize() :
62
+ * Provides the size of compressed block from block header `src` */
63
+ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
64
+ blockProperties_t* bpPtr)
65
+ {
66
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
67
+
68
+ { U32 const cBlockHeader = MEM_readLE24(src);
69
+ U32 const cSize = cBlockHeader >> 3;
70
+ bpPtr->lastBlock = cBlockHeader & 1;
71
+ bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
72
+ bpPtr->origSize = cSize; /* only useful for RLE */
73
+ if (bpPtr->blockType == bt_rle) return 1;
74
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
75
+ return cSize;
76
+ }
77
+ }
78
+
79
+ /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
80
+ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
81
+ const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
82
+ {
83
+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
84
+ assert(litSize <= blockSizeMax);
85
+ assert(dctx->isFrameDecompression || streaming == not_streaming);
86
+ assert(expectedWriteSize <= blockSizeMax);
87
+ if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
88
+ /* If we aren't streaming, we can just put the literals after the output
89
+ * of the current block. We don't need to worry about overwriting the
90
+ * extDict of our window, because it doesn't exist.
91
+ * So if we have space after the end of the block, just put it there.
92
+ */
93
+ dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
94
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
95
+ dctx->litBufferLocation = ZSTD_in_dst;
96
+ } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
97
+ /* Literals fit entirely within the extra buffer, put them there to avoid
98
+ * having to split the literals.
99
+ */
100
+ dctx->litBuffer = dctx->litExtraBuffer;
101
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
102
+ dctx->litBufferLocation = ZSTD_not_in_dst;
103
+ } else {
104
+ assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
105
+ /* Literals must be split between the output block and the extra lit
106
+ * buffer. We fill the extra lit buffer with the tail of the literals,
107
+ * and put the rest of the literals at the end of the block, with
108
+ * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
109
+ * This MUST not write more than our maxBlockSize beyond dst, because in
110
+ * streaming mode, that could overwrite part of our extDict window.
111
+ */
112
+ if (splitImmediately) {
113
+ /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
114
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
115
+ dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
116
+ } else {
117
+ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
118
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
119
+ dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
120
+ }
121
+ dctx->litBufferLocation = ZSTD_split;
122
+ assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
123
+ }
124
+ }
125
+
126
+ /*! ZSTD_decodeLiteralsBlock() :
127
+ * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
128
+ * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
129
+ * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
130
+ * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
131
+ *
132
+ * @return : nb of bytes read from src (< srcSize )
133
+ * note : symbol not declared but exposed for fullbench */
134
+ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
135
+ const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
136
+ void* dst, size_t dstCapacity, const streaming_operation streaming)
137
+ {
138
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
139
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
140
+
141
+ { const BYTE* const istart = (const BYTE*) src;
142
+ symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
143
+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
144
+
145
+ switch(litEncType)
146
+ {
147
+ case set_repeat:
148
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
149
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
150
+ ZSTD_FALLTHROUGH;
151
+
152
+ case set_compressed:
153
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
154
+ { size_t lhSize, litSize, litCSize;
155
+ U32 singleStream=0;
156
+ U32 const lhlCode = (istart[0] >> 2) & 3;
157
+ U32 const lhc = MEM_readLE32(istart);
158
+ size_t hufSuccess;
159
+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
160
+ int const flags = 0
161
+ | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
162
+ | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
163
+ switch(lhlCode)
164
+ {
165
+ case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
166
+ /* 2 - 2 - 10 - 10 */
167
+ singleStream = !lhlCode;
168
+ lhSize = 3;
169
+ litSize = (lhc >> 4) & 0x3FF;
170
+ litCSize = (lhc >> 14) & 0x3FF;
171
+ break;
172
+ case 2:
173
+ /* 2 - 2 - 14 - 14 */
174
+ lhSize = 4;
175
+ litSize = (lhc >> 4) & 0x3FFF;
176
+ litCSize = lhc >> 18;
177
+ break;
178
+ case 3:
179
+ /* 2 - 2 - 18 - 18 */
180
+ lhSize = 5;
181
+ litSize = (lhc >> 4) & 0x3FFFF;
182
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
183
+ break;
184
+ }
185
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
186
+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
187
+ if (!singleStream)
188
+ RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
189
+ "Not enough literals (%zu) for the 4-streams mode (min %u)",
190
+ litSize, MIN_LITERALS_FOR_4_STREAMS);
191
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
192
+ RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
193
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
194
+
195
+ /* prefetch huffman table if cold */
196
+ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
197
+ PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
198
+ }
199
+
200
+ if (litEncType==set_repeat) {
201
+ if (singleStream) {
202
+ hufSuccess = HUF_decompress1X_usingDTable(
203
+ dctx->litBuffer, litSize, istart+lhSize, litCSize,
204
+ dctx->HUFptr, flags);
205
+ } else {
206
+ assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
207
+ hufSuccess = HUF_decompress4X_usingDTable(
208
+ dctx->litBuffer, litSize, istart+lhSize, litCSize,
209
+ dctx->HUFptr, flags);
210
+ }
211
+ } else {
212
+ if (singleStream) {
213
+ #if defined(HUF_FORCE_DECOMPRESS_X2)
214
+ hufSuccess = HUF_decompress1X_DCtx_wksp(
215
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
216
+ istart+lhSize, litCSize, dctx->workspace,
217
+ sizeof(dctx->workspace), flags);
218
+ #else
219
+ hufSuccess = HUF_decompress1X1_DCtx_wksp(
220
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
221
+ istart+lhSize, litCSize, dctx->workspace,
222
+ sizeof(dctx->workspace), flags);
223
+ #endif
224
+ } else {
225
+ hufSuccess = HUF_decompress4X_hufOnly_wksp(
226
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
227
+ istart+lhSize, litCSize, dctx->workspace,
228
+ sizeof(dctx->workspace), flags);
229
+ }
230
+ }
231
+ if (dctx->litBufferLocation == ZSTD_split)
232
+ {
233
+ assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
234
+ ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
235
+ ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
236
+ dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
237
+ dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
238
+ assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
239
+ }
240
+
241
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
242
+
243
+ dctx->litPtr = dctx->litBuffer;
244
+ dctx->litSize = litSize;
245
+ dctx->litEntropy = 1;
246
+ if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
247
+ return litCSize + lhSize;
248
+ }
249
+
250
+ case set_basic:
251
+ { size_t litSize, lhSize;
252
+ U32 const lhlCode = ((istart[0]) >> 2) & 3;
253
+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
254
+ switch(lhlCode)
255
+ {
256
+ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
257
+ lhSize = 1;
258
+ litSize = istart[0] >> 3;
259
+ break;
260
+ case 1:
261
+ lhSize = 2;
262
+ litSize = MEM_readLE16(istart) >> 4;
263
+ break;
264
+ case 3:
265
+ lhSize = 3;
266
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
267
+ litSize = MEM_readLE24(istart) >> 4;
268
+ break;
269
+ }
270
+
271
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
272
+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
273
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
274
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
275
+ if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
276
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
277
+ if (dctx->litBufferLocation == ZSTD_split)
278
+ {
279
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
280
+ ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
281
+ }
282
+ else
283
+ {
284
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
285
+ }
286
+ dctx->litPtr = dctx->litBuffer;
287
+ dctx->litSize = litSize;
288
+ return lhSize+litSize;
289
+ }
290
+ /* direct reference into compressed stream */
291
+ dctx->litPtr = istart+lhSize;
292
+ dctx->litSize = litSize;
293
+ dctx->litBufferEnd = dctx->litPtr + litSize;
294
+ dctx->litBufferLocation = ZSTD_not_in_dst;
295
+ return lhSize+litSize;
296
+ }
297
+
298
+ case set_rle:
299
+ { U32 const lhlCode = ((istart[0]) >> 2) & 3;
300
+ size_t litSize, lhSize;
301
+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
302
+ switch(lhlCode)
303
+ {
304
+ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
305
+ lhSize = 1;
306
+ litSize = istart[0] >> 3;
307
+ break;
308
+ case 1:
309
+ lhSize = 2;
310
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
311
+ litSize = MEM_readLE16(istart) >> 4;
312
+ break;
313
+ case 3:
314
+ lhSize = 3;
315
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
316
+ litSize = MEM_readLE24(istart) >> 4;
317
+ break;
318
+ }
319
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
320
+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
321
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
322
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
323
+ if (dctx->litBufferLocation == ZSTD_split)
324
+ {
325
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
326
+ ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
327
+ }
328
+ else
329
+ {
330
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
331
+ }
332
+ dctx->litPtr = dctx->litBuffer;
333
+ dctx->litSize = litSize;
334
+ return lhSize+1;
335
+ }
336
+ default:
337
+ RETURN_ERROR(corruption_detected, "impossible");
338
+ }
339
+ }
340
+ }
341
+
342
+ /* Hidden declaration for fullbench */
343
+ size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
344
+ const void* src, size_t srcSize,
345
+ void* dst, size_t dstCapacity);
346
+ size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
347
+ const void* src, size_t srcSize,
348
+ void* dst, size_t dstCapacity)
349
+ {
350
+ dctx->isFrameDecompression = 0;
351
+ return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
352
+ }
353
+
354
+ /* Default FSE distribution tables.
355
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
356
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
357
+ * They were generated programmatically with following method :
358
+ * - start from default distributions, present in /lib/common/zstd_internal.h
359
+ * - generate tables normally, using ZSTD_buildFSETable()
360
+ * - printout the content of tables
361
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
362
+
363
+ /* Default FSE distribution table for Literal Lengths */
364
+ static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
365
+ { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
366
+ /* nextState, nbAddBits, nbBits, baseVal */
367
+ { 0, 0, 4, 0}, { 16, 0, 4, 0},
368
+ { 32, 0, 5, 1}, { 0, 0, 5, 3},
369
+ { 0, 0, 5, 4}, { 0, 0, 5, 6},
370
+ { 0, 0, 5, 7}, { 0, 0, 5, 9},
371
+ { 0, 0, 5, 10}, { 0, 0, 5, 12},
372
+ { 0, 0, 6, 14}, { 0, 1, 5, 16},
373
+ { 0, 1, 5, 20}, { 0, 1, 5, 22},
374
+ { 0, 2, 5, 28}, { 0, 3, 5, 32},
375
+ { 0, 4, 5, 48}, { 32, 6, 5, 64},
376
+ { 0, 7, 5, 128}, { 0, 8, 6, 256},
377
+ { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
378
+ { 32, 0, 4, 0}, { 0, 0, 4, 1},
379
+ { 0, 0, 5, 2}, { 32, 0, 5, 4},
380
+ { 0, 0, 5, 5}, { 32, 0, 5, 7},
381
+ { 0, 0, 5, 8}, { 32, 0, 5, 10},
382
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
383
+ { 32, 1, 5, 16}, { 0, 1, 5, 18},
384
+ { 32, 1, 5, 22}, { 0, 2, 5, 24},
385
+ { 32, 3, 5, 32}, { 0, 3, 5, 40},
386
+ { 0, 6, 4, 64}, { 16, 6, 4, 64},
387
+ { 32, 7, 5, 128}, { 0, 9, 6, 512},
388
+ { 0, 11, 6, 2048}, { 48, 0, 4, 0},
389
+ { 16, 0, 4, 1}, { 32, 0, 5, 2},
390
+ { 32, 0, 5, 3}, { 32, 0, 5, 5},
391
+ { 32, 0, 5, 6}, { 32, 0, 5, 8},
392
+ { 32, 0, 5, 9}, { 32, 0, 5, 11},
393
+ { 32, 0, 5, 12}, { 0, 0, 6, 15},
394
+ { 32, 1, 5, 18}, { 32, 1, 5, 20},
395
+ { 32, 2, 5, 24}, { 32, 2, 5, 28},
396
+ { 32, 3, 5, 40}, { 32, 4, 5, 48},
397
+ { 0, 16, 6,65536}, { 0, 15, 6,32768},
398
+ { 0, 14, 6,16384}, { 0, 13, 6, 8192},
399
+ }; /* LL_defaultDTable */
400
+
401
+ /* Default FSE distribution table for Offset Codes */
402
+ static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
403
+ { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
404
+ /* nextState, nbAddBits, nbBits, baseVal */
405
+ { 0, 0, 5, 0}, { 0, 6, 4, 61},
406
+ { 0, 9, 5, 509}, { 0, 15, 5,32765},
407
+ { 0, 21, 5,2097149}, { 0, 3, 5, 5},
408
+ { 0, 7, 4, 125}, { 0, 12, 5, 4093},
409
+ { 0, 18, 5,262141}, { 0, 23, 5,8388605},
410
+ { 0, 5, 5, 29}, { 0, 8, 4, 253},
411
+ { 0, 14, 5,16381}, { 0, 20, 5,1048573},
412
+ { 0, 2, 5, 1}, { 16, 7, 4, 125},
413
+ { 0, 11, 5, 2045}, { 0, 17, 5,131069},
414
+ { 0, 22, 5,4194301}, { 0, 4, 5, 13},
415
+ { 16, 8, 4, 253}, { 0, 13, 5, 8189},
416
+ { 0, 19, 5,524285}, { 0, 1, 5, 1},
417
+ { 16, 6, 4, 61}, { 0, 10, 5, 1021},
418
+ { 0, 16, 5,65533}, { 0, 28, 5,268435453},
419
+ { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
420
+ { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
421
+ }; /* OF_defaultDTable */
422
+
423
+
424
+ /* Default FSE distribution table for Match Lengths */
425
+ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
426
+ { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
427
+ /* nextState, nbAddBits, nbBits, baseVal */
428
+ { 0, 0, 6, 3}, { 0, 0, 4, 4},
429
+ { 32, 0, 5, 5}, { 0, 0, 5, 6},
430
+ { 0, 0, 5, 8}, { 0, 0, 5, 9},
431
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
432
+ { 0, 0, 6, 16}, { 0, 0, 6, 19},
433
+ { 0, 0, 6, 22}, { 0, 0, 6, 25},
434
+ { 0, 0, 6, 28}, { 0, 0, 6, 31},
435
+ { 0, 0, 6, 34}, { 0, 1, 6, 37},
436
+ { 0, 1, 6, 41}, { 0, 2, 6, 47},
437
+ { 0, 3, 6, 59}, { 0, 4, 6, 83},
438
+ { 0, 7, 6, 131}, { 0, 9, 6, 515},
439
+ { 16, 0, 4, 4}, { 0, 0, 4, 5},
440
+ { 32, 0, 5, 6}, { 0, 0, 5, 7},
441
+ { 32, 0, 5, 9}, { 0, 0, 5, 10},
442
+ { 0, 0, 6, 12}, { 0, 0, 6, 15},
443
+ { 0, 0, 6, 18}, { 0, 0, 6, 21},
444
+ { 0, 0, 6, 24}, { 0, 0, 6, 27},
445
+ { 0, 0, 6, 30}, { 0, 0, 6, 33},
446
+ { 0, 1, 6, 35}, { 0, 1, 6, 39},
447
+ { 0, 2, 6, 43}, { 0, 3, 6, 51},
448
+ { 0, 4, 6, 67}, { 0, 5, 6, 99},
449
+ { 0, 8, 6, 259}, { 32, 0, 4, 4},
450
+ { 48, 0, 4, 4}, { 16, 0, 4, 5},
451
+ { 32, 0, 5, 7}, { 32, 0, 5, 8},
452
+ { 32, 0, 5, 10}, { 32, 0, 5, 11},
453
+ { 0, 0, 6, 14}, { 0, 0, 6, 17},
454
+ { 0, 0, 6, 20}, { 0, 0, 6, 23},
455
+ { 0, 0, 6, 26}, { 0, 0, 6, 29},
456
+ { 0, 0, 6, 32}, { 0, 16, 6,65539},
457
+ { 0, 15, 6,32771}, { 0, 14, 6,16387},
458
+ { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
459
+ { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
460
+ }; /* ML_defaultDTable */
461
+
462
+
463
+ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
464
+ {
465
+ void* ptr = dt;
466
+ ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
467
+ ZSTD_seqSymbol* const cell = dt + 1;
468
+
469
+ DTableH->tableLog = 0;
470
+ DTableH->fastMode = 0;
471
+
472
+ cell->nbBits = 0;
473
+ cell->nextState = 0;
474
+ assert(nbAddBits < 255);
475
+ cell->nbAdditionalBits = nbAddBits;
476
+ cell->baseValue = baseValue;
477
+ }
478
+
479
+
480
+ /* ZSTD_buildFSETable() :
481
+ * generate FSE decoding table for one symbol (ll, ml or off)
482
+ * cannot fail if input is valid =>
483
+ * all inputs are presumed validated at this stage */
484
+ FORCE_INLINE_TEMPLATE
485
+ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
486
+ const short* normalizedCounter, unsigned maxSymbolValue,
487
+ const U32* baseValue, const U8* nbAdditionalBits,
488
+ unsigned tableLog, void* wksp, size_t wkspSize)
489
+ {
490
+ ZSTD_seqSymbol* const tableDecode = dt+1;
491
+ U32 const maxSV1 = maxSymbolValue + 1;
492
+ U32 const tableSize = 1 << tableLog;
493
+
494
+ U16* symbolNext = (U16*)wksp;
495
+ BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
496
+ U32 highThreshold = tableSize - 1;
497
+
498
+
499
+ /* Sanity Checks */
500
+ assert(maxSymbolValue <= MaxSeq);
501
+ assert(tableLog <= MaxFSELog);
502
+ assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
503
+ (void)wkspSize;
504
+ /* Init, lay down lowprob symbols */
505
+ { ZSTD_seqSymbol_header DTableH;
506
+ DTableH.tableLog = tableLog;
507
+ DTableH.fastMode = 1;
508
+ { S16 const largeLimit= (S16)(1 << (tableLog-1));
509
+ U32 s;
510
+ for (s=0; s<maxSV1; s++) {
511
+ if (normalizedCounter[s]==-1) {
512
+ tableDecode[highThreshold--].baseValue = s;
513
+ symbolNext[s] = 1;
514
+ } else {
515
+ if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
516
+ assert(normalizedCounter[s]>=0);
517
+ symbolNext[s] = (U16)normalizedCounter[s];
518
+ } } }
519
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
520
+ }
521
+
522
+ /* Spread symbols */
523
+ assert(tableSize <= 512);
524
+ /* Specialized symbol spreading for the case when there are
525
+ * no low probability (-1 count) symbols. When compressing
526
+ * small blocks we avoid low probability symbols to hit this
527
+ * case, since header decoding speed matters more.
528
+ */
529
+ if (highThreshold == tableSize - 1) {
530
+ size_t const tableMask = tableSize-1;
531
+ size_t const step = FSE_TABLESTEP(tableSize);
532
+ /* First lay down the symbols in order.
533
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
534
+ * misses since small blocks generally have small table logs, so nearly
535
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
536
+ * our buffer to handle the over-write.
537
+ */
538
+ {
539
+ U64 const add = 0x0101010101010101ull;
540
+ size_t pos = 0;
541
+ U64 sv = 0;
542
+ U32 s;
543
+ for (s=0; s<maxSV1; ++s, sv += add) {
544
+ int i;
545
+ int const n = normalizedCounter[s];
546
+ MEM_write64(spread + pos, sv);
547
+ for (i = 8; i < n; i += 8) {
548
+ MEM_write64(spread + pos + i, sv);
549
+ }
550
+ assert(n>=0);
551
+ pos += (size_t)n;
552
+ }
553
+ }
554
+ /* Now we spread those positions across the table.
555
+ * The benefit of doing it in two stages is that we avoid the
556
+ * variable size inner loop, which caused lots of branch misses.
557
+ * Now we can run through all the positions without any branch misses.
558
+ * We unroll the loop twice, since that is what empirically worked best.
559
+ */
560
+ {
561
+ size_t position = 0;
562
+ size_t s;
563
+ size_t const unroll = 2;
564
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
565
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
566
+ size_t u;
567
+ for (u = 0; u < unroll; ++u) {
568
+ size_t const uPosition = (position + (u * step)) & tableMask;
569
+ tableDecode[uPosition].baseValue = spread[s + u];
570
+ }
571
+ position = (position + (unroll * step)) & tableMask;
572
+ }
573
+ assert(position == 0);
574
+ }
575
+ } else {
576
+ U32 const tableMask = tableSize-1;
577
+ U32 const step = FSE_TABLESTEP(tableSize);
578
+ U32 s, position = 0;
579
+ for (s=0; s<maxSV1; s++) {
580
+ int i;
581
+ int const n = normalizedCounter[s];
582
+ for (i=0; i<n; i++) {
583
+ tableDecode[position].baseValue = s;
584
+ position = (position + step) & tableMask;
585
+ while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
586
+ } }
587
+ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
588
+ }
589
+
590
+ /* Build Decoding table */
591
+ {
592
+ U32 u;
593
+ for (u=0; u<tableSize; u++) {
594
+ U32 const symbol = tableDecode[u].baseValue;
595
+ U32 const nextState = symbolNext[symbol]++;
596
+ tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
597
+ tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
598
+ assert(nbAdditionalBits[symbol] < 255);
599
+ tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
600
+ tableDecode[u].baseValue = baseValue[symbol];
601
+ }
602
+ }
603
+ }
604
+
605
+ /* Avoids the FORCE_INLINE of the _body() function. */
606
+ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
607
+ const short* normalizedCounter, unsigned maxSymbolValue,
608
+ const U32* baseValue, const U8* nbAdditionalBits,
609
+ unsigned tableLog, void* wksp, size_t wkspSize)
610
+ {
611
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
612
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
613
+ }
614
+
615
+ #if DYNAMIC_BMI2
616
+ BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
617
+ const short* normalizedCounter, unsigned maxSymbolValue,
618
+ const U32* baseValue, const U8* nbAdditionalBits,
619
+ unsigned tableLog, void* wksp, size_t wkspSize)
620
+ {
621
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
622
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
623
+ }
624
+ #endif
625
+
626
+ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
627
+ const short* normalizedCounter, unsigned maxSymbolValue,
628
+ const U32* baseValue, const U8* nbAdditionalBits,
629
+ unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
630
+ {
631
+ #if DYNAMIC_BMI2
632
+ if (bmi2) {
633
+ ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
634
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
635
+ return;
636
+ }
637
+ #endif
638
+ (void)bmi2;
639
+ ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
640
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
641
+ }
642
+
643
+
644
+ /*! ZSTD_buildSeqTable() :
645
+ * @return : nb bytes read from src,
646
+ * or an error code if it fails */
647
+ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
648
+ symbolEncodingType_e type, unsigned max, U32 maxLog,
649
+ const void* src, size_t srcSize,
650
+ const U32* baseValue, const U8* nbAdditionalBits,
651
+ const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
652
+ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
653
+ int bmi2)
654
+ {
655
+ switch(type)
656
+ {
657
+ case set_rle :
658
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
659
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
660
+ { U32 const symbol = *(const BYTE*)src;
661
+ U32 const baseline = baseValue[symbol];
662
+ U8 const nbBits = nbAdditionalBits[symbol];
663
+ ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
664
+ }
665
+ *DTablePtr = DTableSpace;
666
+ return 1;
667
+ case set_basic :
668
+ *DTablePtr = defaultTable;
669
+ return 0;
670
+ case set_repeat:
671
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
672
+ /* prefetch FSE table if used */
673
+ if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
674
+ const void* const pStart = *DTablePtr;
675
+ size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
676
+ PREFETCH_AREA(pStart, pSize);
677
+ }
678
+ return 0;
679
+ case set_compressed :
680
+ { unsigned tableLog;
681
+ S16 norm[MaxSeq+1];
682
+ size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
683
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
684
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
685
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
686
+ *DTablePtr = DTableSpace;
687
+ return headerSize;
688
+ }
689
+ default :
690
+ assert(0);
691
+ RETURN_ERROR(GENERIC, "impossible");
692
+ }
693
+ }
694
+
695
+ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
696
+ const void* src, size_t srcSize)
697
+ {
698
+ const BYTE* const istart = (const BYTE*)src;
699
+ const BYTE* const iend = istart + srcSize;
700
+ const BYTE* ip = istart;
701
+ int nbSeq;
702
+ DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
703
+
704
+ /* check */
705
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
706
+
707
+ /* SeqHead */
708
+ nbSeq = *ip++;
709
+ if (nbSeq > 0x7F) {
710
+ if (nbSeq == 0xFF) {
711
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
712
+ nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
713
+ ip+=2;
714
+ } else {
715
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
716
+ nbSeq = ((nbSeq-0x80)<<8) + *ip++;
717
+ }
718
+ }
719
+ *nbSeqPtr = nbSeq;
720
+
721
+ if (nbSeq == 0) {
722
+ /* No sequence : section ends immediately */
723
+ RETURN_ERROR_IF(ip != iend, corruption_detected,
724
+ "extraneous data present in the Sequences section");
725
+ return (size_t)(ip - istart);
726
+ }
727
+
728
+ /* FSE table descriptors */
729
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
730
+ RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
731
+ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
732
+ symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
733
+ symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
734
+ ip++;
735
+
736
+ /* Build DTables */
737
+ { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
738
+ LLtype, MaxLL, LLFSELog,
739
+ ip, iend-ip,
740
+ LL_base, LL_bits,
741
+ LL_defaultDTable, dctx->fseEntropy,
742
+ dctx->ddictIsCold, nbSeq,
743
+ dctx->workspace, sizeof(dctx->workspace),
744
+ ZSTD_DCtx_get_bmi2(dctx));
745
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
746
+ ip += llhSize;
747
+ }
748
+
749
+ { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
750
+ OFtype, MaxOff, OffFSELog,
751
+ ip, iend-ip,
752
+ OF_base, OF_bits,
753
+ OF_defaultDTable, dctx->fseEntropy,
754
+ dctx->ddictIsCold, nbSeq,
755
+ dctx->workspace, sizeof(dctx->workspace),
756
+ ZSTD_DCtx_get_bmi2(dctx));
757
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
758
+ ip += ofhSize;
759
+ }
760
+
761
+ { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
762
+ MLtype, MaxML, MLFSELog,
763
+ ip, iend-ip,
764
+ ML_base, ML_bits,
765
+ ML_defaultDTable, dctx->fseEntropy,
766
+ dctx->ddictIsCold, nbSeq,
767
+ dctx->workspace, sizeof(dctx->workspace),
768
+ ZSTD_DCtx_get_bmi2(dctx));
769
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
770
+ ip += mlhSize;
771
+ }
772
+ }
773
+
774
+ return ip-istart;
775
+ }
776
+
777
+
778
+ typedef struct {
779
+ size_t litLength;
780
+ size_t matchLength;
781
+ size_t offset;
782
+ } seq_t;
783
+
784
+ typedef struct {
785
+ size_t state;
786
+ const ZSTD_seqSymbol* table;
787
+ } ZSTD_fseState;
788
+
789
+ typedef struct {
790
+ BIT_DStream_t DStream;
791
+ ZSTD_fseState stateLL;
792
+ ZSTD_fseState stateOffb;
793
+ ZSTD_fseState stateML;
794
+ size_t prevOffset[ZSTD_REP_NUM];
795
+ } seqState_t;
796
+
797
+ /*! ZSTD_overlapCopy8() :
798
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
799
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
800
+ *
801
+ * Precondition: *ip <= *op
802
+ * Postcondition: *op - *op >= 8
803
+ */
804
+ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
805
+ assert(*ip <= *op);
806
+ if (offset < 8) {
807
+ /* close range match, overlap */
808
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
809
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
810
+ int const sub2 = dec64table[offset];
811
+ (*op)[0] = (*ip)[0];
812
+ (*op)[1] = (*ip)[1];
813
+ (*op)[2] = (*ip)[2];
814
+ (*op)[3] = (*ip)[3];
815
+ *ip += dec32table[offset];
816
+ ZSTD_copy4(*op+4, *ip);
817
+ *ip -= sub2;
818
+ } else {
819
+ ZSTD_copy8(*op, *ip);
820
+ }
821
+ *ip += 8;
822
+ *op += 8;
823
+ assert(*op - *ip >= 8);
824
+ }
825
+
826
+ /*! ZSTD_safecopy() :
827
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
828
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
829
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
830
+ * should be fast for a single long sequence, but can be slow for several short sequences.
831
+ *
832
+ * @param ovtype controls the overlap detection
833
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
834
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
835
+ * The src buffer must be before the dst buffer.
836
+ */
837
+ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
838
+ ptrdiff_t const diff = op - ip;
839
+ BYTE* const oend = op + length;
840
+
841
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
842
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
843
+
844
+ if (length < 8) {
845
+ /* Handle short lengths. */
846
+ while (op < oend) *op++ = *ip++;
847
+ return;
848
+ }
849
+ if (ovtype == ZSTD_overlap_src_before_dst) {
850
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
851
+ assert(length >= 8);
852
+ ZSTD_overlapCopy8(&op, &ip, diff);
853
+ length -= 8;
854
+ assert(op - ip >= 8);
855
+ assert(op <= oend);
856
+ }
857
+
858
+ if (oend <= oend_w) {
859
+ /* No risk of overwrite. */
860
+ ZSTD_wildcopy(op, ip, length, ovtype);
861
+ return;
862
+ }
863
+ if (op <= oend_w) {
864
+ /* Wildcopy until we get close to the end. */
865
+ assert(oend > oend_w);
866
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
867
+ ip += oend_w - op;
868
+ op += oend_w - op;
869
+ }
870
+ /* Handle the leftovers. */
871
+ while (op < oend) *op++ = *ip++;
872
+ }
873
+
874
+ /* ZSTD_safecopyDstBeforeSrc():
875
+ * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
876
+ * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
877
+ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
878
+ ptrdiff_t const diff = op - ip;
879
+ BYTE* const oend = op + length;
880
+
881
+ if (length < 8 || diff > -8) {
882
+ /* Handle short lengths, close overlaps, and dst not before src. */
883
+ while (op < oend) *op++ = *ip++;
884
+ return;
885
+ }
886
+
887
+ if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
888
+ ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
889
+ ip += oend - WILDCOPY_OVERLENGTH - op;
890
+ op += oend - WILDCOPY_OVERLENGTH - op;
891
+ }
892
+
893
+ /* Handle the leftovers. */
894
+ while (op < oend) *op++ = *ip++;
895
+ }
896
+
897
+ /* ZSTD_execSequenceEnd():
898
+ * This version handles cases that are near the end of the output buffer. It requires
899
+ * more careful checks to make sure there is no overflow. By separating out these hard
900
+ * and unlikely cases, we can speed up the common cases.
901
+ *
902
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
903
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
904
+ */
905
+ FORCE_NOINLINE
906
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
907
+ size_t ZSTD_execSequenceEnd(BYTE* op,
908
+ BYTE* const oend, seq_t sequence,
909
+ const BYTE** litPtr, const BYTE* const litLimit,
910
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
911
+ {
912
+ BYTE* const oLitEnd = op + sequence.litLength;
913
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
914
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
915
+ const BYTE* match = oLitEnd - sequence.offset;
916
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
917
+
918
+ /* bounds checks : careful of address space overflow in 32-bit mode */
919
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
920
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
921
+ assert(op < op + sequenceLength);
922
+ assert(oLitEnd < op + sequenceLength);
923
+
924
+ /* copy literals */
925
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
926
+ op = oLitEnd;
927
+ *litPtr = iLitEnd;
928
+
929
+ /* copy Match */
930
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
931
+ /* offset beyond prefix */
932
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
933
+ match = dictEnd - (prefixStart - match);
934
+ if (match + sequence.matchLength <= dictEnd) {
935
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
936
+ return sequenceLength;
937
+ }
938
+ /* span extDict & currentPrefixSegment */
939
+ { size_t const length1 = dictEnd - match;
940
+ ZSTD_memmove(oLitEnd, match, length1);
941
+ op = oLitEnd + length1;
942
+ sequence.matchLength -= length1;
943
+ match = prefixStart;
944
+ }
945
+ }
946
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
947
+ return sequenceLength;
948
+ }
949
+
950
+ /* ZSTD_execSequenceEndSplitLitBuffer():
951
+ * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
952
+ */
953
+ FORCE_NOINLINE
954
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
955
+ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
956
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
957
+ const BYTE** litPtr, const BYTE* const litLimit,
958
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
959
+ {
960
+ BYTE* const oLitEnd = op + sequence.litLength;
961
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
962
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
963
+ const BYTE* match = oLitEnd - sequence.offset;
964
+
965
+
966
+ /* bounds checks : careful of address space overflow in 32-bit mode */
967
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
968
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
969
+ assert(op < op + sequenceLength);
970
+ assert(oLitEnd < op + sequenceLength);
971
+
972
+ /* copy literals */
973
+ RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
974
+ ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
975
+ op = oLitEnd;
976
+ *litPtr = iLitEnd;
977
+
978
+ /* copy Match */
979
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
980
+ /* offset beyond prefix */
981
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
982
+ match = dictEnd - (prefixStart - match);
983
+ if (match + sequence.matchLength <= dictEnd) {
984
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
985
+ return sequenceLength;
986
+ }
987
+ /* span extDict & currentPrefixSegment */
988
+ { size_t const length1 = dictEnd - match;
989
+ ZSTD_memmove(oLitEnd, match, length1);
990
+ op = oLitEnd + length1;
991
+ sequence.matchLength -= length1;
992
+ match = prefixStart;
993
+ }
994
+ }
995
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
996
+ return sequenceLength;
997
+ }
998
+
999
+ HINT_INLINE
1000
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1001
+ size_t ZSTD_execSequence(BYTE* op,
1002
+ BYTE* const oend, seq_t sequence,
1003
+ const BYTE** litPtr, const BYTE* const litLimit,
1004
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1005
+ {
1006
+ BYTE* const oLitEnd = op + sequence.litLength;
1007
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1008
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1009
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
1010
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1011
+ const BYTE* match = oLitEnd - sequence.offset;
1012
+
1013
+ assert(op != NULL /* Precondition */);
1014
+ assert(oend_w < oend /* No underflow */);
1015
+
1016
+ #if defined(__aarch64__)
1017
+ /* prefetch sequence starting from match that will be used for copy later */
1018
+ PREFETCH_L1(match);
1019
+ #endif
1020
+ /* Handle edge cases in a slow path:
1021
+ * - Read beyond end of literals
1022
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
1023
+ * - 32-bit mode and the match length overflows
1024
+ */
1025
+ if (UNLIKELY(
1026
+ iLitEnd > litLimit ||
1027
+ oMatchEnd > oend_w ||
1028
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1029
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1030
+
1031
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1032
+ assert(op <= oLitEnd /* No overflow */);
1033
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1034
+ assert(oMatchEnd <= oend /* No underflow */);
1035
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1036
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1037
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1038
+
1039
+ /* Copy Literals:
1040
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1041
+ * We likely don't need the full 32-byte wildcopy.
1042
+ */
1043
+ assert(WILDCOPY_OVERLENGTH >= 16);
1044
+ ZSTD_copy16(op, (*litPtr));
1045
+ if (UNLIKELY(sequence.litLength > 16)) {
1046
+ ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
1047
+ }
1048
+ op = oLitEnd;
1049
+ *litPtr = iLitEnd; /* update for next sequence */
1050
+
1051
+ /* Copy Match */
1052
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1053
+ /* offset beyond prefix -> go into extDict */
1054
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1055
+ match = dictEnd + (match - prefixStart);
1056
+ if (match + sequence.matchLength <= dictEnd) {
1057
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1058
+ return sequenceLength;
1059
+ }
1060
+ /* span extDict & currentPrefixSegment */
1061
+ { size_t const length1 = dictEnd - match;
1062
+ ZSTD_memmove(oLitEnd, match, length1);
1063
+ op = oLitEnd + length1;
1064
+ sequence.matchLength -= length1;
1065
+ match = prefixStart;
1066
+ }
1067
+ }
1068
+ /* Match within prefix of 1 or more bytes */
1069
+ assert(op <= oMatchEnd);
1070
+ assert(oMatchEnd <= oend_w);
1071
+ assert(match >= prefixStart);
1072
+ assert(sequence.matchLength >= 1);
1073
+
1074
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1075
+ * without overlap checking.
1076
+ */
1077
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1078
+ /* We bet on a full wildcopy for matches, since we expect matches to be
1079
+ * longer than literals (in general). In silesia, ~10% of matches are longer
1080
+ * than 16 bytes.
1081
+ */
1082
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1083
+ return sequenceLength;
1084
+ }
1085
+ assert(sequence.offset < WILDCOPY_VECLEN);
1086
+
1087
+ /* Copy 8 bytes and spread the offset to be >= 8. */
1088
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
1089
+
1090
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
1091
+ if (sequence.matchLength > 8) {
1092
+ assert(op < oMatchEnd);
1093
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
1094
+ }
1095
+ return sequenceLength;
1096
+ }
1097
+
1098
+ HINT_INLINE
1099
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1100
+ size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1101
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1102
+ const BYTE** litPtr, const BYTE* const litLimit,
1103
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1104
+ {
1105
+ BYTE* const oLitEnd = op + sequence.litLength;
1106
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1107
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1108
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1109
+ const BYTE* match = oLitEnd - sequence.offset;
1110
+
1111
+ assert(op != NULL /* Precondition */);
1112
+ assert(oend_w < oend /* No underflow */);
1113
+ /* Handle edge cases in a slow path:
1114
+ * - Read beyond end of literals
1115
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
1116
+ * - 32-bit mode and the match length overflows
1117
+ */
1118
+ if (UNLIKELY(
1119
+ iLitEnd > litLimit ||
1120
+ oMatchEnd > oend_w ||
1121
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1122
+ return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1123
+
1124
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1125
+ assert(op <= oLitEnd /* No overflow */);
1126
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1127
+ assert(oMatchEnd <= oend /* No underflow */);
1128
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1129
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1130
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1131
+
1132
+ /* Copy Literals:
1133
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1134
+ * We likely don't need the full 32-byte wildcopy.
1135
+ */
1136
+ assert(WILDCOPY_OVERLENGTH >= 16);
1137
+ ZSTD_copy16(op, (*litPtr));
1138
+ if (UNLIKELY(sequence.litLength > 16)) {
1139
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
1140
+ }
1141
+ op = oLitEnd;
1142
+ *litPtr = iLitEnd; /* update for next sequence */
1143
+
1144
+ /* Copy Match */
1145
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1146
+ /* offset beyond prefix -> go into extDict */
1147
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1148
+ match = dictEnd + (match - prefixStart);
1149
+ if (match + sequence.matchLength <= dictEnd) {
1150
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1151
+ return sequenceLength;
1152
+ }
1153
+ /* span extDict & currentPrefixSegment */
1154
+ { size_t const length1 = dictEnd - match;
1155
+ ZSTD_memmove(oLitEnd, match, length1);
1156
+ op = oLitEnd + length1;
1157
+ sequence.matchLength -= length1;
1158
+ match = prefixStart;
1159
+ } }
1160
+ /* Match within prefix of 1 or more bytes */
1161
+ assert(op <= oMatchEnd);
1162
+ assert(oMatchEnd <= oend_w);
1163
+ assert(match >= prefixStart);
1164
+ assert(sequence.matchLength >= 1);
1165
+
1166
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1167
+ * without overlap checking.
1168
+ */
1169
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1170
+ /* We bet on a full wildcopy for matches, since we expect matches to be
1171
+ * longer than literals (in general). In silesia, ~10% of matches are longer
1172
+ * than 16 bytes.
1173
+ */
1174
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1175
+ return sequenceLength;
1176
+ }
1177
+ assert(sequence.offset < WILDCOPY_VECLEN);
1178
+
1179
+ /* Copy 8 bytes and spread the offset to be >= 8. */
1180
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
1181
+
1182
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
1183
+ if (sequence.matchLength > 8) {
1184
+ assert(op < oMatchEnd);
1185
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
1186
+ }
1187
+ return sequenceLength;
1188
+ }
1189
+
1190
+
1191
+ static void
1192
+ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1193
+ {
1194
+ const void* ptr = dt;
1195
+ const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1196
+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1197
+ DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1198
+ (U32)DStatePtr->state, DTableH->tableLog);
1199
+ BIT_reloadDStream(bitD);
1200
+ DStatePtr->table = dt + 1;
1201
+ }
1202
+
1203
+ FORCE_INLINE_TEMPLATE void
1204
+ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1205
+ {
1206
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
1207
+ DStatePtr->state = nextState + lowBits;
1208
+ }
1209
+
1210
+ /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1211
+ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
1212
+ * bits before reloading. This value is the maximum number of bytes we read
1213
+ * after reloading when we are decoding long offsets.
1214
+ */
1215
+ #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1216
+ (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1217
+ ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1218
+ : 0)
1219
+
1220
+ typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1221
+
1222
+ /**
1223
+ * ZSTD_decodeSequence():
1224
+ * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
1225
+ * only used in 32-bit mode
1226
+ * @return : Sequence (litL + matchL + offset)
1227
+ */
1228
+ FORCE_INLINE_TEMPLATE seq_t
1229
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
1230
+ {
1231
+ seq_t seq;
1232
+ /*
1233
+ * ZSTD_seqSymbol is a 64 bits wide structure.
1234
+ * It can be loaded in one operation
1235
+ * and its fields extracted by simply shifting or bit-extracting on aarch64.
1236
+ * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
1237
+ * operations that cause performance drop. This can be avoided by using this
1238
+ * ZSTD_memcpy hack.
1239
+ */
1240
+ #if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1241
+ ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1242
+ ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1243
+ ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1244
+ ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1245
+ ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1246
+ ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1247
+ ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1248
+ #else
1249
+ const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1250
+ const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1251
+ const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1252
+ #endif
1253
+ seq.matchLength = mlDInfo->baseValue;
1254
+ seq.litLength = llDInfo->baseValue;
1255
+ { U32 const ofBase = ofDInfo->baseValue;
1256
+ BYTE const llBits = llDInfo->nbAdditionalBits;
1257
+ BYTE const mlBits = mlDInfo->nbAdditionalBits;
1258
+ BYTE const ofBits = ofDInfo->nbAdditionalBits;
1259
+ BYTE const totalBits = llBits+mlBits+ofBits;
1260
+
1261
+ U16 const llNext = llDInfo->nextState;
1262
+ U16 const mlNext = mlDInfo->nextState;
1263
+ U16 const ofNext = ofDInfo->nextState;
1264
+ U32 const llnbBits = llDInfo->nbBits;
1265
+ U32 const mlnbBits = mlDInfo->nbBits;
1266
+ U32 const ofnbBits = ofDInfo->nbBits;
1267
+
1268
+ assert(llBits <= MaxLLBits);
1269
+ assert(mlBits <= MaxMLBits);
1270
+ assert(ofBits <= MaxOff);
1271
+ /*
1272
+ * As gcc has better branch and block analyzers, sometimes it is only
1273
+ * valuable to mark likeliness for clang, it gives around 3-4% of
1274
+ * performance.
1275
+ */
1276
+
1277
+ /* sequence */
1278
+ { size_t offset;
1279
+ if (ofBits > 1) {
1280
+ ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1281
+ ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1282
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
1283
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
1284
+ if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1285
+ /* Always read extra bits, this keeps the logic simple,
1286
+ * avoids branches, and avoids accidentally reading 0 bits.
1287
+ */
1288
+ U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
1289
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1290
+ BIT_reloadDStream(&seqState->DStream);
1291
+ offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1292
+ } else {
1293
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1294
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1295
+ }
1296
+ seqState->prevOffset[2] = seqState->prevOffset[1];
1297
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1298
+ seqState->prevOffset[0] = offset;
1299
+ } else {
1300
+ U32 const ll0 = (llDInfo->baseValue == 0);
1301
+ if (LIKELY((ofBits == 0))) {
1302
+ offset = seqState->prevOffset[ll0];
1303
+ seqState->prevOffset[1] = seqState->prevOffset[!ll0];
1304
+ seqState->prevOffset[0] = offset;
1305
+ } else {
1306
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
1307
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1308
+ temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
1309
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1310
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1311
+ seqState->prevOffset[0] = offset = temp;
1312
+ } } }
1313
+ seq.offset = offset;
1314
+ }
1315
+
1316
+ if (mlBits > 0)
1317
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
1318
+
1319
+ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1320
+ BIT_reloadDStream(&seqState->DStream);
1321
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1322
+ BIT_reloadDStream(&seqState->DStream);
1323
+ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1324
+ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1325
+
1326
+ if (llBits > 0)
1327
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
1328
+
1329
+ if (MEM_32bits())
1330
+ BIT_reloadDStream(&seqState->DStream);
1331
+
1332
+ DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1333
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1334
+
1335
+ if (!isLastSeq) {
1336
+ /* don't update FSE state for last Sequence */
1337
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
1338
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
1339
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1340
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
1341
+ BIT_reloadDStream(&seqState->DStream);
1342
+ }
1343
+ }
1344
+
1345
+ return seq;
1346
+ }
1347
+
1348
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1349
+ #if DEBUGLEVEL >= 1
1350
+ static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1351
+ {
1352
+ size_t const windowSize = dctx->fParams.windowSize;
1353
+ /* No dictionary used. */
1354
+ if (dctx->dictContentEndForFuzzing == NULL) return 0;
1355
+ /* Dictionary is our prefix. */
1356
+ if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
1357
+ /* Dictionary is not our ext-dict. */
1358
+ if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
1359
+ /* Dictionary is not within our window size. */
1360
+ if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
1361
+ /* Dictionary is active. */
1362
+ return 1;
1363
+ }
1364
+ #endif
1365
+
1366
+ static void ZSTD_assertValidSequence(
1367
+ ZSTD_DCtx const* dctx,
1368
+ BYTE const* op, BYTE const* oend,
1369
+ seq_t const seq,
1370
+ BYTE const* prefixStart, BYTE const* virtualStart)
1371
+ {
1372
+ #if DEBUGLEVEL >= 1
1373
+ if (dctx->isFrameDecompression) {
1374
+ size_t const windowSize = dctx->fParams.windowSize;
1375
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
1376
+ BYTE const* const oLitEnd = op + seq.litLength;
1377
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
1378
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1379
+ assert(op <= oend);
1380
+ assert((size_t)(oend - op) >= sequenceSize);
1381
+ assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
1382
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1383
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
1384
+ /* Offset must be within the dictionary. */
1385
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1386
+ assert(seq.offset <= windowSize + dictSize);
1387
+ } else {
1388
+ /* Offset must be within our window. */
1389
+ assert(seq.offset <= windowSize);
1390
+ }
1391
+ }
1392
+ #else
1393
+ (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1394
+ #endif
1395
+ }
1396
+ #endif
1397
+
1398
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1399
+
1400
+
1401
+ FORCE_INLINE_TEMPLATE size_t
1402
+ DONT_VECTORIZE
1403
+ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1404
+ void* dst, size_t maxDstSize,
1405
+ const void* seqStart, size_t seqSize, int nbSeq,
1406
+ const ZSTD_longOffset_e isLongOffset)
1407
+ {
1408
+ const BYTE* ip = (const BYTE*)seqStart;
1409
+ const BYTE* const iend = ip + seqSize;
1410
+ BYTE* const ostart = (BYTE*)dst;
1411
+ BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
1412
+ BYTE* op = ostart;
1413
+ const BYTE* litPtr = dctx->litPtr;
1414
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
1415
+ const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1416
+ const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1417
+ const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1418
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
1419
+
1420
+ /* Literals are split between internal buffer & output buffer */
1421
+ if (nbSeq) {
1422
+ seqState_t seqState;
1423
+ dctx->fseEntropy = 1;
1424
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1425
+ RETURN_ERROR_IF(
1426
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1427
+ corruption_detected, "");
1428
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1429
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1430
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1431
+ assert(dst != NULL);
1432
+
1433
+ ZSTD_STATIC_ASSERT(
1434
+ BIT_DStream_unfinished < BIT_DStream_completed &&
1435
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1436
+ BIT_DStream_completed < BIT_DStream_overflow);
1437
+
1438
+ /* decompress without overrunning litPtr begins */
1439
+ { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
1440
+ /* Align the decompression loop to 32 + 16 bytes.
1441
+ *
1442
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1443
+ * speed swings based on the alignment of the decompression loop. This
1444
+ * performance swing is caused by parts of the decompression loop falling
1445
+ * out of the DSB. The entire decompression loop should fit in the DSB,
1446
+ * when it can't we get much worse performance. You can measure if you've
1447
+ * hit the good case or the bad case with this perf command for some
1448
+ * compressed file test.zst:
1449
+ *
1450
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1451
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1452
+ *
1453
+ * If you see most cycles served out of the MITE you've hit the bad case.
1454
+ * If you see most cycles served out of the DSB you've hit the good case.
1455
+ * If it is pretty even then you may be in an okay case.
1456
+ *
1457
+ * This issue has been reproduced on the following CPUs:
1458
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1459
+ * Use Instruments->Counters to get DSB/MITE cycles.
1460
+ * I never got performance swings, but I was able to
1461
+ * go from the good case of mostly DSB to half of the
1462
+ * cycles served from MITE.
1463
+ * - Coffeelake: Intel i9-9900k
1464
+ * - Coffeelake: Intel i7-9700k
1465
+ *
1466
+ * I haven't been able to reproduce the instability or DSB misses on any
1467
+ * of the following CPUS:
1468
+ * - Haswell
1469
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1470
+ * - Skylake
1471
+ *
1472
+ * Alignment is done for each of the three major decompression loops:
1473
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1474
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1475
+ * - ZSTD_decompressSequences_body
1476
+ * Alignment choices are made to minimize large swings on bad cases and influence on performance
1477
+ * from changes external to this code, rather than to overoptimize on the current commit.
1478
+ *
1479
+ * If you are seeing performance stability this script can help test.
1480
+ * It tests on 4 commits in zstd where I saw performance change.
1481
+ *
1482
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1483
+ */
1484
+ #if defined(__GNUC__) && defined(__x86_64__)
1485
+ __asm__(".p2align 6");
1486
+ # if __GNUC__ >= 7
1487
+ /* good for gcc-7, gcc-9, and gcc-11 */
1488
+ __asm__("nop");
1489
+ __asm__(".p2align 5");
1490
+ __asm__("nop");
1491
+ __asm__(".p2align 4");
1492
+ # if __GNUC__ == 8 || __GNUC__ == 10
1493
+ /* good for gcc-8 and gcc-10 */
1494
+ __asm__("nop");
1495
+ __asm__(".p2align 3");
1496
+ # endif
1497
+ # endif
1498
+ #endif
1499
+
1500
+ /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1501
+ for ( ; nbSeq; nbSeq--) {
1502
+ sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
1503
+ if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
1504
+ { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1505
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1506
+ assert(!ZSTD_isError(oneSeqSize));
1507
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1508
+ #endif
1509
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1510
+ return oneSeqSize;
1511
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1512
+ op += oneSeqSize;
1513
+ } }
1514
+ DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
1515
+
1516
+ /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1517
+ if (nbSeq > 0) {
1518
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1519
+ DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
1520
+ if (leftoverLit) {
1521
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1522
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1523
+ sequence.litLength -= leftoverLit;
1524
+ op += leftoverLit;
1525
+ }
1526
+ litPtr = dctx->litExtraBuffer;
1527
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1528
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1529
+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1530
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1531
+ assert(!ZSTD_isError(oneSeqSize));
1532
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1533
+ #endif
1534
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1535
+ return oneSeqSize;
1536
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1537
+ op += oneSeqSize;
1538
+ }
1539
+ nbSeq--;
1540
+ }
1541
+ }
1542
+
1543
+ if (nbSeq > 0) {
1544
+ /* there is remaining lit from extra buffer */
1545
+
1546
+ #if defined(__GNUC__) && defined(__x86_64__)
1547
+ __asm__(".p2align 6");
1548
+ __asm__("nop");
1549
+ # if __GNUC__ != 7
1550
+ /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1551
+ __asm__(".p2align 4");
1552
+ __asm__("nop");
1553
+ __asm__(".p2align 3");
1554
+ # elif __GNUC__ >= 11
1555
+ __asm__(".p2align 3");
1556
+ # else
1557
+ __asm__(".p2align 5");
1558
+ __asm__("nop");
1559
+ __asm__(".p2align 3");
1560
+ # endif
1561
+ #endif
1562
+
1563
+ for ( ; nbSeq ; nbSeq--) {
1564
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
1565
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1566
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1567
+ assert(!ZSTD_isError(oneSeqSize));
1568
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1569
+ #endif
1570
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1571
+ return oneSeqSize;
1572
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1573
+ op += oneSeqSize;
1574
+ }
1575
+ }
1576
+
1577
+ /* check if reached exact end */
1578
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1579
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1580
+ DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
1581
+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1582
+ /* save reps for next block */
1583
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1584
+ }
1585
+
1586
+ /* last literal segment */
1587
+ if (dctx->litBufferLocation == ZSTD_split) {
1588
+ /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1589
+ size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1590
+ DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
1591
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1592
+ if (op != NULL) {
1593
+ ZSTD_memmove(op, litPtr, lastLLSize);
1594
+ op += lastLLSize;
1595
+ }
1596
+ litPtr = dctx->litExtraBuffer;
1597
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1598
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1599
+ }
1600
+ /* copy last literals from internal buffer */
1601
+ { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1602
+ DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
1603
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1604
+ if (op != NULL) {
1605
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1606
+ op += lastLLSize;
1607
+ } }
1608
+
1609
+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
1610
+ return (size_t)(op - ostart);
1611
+ }
1612
+
1613
+ FORCE_INLINE_TEMPLATE size_t
1614
+ DONT_VECTORIZE
1615
+ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1616
+ void* dst, size_t maxDstSize,
1617
+ const void* seqStart, size_t seqSize, int nbSeq,
1618
+ const ZSTD_longOffset_e isLongOffset)
1619
+ {
1620
+ const BYTE* ip = (const BYTE*)seqStart;
1621
+ const BYTE* const iend = ip + seqSize;
1622
+ BYTE* const ostart = (BYTE*)dst;
1623
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
1624
+ BYTE* op = ostart;
1625
+ const BYTE* litPtr = dctx->litPtr;
1626
+ const BYTE* const litEnd = litPtr + dctx->litSize;
1627
+ const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1628
+ const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1629
+ const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1630
+ DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
1631
+
1632
+ /* Regen sequences */
1633
+ if (nbSeq) {
1634
+ seqState_t seqState;
1635
+ dctx->fseEntropy = 1;
1636
+ { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1637
+ RETURN_ERROR_IF(
1638
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1639
+ corruption_detected, "");
1640
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1641
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1642
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1643
+ assert(dst != NULL);
1644
+
1645
+ #if defined(__GNUC__) && defined(__x86_64__)
1646
+ __asm__(".p2align 6");
1647
+ __asm__("nop");
1648
+ # if __GNUC__ >= 7
1649
+ __asm__(".p2align 5");
1650
+ __asm__("nop");
1651
+ __asm__(".p2align 3");
1652
+ # else
1653
+ __asm__(".p2align 4");
1654
+ __asm__("nop");
1655
+ __asm__(".p2align 3");
1656
+ # endif
1657
+ #endif
1658
+
1659
+ for ( ; nbSeq ; nbSeq--) {
1660
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
1661
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1662
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1663
+ assert(!ZSTD_isError(oneSeqSize));
1664
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1665
+ #endif
1666
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1667
+ return oneSeqSize;
1668
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1669
+ op += oneSeqSize;
1670
+ }
1671
+
1672
+ /* check if reached exact end */
1673
+ assert(nbSeq == 0);
1674
+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1675
+ /* save reps for next block */
1676
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1677
+ }
1678
+
1679
+ /* last literal segment */
1680
+ { size_t const lastLLSize = (size_t)(litEnd - litPtr);
1681
+ DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
1682
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1683
+ if (op != NULL) {
1684
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1685
+ op += lastLLSize;
1686
+ } }
1687
+
1688
+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
1689
+ return (size_t)(op - ostart);
1690
+ }
1691
+
1692
+ static size_t
1693
+ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1694
+ void* dst, size_t maxDstSize,
1695
+ const void* seqStart, size_t seqSize, int nbSeq,
1696
+ const ZSTD_longOffset_e isLongOffset)
1697
+ {
1698
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1699
+ }
1700
+
1701
+ static size_t
1702
+ ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1703
+ void* dst, size_t maxDstSize,
1704
+ const void* seqStart, size_t seqSize, int nbSeq,
1705
+ const ZSTD_longOffset_e isLongOffset)
1706
+ {
1707
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1708
+ }
1709
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1710
+
1711
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1712
+
1713
+ FORCE_INLINE_TEMPLATE
1714
+
1715
+ size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1716
+ const BYTE* const prefixStart, const BYTE* const dictEnd)
1717
+ {
1718
+ prefetchPos += sequence.litLength;
1719
+ { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1720
+ /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1721
+ * No consequence though : memory address is only used for prefetching, not for dereferencing */
1722
+ const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
1723
+ PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1724
+ }
1725
+ return prefetchPos + sequence.matchLength;
1726
+ }
1727
+
1728
+ /* This decoding function employs prefetching
1729
+ * to reduce latency impact of cache misses.
1730
+ * It's generally employed when block contains a significant portion of long-distance matches
1731
+ * or when coupled with a "cold" dictionary */
1732
+ FORCE_INLINE_TEMPLATE size_t
1733
+ ZSTD_decompressSequencesLong_body(
1734
+ ZSTD_DCtx* dctx,
1735
+ void* dst, size_t maxDstSize,
1736
+ const void* seqStart, size_t seqSize, int nbSeq,
1737
+ const ZSTD_longOffset_e isLongOffset)
1738
+ {
1739
+ const BYTE* ip = (const BYTE*)seqStart;
1740
+ const BYTE* const iend = ip + seqSize;
1741
+ BYTE* const ostart = (BYTE*)dst;
1742
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
1743
+ BYTE* op = ostart;
1744
+ const BYTE* litPtr = dctx->litPtr;
1745
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
1746
+ const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1747
+ const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1748
+ const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1749
+
1750
+ /* Regen sequences */
1751
+ if (nbSeq) {
1752
+ #define STORED_SEQS 8
1753
+ #define STORED_SEQS_MASK (STORED_SEQS-1)
1754
+ #define ADVANCED_SEQS STORED_SEQS
1755
+ seq_t sequences[STORED_SEQS];
1756
+ int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1757
+ seqState_t seqState;
1758
+ int seqNb;
1759
+ size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1760
+
1761
+ dctx->fseEntropy = 1;
1762
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1763
+ assert(dst != NULL);
1764
+ assert(iend >= ip);
1765
+ RETURN_ERROR_IF(
1766
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1767
+ corruption_detected, "");
1768
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1769
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1770
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1771
+
1772
+ /* prepare in advance */
1773
+ for (seqNb=0; seqNb<seqAdvance; seqNb++) {
1774
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
1775
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1776
+ sequences[seqNb] = sequence;
1777
+ }
1778
+
1779
+ /* decompress without stomping litBuffer */
1780
+ for (; seqNb < nbSeq; seqNb++) {
1781
+ seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
1782
+
1783
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
1784
+ /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1785
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1786
+ if (leftoverLit)
1787
+ {
1788
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1789
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1790
+ sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1791
+ op += leftoverLit;
1792
+ }
1793
+ litPtr = dctx->litExtraBuffer;
1794
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1795
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1796
+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1797
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1798
+ assert(!ZSTD_isError(oneSeqSize));
1799
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1800
+ #endif
1801
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1802
+
1803
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1804
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
1805
+ op += oneSeqSize;
1806
+ } }
1807
+ else
1808
+ {
1809
+ /* lit buffer is either wholly contained in first or second split, or not split at all*/
1810
+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1811
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1812
+ ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1813
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1814
+ assert(!ZSTD_isError(oneSeqSize));
1815
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1816
+ #endif
1817
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1818
+
1819
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1820
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
1821
+ op += oneSeqSize;
1822
+ }
1823
+ }
1824
+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1825
+
1826
+ /* finish queue */
1827
+ seqNb -= seqAdvance;
1828
+ for ( ; seqNb<nbSeq ; seqNb++) {
1829
+ seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1830
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
1831
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1832
+ if (leftoverLit) {
1833
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1834
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1835
+ sequence->litLength -= leftoverLit;
1836
+ op += leftoverLit;
1837
+ }
1838
+ litPtr = dctx->litExtraBuffer;
1839
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1840
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1841
+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1842
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1843
+ assert(!ZSTD_isError(oneSeqSize));
1844
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1845
+ #endif
1846
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1847
+ op += oneSeqSize;
1848
+ }
1849
+ }
1850
+ else
1851
+ {
1852
+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1853
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1854
+ ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1855
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1856
+ assert(!ZSTD_isError(oneSeqSize));
1857
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1858
+ #endif
1859
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1860
+ op += oneSeqSize;
1861
+ }
1862
+ }
1863
+
1864
+ /* save reps for next block */
1865
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1866
+ }
1867
+
1868
+ /* last literal segment */
1869
+ if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
1870
+ size_t const lastLLSize = litBufferEnd - litPtr;
1871
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1872
+ if (op != NULL) {
1873
+ ZSTD_memmove(op, litPtr, lastLLSize);
1874
+ op += lastLLSize;
1875
+ }
1876
+ litPtr = dctx->litExtraBuffer;
1877
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1878
+ }
1879
+ { size_t const lastLLSize = litBufferEnd - litPtr;
1880
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1881
+ if (op != NULL) {
1882
+ ZSTD_memmove(op, litPtr, lastLLSize);
1883
+ op += lastLLSize;
1884
+ }
1885
+ }
1886
+
1887
+ return (size_t)(op - ostart);
1888
+ }
1889
+
1890
+ static size_t
1891
+ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1892
+ void* dst, size_t maxDstSize,
1893
+ const void* seqStart, size_t seqSize, int nbSeq,
1894
+ const ZSTD_longOffset_e isLongOffset)
1895
+ {
1896
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1897
+ }
1898
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1899
+
1900
+
1901
+
1902
+ #if DYNAMIC_BMI2
1903
+
1904
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1905
+ static BMI2_TARGET_ATTRIBUTE size_t
1906
+ DONT_VECTORIZE
1907
+ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1908
+ void* dst, size_t maxDstSize,
1909
+ const void* seqStart, size_t seqSize, int nbSeq,
1910
+ const ZSTD_longOffset_e isLongOffset)
1911
+ {
1912
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1913
+ }
1914
+ static BMI2_TARGET_ATTRIBUTE size_t
1915
+ DONT_VECTORIZE
1916
+ ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1917
+ void* dst, size_t maxDstSize,
1918
+ const void* seqStart, size_t seqSize, int nbSeq,
1919
+ const ZSTD_longOffset_e isLongOffset)
1920
+ {
1921
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1922
+ }
1923
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1924
+
1925
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1926
+ static BMI2_TARGET_ATTRIBUTE size_t
1927
+ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1928
+ void* dst, size_t maxDstSize,
1929
+ const void* seqStart, size_t seqSize, int nbSeq,
1930
+ const ZSTD_longOffset_e isLongOffset)
1931
+ {
1932
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1933
+ }
1934
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1935
+
1936
+ #endif /* DYNAMIC_BMI2 */
1937
+
1938
+ typedef size_t (*ZSTD_decompressSequences_t)(
1939
+ ZSTD_DCtx* dctx,
1940
+ void* dst, size_t maxDstSize,
1941
+ const void* seqStart, size_t seqSize, int nbSeq,
1942
+ const ZSTD_longOffset_e isLongOffset);
1943
+
1944
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1945
+ static size_t
1946
+ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1947
+ const void* seqStart, size_t seqSize, int nbSeq,
1948
+ const ZSTD_longOffset_e isLongOffset)
1949
+ {
1950
+ DEBUGLOG(5, "ZSTD_decompressSequences");
1951
+ #if DYNAMIC_BMI2
1952
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1953
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1954
+ }
1955
+ #endif
1956
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1957
+ }
1958
+ static size_t
1959
+ ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1960
+ const void* seqStart, size_t seqSize, int nbSeq,
1961
+ const ZSTD_longOffset_e isLongOffset)
1962
+ {
1963
+ DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1964
+ #if DYNAMIC_BMI2
1965
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1966
+ return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1967
+ }
1968
+ #endif
1969
+ return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1970
+ }
1971
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1972
+
1973
+
1974
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1975
+ /* ZSTD_decompressSequencesLong() :
1976
+ * decompression function triggered when a minimum share of offsets is considered "long",
1977
+ * aka out of cache.
1978
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1979
+ * This function will try to mitigate main memory latency through the use of prefetching */
1980
+ static size_t
1981
+ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1982
+ void* dst, size_t maxDstSize,
1983
+ const void* seqStart, size_t seqSize, int nbSeq,
1984
+ const ZSTD_longOffset_e isLongOffset)
1985
+ {
1986
+ DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1987
+ #if DYNAMIC_BMI2
1988
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1989
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1990
+ }
1991
+ #endif
1992
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1993
+ }
1994
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1995
+
1996
+
1997
+ /**
1998
+ * @returns The total size of the history referenceable by zstd, including
1999
+ * both the prefix and the extDict. At @p op any offset larger than this
2000
+ * is invalid.
2001
+ */
2002
+ static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
2003
+ {
2004
+ return (size_t)(op - virtualStart);
2005
+ }
2006
+
2007
+ typedef struct {
2008
+ unsigned longOffsetShare;
2009
+ unsigned maxNbAdditionalBits;
2010
+ } ZSTD_OffsetInfo;
2011
+
2012
+ /* ZSTD_getOffsetInfo() :
2013
+ * condition : offTable must be valid
2014
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
2015
+ * compared to maximum possible of (1<<OffFSELog),
2016
+ * as well as the maximum number additional bits required.
2017
+ */
2018
+ static ZSTD_OffsetInfo
2019
+ ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
2020
+ {
2021
+ ZSTD_OffsetInfo info = {0, 0};
2022
+ /* If nbSeq == 0, then the offTable is uninitialized, but we have
2023
+ * no sequences, so both values should be 0.
2024
+ */
2025
+ if (nbSeq != 0) {
2026
+ const void* ptr = offTable;
2027
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
2028
+ const ZSTD_seqSymbol* table = offTable + 1;
2029
+ U32 const max = 1 << tableLog;
2030
+ U32 u;
2031
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
2032
+
2033
+ assert(max <= (1 << OffFSELog)); /* max not too large */
2034
+ for (u=0; u<max; u++) {
2035
+ info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
2036
+ if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
2037
+ }
2038
+
2039
+ assert(tableLog <= OffFSELog);
2040
+ info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
2041
+ }
2042
+
2043
+ return info;
2044
+ }
2045
+
2046
+ /**
2047
+ * @returns The maximum offset we can decode in one read of our bitstream, without
2048
+ * reloading more bits in the middle of the offset bits read. Any offsets larger
2049
+ * than this must use the long offset decoder.
2050
+ */
2051
+ static size_t ZSTD_maxShortOffset(void)
2052
+ {
2053
+ if (MEM_64bits()) {
2054
+ /* We can decode any offset without reloading bits.
2055
+ * This might change if the max window size grows.
2056
+ */
2057
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2058
+ return (size_t)-1;
2059
+ } else {
2060
+ /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
2061
+ * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
2062
+ * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
2063
+ */
2064
+ size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
2065
+ size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
2066
+ assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
2067
+ return maxOffset;
2068
+ }
2069
+ }
2070
+
2071
+ size_t
2072
+ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2073
+ void* dst, size_t dstCapacity,
2074
+ const void* src, size_t srcSize, const streaming_operation streaming)
2075
+ { /* blockType == blockCompressed */
2076
+ const BYTE* ip = (const BYTE*)src;
2077
+ DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
2078
+
2079
+ /* Note : the wording of the specification
2080
+ * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
2081
+ * This generally does not happen, as it makes little sense,
2082
+ * since an uncompressed block would feature same size and have no decompression cost.
2083
+ * Also, note that decoder from reference libzstd before < v1.5.4
2084
+ * would consider this edge case as an error.
2085
+ * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
2086
+ * for broader compatibility with the deployed ecosystem of zstd decoders */
2087
+ RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
2088
+
2089
+ /* Decode literals section */
2090
+ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
2091
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
2092
+ if (ZSTD_isError(litCSize)) return litCSize;
2093
+ ip += litCSize;
2094
+ srcSize -= litCSize;
2095
+ }
2096
+
2097
+ /* Build Decoding Tables */
2098
+ {
2099
+ /* Compute the maximum block size, which must also work when !frame and fParams are unset.
2100
+ * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
2101
+ */
2102
+ size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
2103
+ size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
2104
+ /* isLongOffset must be true if there are long offsets.
2105
+ * Offsets are long if they are larger than ZSTD_maxShortOffset().
2106
+ * We don't expect that to be the case in 64-bit mode.
2107
+ *
2108
+ * We check here to see if our history is large enough to allow long offsets.
2109
+ * If it isn't, then we can't possible have (valid) long offsets. If the offset
2110
+ * is invalid, then it is okay to read it incorrectly.
2111
+ *
2112
+ * If isLongOffsets is true, then we will later check our decoding table to see
2113
+ * if it is even possible to generate long offsets.
2114
+ */
2115
+ ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
2116
+ /* These macros control at build-time which decompressor implementation
2117
+ * we use. If neither is defined, we do some inspection and dispatch at
2118
+ * runtime.
2119
+ */
2120
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2121
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2122
+ int usePrefetchDecoder = dctx->ddictIsCold;
2123
+ #else
2124
+ /* Set to 1 to avoid computing offset info if we don't need to.
2125
+ * Otherwise this value is ignored.
2126
+ */
2127
+ int usePrefetchDecoder = 1;
2128
+ #endif
2129
+ int nbSeq;
2130
+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
2131
+ if (ZSTD_isError(seqHSize)) return seqHSize;
2132
+ ip += seqHSize;
2133
+ srcSize -= seqHSize;
2134
+
2135
+ RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2136
+ RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
2137
+ "invalid dst");
2138
+
2139
+ /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
2140
+ * compute information about the share of long offsets, and the maximum nbAdditionalBits.
2141
+ * NOTE: could probably use a larger nbSeq limit
2142
+ */
2143
+ if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
2144
+ ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
2145
+ if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
2146
+ /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
2147
+ * enough, then we know it is impossible to have too long an offset in this block, so we can
2148
+ * use the regular offset decoder.
2149
+ */
2150
+ isLongOffset = ZSTD_lo_isRegularOffset;
2151
+ }
2152
+ if (!usePrefetchDecoder) {
2153
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2154
+ usePrefetchDecoder = (info.longOffsetShare >= minShare);
2155
+ }
2156
+ }
2157
+
2158
+ dctx->ddictIsCold = 0;
2159
+
2160
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2161
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2162
+ if (usePrefetchDecoder) {
2163
+ #else
2164
+ (void)usePrefetchDecoder;
2165
+ {
2166
+ #endif
2167
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2168
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
2169
+ #endif
2170
+ }
2171
+
2172
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2173
+ /* else */
2174
+ if (dctx->litBufferLocation == ZSTD_split)
2175
+ return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
2176
+ else
2177
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
2178
+ #endif
2179
+ }
2180
+ }
2181
+
2182
+
2183
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
2184
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2185
+ {
2186
+ if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
2187
+ dctx->dictEnd = dctx->previousDstEnd;
2188
+ dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
2189
+ dctx->prefixStart = dst;
2190
+ dctx->previousDstEnd = dst;
2191
+ }
2192
+ }
2193
+
2194
+
2195
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
2196
+ void* dst, size_t dstCapacity,
2197
+ const void* src, size_t srcSize)
2198
+ {
2199
+ size_t dSize;
2200
+ dctx->isFrameDecompression = 0;
2201
+ ZSTD_checkContinuity(dctx, dst, dstCapacity);
2202
+ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
2203
+ FORWARD_IF_ERROR(dSize, "");
2204
+ dctx->previousDstEnd = (char*)dst + dSize;
2205
+ return dSize;
2206
+ }
2207
+
2208
+
2209
+ /* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
2210
+ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2211
+ void* dst, size_t dstCapacity,
2212
+ const void* src, size_t srcSize)
2213
+ {
2214
+ return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
2215
+ }