zstdlib 0.6.0-x64-mingw32 → 0.9.0-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (140) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +20 -0
  3. data/README.md +7 -1
  4. data/Rakefile +38 -8
  5. data/ext/{zstdlib → zstdlib_c}/extconf.rb +10 -5
  6. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
  7. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
  8. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
  9. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
  10. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
  11. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
  12. data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
  13. data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
  14. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/adler32.c +0 -0
  15. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/compress.c +0 -0
  16. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.c +0 -0
  17. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.h +0 -0
  18. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.c +0 -0
  19. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.h +0 -0
  20. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzclose.c +0 -0
  21. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzguts.h +0 -0
  22. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzlib.c +0 -0
  23. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzread.c +0 -0
  24. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzwrite.c +0 -0
  25. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/infback.c +0 -0
  26. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.c +0 -0
  27. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.h +0 -0
  28. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffixed.h +0 -0
  29. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.c +0 -0
  30. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.h +0 -0
  31. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.c +0 -0
  32. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.h +0 -0
  33. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.c +0 -0
  34. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.h +0 -0
  35. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/uncompr.c +0 -0
  36. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zconf.h +0 -0
  37. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zlib.h +0 -0
  38. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.c +0 -0
  39. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.h +0 -0
  40. data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
  41. data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
  42. data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
  43. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +75 -57
  44. data/ext/zstdlib_c/zstd-1.5.2/lib/common/compiler.h +335 -0
  45. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +1 -3
  46. data/ext/zstdlib_c/zstd-1.5.2/lib/common/debug.c +24 -0
  47. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +22 -49
  48. data/ext/zstdlib_c/zstd-1.5.2/lib/common/entropy_common.c +368 -0
  49. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +3 -1
  50. data/ext/zstdlib_c/zstd-1.5.2/lib/common/error_private.h +159 -0
  51. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +51 -42
  52. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +149 -57
  53. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +60 -54
  54. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +87 -98
  55. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +34 -23
  56. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +5 -5
  57. data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
  58. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +10 -8
  59. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +4 -3
  60. data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
  61. data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
  62. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +10 -10
  63. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_deps.h +111 -0
  64. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_internal.h +493 -0
  65. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_trace.h +163 -0
  66. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
  67. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +105 -85
  68. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +41 -63
  69. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +13 -33
  70. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/huf_compress.c +1370 -0
  71. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress.c +6327 -0
  72. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +537 -82
  73. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +21 -16
  74. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +4 -2
  75. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +61 -34
  76. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +10 -3
  77. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.c +573 -0
  78. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.h +32 -0
  79. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +236 -95
  80. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +321 -143
  81. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +2 -2
  82. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +328 -137
  83. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +2 -2
  84. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.c +2104 -0
  85. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.h +125 -0
  86. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +336 -209
  87. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +15 -3
  88. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_ldm_geartab.h +106 -0
  89. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +439 -239
  90. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +1 -1
  91. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +205 -462
  92. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstdmt_compress.h +113 -0
  93. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress.c +1889 -0
  94. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
  95. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +20 -16
  96. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +3 -3
  97. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +691 -230
  98. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/zstd_decompress_block.c +2072 -0
  99. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +16 -7
  100. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +71 -10
  101. data/ext/zstdlib_c/zstd-1.5.2/lib/zdict.h +452 -0
  102. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +760 -234
  103. data/ext/{zstdlib/zstd-1.4.4/lib/common → zstdlib_c/zstd-1.5.2/lib}/zstd_errors.h +3 -1
  104. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
  105. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +1 -1
  106. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
  107. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
  108. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
  109. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
  110. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +133 -44
  111. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  112. data/ext/zstdlib_c/zstd.mk +15 -0
  113. data/lib/2.4/zstdlib_c.so +0 -0
  114. data/lib/2.5/zstdlib_c.so +0 -0
  115. data/lib/2.6/zstdlib_c.so +0 -0
  116. data/lib/2.7/zstdlib_c.so +0 -0
  117. data/lib/3.0/zstdlib_c.so +0 -0
  118. data/lib/zstdlib.rb +2 -2
  119. metadata +124 -114
  120. data/ext/zstdlib/zstd-1.4.4/lib/common/compiler.h +0 -159
  121. data/ext/zstdlib/zstd-1.4.4/lib/common/debug.c +0 -44
  122. data/ext/zstdlib/zstd-1.4.4/lib/common/entropy_common.c +0 -236
  123. data/ext/zstdlib/zstd-1.4.4/lib/common/error_private.h +0 -76
  124. data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.c +0 -882
  125. data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.h +0 -305
  126. data/ext/zstdlib/zstd-1.4.4/lib/common/zstd_internal.h +0 -350
  127. data/ext/zstdlib/zstd-1.4.4/lib/compress/huf_compress.c +0 -798
  128. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_compress.c +0 -4103
  129. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.c +0 -1115
  130. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.h +0 -67
  131. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstdmt_compress.h +0 -192
  132. data/ext/zstdlib/zstd-1.4.4/lib/decompress/huf_decompress.c +0 -1234
  133. data/ext/zstdlib/zstd-1.4.4/lib/decompress/zstd_decompress_block.c +0 -1323
  134. data/ext/zstdlib/zstd.mk +0 -14
  135. data/lib/2.2/zstdlib.so +0 -0
  136. data/lib/2.3/zstdlib.so +0 -0
  137. data/lib/2.4/zstdlib.so +0 -0
  138. data/lib/2.5/zstdlib.so +0 -0
  139. data/lib/2.6/zstdlib.so +0 -0
  140. data/lib/2.7/zstdlib.so +0 -0
@@ -0,0 +1,2072 @@
1
+ /*
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ /* zstd_decompress_block :
12
+ * this module takes care of decompressing _compressed_ block */
13
+
14
+ /*-*******************************************************
15
+ * Dependencies
16
+ *********************************************************/
17
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18
+ #include "../common/compiler.h" /* prefetch */
19
+ #include "../common/cpu.h" /* bmi2 */
20
+ #include "../common/mem.h" /* low level memory routines */
21
+ #define FSE_STATIC_LINKING_ONLY
22
+ #include "../common/fse.h"
23
+ #define HUF_STATIC_LINKING_ONLY
24
+ #include "../common/huf.h"
25
+ #include "../common/zstd_internal.h"
26
+ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27
+ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28
+ #include "zstd_decompress_block.h"
29
+
30
+ /*_*******************************************************
31
+ * Macros
32
+ **********************************************************/
33
+
34
+ /* These two optional macros force the use one way or another of the two
35
+ * ZSTD_decompressSequences implementations. You can't force in both directions
36
+ * at the same time.
37
+ */
38
+ #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39
+ defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40
+ #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41
+ #endif
42
+
43
+
44
+ /*_*******************************************************
45
+ * Memory operations
46
+ **********************************************************/
47
+ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
48
+
49
+
50
+ /*-*************************************************************
51
+ * Block decoding
52
+ ***************************************************************/
53
+
54
+ /*! ZSTD_getcBlockSize() :
55
+ * Provides the size of compressed block from block header `src` */
56
+ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57
+ blockProperties_t* bpPtr)
58
+ {
59
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
60
+
61
+ { U32 const cBlockHeader = MEM_readLE24(src);
62
+ U32 const cSize = cBlockHeader >> 3;
63
+ bpPtr->lastBlock = cBlockHeader & 1;
64
+ bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
65
+ bpPtr->origSize = cSize; /* only useful for RLE */
66
+ if (bpPtr->blockType == bt_rle) return 1;
67
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68
+ return cSize;
69
+ }
70
+ }
71
+
72
+ /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
73
+ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
74
+ const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
75
+ {
76
+ if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
77
+ {
78
+ /* room for litbuffer to fit without read faulting */
79
+ dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
80
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
81
+ dctx->litBufferLocation = ZSTD_in_dst;
82
+ }
83
+ else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
84
+ {
85
+ /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
86
+ if (splitImmediately) {
87
+ /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
88
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
89
+ dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
90
+ }
91
+ else {
92
+ /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
93
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
94
+ dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
95
+ }
96
+ dctx->litBufferLocation = ZSTD_split;
97
+ }
98
+ else
99
+ {
100
+ /* fits entirely within litExtraBuffer, so no split is necessary */
101
+ dctx->litBuffer = dctx->litExtraBuffer;
102
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
103
+ dctx->litBufferLocation = ZSTD_not_in_dst;
104
+ }
105
+ }
106
+
107
+ /* Hidden declaration for fullbench */
108
+ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
109
+ const void* src, size_t srcSize,
110
+ void* dst, size_t dstCapacity, const streaming_operation streaming);
111
+ /*! ZSTD_decodeLiteralsBlock() :
112
+ * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
113
+ * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
114
+ * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
115
+ * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
116
+ *
117
+ * @return : nb of bytes read from src (< srcSize )
118
+ * note : symbol not declared but exposed for fullbench */
119
+ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
120
+ const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
121
+ void* dst, size_t dstCapacity, const streaming_operation streaming)
122
+ {
123
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
124
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
125
+
126
+ { const BYTE* const istart = (const BYTE*) src;
127
+ symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
128
+
129
+ switch(litEncType)
130
+ {
131
+ case set_repeat:
132
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
133
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
134
+ ZSTD_FALLTHROUGH;
135
+
136
+ case set_compressed:
137
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
138
+ { size_t lhSize, litSize, litCSize;
139
+ U32 singleStream=0;
140
+ U32 const lhlCode = (istart[0] >> 2) & 3;
141
+ U32 const lhc = MEM_readLE32(istart);
142
+ size_t hufSuccess;
143
+ size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
144
+ switch(lhlCode)
145
+ {
146
+ case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
147
+ /* 2 - 2 - 10 - 10 */
148
+ singleStream = !lhlCode;
149
+ lhSize = 3;
150
+ litSize = (lhc >> 4) & 0x3FF;
151
+ litCSize = (lhc >> 14) & 0x3FF;
152
+ break;
153
+ case 2:
154
+ /* 2 - 2 - 14 - 14 */
155
+ lhSize = 4;
156
+ litSize = (lhc >> 4) & 0x3FFF;
157
+ litCSize = lhc >> 18;
158
+ break;
159
+ case 3:
160
+ /* 2 - 2 - 18 - 18 */
161
+ lhSize = 5;
162
+ litSize = (lhc >> 4) & 0x3FFFF;
163
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
164
+ break;
165
+ }
166
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
167
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
168
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
169
+ RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
170
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
171
+
172
+ /* prefetch huffman table if cold */
173
+ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
174
+ PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
175
+ }
176
+
177
+ if (litEncType==set_repeat) {
178
+ if (singleStream) {
179
+ hufSuccess = HUF_decompress1X_usingDTable_bmi2(
180
+ dctx->litBuffer, litSize, istart+lhSize, litCSize,
181
+ dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
182
+ } else {
183
+ hufSuccess = HUF_decompress4X_usingDTable_bmi2(
184
+ dctx->litBuffer, litSize, istart+lhSize, litCSize,
185
+ dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
186
+ }
187
+ } else {
188
+ if (singleStream) {
189
+ #if defined(HUF_FORCE_DECOMPRESS_X2)
190
+ hufSuccess = HUF_decompress1X_DCtx_wksp(
191
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
192
+ istart+lhSize, litCSize, dctx->workspace,
193
+ sizeof(dctx->workspace));
194
+ #else
195
+ hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
196
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
197
+ istart+lhSize, litCSize, dctx->workspace,
198
+ sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
199
+ #endif
200
+ } else {
201
+ hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
202
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
203
+ istart+lhSize, litCSize, dctx->workspace,
204
+ sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
205
+ }
206
+ }
207
+ if (dctx->litBufferLocation == ZSTD_split)
208
+ {
209
+ ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
210
+ ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
211
+ dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
212
+ dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
213
+ }
214
+
215
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
216
+
217
+ dctx->litPtr = dctx->litBuffer;
218
+ dctx->litSize = litSize;
219
+ dctx->litEntropy = 1;
220
+ if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
221
+ return litCSize + lhSize;
222
+ }
223
+
224
+ case set_basic:
225
+ { size_t litSize, lhSize;
226
+ U32 const lhlCode = ((istart[0]) >> 2) & 3;
227
+ size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
228
+ switch(lhlCode)
229
+ {
230
+ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
231
+ lhSize = 1;
232
+ litSize = istart[0] >> 3;
233
+ break;
234
+ case 1:
235
+ lhSize = 2;
236
+ litSize = MEM_readLE16(istart) >> 4;
237
+ break;
238
+ case 3:
239
+ lhSize = 3;
240
+ litSize = MEM_readLE24(istart) >> 4;
241
+ break;
242
+ }
243
+
244
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
245
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
246
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
247
+ if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
248
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
249
+ if (dctx->litBufferLocation == ZSTD_split)
250
+ {
251
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
252
+ ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
253
+ }
254
+ else
255
+ {
256
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
257
+ }
258
+ dctx->litPtr = dctx->litBuffer;
259
+ dctx->litSize = litSize;
260
+ return lhSize+litSize;
261
+ }
262
+ /* direct reference into compressed stream */
263
+ dctx->litPtr = istart+lhSize;
264
+ dctx->litSize = litSize;
265
+ dctx->litBufferEnd = dctx->litPtr + litSize;
266
+ dctx->litBufferLocation = ZSTD_not_in_dst;
267
+ return lhSize+litSize;
268
+ }
269
+
270
+ case set_rle:
271
+ { U32 const lhlCode = ((istart[0]) >> 2) & 3;
272
+ size_t litSize, lhSize;
273
+ size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
274
+ switch(lhlCode)
275
+ {
276
+ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
277
+ lhSize = 1;
278
+ litSize = istart[0] >> 3;
279
+ break;
280
+ case 1:
281
+ lhSize = 2;
282
+ litSize = MEM_readLE16(istart) >> 4;
283
+ break;
284
+ case 3:
285
+ lhSize = 3;
286
+ litSize = MEM_readLE24(istart) >> 4;
287
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
288
+ break;
289
+ }
290
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
291
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
292
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
293
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
294
+ if (dctx->litBufferLocation == ZSTD_split)
295
+ {
296
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
297
+ ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
298
+ }
299
+ else
300
+ {
301
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
302
+ }
303
+ dctx->litPtr = dctx->litBuffer;
304
+ dctx->litSize = litSize;
305
+ return lhSize+1;
306
+ }
307
+ default:
308
+ RETURN_ERROR(corruption_detected, "impossible");
309
+ }
310
+ }
311
+ }
312
+
313
+ /* Default FSE distribution tables.
314
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
315
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
316
+ * They were generated programmatically with following method :
317
+ * - start from default distributions, present in /lib/common/zstd_internal.h
318
+ * - generate tables normally, using ZSTD_buildFSETable()
319
+ * - printout the content of tables
320
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
321
+
322
+ /* Default FSE distribution table for Literal Lengths */
323
+ static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
324
+ { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
325
+ /* nextState, nbAddBits, nbBits, baseVal */
326
+ { 0, 0, 4, 0}, { 16, 0, 4, 0},
327
+ { 32, 0, 5, 1}, { 0, 0, 5, 3},
328
+ { 0, 0, 5, 4}, { 0, 0, 5, 6},
329
+ { 0, 0, 5, 7}, { 0, 0, 5, 9},
330
+ { 0, 0, 5, 10}, { 0, 0, 5, 12},
331
+ { 0, 0, 6, 14}, { 0, 1, 5, 16},
332
+ { 0, 1, 5, 20}, { 0, 1, 5, 22},
333
+ { 0, 2, 5, 28}, { 0, 3, 5, 32},
334
+ { 0, 4, 5, 48}, { 32, 6, 5, 64},
335
+ { 0, 7, 5, 128}, { 0, 8, 6, 256},
336
+ { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
337
+ { 32, 0, 4, 0}, { 0, 0, 4, 1},
338
+ { 0, 0, 5, 2}, { 32, 0, 5, 4},
339
+ { 0, 0, 5, 5}, { 32, 0, 5, 7},
340
+ { 0, 0, 5, 8}, { 32, 0, 5, 10},
341
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
342
+ { 32, 1, 5, 16}, { 0, 1, 5, 18},
343
+ { 32, 1, 5, 22}, { 0, 2, 5, 24},
344
+ { 32, 3, 5, 32}, { 0, 3, 5, 40},
345
+ { 0, 6, 4, 64}, { 16, 6, 4, 64},
346
+ { 32, 7, 5, 128}, { 0, 9, 6, 512},
347
+ { 0, 11, 6, 2048}, { 48, 0, 4, 0},
348
+ { 16, 0, 4, 1}, { 32, 0, 5, 2},
349
+ { 32, 0, 5, 3}, { 32, 0, 5, 5},
350
+ { 32, 0, 5, 6}, { 32, 0, 5, 8},
351
+ { 32, 0, 5, 9}, { 32, 0, 5, 11},
352
+ { 32, 0, 5, 12}, { 0, 0, 6, 15},
353
+ { 32, 1, 5, 18}, { 32, 1, 5, 20},
354
+ { 32, 2, 5, 24}, { 32, 2, 5, 28},
355
+ { 32, 3, 5, 40}, { 32, 4, 5, 48},
356
+ { 0, 16, 6,65536}, { 0, 15, 6,32768},
357
+ { 0, 14, 6,16384}, { 0, 13, 6, 8192},
358
+ }; /* LL_defaultDTable */
359
+
360
+ /* Default FSE distribution table for Offset Codes */
361
+ static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
362
+ { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
363
+ /* nextState, nbAddBits, nbBits, baseVal */
364
+ { 0, 0, 5, 0}, { 0, 6, 4, 61},
365
+ { 0, 9, 5, 509}, { 0, 15, 5,32765},
366
+ { 0, 21, 5,2097149}, { 0, 3, 5, 5},
367
+ { 0, 7, 4, 125}, { 0, 12, 5, 4093},
368
+ { 0, 18, 5,262141}, { 0, 23, 5,8388605},
369
+ { 0, 5, 5, 29}, { 0, 8, 4, 253},
370
+ { 0, 14, 5,16381}, { 0, 20, 5,1048573},
371
+ { 0, 2, 5, 1}, { 16, 7, 4, 125},
372
+ { 0, 11, 5, 2045}, { 0, 17, 5,131069},
373
+ { 0, 22, 5,4194301}, { 0, 4, 5, 13},
374
+ { 16, 8, 4, 253}, { 0, 13, 5, 8189},
375
+ { 0, 19, 5,524285}, { 0, 1, 5, 1},
376
+ { 16, 6, 4, 61}, { 0, 10, 5, 1021},
377
+ { 0, 16, 5,65533}, { 0, 28, 5,268435453},
378
+ { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
379
+ { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
380
+ }; /* OF_defaultDTable */
381
+
382
+
383
+ /* Default FSE distribution table for Match Lengths */
384
+ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
385
+ { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
386
+ /* nextState, nbAddBits, nbBits, baseVal */
387
+ { 0, 0, 6, 3}, { 0, 0, 4, 4},
388
+ { 32, 0, 5, 5}, { 0, 0, 5, 6},
389
+ { 0, 0, 5, 8}, { 0, 0, 5, 9},
390
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
391
+ { 0, 0, 6, 16}, { 0, 0, 6, 19},
392
+ { 0, 0, 6, 22}, { 0, 0, 6, 25},
393
+ { 0, 0, 6, 28}, { 0, 0, 6, 31},
394
+ { 0, 0, 6, 34}, { 0, 1, 6, 37},
395
+ { 0, 1, 6, 41}, { 0, 2, 6, 47},
396
+ { 0, 3, 6, 59}, { 0, 4, 6, 83},
397
+ { 0, 7, 6, 131}, { 0, 9, 6, 515},
398
+ { 16, 0, 4, 4}, { 0, 0, 4, 5},
399
+ { 32, 0, 5, 6}, { 0, 0, 5, 7},
400
+ { 32, 0, 5, 9}, { 0, 0, 5, 10},
401
+ { 0, 0, 6, 12}, { 0, 0, 6, 15},
402
+ { 0, 0, 6, 18}, { 0, 0, 6, 21},
403
+ { 0, 0, 6, 24}, { 0, 0, 6, 27},
404
+ { 0, 0, 6, 30}, { 0, 0, 6, 33},
405
+ { 0, 1, 6, 35}, { 0, 1, 6, 39},
406
+ { 0, 2, 6, 43}, { 0, 3, 6, 51},
407
+ { 0, 4, 6, 67}, { 0, 5, 6, 99},
408
+ { 0, 8, 6, 259}, { 32, 0, 4, 4},
409
+ { 48, 0, 4, 4}, { 16, 0, 4, 5},
410
+ { 32, 0, 5, 7}, { 32, 0, 5, 8},
411
+ { 32, 0, 5, 10}, { 32, 0, 5, 11},
412
+ { 0, 0, 6, 14}, { 0, 0, 6, 17},
413
+ { 0, 0, 6, 20}, { 0, 0, 6, 23},
414
+ { 0, 0, 6, 26}, { 0, 0, 6, 29},
415
+ { 0, 0, 6, 32}, { 0, 16, 6,65539},
416
+ { 0, 15, 6,32771}, { 0, 14, 6,16387},
417
+ { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
418
+ { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
419
+ }; /* ML_defaultDTable */
420
+
421
+
422
+ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
423
+ {
424
+ void* ptr = dt;
425
+ ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
426
+ ZSTD_seqSymbol* const cell = dt + 1;
427
+
428
+ DTableH->tableLog = 0;
429
+ DTableH->fastMode = 0;
430
+
431
+ cell->nbBits = 0;
432
+ cell->nextState = 0;
433
+ assert(nbAddBits < 255);
434
+ cell->nbAdditionalBits = nbAddBits;
435
+ cell->baseValue = baseValue;
436
+ }
437
+
438
+
439
+ /* ZSTD_buildFSETable() :
440
+ * generate FSE decoding table for one symbol (ll, ml or off)
441
+ * cannot fail if input is valid =>
442
+ * all inputs are presumed validated at this stage */
443
+ FORCE_INLINE_TEMPLATE
444
+ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
445
+ const short* normalizedCounter, unsigned maxSymbolValue,
446
+ const U32* baseValue, const U8* nbAdditionalBits,
447
+ unsigned tableLog, void* wksp, size_t wkspSize)
448
+ {
449
+ ZSTD_seqSymbol* const tableDecode = dt+1;
450
+ U32 const maxSV1 = maxSymbolValue + 1;
451
+ U32 const tableSize = 1 << tableLog;
452
+
453
+ U16* symbolNext = (U16*)wksp;
454
+ BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
455
+ U32 highThreshold = tableSize - 1;
456
+
457
+
458
+ /* Sanity Checks */
459
+ assert(maxSymbolValue <= MaxSeq);
460
+ assert(tableLog <= MaxFSELog);
461
+ assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
462
+ (void)wkspSize;
463
+ /* Init, lay down lowprob symbols */
464
+ { ZSTD_seqSymbol_header DTableH;
465
+ DTableH.tableLog = tableLog;
466
+ DTableH.fastMode = 1;
467
+ { S16 const largeLimit= (S16)(1 << (tableLog-1));
468
+ U32 s;
469
+ for (s=0; s<maxSV1; s++) {
470
+ if (normalizedCounter[s]==-1) {
471
+ tableDecode[highThreshold--].baseValue = s;
472
+ symbolNext[s] = 1;
473
+ } else {
474
+ if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
475
+ assert(normalizedCounter[s]>=0);
476
+ symbolNext[s] = (U16)normalizedCounter[s];
477
+ } } }
478
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
479
+ }
480
+
481
+ /* Spread symbols */
482
+ assert(tableSize <= 512);
483
+ /* Specialized symbol spreading for the case when there are
484
+ * no low probability (-1 count) symbols. When compressing
485
+ * small blocks we avoid low probability symbols to hit this
486
+ * case, since header decoding speed matters more.
487
+ */
488
+ if (highThreshold == tableSize - 1) {
489
+ size_t const tableMask = tableSize-1;
490
+ size_t const step = FSE_TABLESTEP(tableSize);
491
+ /* First lay down the symbols in order.
492
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
493
+ * misses since small blocks generally have small table logs, so nearly
494
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
495
+ * our buffer to handle the over-write.
496
+ */
497
+ {
498
+ U64 const add = 0x0101010101010101ull;
499
+ size_t pos = 0;
500
+ U64 sv = 0;
501
+ U32 s;
502
+ for (s=0; s<maxSV1; ++s, sv += add) {
503
+ int i;
504
+ int const n = normalizedCounter[s];
505
+ MEM_write64(spread + pos, sv);
506
+ for (i = 8; i < n; i += 8) {
507
+ MEM_write64(spread + pos + i, sv);
508
+ }
509
+ pos += n;
510
+ }
511
+ }
512
+ /* Now we spread those positions across the table.
513
+ * The benefit of doing it in two stages is that we avoid the the
514
+ * variable size inner loop, which caused lots of branch misses.
515
+ * Now we can run through all the positions without any branch misses.
516
+ * We unroll the loop twice, since that is what emperically worked best.
517
+ */
518
+ {
519
+ size_t position = 0;
520
+ size_t s;
521
+ size_t const unroll = 2;
522
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
523
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
524
+ size_t u;
525
+ for (u = 0; u < unroll; ++u) {
526
+ size_t const uPosition = (position + (u * step)) & tableMask;
527
+ tableDecode[uPosition].baseValue = spread[s + u];
528
+ }
529
+ position = (position + (unroll * step)) & tableMask;
530
+ }
531
+ assert(position == 0);
532
+ }
533
+ } else {
534
+ U32 const tableMask = tableSize-1;
535
+ U32 const step = FSE_TABLESTEP(tableSize);
536
+ U32 s, position = 0;
537
+ for (s=0; s<maxSV1; s++) {
538
+ int i;
539
+ int const n = normalizedCounter[s];
540
+ for (i=0; i<n; i++) {
541
+ tableDecode[position].baseValue = s;
542
+ position = (position + step) & tableMask;
543
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
544
+ } }
545
+ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
546
+ }
547
+
548
+ /* Build Decoding table */
549
+ {
550
+ U32 u;
551
+ for (u=0; u<tableSize; u++) {
552
+ U32 const symbol = tableDecode[u].baseValue;
553
+ U32 const nextState = symbolNext[symbol]++;
554
+ tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
555
+ tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
556
+ assert(nbAdditionalBits[symbol] < 255);
557
+ tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
558
+ tableDecode[u].baseValue = baseValue[symbol];
559
+ }
560
+ }
561
+ }
562
+
563
+ /* Avoids the FORCE_INLINE of the _body() function. */
564
+ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
565
+ const short* normalizedCounter, unsigned maxSymbolValue,
566
+ const U32* baseValue, const U8* nbAdditionalBits,
567
+ unsigned tableLog, void* wksp, size_t wkspSize)
568
+ {
569
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
570
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
571
+ }
572
+
573
+ #if DYNAMIC_BMI2
574
+ BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
575
+ const short* normalizedCounter, unsigned maxSymbolValue,
576
+ const U32* baseValue, const U8* nbAdditionalBits,
577
+ unsigned tableLog, void* wksp, size_t wkspSize)
578
+ {
579
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
580
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
581
+ }
582
+ #endif
583
+
584
+ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
585
+ const short* normalizedCounter, unsigned maxSymbolValue,
586
+ const U32* baseValue, const U8* nbAdditionalBits,
587
+ unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
588
+ {
589
+ #if DYNAMIC_BMI2
590
+ if (bmi2) {
591
+ ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
592
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
593
+ return;
594
+ }
595
+ #endif
596
+ (void)bmi2;
597
+ ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
598
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
599
+ }
600
+
601
+
602
+ /*! ZSTD_buildSeqTable() :
603
+ * @return : nb bytes read from src,
604
+ * or an error code if it fails */
605
+ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
606
+ symbolEncodingType_e type, unsigned max, U32 maxLog,
607
+ const void* src, size_t srcSize,
608
+ const U32* baseValue, const U8* nbAdditionalBits,
609
+ const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
610
+ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
611
+ int bmi2)
612
+ {
613
+ switch(type)
614
+ {
615
+ case set_rle :
616
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
617
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
618
+ { U32 const symbol = *(const BYTE*)src;
619
+ U32 const baseline = baseValue[symbol];
620
+ U8 const nbBits = nbAdditionalBits[symbol];
621
+ ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
622
+ }
623
+ *DTablePtr = DTableSpace;
624
+ return 1;
625
+ case set_basic :
626
+ *DTablePtr = defaultTable;
627
+ return 0;
628
+ case set_repeat:
629
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
630
+ /* prefetch FSE table if used */
631
+ if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
632
+ const void* const pStart = *DTablePtr;
633
+ size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
634
+ PREFETCH_AREA(pStart, pSize);
635
+ }
636
+ return 0;
637
+ case set_compressed :
638
+ { unsigned tableLog;
639
+ S16 norm[MaxSeq+1];
640
+ size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
641
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
642
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
643
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
644
+ *DTablePtr = DTableSpace;
645
+ return headerSize;
646
+ }
647
+ default :
648
+ assert(0);
649
+ RETURN_ERROR(GENERIC, "impossible");
650
+ }
651
+ }
652
+
653
+ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
654
+ const void* src, size_t srcSize)
655
+ {
656
+ const BYTE* const istart = (const BYTE*)src;
657
+ const BYTE* const iend = istart + srcSize;
658
+ const BYTE* ip = istart;
659
+ int nbSeq;
660
+ DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
661
+
662
+ /* check */
663
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
664
+
665
+ /* SeqHead */
666
+ nbSeq = *ip++;
667
+ if (!nbSeq) {
668
+ *nbSeqPtr=0;
669
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
670
+ return 1;
671
+ }
672
+ if (nbSeq > 0x7F) {
673
+ if (nbSeq == 0xFF) {
674
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
675
+ nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
676
+ ip+=2;
677
+ } else {
678
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
679
+ nbSeq = ((nbSeq-0x80)<<8) + *ip++;
680
+ }
681
+ }
682
+ *nbSeqPtr = nbSeq;
683
+
684
+ /* FSE table descriptors */
685
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
686
+ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
687
+ symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
688
+ symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
689
+ ip++;
690
+
691
+ /* Build DTables */
692
+ { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
693
+ LLtype, MaxLL, LLFSELog,
694
+ ip, iend-ip,
695
+ LL_base, LL_bits,
696
+ LL_defaultDTable, dctx->fseEntropy,
697
+ dctx->ddictIsCold, nbSeq,
698
+ dctx->workspace, sizeof(dctx->workspace),
699
+ ZSTD_DCtx_get_bmi2(dctx));
700
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
701
+ ip += llhSize;
702
+ }
703
+
704
+ { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
705
+ OFtype, MaxOff, OffFSELog,
706
+ ip, iend-ip,
707
+ OF_base, OF_bits,
708
+ OF_defaultDTable, dctx->fseEntropy,
709
+ dctx->ddictIsCold, nbSeq,
710
+ dctx->workspace, sizeof(dctx->workspace),
711
+ ZSTD_DCtx_get_bmi2(dctx));
712
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
713
+ ip += ofhSize;
714
+ }
715
+
716
+ { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
717
+ MLtype, MaxML, MLFSELog,
718
+ ip, iend-ip,
719
+ ML_base, ML_bits,
720
+ ML_defaultDTable, dctx->fseEntropy,
721
+ dctx->ddictIsCold, nbSeq,
722
+ dctx->workspace, sizeof(dctx->workspace),
723
+ ZSTD_DCtx_get_bmi2(dctx));
724
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
725
+ ip += mlhSize;
726
+ }
727
+ }
728
+
729
+ return ip-istart;
730
+ }
731
+
732
+
733
+ typedef struct {
734
+ size_t litLength;
735
+ size_t matchLength;
736
+ size_t offset;
737
+ } seq_t;
738
+
739
+ typedef struct {
740
+ size_t state;
741
+ const ZSTD_seqSymbol* table;
742
+ } ZSTD_fseState;
743
+
744
+ typedef struct {
745
+ BIT_DStream_t DStream;
746
+ ZSTD_fseState stateLL;
747
+ ZSTD_fseState stateOffb;
748
+ ZSTD_fseState stateML;
749
+ size_t prevOffset[ZSTD_REP_NUM];
750
+ } seqState_t;
751
+
752
+ /*! ZSTD_overlapCopy8() :
753
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
754
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
755
+ *
756
+ * Precondition: *ip <= *op
757
+ * Postcondition: *op - *op >= 8
758
+ */
759
+ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
760
+ assert(*ip <= *op);
761
+ if (offset < 8) {
762
+ /* close range match, overlap */
763
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
764
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
765
+ int const sub2 = dec64table[offset];
766
+ (*op)[0] = (*ip)[0];
767
+ (*op)[1] = (*ip)[1];
768
+ (*op)[2] = (*ip)[2];
769
+ (*op)[3] = (*ip)[3];
770
+ *ip += dec32table[offset];
771
+ ZSTD_copy4(*op+4, *ip);
772
+ *ip -= sub2;
773
+ } else {
774
+ ZSTD_copy8(*op, *ip);
775
+ }
776
+ *ip += 8;
777
+ *op += 8;
778
+ assert(*op - *ip >= 8);
779
+ }
780
+
781
+ /*! ZSTD_safecopy() :
782
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
783
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
784
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
785
+ * should be fast for a single long sequence, but can be slow for several short sequences.
786
+ *
787
+ * @param ovtype controls the overlap detection
788
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
789
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
790
+ * The src buffer must be before the dst buffer.
791
+ */
792
+ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
793
+ ptrdiff_t const diff = op - ip;
794
+ BYTE* const oend = op + length;
795
+
796
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
797
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
798
+
799
+ if (length < 8) {
800
+ /* Handle short lengths. */
801
+ while (op < oend) *op++ = *ip++;
802
+ return;
803
+ }
804
+ if (ovtype == ZSTD_overlap_src_before_dst) {
805
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
806
+ assert(length >= 8);
807
+ ZSTD_overlapCopy8(&op, &ip, diff);
808
+ length -= 8;
809
+ assert(op - ip >= 8);
810
+ assert(op <= oend);
811
+ }
812
+
813
+ if (oend <= oend_w) {
814
+ /* No risk of overwrite. */
815
+ ZSTD_wildcopy(op, ip, length, ovtype);
816
+ return;
817
+ }
818
+ if (op <= oend_w) {
819
+ /* Wildcopy until we get close to the end. */
820
+ assert(oend > oend_w);
821
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
822
+ ip += oend_w - op;
823
+ op += oend_w - op;
824
+ }
825
+ /* Handle the leftovers. */
826
+ while (op < oend) *op++ = *ip++;
827
+ }
828
+
829
+ /* ZSTD_safecopyDstBeforeSrc():
830
+ * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
831
+ * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
832
+ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
833
+ ptrdiff_t const diff = op - ip;
834
+ BYTE* const oend = op + length;
835
+
836
+ if (length < 8 || diff > -8) {
837
+ /* Handle short lengths, close overlaps, and dst not before src. */
838
+ while (op < oend) *op++ = *ip++;
839
+ return;
840
+ }
841
+
842
+ if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
843
+ ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
844
+ ip += oend - WILDCOPY_OVERLENGTH - op;
845
+ op += oend - WILDCOPY_OVERLENGTH - op;
846
+ }
847
+
848
+ /* Handle the leftovers. */
849
+ while (op < oend) *op++ = *ip++;
850
+ }
851
+
852
+ /* ZSTD_execSequenceEnd():
853
+ * This version handles cases that are near the end of the output buffer. It requires
854
+ * more careful checks to make sure there is no overflow. By separating out these hard
855
+ * and unlikely cases, we can speed up the common cases.
856
+ *
857
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
858
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
859
+ */
860
+ FORCE_NOINLINE
861
+ size_t ZSTD_execSequenceEnd(BYTE* op,
862
+ BYTE* const oend, seq_t sequence,
863
+ const BYTE** litPtr, const BYTE* const litLimit,
864
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
865
+ {
866
+ BYTE* const oLitEnd = op + sequence.litLength;
867
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
868
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
869
+ const BYTE* match = oLitEnd - sequence.offset;
870
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
871
+
872
+ /* bounds checks : careful of address space overflow in 32-bit mode */
873
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
874
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
875
+ assert(op < op + sequenceLength);
876
+ assert(oLitEnd < op + sequenceLength);
877
+
878
+ /* copy literals */
879
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
880
+ op = oLitEnd;
881
+ *litPtr = iLitEnd;
882
+
883
+ /* copy Match */
884
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
885
+ /* offset beyond prefix */
886
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
887
+ match = dictEnd - (prefixStart - match);
888
+ if (match + sequence.matchLength <= dictEnd) {
889
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
890
+ return sequenceLength;
891
+ }
892
+ /* span extDict & currentPrefixSegment */
893
+ { size_t const length1 = dictEnd - match;
894
+ ZSTD_memmove(oLitEnd, match, length1);
895
+ op = oLitEnd + length1;
896
+ sequence.matchLength -= length1;
897
+ match = prefixStart;
898
+ }
899
+ }
900
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
901
+ return sequenceLength;
902
+ }
903
+
904
+ /* ZSTD_execSequenceEndSplitLitBuffer():
905
+ * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
906
+ */
907
+ FORCE_NOINLINE
908
+ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
909
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
910
+ const BYTE** litPtr, const BYTE* const litLimit,
911
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
912
+ {
913
+ BYTE* const oLitEnd = op + sequence.litLength;
914
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
915
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
916
+ const BYTE* match = oLitEnd - sequence.offset;
917
+
918
+
919
+ /* bounds checks : careful of address space overflow in 32-bit mode */
920
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
921
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
922
+ assert(op < op + sequenceLength);
923
+ assert(oLitEnd < op + sequenceLength);
924
+
925
+ /* copy literals */
926
+ RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
927
+ ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
928
+ op = oLitEnd;
929
+ *litPtr = iLitEnd;
930
+
931
+ /* copy Match */
932
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
933
+ /* offset beyond prefix */
934
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
935
+ match = dictEnd - (prefixStart - match);
936
+ if (match + sequence.matchLength <= dictEnd) {
937
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
938
+ return sequenceLength;
939
+ }
940
+ /* span extDict & currentPrefixSegment */
941
+ { size_t const length1 = dictEnd - match;
942
+ ZSTD_memmove(oLitEnd, match, length1);
943
+ op = oLitEnd + length1;
944
+ sequence.matchLength -= length1;
945
+ match = prefixStart;
946
+ }
947
+ }
948
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
949
+ return sequenceLength;
950
+ }
951
+
952
+ HINT_INLINE
953
+ size_t ZSTD_execSequence(BYTE* op,
954
+ BYTE* const oend, seq_t sequence,
955
+ const BYTE** litPtr, const BYTE* const litLimit,
956
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
957
+ {
958
+ BYTE* const oLitEnd = op + sequence.litLength;
959
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
960
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
961
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
962
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
963
+ const BYTE* match = oLitEnd - sequence.offset;
964
+
965
+ assert(op != NULL /* Precondition */);
966
+ assert(oend_w < oend /* No underflow */);
967
+ /* Handle edge cases in a slow path:
968
+ * - Read beyond end of literals
969
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
970
+ * - 32-bit mode and the match length overflows
971
+ */
972
+ if (UNLIKELY(
973
+ iLitEnd > litLimit ||
974
+ oMatchEnd > oend_w ||
975
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
976
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
977
+
978
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
979
+ assert(op <= oLitEnd /* No overflow */);
980
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
981
+ assert(oMatchEnd <= oend /* No underflow */);
982
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
983
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
984
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
985
+
986
+ /* Copy Literals:
987
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
988
+ * We likely don't need the full 32-byte wildcopy.
989
+ */
990
+ assert(WILDCOPY_OVERLENGTH >= 16);
991
+ ZSTD_copy16(op, (*litPtr));
992
+ if (UNLIKELY(sequence.litLength > 16)) {
993
+ ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
994
+ }
995
+ op = oLitEnd;
996
+ *litPtr = iLitEnd; /* update for next sequence */
997
+
998
+ /* Copy Match */
999
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1000
+ /* offset beyond prefix -> go into extDict */
1001
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1002
+ match = dictEnd + (match - prefixStart);
1003
+ if (match + sequence.matchLength <= dictEnd) {
1004
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1005
+ return sequenceLength;
1006
+ }
1007
+ /* span extDict & currentPrefixSegment */
1008
+ { size_t const length1 = dictEnd - match;
1009
+ ZSTD_memmove(oLitEnd, match, length1);
1010
+ op = oLitEnd + length1;
1011
+ sequence.matchLength -= length1;
1012
+ match = prefixStart;
1013
+ }
1014
+ }
1015
+ /* Match within prefix of 1 or more bytes */
1016
+ assert(op <= oMatchEnd);
1017
+ assert(oMatchEnd <= oend_w);
1018
+ assert(match >= prefixStart);
1019
+ assert(sequence.matchLength >= 1);
1020
+
1021
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1022
+ * without overlap checking.
1023
+ */
1024
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1025
+ /* We bet on a full wildcopy for matches, since we expect matches to be
1026
+ * longer than literals (in general). In silesia, ~10% of matches are longer
1027
+ * than 16 bytes.
1028
+ */
1029
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1030
+ return sequenceLength;
1031
+ }
1032
+ assert(sequence.offset < WILDCOPY_VECLEN);
1033
+
1034
+ /* Copy 8 bytes and spread the offset to be >= 8. */
1035
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
1036
+
1037
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
1038
+ if (sequence.matchLength > 8) {
1039
+ assert(op < oMatchEnd);
1040
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
1041
+ }
1042
+ return sequenceLength;
1043
+ }
1044
+
1045
+ HINT_INLINE
1046
+ size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1047
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1048
+ const BYTE** litPtr, const BYTE* const litLimit,
1049
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1050
+ {
1051
+ BYTE* const oLitEnd = op + sequence.litLength;
1052
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1053
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1054
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1055
+ const BYTE* match = oLitEnd - sequence.offset;
1056
+
1057
+ assert(op != NULL /* Precondition */);
1058
+ assert(oend_w < oend /* No underflow */);
1059
+ /* Handle edge cases in a slow path:
1060
+ * - Read beyond end of literals
1061
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
1062
+ * - 32-bit mode and the match length overflows
1063
+ */
1064
+ if (UNLIKELY(
1065
+ iLitEnd > litLimit ||
1066
+ oMatchEnd > oend_w ||
1067
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1068
+ return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1069
+
1070
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1071
+ assert(op <= oLitEnd /* No overflow */);
1072
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1073
+ assert(oMatchEnd <= oend /* No underflow */);
1074
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1075
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1076
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1077
+
1078
+ /* Copy Literals:
1079
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1080
+ * We likely don't need the full 32-byte wildcopy.
1081
+ */
1082
+ assert(WILDCOPY_OVERLENGTH >= 16);
1083
+ ZSTD_copy16(op, (*litPtr));
1084
+ if (UNLIKELY(sequence.litLength > 16)) {
1085
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
1086
+ }
1087
+ op = oLitEnd;
1088
+ *litPtr = iLitEnd; /* update for next sequence */
1089
+
1090
+ /* Copy Match */
1091
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1092
+ /* offset beyond prefix -> go into extDict */
1093
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1094
+ match = dictEnd + (match - prefixStart);
1095
+ if (match + sequence.matchLength <= dictEnd) {
1096
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1097
+ return sequenceLength;
1098
+ }
1099
+ /* span extDict & currentPrefixSegment */
1100
+ { size_t const length1 = dictEnd - match;
1101
+ ZSTD_memmove(oLitEnd, match, length1);
1102
+ op = oLitEnd + length1;
1103
+ sequence.matchLength -= length1;
1104
+ match = prefixStart;
1105
+ } }
1106
+ /* Match within prefix of 1 or more bytes */
1107
+ assert(op <= oMatchEnd);
1108
+ assert(oMatchEnd <= oend_w);
1109
+ assert(match >= prefixStart);
1110
+ assert(sequence.matchLength >= 1);
1111
+
1112
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1113
+ * without overlap checking.
1114
+ */
1115
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1116
+ /* We bet on a full wildcopy for matches, since we expect matches to be
1117
+ * longer than literals (in general). In silesia, ~10% of matches are longer
1118
+ * than 16 bytes.
1119
+ */
1120
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1121
+ return sequenceLength;
1122
+ }
1123
+ assert(sequence.offset < WILDCOPY_VECLEN);
1124
+
1125
+ /* Copy 8 bytes and spread the offset to be >= 8. */
1126
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
1127
+
1128
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
1129
+ if (sequence.matchLength > 8) {
1130
+ assert(op < oMatchEnd);
1131
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
1132
+ }
1133
+ return sequenceLength;
1134
+ }
1135
+
1136
+
1137
+ static void
1138
+ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1139
+ {
1140
+ const void* ptr = dt;
1141
+ const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1142
+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1143
+ DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1144
+ (U32)DStatePtr->state, DTableH->tableLog);
1145
+ BIT_reloadDStream(bitD);
1146
+ DStatePtr->table = dt + 1;
1147
+ }
1148
+
1149
+ FORCE_INLINE_TEMPLATE void
1150
+ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1151
+ {
1152
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
1153
+ DStatePtr->state = nextState + lowBits;
1154
+ }
1155
+
1156
+ /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1157
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1158
+ * bits before reloading. This value is the maximum number of bytes we read
1159
+ * after reloading when we are decoding long offsets.
1160
+ */
1161
+ #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1162
+ (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1163
+ ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1164
+ : 0)
1165
+
1166
+ typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1167
+
1168
+ FORCE_INLINE_TEMPLATE seq_t
1169
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1170
+ {
1171
+ seq_t seq;
1172
+ const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1173
+ const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1174
+ const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1175
+ seq.matchLength = mlDInfo->baseValue;
1176
+ seq.litLength = llDInfo->baseValue;
1177
+ { U32 const ofBase = ofDInfo->baseValue;
1178
+ BYTE const llBits = llDInfo->nbAdditionalBits;
1179
+ BYTE const mlBits = mlDInfo->nbAdditionalBits;
1180
+ BYTE const ofBits = ofDInfo->nbAdditionalBits;
1181
+ BYTE const totalBits = llBits+mlBits+ofBits;
1182
+
1183
+ U16 const llNext = llDInfo->nextState;
1184
+ U16 const mlNext = mlDInfo->nextState;
1185
+ U16 const ofNext = ofDInfo->nextState;
1186
+ U32 const llnbBits = llDInfo->nbBits;
1187
+ U32 const mlnbBits = mlDInfo->nbBits;
1188
+ U32 const ofnbBits = ofDInfo->nbBits;
1189
+ /*
1190
+ * As gcc has better branch and block analyzers, sometimes it is only
1191
+ * valuable to mark likelyness for clang, it gives around 3-4% of
1192
+ * performance.
1193
+ */
1194
+
1195
+ /* sequence */
1196
+ { size_t offset;
1197
+ #if defined(__clang__)
1198
+ if (LIKELY(ofBits > 1)) {
1199
+ #else
1200
+ if (ofBits > 1) {
1201
+ #endif
1202
+ ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1203
+ ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1204
+ assert(ofBits <= MaxOff);
1205
+ if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1206
+ U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
1207
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1208
+ BIT_reloadDStream(&seqState->DStream);
1209
+ if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1210
+ assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
1211
+ } else {
1212
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1213
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1214
+ }
1215
+ seqState->prevOffset[2] = seqState->prevOffset[1];
1216
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1217
+ seqState->prevOffset[0] = offset;
1218
+ } else {
1219
+ U32 const ll0 = (llDInfo->baseValue == 0);
1220
+ if (LIKELY((ofBits == 0))) {
1221
+ offset = seqState->prevOffset[ll0];
1222
+ seqState->prevOffset[1] = seqState->prevOffset[!ll0];
1223
+ seqState->prevOffset[0] = offset;
1224
+ } else {
1225
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
1226
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1227
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
1228
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1229
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1230
+ seqState->prevOffset[0] = offset = temp;
1231
+ } } }
1232
+ seq.offset = offset;
1233
+ }
1234
+
1235
+ #if defined(__clang__)
1236
+ if (UNLIKELY(mlBits > 0))
1237
+ #else
1238
+ if (mlBits > 0)
1239
+ #endif
1240
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
1241
+
1242
+ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1243
+ BIT_reloadDStream(&seqState->DStream);
1244
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1245
+ BIT_reloadDStream(&seqState->DStream);
1246
+ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1247
+ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1248
+
1249
+ #if defined(__clang__)
1250
+ if (UNLIKELY(llBits > 0))
1251
+ #else
1252
+ if (llBits > 0)
1253
+ #endif
1254
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
1255
+
1256
+ if (MEM_32bits())
1257
+ BIT_reloadDStream(&seqState->DStream);
1258
+
1259
+ DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1260
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1261
+
1262
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
1263
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
1264
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1265
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
1266
+ }
1267
+
1268
+ return seq;
1269
+ }
1270
+
1271
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1272
+ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1273
+ {
1274
+ size_t const windowSize = dctx->fParams.windowSize;
1275
+ /* No dictionary used. */
1276
+ if (dctx->dictContentEndForFuzzing == NULL) return 0;
1277
+ /* Dictionary is our prefix. */
1278
+ if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
1279
+ /* Dictionary is not our ext-dict. */
1280
+ if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
1281
+ /* Dictionary is not within our window size. */
1282
+ if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
1283
+ /* Dictionary is active. */
1284
+ return 1;
1285
+ }
1286
+
1287
+ MEM_STATIC void ZSTD_assertValidSequence(
1288
+ ZSTD_DCtx const* dctx,
1289
+ BYTE const* op, BYTE const* oend,
1290
+ seq_t const seq,
1291
+ BYTE const* prefixStart, BYTE const* virtualStart)
1292
+ {
1293
+ #if DEBUGLEVEL >= 1
1294
+ size_t const windowSize = dctx->fParams.windowSize;
1295
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
1296
+ BYTE const* const oLitEnd = op + seq.litLength;
1297
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
1298
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1299
+ assert(op <= oend);
1300
+ assert((size_t)(oend - op) >= sequenceSize);
1301
+ assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
1302
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1303
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
1304
+ /* Offset must be within the dictionary. */
1305
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1306
+ assert(seq.offset <= windowSize + dictSize);
1307
+ } else {
1308
+ /* Offset must be within our window. */
1309
+ assert(seq.offset <= windowSize);
1310
+ }
1311
+ #else
1312
+ (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1313
+ #endif
1314
+ }
1315
+ #endif
1316
+
1317
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1318
+
1319
+
1320
+ FORCE_INLINE_TEMPLATE size_t
1321
+ DONT_VECTORIZE
1322
+ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1323
+ void* dst, size_t maxDstSize,
1324
+ const void* seqStart, size_t seqSize, int nbSeq,
1325
+ const ZSTD_longOffset_e isLongOffset,
1326
+ const int frame)
1327
+ {
1328
+ const BYTE* ip = (const BYTE*)seqStart;
1329
+ const BYTE* const iend = ip + seqSize;
1330
+ BYTE* const ostart = (BYTE*)dst;
1331
+ BYTE* const oend = ostart + maxDstSize;
1332
+ BYTE* op = ostart;
1333
+ const BYTE* litPtr = dctx->litPtr;
1334
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
1335
+ const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1336
+ const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1337
+ const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1338
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
1339
+ (void)frame;
1340
+
1341
+ /* Regen sequences */
1342
+ if (nbSeq) {
1343
+ seqState_t seqState;
1344
+ dctx->fseEntropy = 1;
1345
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1346
+ RETURN_ERROR_IF(
1347
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1348
+ corruption_detected, "");
1349
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1350
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1351
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1352
+ assert(dst != NULL);
1353
+
1354
+ ZSTD_STATIC_ASSERT(
1355
+ BIT_DStream_unfinished < BIT_DStream_completed &&
1356
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1357
+ BIT_DStream_completed < BIT_DStream_overflow);
1358
+
1359
+ /* decompress without overrunning litPtr begins */
1360
+ {
1361
+ seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1362
+ /* Align the decompression loop to 32 + 16 bytes.
1363
+ *
1364
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1365
+ * speed swings based on the alignment of the decompression loop. This
1366
+ * performance swing is caused by parts of the decompression loop falling
1367
+ * out of the DSB. The entire decompression loop should fit in the DSB,
1368
+ * when it can't we get much worse performance. You can measure if you've
1369
+ * hit the good case or the bad case with this perf command for some
1370
+ * compressed file test.zst:
1371
+ *
1372
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1373
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1374
+ *
1375
+ * If you see most cycles served out of the MITE you've hit the bad case.
1376
+ * If you see most cycles served out of the DSB you've hit the good case.
1377
+ * If it is pretty even then you may be in an okay case.
1378
+ *
1379
+ * This issue has been reproduced on the following CPUs:
1380
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1381
+ * Use Instruments->Counters to get DSB/MITE cycles.
1382
+ * I never got performance swings, but I was able to
1383
+ * go from the good case of mostly DSB to half of the
1384
+ * cycles served from MITE.
1385
+ * - Coffeelake: Intel i9-9900k
1386
+ * - Coffeelake: Intel i7-9700k
1387
+ *
1388
+ * I haven't been able to reproduce the instability or DSB misses on any
1389
+ * of the following CPUS:
1390
+ * - Haswell
1391
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1392
+ * - Skylake
1393
+ *
1394
+ * Alignment is done for each of the three major decompression loops:
1395
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1396
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1397
+ * - ZSTD_decompressSequences_body
1398
+ * Alignment choices are made to minimize large swings on bad cases and influence on performance
1399
+ * from changes external to this code, rather than to overoptimize on the current commit.
1400
+ *
1401
+ * If you are seeing performance stability this script can help test.
1402
+ * It tests on 4 commits in zstd where I saw performance change.
1403
+ *
1404
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1405
+ */
1406
+ #if defined(__GNUC__) && defined(__x86_64__)
1407
+ __asm__(".p2align 6");
1408
+ # if __GNUC__ >= 7
1409
+ /* good for gcc-7, gcc-9, and gcc-11 */
1410
+ __asm__("nop");
1411
+ __asm__(".p2align 5");
1412
+ __asm__("nop");
1413
+ __asm__(".p2align 4");
1414
+ # if __GNUC__ == 8 || __GNUC__ == 10
1415
+ /* good for gcc-8 and gcc-10 */
1416
+ __asm__("nop");
1417
+ __asm__(".p2align 3");
1418
+ # endif
1419
+ # endif
1420
+ #endif
1421
+
1422
+ /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1423
+ for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
1424
+ size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1425
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1426
+ assert(!ZSTD_isError(oneSeqSize));
1427
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1428
+ #endif
1429
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1430
+ return oneSeqSize;
1431
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1432
+ op += oneSeqSize;
1433
+ if (UNLIKELY(!--nbSeq))
1434
+ break;
1435
+ BIT_reloadDStream(&(seqState.DStream));
1436
+ sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1437
+ }
1438
+
1439
+ /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1440
+ if (nbSeq > 0) {
1441
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1442
+ if (leftoverLit)
1443
+ {
1444
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1445
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1446
+ sequence.litLength -= leftoverLit;
1447
+ op += leftoverLit;
1448
+ }
1449
+ litPtr = dctx->litExtraBuffer;
1450
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1451
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1452
+ {
1453
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1454
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1455
+ assert(!ZSTD_isError(oneSeqSize));
1456
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1457
+ #endif
1458
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1459
+ return oneSeqSize;
1460
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1461
+ op += oneSeqSize;
1462
+ if (--nbSeq)
1463
+ BIT_reloadDStream(&(seqState.DStream));
1464
+ }
1465
+ }
1466
+ }
1467
+
1468
+ if (nbSeq > 0) /* there is remaining lit from extra buffer */
1469
+ {
1470
+
1471
+ #if defined(__GNUC__) && defined(__x86_64__)
1472
+ __asm__(".p2align 6");
1473
+ __asm__("nop");
1474
+ # if __GNUC__ != 7
1475
+ /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1476
+ __asm__(".p2align 4");
1477
+ __asm__("nop");
1478
+ __asm__(".p2align 3");
1479
+ # elif __GNUC__ >= 11
1480
+ __asm__(".p2align 3");
1481
+ # else
1482
+ __asm__(".p2align 5");
1483
+ __asm__("nop");
1484
+ __asm__(".p2align 3");
1485
+ # endif
1486
+ #endif
1487
+
1488
+ for (; ; ) {
1489
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1490
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1491
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1492
+ assert(!ZSTD_isError(oneSeqSize));
1493
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1494
+ #endif
1495
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1496
+ return oneSeqSize;
1497
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1498
+ op += oneSeqSize;
1499
+ if (UNLIKELY(!--nbSeq))
1500
+ break;
1501
+ BIT_reloadDStream(&(seqState.DStream));
1502
+ }
1503
+ }
1504
+
1505
+ /* check if reached exact end */
1506
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1507
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1508
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1509
+ /* save reps for next block */
1510
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1511
+ }
1512
+
1513
+ /* last literal segment */
1514
+ if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1515
+ {
1516
+ size_t const lastLLSize = litBufferEnd - litPtr;
1517
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1518
+ if (op != NULL) {
1519
+ ZSTD_memmove(op, litPtr, lastLLSize);
1520
+ op += lastLLSize;
1521
+ }
1522
+ litPtr = dctx->litExtraBuffer;
1523
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1524
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1525
+ }
1526
+ { size_t const lastLLSize = litBufferEnd - litPtr;
1527
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1528
+ if (op != NULL) {
1529
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1530
+ op += lastLLSize;
1531
+ }
1532
+ }
1533
+
1534
+ return op-ostart;
1535
+ }
1536
+
1537
+ FORCE_INLINE_TEMPLATE size_t
1538
+ DONT_VECTORIZE
1539
+ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1540
+ void* dst, size_t maxDstSize,
1541
+ const void* seqStart, size_t seqSize, int nbSeq,
1542
+ const ZSTD_longOffset_e isLongOffset,
1543
+ const int frame)
1544
+ {
1545
+ const BYTE* ip = (const BYTE*)seqStart;
1546
+ const BYTE* const iend = ip + seqSize;
1547
+ BYTE* const ostart = (BYTE*)dst;
1548
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
1549
+ BYTE* op = ostart;
1550
+ const BYTE* litPtr = dctx->litPtr;
1551
+ const BYTE* const litEnd = litPtr + dctx->litSize;
1552
+ const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1553
+ const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1554
+ const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1555
+ DEBUGLOG(5, "ZSTD_decompressSequences_body");
1556
+ (void)frame;
1557
+
1558
+ /* Regen sequences */
1559
+ if (nbSeq) {
1560
+ seqState_t seqState;
1561
+ dctx->fseEntropy = 1;
1562
+ { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1563
+ RETURN_ERROR_IF(
1564
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1565
+ corruption_detected, "");
1566
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1567
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1568
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1569
+ assert(dst != NULL);
1570
+
1571
+ ZSTD_STATIC_ASSERT(
1572
+ BIT_DStream_unfinished < BIT_DStream_completed &&
1573
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1574
+ BIT_DStream_completed < BIT_DStream_overflow);
1575
+
1576
+ #if defined(__GNUC__) && defined(__x86_64__)
1577
+ __asm__(".p2align 6");
1578
+ __asm__("nop");
1579
+ # if __GNUC__ >= 7
1580
+ __asm__(".p2align 5");
1581
+ __asm__("nop");
1582
+ __asm__(".p2align 3");
1583
+ # else
1584
+ __asm__(".p2align 4");
1585
+ __asm__("nop");
1586
+ __asm__(".p2align 3");
1587
+ # endif
1588
+ #endif
1589
+
1590
+ for ( ; ; ) {
1591
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1592
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1593
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1594
+ assert(!ZSTD_isError(oneSeqSize));
1595
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1596
+ #endif
1597
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1598
+ return oneSeqSize;
1599
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1600
+ op += oneSeqSize;
1601
+ if (UNLIKELY(!--nbSeq))
1602
+ break;
1603
+ BIT_reloadDStream(&(seqState.DStream));
1604
+ }
1605
+
1606
+ /* check if reached exact end */
1607
+ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1608
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1609
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1610
+ /* save reps for next block */
1611
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1612
+ }
1613
+
1614
+ /* last literal segment */
1615
+ { size_t const lastLLSize = litEnd - litPtr;
1616
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1617
+ if (op != NULL) {
1618
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1619
+ op += lastLLSize;
1620
+ }
1621
+ }
1622
+
1623
+ return op-ostart;
1624
+ }
1625
+
1626
+ static size_t
1627
+ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1628
+ void* dst, size_t maxDstSize,
1629
+ const void* seqStart, size_t seqSize, int nbSeq,
1630
+ const ZSTD_longOffset_e isLongOffset,
1631
+ const int frame)
1632
+ {
1633
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1634
+ }
1635
+
1636
+ static size_t
1637
+ ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1638
+ void* dst, size_t maxDstSize,
1639
+ const void* seqStart, size_t seqSize, int nbSeq,
1640
+ const ZSTD_longOffset_e isLongOffset,
1641
+ const int frame)
1642
+ {
1643
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1644
+ }
1645
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1646
+
1647
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1648
+
1649
+ FORCE_INLINE_TEMPLATE size_t
1650
+ ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1651
+ const BYTE* const prefixStart, const BYTE* const dictEnd)
1652
+ {
1653
+ prefetchPos += sequence.litLength;
1654
+ { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1655
+ const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1656
+ * No consequence though : memory address is only used for prefetching, not for dereferencing */
1657
+ PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1658
+ }
1659
+ return prefetchPos + sequence.matchLength;
1660
+ }
1661
+
1662
+ /* This decoding function employs prefetching
1663
+ * to reduce latency impact of cache misses.
1664
+ * It's generally employed when block contains a significant portion of long-distance matches
1665
+ * or when coupled with a "cold" dictionary */
1666
+ FORCE_INLINE_TEMPLATE size_t
1667
+ ZSTD_decompressSequencesLong_body(
1668
+ ZSTD_DCtx* dctx,
1669
+ void* dst, size_t maxDstSize,
1670
+ const void* seqStart, size_t seqSize, int nbSeq,
1671
+ const ZSTD_longOffset_e isLongOffset,
1672
+ const int frame)
1673
+ {
1674
+ const BYTE* ip = (const BYTE*)seqStart;
1675
+ const BYTE* const iend = ip + seqSize;
1676
+ BYTE* const ostart = (BYTE*)dst;
1677
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
1678
+ BYTE* op = ostart;
1679
+ const BYTE* litPtr = dctx->litPtr;
1680
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
1681
+ const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1682
+ const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1683
+ const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1684
+ (void)frame;
1685
+
1686
+ /* Regen sequences */
1687
+ if (nbSeq) {
1688
+ #define STORED_SEQS 8
1689
+ #define STORED_SEQS_MASK (STORED_SEQS-1)
1690
+ #define ADVANCED_SEQS STORED_SEQS
1691
+ seq_t sequences[STORED_SEQS];
1692
+ int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1693
+ seqState_t seqState;
1694
+ int seqNb;
1695
+ size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1696
+
1697
+ dctx->fseEntropy = 1;
1698
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1699
+ assert(dst != NULL);
1700
+ assert(iend >= ip);
1701
+ RETURN_ERROR_IF(
1702
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1703
+ corruption_detected, "");
1704
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1705
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1706
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1707
+
1708
+ /* prepare in advance */
1709
+ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1710
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1711
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1712
+ sequences[seqNb] = sequence;
1713
+ }
1714
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1715
+
1716
+ /* decompress without stomping litBuffer */
1717
+ for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
1718
+ seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1719
+ size_t oneSeqSize;
1720
+
1721
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
1722
+ {
1723
+ /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1724
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1725
+ if (leftoverLit)
1726
+ {
1727
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1728
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1729
+ sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1730
+ op += leftoverLit;
1731
+ }
1732
+ litPtr = dctx->litExtraBuffer;
1733
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1734
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1735
+ oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1736
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1737
+ assert(!ZSTD_isError(oneSeqSize));
1738
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1739
+ #endif
1740
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1741
+
1742
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1743
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
1744
+ op += oneSeqSize;
1745
+ }
1746
+ else
1747
+ {
1748
+ /* lit buffer is either wholly contained in first or second split, or not split at all*/
1749
+ oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1750
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1751
+ ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1752
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1753
+ assert(!ZSTD_isError(oneSeqSize));
1754
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1755
+ #endif
1756
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1757
+
1758
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1759
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
1760
+ op += oneSeqSize;
1761
+ }
1762
+ }
1763
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1764
+
1765
+ /* finish queue */
1766
+ seqNb -= seqAdvance;
1767
+ for ( ; seqNb<nbSeq ; seqNb++) {
1768
+ seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1769
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
1770
+ {
1771
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1772
+ if (leftoverLit)
1773
+ {
1774
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1775
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1776
+ sequence->litLength -= leftoverLit;
1777
+ op += leftoverLit;
1778
+ }
1779
+ litPtr = dctx->litExtraBuffer;
1780
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1781
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1782
+ {
1783
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1784
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1785
+ assert(!ZSTD_isError(oneSeqSize));
1786
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1787
+ #endif
1788
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1789
+ op += oneSeqSize;
1790
+ }
1791
+ }
1792
+ else
1793
+ {
1794
+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1795
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1796
+ ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1797
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1798
+ assert(!ZSTD_isError(oneSeqSize));
1799
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1800
+ #endif
1801
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1802
+ op += oneSeqSize;
1803
+ }
1804
+ }
1805
+
1806
+ /* save reps for next block */
1807
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1808
+ }
1809
+
1810
+ /* last literal segment */
1811
+ if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
1812
+ {
1813
+ size_t const lastLLSize = litBufferEnd - litPtr;
1814
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1815
+ if (op != NULL) {
1816
+ ZSTD_memmove(op, litPtr, lastLLSize);
1817
+ op += lastLLSize;
1818
+ }
1819
+ litPtr = dctx->litExtraBuffer;
1820
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1821
+ }
1822
+ { size_t const lastLLSize = litBufferEnd - litPtr;
1823
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1824
+ if (op != NULL) {
1825
+ ZSTD_memmove(op, litPtr, lastLLSize);
1826
+ op += lastLLSize;
1827
+ }
1828
+ }
1829
+
1830
+ return op-ostart;
1831
+ }
1832
+
1833
+ static size_t
1834
+ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1835
+ void* dst, size_t maxDstSize,
1836
+ const void* seqStart, size_t seqSize, int nbSeq,
1837
+ const ZSTD_longOffset_e isLongOffset,
1838
+ const int frame)
1839
+ {
1840
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1841
+ }
1842
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1843
+
1844
+
1845
+
1846
+ #if DYNAMIC_BMI2
1847
+
1848
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1849
+ static BMI2_TARGET_ATTRIBUTE size_t
1850
+ DONT_VECTORIZE
1851
+ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1852
+ void* dst, size_t maxDstSize,
1853
+ const void* seqStart, size_t seqSize, int nbSeq,
1854
+ const ZSTD_longOffset_e isLongOffset,
1855
+ const int frame)
1856
+ {
1857
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1858
+ }
1859
+ static BMI2_TARGET_ATTRIBUTE size_t
1860
+ DONT_VECTORIZE
1861
+ ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1862
+ void* dst, size_t maxDstSize,
1863
+ const void* seqStart, size_t seqSize, int nbSeq,
1864
+ const ZSTD_longOffset_e isLongOffset,
1865
+ const int frame)
1866
+ {
1867
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1868
+ }
1869
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1870
+
1871
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1872
+ static BMI2_TARGET_ATTRIBUTE size_t
1873
+ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1874
+ void* dst, size_t maxDstSize,
1875
+ const void* seqStart, size_t seqSize, int nbSeq,
1876
+ const ZSTD_longOffset_e isLongOffset,
1877
+ const int frame)
1878
+ {
1879
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1880
+ }
1881
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1882
+
1883
+ #endif /* DYNAMIC_BMI2 */
1884
+
1885
+ typedef size_t (*ZSTD_decompressSequences_t)(
1886
+ ZSTD_DCtx* dctx,
1887
+ void* dst, size_t maxDstSize,
1888
+ const void* seqStart, size_t seqSize, int nbSeq,
1889
+ const ZSTD_longOffset_e isLongOffset,
1890
+ const int frame);
1891
+
1892
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1893
+ static size_t
1894
+ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1895
+ const void* seqStart, size_t seqSize, int nbSeq,
1896
+ const ZSTD_longOffset_e isLongOffset,
1897
+ const int frame)
1898
+ {
1899
+ DEBUGLOG(5, "ZSTD_decompressSequences");
1900
+ #if DYNAMIC_BMI2
1901
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1902
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1903
+ }
1904
+ #endif
1905
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1906
+ }
1907
+ static size_t
1908
+ ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1909
+ const void* seqStart, size_t seqSize, int nbSeq,
1910
+ const ZSTD_longOffset_e isLongOffset,
1911
+ const int frame)
1912
+ {
1913
+ DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1914
+ #if DYNAMIC_BMI2
1915
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1916
+ return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1917
+ }
1918
+ #endif
1919
+ return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1920
+ }
1921
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1922
+
1923
+
1924
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1925
+ /* ZSTD_decompressSequencesLong() :
1926
+ * decompression function triggered when a minimum share of offsets is considered "long",
1927
+ * aka out of cache.
1928
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1929
+ * This function will try to mitigate main memory latency through the use of prefetching */
1930
+ static size_t
1931
+ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1932
+ void* dst, size_t maxDstSize,
1933
+ const void* seqStart, size_t seqSize, int nbSeq,
1934
+ const ZSTD_longOffset_e isLongOffset,
1935
+ const int frame)
1936
+ {
1937
+ DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1938
+ #if DYNAMIC_BMI2
1939
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1940
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1941
+ }
1942
+ #endif
1943
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1944
+ }
1945
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1946
+
1947
+
1948
+
1949
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1950
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1951
+ /* ZSTD_getLongOffsetsShare() :
1952
+ * condition : offTable must be valid
1953
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1954
+ * compared to maximum possible of (1<<OffFSELog) */
1955
+ static unsigned
1956
+ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1957
+ {
1958
+ const void* ptr = offTable;
1959
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1960
+ const ZSTD_seqSymbol* table = offTable + 1;
1961
+ U32 const max = 1 << tableLog;
1962
+ U32 u, total = 0;
1963
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1964
+
1965
+ assert(max <= (1 << OffFSELog)); /* max not too large */
1966
+ for (u=0; u<max; u++) {
1967
+ if (table[u].nbAdditionalBits > 22) total += 1;
1968
+ }
1969
+
1970
+ assert(tableLog <= OffFSELog);
1971
+ total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1972
+
1973
+ return total;
1974
+ }
1975
+ #endif
1976
+
1977
+ size_t
1978
+ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1979
+ void* dst, size_t dstCapacity,
1980
+ const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
1981
+ { /* blockType == blockCompressed */
1982
+ const BYTE* ip = (const BYTE*)src;
1983
+ /* isLongOffset must be true if there are long offsets.
1984
+ * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1985
+ * We don't expect that to be the case in 64-bit mode.
1986
+ * In block mode, window size is not known, so we have to be conservative.
1987
+ * (note: but it could be evaluated from current-lowLimit)
1988
+ */
1989
+ ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1990
+ DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1991
+
1992
+ RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1993
+
1994
+ /* Decode literals section */
1995
+ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
1996
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
1997
+ if (ZSTD_isError(litCSize)) return litCSize;
1998
+ ip += litCSize;
1999
+ srcSize -= litCSize;
2000
+ }
2001
+
2002
+ /* Build Decoding Tables */
2003
+ {
2004
+ /* These macros control at build-time which decompressor implementation
2005
+ * we use. If neither is defined, we do some inspection and dispatch at
2006
+ * runtime.
2007
+ */
2008
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2009
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2010
+ int usePrefetchDecoder = dctx->ddictIsCold;
2011
+ #endif
2012
+ int nbSeq;
2013
+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
2014
+ if (ZSTD_isError(seqHSize)) return seqHSize;
2015
+ ip += seqHSize;
2016
+ srcSize -= seqHSize;
2017
+
2018
+ RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2019
+
2020
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2021
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2022
+ if ( !usePrefetchDecoder
2023
+ && (!frame || (dctx->fParams.windowSize > (1<<24)))
2024
+ && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
2025
+ U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
2026
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2027
+ usePrefetchDecoder = (shareLongOffsets >= minShare);
2028
+ }
2029
+ #endif
2030
+
2031
+ dctx->ddictIsCold = 0;
2032
+
2033
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2034
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2035
+ if (usePrefetchDecoder)
2036
+ #endif
2037
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2038
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2039
+ #endif
2040
+
2041
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2042
+ /* else */
2043
+ if (dctx->litBufferLocation == ZSTD_split)
2044
+ return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2045
+ else
2046
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2047
+ #endif
2048
+ }
2049
+ }
2050
+
2051
+
2052
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2053
+ {
2054
+ if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
2055
+ dctx->dictEnd = dctx->previousDstEnd;
2056
+ dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
2057
+ dctx->prefixStart = dst;
2058
+ dctx->previousDstEnd = dst;
2059
+ }
2060
+ }
2061
+
2062
+
2063
+ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2064
+ void* dst, size_t dstCapacity,
2065
+ const void* src, size_t srcSize)
2066
+ {
2067
+ size_t dSize;
2068
+ ZSTD_checkContinuity(dctx, dst, dstCapacity);
2069
+ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
2070
+ dctx->previousDstEnd = (char*)dst + dSize;
2071
+ return dSize;
2072
+ }