zstdlib 0.10.0-x64-mingw32 → 0.11.0-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +8 -0
  3. data/ext/zstdlib_c/extconf.rb +2 -2
  4. data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
  5. data/ext/zstdlib_c/zstd-1.5.5/lib/common/allocations.h +55 -0
  6. data/ext/zstdlib_c/zstd-1.5.5/lib/common/bits.h +200 -0
  7. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/bitstream.h +19 -60
  8. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/compiler.h +26 -3
  9. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/cpu.h +1 -1
  10. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.c +1 -1
  11. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.h +1 -1
  12. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/entropy_common.c +12 -40
  13. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.c +9 -2
  14. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.h +1 -1
  15. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse.h +5 -83
  16. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse_decompress.c +7 -99
  17. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/huf.h +65 -156
  18. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/mem.h +39 -46
  19. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.c +26 -10
  20. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.h +7 -1
  21. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/portability_macros.h +22 -3
  22. data/ext/zstdlib_c/zstd-1.5.5/lib/common/threading.c +176 -0
  23. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/threading.h +5 -10
  24. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.c +2 -2
  25. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.h +8 -8
  26. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_common.c +1 -36
  27. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_deps.h +1 -1
  28. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_internal.h +17 -118
  29. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_trace.h +3 -3
  30. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/clevels.h +1 -1
  31. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/fse_compress.c +7 -124
  32. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.c +1 -1
  33. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.h +1 -1
  34. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/huf_compress.c +234 -169
  35. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress.c +1243 -538
  36. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_internal.h +225 -151
  37. data/ext/zstdlib_c/zstd-1.5.5/lib/compress/zstd_compress_literals.c +235 -0
  38. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_literals.h +16 -8
  39. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.c +3 -3
  40. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.h +1 -1
  41. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.c +25 -21
  42. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.h +1 -1
  43. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_cwksp.h +128 -62
  44. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.c +95 -33
  45. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.h +3 -2
  46. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.c +433 -148
  47. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.h +3 -2
  48. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.c +398 -345
  49. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.h +4 -2
  50. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.c +5 -5
  51. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.h +1 -1
  52. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm_geartab.h +1 -1
  53. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.c +106 -80
  54. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.h +1 -1
  55. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.c +17 -9
  56. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.h +1 -1
  57. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress.c +434 -441
  58. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress_amd64.S +30 -39
  59. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.c +4 -4
  60. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.h +1 -1
  61. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress.c +205 -80
  62. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.c +201 -81
  63. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.h +6 -1
  64. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_internal.h +4 -2
  65. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zdict.h +53 -31
  66. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd.h +580 -135
  67. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd_errors.h +27 -8
  68. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzclose.c +1 -1
  69. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzcompatibility.h +8 -8
  70. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzguts.h +10 -10
  71. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzlib.c +3 -3
  72. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzread.c +10 -10
  73. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzwrite.c +5 -5
  74. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.c +46 -44
  75. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.h +4 -1
  76. data/lib/2.4/zstdlib_c.so +0 -0
  77. data/lib/2.5/zstdlib_c.so +0 -0
  78. data/lib/2.6/zstdlib_c.so +0 -0
  79. data/lib/2.7/zstdlib_c.so +0 -0
  80. data/lib/3.0/zstdlib_c.so +0 -0
  81. metadata +80 -77
  82. data/ext/zstdlib_c/zstd-1.5.2/lib/common/threading.c +0 -122
  83. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_literals.c +0 -159
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * huff0 huffman decoder,
3
3
  * part of Finite State Entropy library
4
- * Copyright (c) Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -19,10 +19,10 @@
19
19
  #include "../common/compiler.h"
20
20
  #include "../common/bitstream.h" /* BIT_* */
21
21
  #include "../common/fse.h" /* to compress headers */
22
- #define HUF_STATIC_LINKING_ONLY
23
22
  #include "../common/huf.h"
24
23
  #include "../common/error_private.h"
25
24
  #include "../common/zstd_internal.h"
25
+ #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
26
26
 
27
27
  /* **************************************************************
28
28
  * Constants
@@ -43,10 +43,14 @@
43
43
  #error "Cannot force the use of the X1 and X2 decoders at the same time!"
44
44
  #endif
45
45
 
46
- #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
47
- # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
46
+ /* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
47
+ * supported at runtime, so we can add the BMI2 target attribute.
48
+ * When it is disabled, we will still get BMI2 if it is enabled statically.
49
+ */
50
+ #if DYNAMIC_BMI2
51
+ # define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
48
52
  #else
49
- # define HUF_ASM_X86_64_BMI2_ATTRS
53
+ # define HUF_FAST_BMI2_ATTRS
50
54
  #endif
51
55
 
52
56
  #ifdef __cplusplus
@@ -56,18 +60,12 @@
56
60
  #endif
57
61
  #define HUF_ASM_DECL HUF_EXTERN_C
58
62
 
59
- #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
63
+ #if DYNAMIC_BMI2
60
64
  # define HUF_NEED_BMI2_FUNCTION 1
61
65
  #else
62
66
  # define HUF_NEED_BMI2_FUNCTION 0
63
67
  #endif
64
68
 
65
- #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
66
- # define HUF_NEED_DEFAULT_FUNCTION 1
67
- #else
68
- # define HUF_NEED_DEFAULT_FUNCTION 0
69
- #endif
70
-
71
69
  /* **************************************************************
72
70
  * Error Management
73
71
  ****************************************************************/
@@ -84,6 +82,11 @@
84
82
  /* **************************************************************
85
83
  * BMI2 Variant Wrappers
86
84
  ****************************************************************/
85
+ typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
86
+ const void *cSrc,
87
+ size_t cSrcSize,
88
+ const HUF_DTable *DTable);
89
+
87
90
  #if DYNAMIC_BMI2
88
91
 
89
92
  #define HUF_DGEN(fn) \
@@ -105,9 +108,9 @@
105
108
  } \
106
109
  \
107
110
  static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
108
- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
111
+ size_t cSrcSize, HUF_DTable const* DTable, int flags) \
109
112
  { \
110
- if (bmi2) { \
113
+ if (flags & HUF_flags_bmi2) { \
111
114
  return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
112
115
  } \
113
116
  return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
@@ -117,9 +120,9 @@
117
120
 
118
121
  #define HUF_DGEN(fn) \
119
122
  static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
120
- size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
123
+ size_t cSrcSize, HUF_DTable const* DTable, int flags) \
121
124
  { \
122
- (void)bmi2; \
125
+ (void)flags; \
123
126
  return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
124
127
  }
125
128
 
@@ -138,15 +141,28 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
138
141
  return dtd;
139
142
  }
140
143
 
141
- #if ZSTD_ENABLE_ASM_X86_64_BMI2
142
-
143
- static size_t HUF_initDStream(BYTE const* ip) {
144
+ static size_t HUF_initFastDStream(BYTE const* ip) {
144
145
  BYTE const lastByte = ip[7];
145
- size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
146
+ size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
146
147
  size_t const value = MEM_readLEST(ip) | 1;
147
148
  assert(bitsConsumed <= 8);
149
+ assert(sizeof(size_t) == 8);
148
150
  return value << bitsConsumed;
149
151
  }
152
+
153
+
154
+ /**
155
+ * The input/output arguments to the Huffman fast decoding loop:
156
+ *
157
+ * ip [in/out] - The input pointers, must be updated to reflect what is consumed.
158
+ * op [in/out] - The output pointers, must be updated to reflect what is written.
159
+ * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
160
+ * dt [in] - The decoding table.
161
+ * ilimit [in] - The input limit, stop when any input pointer is below ilimit.
162
+ * oend [in] - The end of the output stream. op[3] must not cross oend.
163
+ * iend [in] - The end of each input stream. ip[i] may cross iend[i],
164
+ * as long as it is above ilimit, but that indicates corruption.
165
+ */
150
166
  typedef struct {
151
167
  BYTE const* ip[4];
152
168
  BYTE* op[4];
@@ -155,15 +171,17 @@ typedef struct {
155
171
  BYTE const* ilimit;
156
172
  BYTE* oend;
157
173
  BYTE const* iend[4];
158
- } HUF_DecompressAsmArgs;
174
+ } HUF_DecompressFastArgs;
175
+
176
+ typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
159
177
 
160
178
  /**
161
- * Initializes args for the asm decoding loop.
162
- * @returns 0 on success
163
- * 1 if the fallback implementation should be used.
179
+ * Initializes args for the fast decoding loop.
180
+ * @returns 1 on success
181
+ * 0 if the fallback implementation should be used.
164
182
  * Or an error code on failure.
165
183
  */
166
- static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
184
+ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
167
185
  {
168
186
  void const* dt = DTable + 1;
169
187
  U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
@@ -172,9 +190,11 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
172
190
 
173
191
  BYTE* const oend = (BYTE*)dst + dstSize;
174
192
 
175
- /* The following condition is false on x32 platform,
176
- * but HUF_asm is not compatible with this ABI */
177
- if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
193
+ /* The fast decoding loop assumes 64-bit little-endian.
194
+ * This condition is false on x32.
195
+ */
196
+ if (!MEM_isLittleEndian() || MEM_32bits())
197
+ return 0;
178
198
 
179
199
  /* strict minimum : jump table + 1 byte per stream */
180
200
  if (srcSize < 10)
@@ -185,7 +205,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
185
205
  * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
186
206
  */
187
207
  if (dtLog != HUF_DECODER_FAST_TABLELOG)
188
- return 1;
208
+ return 0;
189
209
 
190
210
  /* Read the jump table. */
191
211
  {
@@ -199,13 +219,13 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
199
219
  args->iend[2] = args->iend[1] + length2;
200
220
  args->iend[3] = args->iend[2] + length3;
201
221
 
202
- /* HUF_initDStream() requires this, and this small of an input
222
+ /* HUF_initFastDStream() requires this, and this small of an input
203
223
  * won't benefit from the ASM loop anyways.
204
224
  * length1 must be >= 16 so that ip[0] >= ilimit before the loop
205
225
  * starts.
206
226
  */
207
227
  if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
208
- return 1;
228
+ return 0;
209
229
  if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
210
230
  }
211
231
  /* ip[] contains the position that is currently loaded into bits[]. */
@@ -222,7 +242,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
222
242
 
223
243
  /* No point to call the ASM loop for tiny outputs. */
224
244
  if (args->op[3] >= oend)
225
- return 1;
245
+ return 0;
226
246
 
227
247
  /* bits[] is the bit container.
228
248
  * It is read from the MSB down to the LSB.
@@ -231,10 +251,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
231
251
  * set, so that CountTrailingZeros(bits[]) can be used
232
252
  * to count how many bits we've consumed.
233
253
  */
234
- args->bits[0] = HUF_initDStream(args->ip[0]);
235
- args->bits[1] = HUF_initDStream(args->ip[1]);
236
- args->bits[2] = HUF_initDStream(args->ip[2]);
237
- args->bits[3] = HUF_initDStream(args->ip[3]);
254
+ args->bits[0] = HUF_initFastDStream(args->ip[0]);
255
+ args->bits[1] = HUF_initFastDStream(args->ip[1]);
256
+ args->bits[2] = HUF_initFastDStream(args->ip[2]);
257
+ args->bits[3] = HUF_initFastDStream(args->ip[3]);
238
258
 
239
259
  /* If ip[] >= ilimit, it is guaranteed to be safe to
240
260
  * reload bits[]. It may be beyond its section, but is
@@ -245,10 +265,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
245
265
  args->oend = oend;
246
266
  args->dt = dt;
247
267
 
248
- return 0;
268
+ return 1;
249
269
  }
250
270
 
251
- static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
271
+ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
252
272
  {
253
273
  /* Validate that we haven't overwritten. */
254
274
  if (args->op[stream] > segmentEnd)
@@ -262,15 +282,15 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs
262
282
  return ERROR(corruption_detected);
263
283
 
264
284
  /* Construct the BIT_DStream_t. */
265
- bit->bitContainer = MEM_readLE64(args->ip[stream]);
266
- bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
285
+ assert(sizeof(size_t) == 8);
286
+ bit->bitContainer = MEM_readLEST(args->ip[stream]);
287
+ bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
267
288
  bit->start = (const char*)args->iend[0];
268
289
  bit->limitPtr = bit->start + sizeof(size_t);
269
290
  bit->ptr = (const char*)args->ip[stream];
270
291
 
271
292
  return 0;
272
293
  }
273
- #endif
274
294
 
275
295
 
276
296
  #ifndef HUF_FORCE_DECOMPRESS_X2
@@ -287,10 +307,11 @@ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decodi
287
307
  static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
288
308
  U64 D4;
289
309
  if (MEM_isLittleEndian()) {
290
- D4 = (symbol << 8) + nbBits;
310
+ D4 = (U64)((symbol << 8) + nbBits);
291
311
  } else {
292
- D4 = symbol + (nbBits << 8);
312
+ D4 = (U64)(symbol + (nbBits << 8));
293
313
  }
314
+ assert(D4 < (1U << 16));
294
315
  D4 *= 0x0001000100010001ULL;
295
316
  return D4;
296
317
  }
@@ -333,13 +354,7 @@ typedef struct {
333
354
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
334
355
  } HUF_ReadDTableX1_Workspace;
335
356
 
336
-
337
- size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
338
- {
339
- return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
340
- }
341
-
342
- size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
357
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags)
343
358
  {
344
359
  U32 tableLog = 0;
345
360
  U32 nbSymbols = 0;
@@ -354,7 +369,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
354
369
  DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
355
370
  /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
356
371
 
357
- iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
372
+ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
358
373
  if (HUF_isError(iSize)) return iSize;
359
374
 
360
375
 
@@ -381,9 +396,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
381
396
  * rankStart[0] is not filled because there are no entries in the table for
382
397
  * weight 0.
383
398
  */
384
- {
385
- int n;
386
- int nextRankStart = 0;
399
+ { int n;
400
+ U32 nextRankStart = 0;
387
401
  int const unroll = 4;
388
402
  int const nLimit = (int)nbSymbols - unroll + 1;
389
403
  for (n=0; n<(int)tableLog+1; n++) {
@@ -410,10 +424,9 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
410
424
  * We can switch based on the length to a different inner loop which is
411
425
  * optimized for that particular case.
412
426
  */
413
- {
414
- U32 w;
415
- int symbol=wksp->rankVal[0];
416
- int rankStart=0;
427
+ { U32 w;
428
+ int symbol = wksp->rankVal[0];
429
+ int rankStart = 0;
417
430
  for (w=1; w<tableLog+1; ++w) {
418
431
  int const symbolCount = wksp->rankVal[w];
419
432
  int const length = (1 << w) >> 1;
@@ -523,7 +536,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
523
536
  while (p < pEnd)
524
537
  HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
525
538
 
526
- return pEnd-pStart;
539
+ return (size_t)(pEnd-pStart);
527
540
  }
528
541
 
529
542
  FORCE_INLINE_TEMPLATE size_t
@@ -549,6 +562,10 @@ HUF_decompress1X1_usingDTable_internal_body(
549
562
  return dstSize;
550
563
  }
551
564
 
565
+ /* HUF_decompress4X1_usingDTable_internal_body():
566
+ * Conditions :
567
+ * @dstSize >= 6
568
+ */
552
569
  FORCE_INLINE_TEMPLATE size_t
553
570
  HUF_decompress4X1_usingDTable_internal_body(
554
571
  void* dst, size_t dstSize,
@@ -592,6 +609,7 @@ HUF_decompress4X1_usingDTable_internal_body(
592
609
 
593
610
  if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
594
611
  if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
612
+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
595
613
  CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
596
614
  CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
597
615
  CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
@@ -654,38 +672,142 @@ size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo
654
672
  }
655
673
  #endif
656
674
 
657
- #if HUF_NEED_DEFAULT_FUNCTION
658
675
  static
659
676
  size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
660
677
  size_t cSrcSize, HUF_DTable const* DTable) {
661
678
  return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
662
679
  }
663
- #endif
664
680
 
665
681
  #if ZSTD_ENABLE_ASM_X86_64_BMI2
666
682
 
667
- HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
683
+ HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
668
684
 
669
- static HUF_ASM_X86_64_BMI2_ATTRS
685
+ #endif
686
+
687
+ static HUF_FAST_BMI2_ATTRS
688
+ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
689
+ {
690
+ U64 bits[4];
691
+ BYTE const* ip[4];
692
+ BYTE* op[4];
693
+ U16 const* const dtable = (U16 const*)args->dt;
694
+ BYTE* const oend = args->oend;
695
+ BYTE const* const ilimit = args->ilimit;
696
+
697
+ /* Copy the arguments to local variables */
698
+ ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
699
+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
700
+ ZSTD_memcpy(&op, &args->op, sizeof(op));
701
+
702
+ assert(MEM_isLittleEndian());
703
+ assert(!MEM_32bits());
704
+
705
+ for (;;) {
706
+ BYTE* olimit;
707
+ int stream;
708
+ int symbol;
709
+
710
+ /* Assert loop preconditions */
711
+ #ifndef NDEBUG
712
+ for (stream = 0; stream < 4; ++stream) {
713
+ assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
714
+ assert(ip[stream] >= ilimit);
715
+ }
716
+ #endif
717
+ /* Compute olimit */
718
+ {
719
+ /* Each iteration produces 5 output symbols per stream */
720
+ size_t const oiters = (size_t)(oend - op[3]) / 5;
721
+ /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
722
+ * per stream.
723
+ */
724
+ size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
725
+ /* We can safely run iters iterations before running bounds checks */
726
+ size_t const iters = MIN(oiters, iiters);
727
+ size_t const symbols = iters * 5;
728
+
729
+ /* We can simply check that op[3] < olimit, instead of checking all
730
+ * of our bounds, since we can't hit the other bounds until we've run
731
+ * iters iterations, which only happens when op[3] == olimit.
732
+ */
733
+ olimit = op[3] + symbols;
734
+
735
+ /* Exit fast decoding loop once we get close to the end. */
736
+ if (op[3] + 20 > olimit)
737
+ break;
738
+
739
+ /* Exit the decoding loop if any input pointer has crossed the
740
+ * previous one. This indicates corruption, and a precondition
741
+ * to our loop is that ip[i] >= ip[0].
742
+ */
743
+ for (stream = 1; stream < 4; ++stream) {
744
+ if (ip[stream] < ip[stream - 1])
745
+ goto _out;
746
+ }
747
+ }
748
+
749
+ #ifndef NDEBUG
750
+ for (stream = 1; stream < 4; ++stream) {
751
+ assert(ip[stream] >= ip[stream - 1]);
752
+ }
753
+ #endif
754
+
755
+ do {
756
+ /* Decode 5 symbols in each of the 4 streams */
757
+ for (symbol = 0; symbol < 5; ++symbol) {
758
+ for (stream = 0; stream < 4; ++stream) {
759
+ int const index = (int)(bits[stream] >> 53);
760
+ int const entry = (int)dtable[index];
761
+ bits[stream] <<= (entry & 63);
762
+ op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
763
+ }
764
+ }
765
+ /* Reload the bitstreams */
766
+ for (stream = 0; stream < 4; ++stream) {
767
+ int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
768
+ int const nbBits = ctz & 7;
769
+ int const nbBytes = ctz >> 3;
770
+ op[stream] += 5;
771
+ ip[stream] -= nbBytes;
772
+ bits[stream] = MEM_read64(ip[stream]) | 1;
773
+ bits[stream] <<= nbBits;
774
+ }
775
+ } while (op[3] < olimit);
776
+ }
777
+
778
+ _out:
779
+
780
+ /* Save the final values of each of the state variables back to args. */
781
+ ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
782
+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
783
+ ZSTD_memcpy(&args->op, &op, sizeof(op));
784
+ }
785
+
786
+ /**
787
+ * @returns @p dstSize on success (>= 6)
788
+ * 0 if the fallback implementation should be used
789
+ * An error if an error occurred
790
+ */
791
+ static HUF_FAST_BMI2_ATTRS
670
792
  size_t
671
- HUF_decompress4X1_usingDTable_internal_bmi2_asm(
793
+ HUF_decompress4X1_usingDTable_internal_fast(
672
794
  void* dst, size_t dstSize,
673
795
  const void* cSrc, size_t cSrcSize,
674
- const HUF_DTable* DTable)
796
+ const HUF_DTable* DTable,
797
+ HUF_DecompressFastLoopFn loopFn)
675
798
  {
676
799
  void const* dt = DTable + 1;
677
800
  const BYTE* const iend = (const BYTE*)cSrc + 6;
678
801
  BYTE* const oend = (BYTE*)dst + dstSize;
679
- HUF_DecompressAsmArgs args;
680
- {
681
- size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
682
- FORWARD_IF_ERROR(ret, "Failed to init asm args");
683
- if (ret != 0)
684
- return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
802
+ HUF_DecompressFastArgs args;
803
+ { size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
804
+ FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
805
+ if (ret == 0)
806
+ return 0;
685
807
  }
686
808
 
687
809
  assert(args.ip[0] >= args.ilimit);
688
- HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
810
+ loopFn(&args);
689
811
 
690
812
  /* Our loop guarantees that ip[] >= ilimit and that we haven't
691
813
  * overwritten any op[].
@@ -698,8 +820,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
698
820
  (void)iend;
699
821
 
700
822
  /* finish bit streams one by one. */
701
- {
702
- size_t const segmentSize = (dstSize+3) / 4;
823
+ { size_t const segmentSize = (dstSize+3) / 4;
703
824
  BYTE* segmentEnd = (BYTE*)dst;
704
825
  int i;
705
826
  for (i = 0; i < 4; ++i) {
@@ -716,97 +837,59 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
716
837
  }
717
838
 
718
839
  /* decoded size */
840
+ assert(dstSize != 0);
719
841
  return dstSize;
720
842
  }
721
- #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
722
-
723
- typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
724
- const void *cSrc,
725
- size_t cSrcSize,
726
- const HUF_DTable *DTable);
727
843
 
728
844
  HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
729
845
 
730
846
  static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
731
- size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
847
+ size_t cSrcSize, HUF_DTable const* DTable, int flags)
732
848
  {
849
+ HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
850
+ HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
851
+
733
852
  #if DYNAMIC_BMI2
734
- if (bmi2) {
853
+ if (flags & HUF_flags_bmi2) {
854
+ fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
735
855
  # if ZSTD_ENABLE_ASM_X86_64_BMI2
736
- return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
737
- # else
738
- return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
856
+ if (!(flags & HUF_flags_disableAsm)) {
857
+ loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
858
+ }
739
859
  # endif
860
+ } else {
861
+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
740
862
  }
741
- #else
742
- (void)bmi2;
743
863
  #endif
744
864
 
745
865
  #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
746
- return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
747
- #else
748
- return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
866
+ if (!(flags & HUF_flags_disableAsm)) {
867
+ loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
868
+ }
749
869
  #endif
750
- }
751
870
 
752
-
753
- size_t HUF_decompress1X1_usingDTable(
754
- void* dst, size_t dstSize,
755
- const void* cSrc, size_t cSrcSize,
756
- const HUF_DTable* DTable)
757
- {
758
- DTableDesc dtd = HUF_getDTableDesc(DTable);
759
- if (dtd.tableType != 0) return ERROR(GENERIC);
760
- return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
761
- }
762
-
763
- size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
764
- const void* cSrc, size_t cSrcSize,
765
- void* workSpace, size_t wkspSize)
766
- {
767
- const BYTE* ip = (const BYTE*) cSrc;
768
-
769
- size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
770
- if (HUF_isError(hSize)) return hSize;
771
- if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
772
- ip += hSize; cSrcSize -= hSize;
773
-
774
- return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
775
- }
776
-
777
-
778
- size_t HUF_decompress4X1_usingDTable(
779
- void* dst, size_t dstSize,
780
- const void* cSrc, size_t cSrcSize,
781
- const HUF_DTable* DTable)
782
- {
783
- DTableDesc dtd = HUF_getDTableDesc(DTable);
784
- if (dtd.tableType != 0) return ERROR(GENERIC);
785
- return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
871
+ if (!(flags & HUF_flags_disableFast)) {
872
+ size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
873
+ if (ret != 0)
874
+ return ret;
875
+ }
876
+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
786
877
  }
787
878
 
788
- static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
879
+ static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
789
880
  const void* cSrc, size_t cSrcSize,
790
- void* workSpace, size_t wkspSize, int bmi2)
881
+ void* workSpace, size_t wkspSize, int flags)
791
882
  {
792
883
  const BYTE* ip = (const BYTE*) cSrc;
793
884
 
794
- size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
885
+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
795
886
  if (HUF_isError(hSize)) return hSize;
796
887
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
797
888
  ip += hSize; cSrcSize -= hSize;
798
889
 
799
- return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
800
- }
801
-
802
- size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
803
- const void* cSrc, size_t cSrcSize,
804
- void* workSpace, size_t wkspSize)
805
- {
806
- return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
890
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
807
891
  }
808
892
 
809
-
810
893
  #endif /* HUF_FORCE_DECOMPRESS_X2 */
811
894
 
812
895
 
@@ -989,7 +1072,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32
989
1072
 
990
1073
  static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
991
1074
  const sortedSymbol_t* sortedList,
992
- const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
1075
+ const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight,
993
1076
  const U32 nbBitsBaseline)
994
1077
  {
995
1078
  U32* const rankVal = rankValOrigin[0];
@@ -1044,14 +1127,7 @@ typedef struct {
1044
1127
 
1045
1128
  size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
1046
1129
  const void* src, size_t srcSize,
1047
- void* workSpace, size_t wkspSize)
1048
- {
1049
- return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
1050
- }
1051
-
1052
- size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
1053
- const void* src, size_t srcSize,
1054
- void* workSpace, size_t wkspSize, int bmi2)
1130
+ void* workSpace, size_t wkspSize, int flags)
1055
1131
  {
1056
1132
  U32 tableLog, maxW, nbSymbols;
1057
1133
  DTableDesc dtd = HUF_getDTableDesc(DTable);
@@ -1073,7 +1149,7 @@ size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
1073
1149
  if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
1074
1150
  /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
1075
1151
 
1076
- iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
1152
+ iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags);
1077
1153
  if (HUF_isError(iSize)) return iSize;
1078
1154
 
1079
1155
  /* check result */
@@ -1244,6 +1320,11 @@ HUF_decompress1X2_usingDTable_internal_body(
1244
1320
  /* decoded size */
1245
1321
  return dstSize;
1246
1322
  }
1323
+
1324
+ /* HUF_decompress4X2_usingDTable_internal_body():
1325
+ * Conditions:
1326
+ * @dstSize >= 6
1327
+ */
1247
1328
  FORCE_INLINE_TEMPLATE size_t
1248
1329
  HUF_decompress4X2_usingDTable_internal_body(
1249
1330
  void* dst, size_t dstSize,
@@ -1284,8 +1365,9 @@ HUF_decompress4X2_usingDTable_internal_body(
1284
1365
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
1285
1366
  U32 const dtLog = dtd.tableLog;
1286
1367
 
1287
- if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
1288
- if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
1368
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
1369
+ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
1370
+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
1289
1371
  CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
1290
1372
  CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
1291
1373
  CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
@@ -1370,36 +1452,177 @@ size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, vo
1370
1452
  }
1371
1453
  #endif
1372
1454
 
1373
- #if HUF_NEED_DEFAULT_FUNCTION
1374
1455
  static
1375
1456
  size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
1376
1457
  size_t cSrcSize, HUF_DTable const* DTable) {
1377
1458
  return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
1378
1459
  }
1379
- #endif
1380
1460
 
1381
1461
  #if ZSTD_ENABLE_ASM_X86_64_BMI2
1382
1462
 
1383
- HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
1463
+ HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
1464
+
1465
+ #endif
1466
+
1467
+ static HUF_FAST_BMI2_ATTRS
1468
+ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
1469
+ {
1470
+ U64 bits[4];
1471
+ BYTE const* ip[4];
1472
+ BYTE* op[4];
1473
+ BYTE* oend[4];
1474
+ HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
1475
+ BYTE const* const ilimit = args->ilimit;
1476
+
1477
+ /* Copy the arguments to local registers. */
1478
+ ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
1479
+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
1480
+ ZSTD_memcpy(&op, &args->op, sizeof(op));
1481
+
1482
+ oend[0] = op[1];
1483
+ oend[1] = op[2];
1484
+ oend[2] = op[3];
1485
+ oend[3] = args->oend;
1486
+
1487
+ assert(MEM_isLittleEndian());
1488
+ assert(!MEM_32bits());
1489
+
1490
+ for (;;) {
1491
+ BYTE* olimit;
1492
+ int stream;
1493
+ int symbol;
1494
+
1495
+ /* Assert loop preconditions */
1496
+ #ifndef NDEBUG
1497
+ for (stream = 0; stream < 4; ++stream) {
1498
+ assert(op[stream] <= oend[stream]);
1499
+ assert(ip[stream] >= ilimit);
1500
+ }
1501
+ #endif
1502
+ /* Compute olimit */
1503
+ {
1504
+ /* Each loop does 5 table lookups for each of the 4 streams.
1505
+ * Each table lookup consumes up to 11 bits of input, and produces
1506
+ * up to 2 bytes of output.
1507
+ */
1508
+ /* We can consume up to 7 bytes of input per iteration per stream.
1509
+ * We also know that each input pointer is >= ip[0]. So we can run
1510
+ * iters loops before running out of input.
1511
+ */
1512
+ size_t iters = (size_t)(ip[0] - ilimit) / 7;
1513
+ /* Each iteration can produce up to 10 bytes of output per stream.
1514
+ * Each output stream my advance at different rates. So take the
1515
+ * minimum number of safe iterations among all the output streams.
1516
+ */
1517
+ for (stream = 0; stream < 4; ++stream) {
1518
+ size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
1519
+ iters = MIN(iters, oiters);
1520
+ }
1521
+
1522
+ /* Each iteration produces at least 5 output symbols. So until
1523
+ * op[3] crosses olimit, we know we haven't executed iters
1524
+ * iterations yet. This saves us maintaining an iters counter,
1525
+ * at the expense of computing the remaining # of iterations
1526
+ * more frequently.
1527
+ */
1528
+ olimit = op[3] + (iters * 5);
1529
+
1530
+ /* Exit the fast decoding loop if we are too close to the end. */
1531
+ if (op[3] + 10 > olimit)
1532
+ break;
1533
+
1534
+ /* Exit the decoding loop if any input pointer has crossed the
1535
+ * previous one. This indicates corruption, and a precondition
1536
+ * to our loop is that ip[i] >= ip[0].
1537
+ */
1538
+ for (stream = 1; stream < 4; ++stream) {
1539
+ if (ip[stream] < ip[stream - 1])
1540
+ goto _out;
1541
+ }
1542
+ }
1543
+
1544
+ #ifndef NDEBUG
1545
+ for (stream = 1; stream < 4; ++stream) {
1546
+ assert(ip[stream] >= ip[stream - 1]);
1547
+ }
1548
+ #endif
1549
+
1550
+ do {
1551
+ /* Do 5 table lookups for each of the first 3 streams */
1552
+ for (symbol = 0; symbol < 5; ++symbol) {
1553
+ for (stream = 0; stream < 3; ++stream) {
1554
+ int const index = (int)(bits[stream] >> 53);
1555
+ HUF_DEltX2 const entry = dtable[index];
1556
+ MEM_write16(op[stream], entry.sequence);
1557
+ bits[stream] <<= (entry.nbBits);
1558
+ op[stream] += (entry.length);
1559
+ }
1560
+ }
1561
+ /* Do 1 table lookup from the final stream */
1562
+ {
1563
+ int const index = (int)(bits[3] >> 53);
1564
+ HUF_DEltX2 const entry = dtable[index];
1565
+ MEM_write16(op[3], entry.sequence);
1566
+ bits[3] <<= (entry.nbBits);
1567
+ op[3] += (entry.length);
1568
+ }
1569
+ /* Do 4 table lookups from the final stream & reload bitstreams */
1570
+ for (stream = 0; stream < 4; ++stream) {
1571
+ /* Do a table lookup from the final stream.
1572
+ * This is interleaved with the reloading to reduce register
1573
+ * pressure. This shouldn't be necessary, but compilers can
1574
+ * struggle with codegen with high register pressure.
1575
+ */
1576
+ {
1577
+ int const index = (int)(bits[3] >> 53);
1578
+ HUF_DEltX2 const entry = dtable[index];
1579
+ MEM_write16(op[3], entry.sequence);
1580
+ bits[3] <<= (entry.nbBits);
1581
+ op[3] += (entry.length);
1582
+ }
1583
+ /* Reload the bistreams. The final bitstream must be reloaded
1584
+ * after the 5th symbol was decoded.
1585
+ */
1586
+ {
1587
+ int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
1588
+ int const nbBits = ctz & 7;
1589
+ int const nbBytes = ctz >> 3;
1590
+ ip[stream] -= nbBytes;
1591
+ bits[stream] = MEM_read64(ip[stream]) | 1;
1592
+ bits[stream] <<= nbBits;
1593
+ }
1594
+ }
1595
+ } while (op[3] < olimit);
1596
+ }
1384
1597
 
1385
- static HUF_ASM_X86_64_BMI2_ATTRS size_t
1386
- HUF_decompress4X2_usingDTable_internal_bmi2_asm(
1598
+ _out:
1599
+
1600
+ /* Save the final values of each of the state variables back to args. */
1601
+ ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
1602
+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
1603
+ ZSTD_memcpy(&args->op, &op, sizeof(op));
1604
+ }
1605
+
1606
+
1607
+ static HUF_FAST_BMI2_ATTRS size_t
1608
+ HUF_decompress4X2_usingDTable_internal_fast(
1387
1609
  void* dst, size_t dstSize,
1388
1610
  const void* cSrc, size_t cSrcSize,
1389
- const HUF_DTable* DTable) {
1611
+ const HUF_DTable* DTable,
1612
+ HUF_DecompressFastLoopFn loopFn) {
1390
1613
  void const* dt = DTable + 1;
1391
1614
  const BYTE* const iend = (const BYTE*)cSrc + 6;
1392
1615
  BYTE* const oend = (BYTE*)dst + dstSize;
1393
- HUF_DecompressAsmArgs args;
1616
+ HUF_DecompressFastArgs args;
1394
1617
  {
1395
- size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
1618
+ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
1396
1619
  FORWARD_IF_ERROR(ret, "Failed to init asm args");
1397
- if (ret != 0)
1398
- return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1620
+ if (ret == 0)
1621
+ return 0;
1399
1622
  }
1400
1623
 
1401
1624
  assert(args.ip[0] >= args.ilimit);
1402
- HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
1625
+ loopFn(&args);
1403
1626
 
1404
1627
  /* note : op4 already verified within main loop */
1405
1628
  assert(args.ip[0] >= iend);
@@ -1430,91 +1653,72 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm(
1430
1653
  /* decoded size */
1431
1654
  return dstSize;
1432
1655
  }
1433
- #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
1434
1656
 
1435
1657
  static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
1436
- size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
1658
+ size_t cSrcSize, HUF_DTable const* DTable, int flags)
1437
1659
  {
1660
+ HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
1661
+ HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
1662
+
1438
1663
  #if DYNAMIC_BMI2
1439
- if (bmi2) {
1664
+ if (flags & HUF_flags_bmi2) {
1665
+ fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
1440
1666
  # if ZSTD_ENABLE_ASM_X86_64_BMI2
1441
- return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1442
- # else
1443
- return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1667
+ if (!(flags & HUF_flags_disableAsm)) {
1668
+ loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
1669
+ }
1444
1670
  # endif
1671
+ } else {
1672
+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
1445
1673
  }
1446
- #else
1447
- (void)bmi2;
1448
1674
  #endif
1449
1675
 
1450
1676
  #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
1451
- return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1452
- #else
1453
- return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
1677
+ if (!(flags & HUF_flags_disableAsm)) {
1678
+ loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
1679
+ }
1454
1680
  #endif
1681
+
1682
+ if (!(flags & HUF_flags_disableFast)) {
1683
+ size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
1684
+ if (ret != 0)
1685
+ return ret;
1686
+ }
1687
+ return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
1455
1688
  }
1456
1689
 
1457
1690
  HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
1458
1691
 
1459
- size_t HUF_decompress1X2_usingDTable(
1460
- void* dst, size_t dstSize,
1461
- const void* cSrc, size_t cSrcSize,
1462
- const HUF_DTable* DTable)
1463
- {
1464
- DTableDesc dtd = HUF_getDTableDesc(DTable);
1465
- if (dtd.tableType != 1) return ERROR(GENERIC);
1466
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1467
- }
1468
-
1469
1692
  size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
1470
1693
  const void* cSrc, size_t cSrcSize,
1471
- void* workSpace, size_t wkspSize)
1694
+ void* workSpace, size_t wkspSize, int flags)
1472
1695
  {
1473
1696
  const BYTE* ip = (const BYTE*) cSrc;
1474
1697
 
1475
1698
  size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
1476
- workSpace, wkspSize);
1699
+ workSpace, wkspSize, flags);
1477
1700
  if (HUF_isError(hSize)) return hSize;
1478
1701
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1479
1702
  ip += hSize; cSrcSize -= hSize;
1480
1703
 
1481
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
1482
- }
1483
-
1484
-
1485
- size_t HUF_decompress4X2_usingDTable(
1486
- void* dst, size_t dstSize,
1487
- const void* cSrc, size_t cSrcSize,
1488
- const HUF_DTable* DTable)
1489
- {
1490
- DTableDesc dtd = HUF_getDTableDesc(DTable);
1491
- if (dtd.tableType != 1) return ERROR(GENERIC);
1492
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1704
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
1493
1705
  }
1494
1706
 
1495
- static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
1707
+ static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1496
1708
  const void* cSrc, size_t cSrcSize,
1497
- void* workSpace, size_t wkspSize, int bmi2)
1709
+ void* workSpace, size_t wkspSize, int flags)
1498
1710
  {
1499
1711
  const BYTE* ip = (const BYTE*) cSrc;
1500
1712
 
1501
1713
  size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
1502
- workSpace, wkspSize);
1714
+ workSpace, wkspSize, flags);
1503
1715
  if (HUF_isError(hSize)) return hSize;
1504
1716
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1505
1717
  ip += hSize; cSrcSize -= hSize;
1506
1718
 
1507
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1719
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
1508
1720
  }
1509
1721
 
1510
- size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1511
- const void* cSrc, size_t cSrcSize,
1512
- void* workSpace, size_t wkspSize)
1513
- {
1514
- return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
1515
- }
1516
-
1517
-
1518
1722
  #endif /* HUF_FORCE_DECOMPRESS_X1 */
1519
1723
 
1520
1724
 
@@ -1522,44 +1726,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1522
1726
  /* Universal decompression selectors */
1523
1727
  /* ***********************************/
1524
1728
 
1525
- size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
1526
- const void* cSrc, size_t cSrcSize,
1527
- const HUF_DTable* DTable)
1528
- {
1529
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
1530
- #if defined(HUF_FORCE_DECOMPRESS_X1)
1531
- (void)dtd;
1532
- assert(dtd.tableType == 0);
1533
- return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1534
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
1535
- (void)dtd;
1536
- assert(dtd.tableType == 1);
1537
- return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1538
- #else
1539
- return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1540
- HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1541
- #endif
1542
- }
1543
-
1544
- size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
1545
- const void* cSrc, size_t cSrcSize,
1546
- const HUF_DTable* DTable)
1547
- {
1548
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
1549
- #if defined(HUF_FORCE_DECOMPRESS_X1)
1550
- (void)dtd;
1551
- assert(dtd.tableType == 0);
1552
- return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1553
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
1554
- (void)dtd;
1555
- assert(dtd.tableType == 1);
1556
- return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1557
- #else
1558
- return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1559
- HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1560
- #endif
1561
- }
1562
-
1563
1729
 
1564
1730
  #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1565
1731
  typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
@@ -1614,36 +1780,9 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1614
1780
  #endif
1615
1781
  }
1616
1782
 
1617
-
1618
- size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1619
- size_t dstSize, const void* cSrc,
1620
- size_t cSrcSize, void* workSpace,
1621
- size_t wkspSize)
1622
- {
1623
- /* validation checks */
1624
- if (dstSize == 0) return ERROR(dstSize_tooSmall);
1625
- if (cSrcSize == 0) return ERROR(corruption_detected);
1626
-
1627
- { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1628
- #if defined(HUF_FORCE_DECOMPRESS_X1)
1629
- (void)algoNb;
1630
- assert(algoNb == 0);
1631
- return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1632
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
1633
- (void)algoNb;
1634
- assert(algoNb == 1);
1635
- return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1636
- #else
1637
- return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1638
- cSrcSize, workSpace, wkspSize):
1639
- HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1640
- #endif
1641
- }
1642
- }
1643
-
1644
1783
  size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1645
1784
  const void* cSrc, size_t cSrcSize,
1646
- void* workSpace, size_t wkspSize)
1785
+ void* workSpace, size_t wkspSize, int flags)
1647
1786
  {
1648
1787
  /* validation checks */
1649
1788
  if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -1656,71 +1795,71 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1656
1795
  (void)algoNb;
1657
1796
  assert(algoNb == 0);
1658
1797
  return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1659
- cSrcSize, workSpace, wkspSize);
1798
+ cSrcSize, workSpace, wkspSize, flags);
1660
1799
  #elif defined(HUF_FORCE_DECOMPRESS_X2)
1661
1800
  (void)algoNb;
1662
1801
  assert(algoNb == 1);
1663
1802
  return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1664
- cSrcSize, workSpace, wkspSize);
1803
+ cSrcSize, workSpace, wkspSize, flags);
1665
1804
  #else
1666
1805
  return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1667
- cSrcSize, workSpace, wkspSize):
1806
+ cSrcSize, workSpace, wkspSize, flags):
1668
1807
  HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1669
- cSrcSize, workSpace, wkspSize);
1808
+ cSrcSize, workSpace, wkspSize, flags);
1670
1809
  #endif
1671
1810
  }
1672
1811
  }
1673
1812
 
1674
1813
 
1675
- size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1814
+ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
1676
1815
  {
1677
1816
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
1678
1817
  #if defined(HUF_FORCE_DECOMPRESS_X1)
1679
1818
  (void)dtd;
1680
1819
  assert(dtd.tableType == 0);
1681
- return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1820
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1682
1821
  #elif defined(HUF_FORCE_DECOMPRESS_X2)
1683
1822
  (void)dtd;
1684
1823
  assert(dtd.tableType == 1);
1685
- return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1824
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1686
1825
  #else
1687
- return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1688
- HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1826
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
1827
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1689
1828
  #endif
1690
1829
  }
1691
1830
 
1692
1831
  #ifndef HUF_FORCE_DECOMPRESS_X2
1693
- size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1832
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
1694
1833
  {
1695
1834
  const BYTE* ip = (const BYTE*) cSrc;
1696
1835
 
1697
- size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1836
+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
1698
1837
  if (HUF_isError(hSize)) return hSize;
1699
1838
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1700
1839
  ip += hSize; cSrcSize -= hSize;
1701
1840
 
1702
- return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1841
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
1703
1842
  }
1704
1843
  #endif
1705
1844
 
1706
- size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1845
+ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
1707
1846
  {
1708
1847
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
1709
1848
  #if defined(HUF_FORCE_DECOMPRESS_X1)
1710
1849
  (void)dtd;
1711
1850
  assert(dtd.tableType == 0);
1712
- return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1851
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1713
1852
  #elif defined(HUF_FORCE_DECOMPRESS_X2)
1714
1853
  (void)dtd;
1715
1854
  assert(dtd.tableType == 1);
1716
- return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1855
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1717
1856
  #else
1718
- return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1719
- HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1857
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
1858
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
1720
1859
  #endif
1721
1860
  }
1722
1861
 
1723
- size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1862
+ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
1724
1863
  {
1725
1864
  /* validation checks */
1726
1865
  if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -1730,160 +1869,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
1730
1869
  #if defined(HUF_FORCE_DECOMPRESS_X1)
1731
1870
  (void)algoNb;
1732
1871
  assert(algoNb == 0);
1733
- return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1872
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
1734
1873
  #elif defined(HUF_FORCE_DECOMPRESS_X2)
1735
1874
  (void)algoNb;
1736
1875
  assert(algoNb == 1);
1737
- return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1876
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
1738
1877
  #else
1739
- return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1740
- HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1878
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
1879
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
1741
1880
  #endif
1742
1881
  }
1743
1882
  }
1744
-
1745
- #ifndef ZSTD_NO_UNUSED_FUNCTIONS
1746
- #ifndef HUF_FORCE_DECOMPRESS_X2
1747
- size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
1748
- {
1749
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1750
- return HUF_readDTableX1_wksp(DTable, src, srcSize,
1751
- workSpace, sizeof(workSpace));
1752
- }
1753
-
1754
- size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1755
- const void* cSrc, size_t cSrcSize)
1756
- {
1757
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1758
- return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1759
- workSpace, sizeof(workSpace));
1760
- }
1761
-
1762
- size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1763
- {
1764
- HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1765
- return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
1766
- }
1767
- #endif
1768
-
1769
- #ifndef HUF_FORCE_DECOMPRESS_X1
1770
- size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
1771
- {
1772
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1773
- return HUF_readDTableX2_wksp(DTable, src, srcSize,
1774
- workSpace, sizeof(workSpace));
1775
- }
1776
-
1777
- size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1778
- const void* cSrc, size_t cSrcSize)
1779
- {
1780
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1781
- return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1782
- workSpace, sizeof(workSpace));
1783
- }
1784
-
1785
- size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1786
- {
1787
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1788
- return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1789
- }
1790
- #endif
1791
-
1792
- #ifndef HUF_FORCE_DECOMPRESS_X2
1793
- size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1794
- {
1795
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1796
- return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1797
- workSpace, sizeof(workSpace));
1798
- }
1799
- size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1800
- {
1801
- HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1802
- return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1803
- }
1804
- #endif
1805
-
1806
- #ifndef HUF_FORCE_DECOMPRESS_X1
1807
- size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1808
- const void* cSrc, size_t cSrcSize)
1809
- {
1810
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1811
- return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1812
- workSpace, sizeof(workSpace));
1813
- }
1814
-
1815
- size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1816
- {
1817
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1818
- return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1819
- }
1820
- #endif
1821
-
1822
- typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1823
-
1824
- size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1825
- {
1826
- #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1827
- static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1828
- #endif
1829
-
1830
- /* validation checks */
1831
- if (dstSize == 0) return ERROR(dstSize_tooSmall);
1832
- if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1833
- if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1834
- if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1835
-
1836
- { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1837
- #if defined(HUF_FORCE_DECOMPRESS_X1)
1838
- (void)algoNb;
1839
- assert(algoNb == 0);
1840
- return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1841
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
1842
- (void)algoNb;
1843
- assert(algoNb == 1);
1844
- return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1845
- #else
1846
- return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1847
- #endif
1848
- }
1849
- }
1850
-
1851
- size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1852
- {
1853
- /* validation checks */
1854
- if (dstSize == 0) return ERROR(dstSize_tooSmall);
1855
- if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1856
- if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1857
- if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1858
-
1859
- { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1860
- #if defined(HUF_FORCE_DECOMPRESS_X1)
1861
- (void)algoNb;
1862
- assert(algoNb == 0);
1863
- return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1864
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
1865
- (void)algoNb;
1866
- assert(algoNb == 1);
1867
- return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1868
- #else
1869
- return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1870
- HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1871
- #endif
1872
- }
1873
- }
1874
-
1875
- size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1876
- {
1877
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1878
- return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1879
- workSpace, sizeof(workSpace));
1880
- }
1881
-
1882
- size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1883
- const void* cSrc, size_t cSrcSize)
1884
- {
1885
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1886
- return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1887
- workSpace, sizeof(workSpace));
1888
- }
1889
- #endif