extzstd 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +28 -14
  3. data/contrib/zstd/CHANGELOG +114 -56
  4. data/contrib/zstd/CONTRIBUTING.md +14 -0
  5. data/contrib/zstd/Makefile +37 -31
  6. data/contrib/zstd/README.md +6 -0
  7. data/contrib/zstd/appveyor.yml +4 -1
  8. data/contrib/zstd/lib/Makefile +231 -134
  9. data/contrib/zstd/lib/README.md +28 -0
  10. data/contrib/zstd/lib/common/bitstream.h +24 -15
  11. data/contrib/zstd/lib/common/compiler.h +116 -3
  12. data/contrib/zstd/lib/common/cpu.h +0 -2
  13. data/contrib/zstd/lib/common/debug.h +11 -18
  14. data/contrib/zstd/lib/common/entropy_common.c +188 -42
  15. data/contrib/zstd/lib/common/error_private.c +1 -0
  16. data/contrib/zstd/lib/common/error_private.h +1 -1
  17. data/contrib/zstd/lib/common/fse.h +38 -11
  18. data/contrib/zstd/lib/common/fse_decompress.c +123 -16
  19. data/contrib/zstd/lib/common/huf.h +26 -5
  20. data/contrib/zstd/lib/common/mem.h +66 -93
  21. data/contrib/zstd/lib/common/pool.c +22 -16
  22. data/contrib/zstd/lib/common/pool.h +1 -1
  23. data/contrib/zstd/lib/common/threading.c +6 -5
  24. data/contrib/zstd/lib/common/xxhash.c +18 -56
  25. data/contrib/zstd/lib/common/xxhash.h +1 -1
  26. data/contrib/zstd/lib/common/zstd_common.c +9 -9
  27. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  28. data/contrib/zstd/lib/common/zstd_errors.h +1 -0
  29. data/contrib/zstd/lib/common/zstd_internal.h +89 -58
  30. data/contrib/zstd/lib/compress/fse_compress.c +30 -23
  31. data/contrib/zstd/lib/compress/hist.c +26 -28
  32. data/contrib/zstd/lib/compress/hist.h +1 -1
  33. data/contrib/zstd/lib/compress/huf_compress.c +210 -95
  34. data/contrib/zstd/lib/compress/zstd_compress.c +1339 -409
  35. data/contrib/zstd/lib/compress/zstd_compress_internal.h +119 -41
  36. data/contrib/zstd/lib/compress/zstd_compress_literals.c +4 -4
  37. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +17 -3
  38. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +23 -19
  39. data/contrib/zstd/lib/compress/zstd_cwksp.h +60 -24
  40. data/contrib/zstd/lib/compress/zstd_double_fast.c +22 -22
  41. data/contrib/zstd/lib/compress/zstd_fast.c +19 -19
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +351 -77
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +20 -0
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +59 -18
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +6 -0
  46. data/contrib/zstd/lib/compress/zstd_opt.c +190 -45
  47. data/contrib/zstd/lib/compress/zstdmt_compress.c +74 -406
  48. data/contrib/zstd/lib/compress/zstdmt_compress.h +26 -108
  49. data/contrib/zstd/lib/decompress/huf_decompress.c +302 -200
  50. data/contrib/zstd/lib/decompress/zstd_ddict.c +8 -8
  51. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  52. data/contrib/zstd/lib/decompress/zstd_decompress.c +125 -80
  53. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +145 -37
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +5 -2
  55. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +11 -10
  56. data/contrib/zstd/lib/dictBuilder/cover.c +29 -20
  57. data/contrib/zstd/lib/dictBuilder/cover.h +1 -1
  58. data/contrib/zstd/lib/dictBuilder/fastcover.c +20 -19
  59. data/contrib/zstd/lib/dictBuilder/zdict.c +15 -16
  60. data/contrib/zstd/lib/dictBuilder/zdict.h +1 -1
  61. data/contrib/zstd/lib/legacy/zstd_v01.c +5 -1
  62. data/contrib/zstd/lib/legacy/zstd_v02.c +5 -1
  63. data/contrib/zstd/lib/legacy/zstd_v03.c +5 -1
  64. data/contrib/zstd/lib/legacy/zstd_v04.c +6 -2
  65. data/contrib/zstd/lib/legacy/zstd_v05.c +5 -1
  66. data/contrib/zstd/lib/legacy/zstd_v06.c +5 -1
  67. data/contrib/zstd/lib/legacy/zstd_v07.c +5 -1
  68. data/contrib/zstd/lib/libzstd.pc.in +3 -3
  69. data/contrib/zstd/lib/zstd.h +348 -47
  70. data/ext/extzstd.c +6 -0
  71. data/ext/extzstd.h +6 -0
  72. data/gemstub.rb +3 -21
  73. data/lib/extzstd.rb +0 -2
  74. data/lib/extzstd/version.rb +6 -1
  75. data/test/test_basic.rb +0 -5
  76. metadata +5 -4
@@ -143,6 +143,14 @@ The file structure is designed to make this selection manually achievable for an
143
143
  Setting this macro will either force to generate the BMI2 dispatcher (1)
144
144
  or prevent it (0). It overrides automatic detection.
145
145
 
146
+ - The build macro `ZSTD_NO_UNUSED_FUNCTIONS` can be defined to hide the definitions of functions
147
+ that zstd does not use. Not all unused functions are hidden, but they can be if needed.
148
+ Currently, this macro will hide function definitions in FSE and HUF that use an excessive
149
+ amount of stack space.
150
+
151
+ - The build macro `ZSTD_NO_INTRINSICS` can be defined to disable all explicit intrinsics.
152
+ Compiler builtins are still used.
153
+
146
154
 
147
155
  #### Windows : using MinGW+MSYS to create DLL
148
156
 
@@ -160,6 +168,26 @@ file it should be linked with `dll\libzstd.dll`. For example:
160
168
  The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`.
161
169
 
162
170
 
171
+ #### Advanced Build options
172
+
173
+ The build system requires a hash function in order to
174
+ separate object files created with different compilation flags.
175
+ By default, it tries to use `md5sum` or equivalent.
176
+ The hash function can be manually switched by setting the `HASH` variable.
177
+ For example : `make HASH=xxhsum`
178
+ The hash function needs to generate at least 64-bit using hexadecimal format.
179
+ When no hash function is found,
180
+ the Makefile just generates all object files into the same default directory,
181
+ irrespective of compilation flags.
182
+ This functionality only matters if `libzstd` is compiled multiple times
183
+ with different build flags.
184
+
185
+ The build directory, where object files are stored
186
+ can also be manually controlled using variable `BUILD_DIR`,
187
+ for example `make BUILD_DIR=objectDir/v1`.
188
+ In which case, the hash function doesn't matter.
189
+
190
+
163
191
  #### Deprecated API
164
192
 
165
193
  Obsolete API on their way out are stored in directory `lib/deprecated`.
@@ -17,7 +17,6 @@
17
17
  #if defined (__cplusplus)
18
18
  extern "C" {
19
19
  #endif
20
-
21
20
  /*
22
21
  * This API consists of small unitary functions, which must be inlined for best performance.
23
22
  * Since link-time-optimization is not available for all compilers,
@@ -36,10 +35,12 @@ extern "C" {
36
35
  /*=========================================
37
36
  * Target specific
38
37
  =========================================*/
39
- #if defined(__BMI__) && defined(__GNUC__)
40
- # include <immintrin.h> /* support for bextr (experimental) */
41
- #elif defined(__ICCARM__)
42
- # include <intrinsics.h>
38
+ #ifndef ZSTD_NO_INTRINSICS
39
+ # if defined(__BMI__) && defined(__GNUC__)
40
+ # include <immintrin.h> /* support for bextr (experimental) */
41
+ # elif defined(__ICCARM__)
42
+ # include <intrinsics.h>
43
+ # endif
43
44
  #endif
44
45
 
45
46
  #define STREAM_ACCUMULATOR_MIN_32 25
@@ -141,8 +142,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
141
142
  assert(val != 0);
142
143
  {
143
144
  # if defined(_MSC_VER) /* Visual */
144
- unsigned long r=0;
145
- return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
145
+ # if STATIC_BMI2 == 1
146
+ return _lzcnt_u32(val) ^ 31;
147
+ # else
148
+ unsigned long r = 0;
149
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
150
+ # endif
146
151
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
147
152
  return __builtin_clz (val) ^ 31;
148
153
  # elif defined(__ICCARM__) /* IAR Intrinsic */
@@ -198,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
198
203
  MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
199
204
  size_t value, unsigned nbBits)
200
205
  {
201
- MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
206
+ DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
202
207
  assert(nbBits < BIT_MASK_SIZE);
203
208
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
204
209
  bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
@@ -271,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
271
276
  */
272
277
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
273
278
  {
274
- if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
279
+ if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
275
280
 
276
281
  bitD->start = (const char*)srcBuffer;
277
282
  bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@@ -317,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
317
322
  return srcSize;
318
323
  }
319
324
 
320
- MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
325
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
321
326
  {
322
327
  return bitContainer >> start;
323
328
  }
324
329
 
325
- MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
330
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
326
331
  {
327
332
  U32 const regMask = sizeof(bitContainer)*8 - 1;
328
333
  /* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -330,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
330
335
  return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
331
336
  }
332
337
 
333
- MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
338
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
334
339
  {
340
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
341
+ return _bzhi_u64(bitContainer, nbBits);
342
+ #else
335
343
  assert(nbBits < BIT_MASK_SIZE);
336
344
  return bitContainer & BIT_mask[nbBits];
345
+ #endif
337
346
  }
338
347
 
339
348
  /*! BIT_lookBits() :
@@ -342,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
342
351
  * On 32-bits, maxNbBits==24.
343
352
  * On 64-bits, maxNbBits==56.
344
353
  * @return : value extracted */
345
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
354
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
346
355
  {
347
356
  /* arbitrate between double-shift and shift+mask */
348
357
  #if 1
@@ -365,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
365
374
  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
366
375
  }
367
376
 
368
- MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
377
+ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
369
378
  {
370
379
  bitD->bitsConsumed += nbBits;
371
380
  }
@@ -374,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
374
383
  * Read (consume) next n bits from local register and update.
375
384
  * Pay attention to not read more than nbBits contained into local register.
376
385
  * @return : extracted value. */
377
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
386
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
378
387
  {
379
388
  size_t const value = BIT_lookBits(bitD, nbBits);
380
389
  BIT_skipBits(bitD, nbBits);
@@ -38,6 +38,17 @@
38
38
 
39
39
  #endif
40
40
 
41
+ /**
42
+ On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
43
+ This explictly marks such functions as __cdecl so that the code will still compile
44
+ if a CC other than __cdecl has been made the default.
45
+ */
46
+ #if defined(_MSC_VER)
47
+ # define WIN_CDECL __cdecl
48
+ #else
49
+ # define WIN_CDECL
50
+ #endif
51
+
41
52
  /**
42
53
  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
43
54
  * parameters. They must be inlined for the compiler to eliminate the constant
@@ -114,12 +125,12 @@
114
125
  # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
115
126
  # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
116
127
  # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
117
- # elif defined(__aarch64__)
118
- # define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
119
- # define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
120
128
  # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
121
129
  # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
122
130
  # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
131
+ # elif defined(__aarch64__)
132
+ # define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
133
+ # define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
123
134
  # else
124
135
  # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
125
136
  # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
@@ -172,4 +183,106 @@
172
183
  # pragma warning(disable : 4324) /* disable: C4324: padded structure */
173
184
  #endif
174
185
 
186
+ /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
187
+ #ifndef STATIC_BMI2
188
+ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
189
+ # ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
190
+ # define STATIC_BMI2 1
191
+ # endif
192
+ # endif
193
+ #endif
194
+
195
+ #ifndef STATIC_BMI2
196
+ #define STATIC_BMI2 0
197
+ #endif
198
+
199
+ /* compat. with non-clang compilers */
200
+ #ifndef __has_builtin
201
+ # define __has_builtin(x) 0
202
+ #endif
203
+
204
+ /* compat. with non-clang compilers */
205
+ #ifndef __has_feature
206
+ # define __has_feature(x) 0
207
+ #endif
208
+
209
+ /* detects whether we are being compiled under msan */
210
+ #ifndef ZSTD_MEMORY_SANITIZER
211
+ # if __has_feature(memory_sanitizer)
212
+ # define ZSTD_MEMORY_SANITIZER 1
213
+ # else
214
+ # define ZSTD_MEMORY_SANITIZER 0
215
+ # endif
216
+ #endif
217
+
218
+ #if ZSTD_MEMORY_SANITIZER
219
+ /* Not all platforms that support msan provide sanitizers/msan_interface.h.
220
+ * We therefore declare the functions we need ourselves, rather than trying to
221
+ * include the header file... */
222
+ #include <stddef.h> /* size_t */
223
+ #define ZSTD_DEPS_NEED_STDINT
224
+ #include "zstd_deps.h" /* intptr_t */
225
+
226
+ /* Make memory region fully initialized (without changing its contents). */
227
+ void __msan_unpoison(const volatile void *a, size_t size);
228
+
229
+ /* Make memory region fully uninitialized (without changing its contents).
230
+ This is a legacy interface that does not update origin information. Use
231
+ __msan_allocated_memory() instead. */
232
+ void __msan_poison(const volatile void *a, size_t size);
233
+
234
+ /* Returns the offset of the first (at least partially) poisoned byte in the
235
+ memory range, or -1 if the whole range is good. */
236
+ intptr_t __msan_test_shadow(const volatile void *x, size_t size);
237
+ #endif
238
+
239
+ /* detects whether we are being compiled under asan */
240
+ #ifndef ZSTD_ADDRESS_SANITIZER
241
+ # if __has_feature(address_sanitizer)
242
+ # define ZSTD_ADDRESS_SANITIZER 1
243
+ # elif defined(__SANITIZE_ADDRESS__)
244
+ # define ZSTD_ADDRESS_SANITIZER 1
245
+ # else
246
+ # define ZSTD_ADDRESS_SANITIZER 0
247
+ # endif
248
+ #endif
249
+
250
+ #if ZSTD_ADDRESS_SANITIZER
251
+ /* Not all platforms that support asan provide sanitizers/asan_interface.h.
252
+ * We therefore declare the functions we need ourselves, rather than trying to
253
+ * include the header file... */
254
+ #include <stddef.h> /* size_t */
255
+
256
+ /**
257
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
258
+ *
259
+ * This memory must be previously allocated by your program. Instrumented
260
+ * code is forbidden from accessing addresses in this region until it is
261
+ * unpoisoned. This function is not guaranteed to poison the entire region -
262
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
263
+ * alignment restrictions.
264
+ *
265
+ * \note This function is not thread-safe because no two threads can poison or
266
+ * unpoison memory in the same memory region simultaneously.
267
+ *
268
+ * \param addr Start of memory region.
269
+ * \param size Size of memory region. */
270
+ void __asan_poison_memory_region(void const volatile *addr, size_t size);
271
+
272
+ /**
273
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
274
+ *
275
+ * This memory must be previously allocated by your program. Accessing
276
+ * addresses in this region is allowed until this region is poisoned again.
277
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
278
+ * to ASan alignment restrictions.
279
+ *
280
+ * \note This function is not thread-safe because no two threads can
281
+ * poison or unpoison memory in the same memory region simultaneously.
282
+ *
283
+ * \param addr Start of memory region.
284
+ * \param size Size of memory region. */
285
+ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
286
+ #endif
287
+
175
288
  #endif /* ZSTD_COMPILER_H */
@@ -16,8 +16,6 @@
16
16
  * https://github.com/facebook/folly/blob/master/folly/CpuId.h
17
17
  */
18
18
 
19
- #include <string.h>
20
-
21
19
  #include "mem.h"
22
20
 
23
21
  #ifdef _MSC_VER
@@ -51,15 +51,6 @@ extern "C" {
51
51
  #endif
52
52
 
53
53
 
54
- /* DEBUGFILE can be defined externally,
55
- * typically through compiler command line.
56
- * note : currently useless.
57
- * Value must be stderr or stdout */
58
- #ifndef DEBUGFILE
59
- # define DEBUGFILE stderr
60
- #endif
61
-
62
-
63
54
  /* recommended values for DEBUGLEVEL :
64
55
  * 0 : release mode, no debug, all run-time checks disabled
65
56
  * 1 : enables assert() only, no display
@@ -76,7 +67,8 @@ extern "C" {
76
67
  */
77
68
 
78
69
  #if (DEBUGLEVEL>=1)
79
- # include <assert.h>
70
+ # define ZSTD_DEPS_NEED_ASSERT
71
+ # include "zstd_deps.h"
80
72
  #else
81
73
  # ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
82
74
  # define assert(condition) ((void)0) /* disable assert (default) */
@@ -84,7 +76,8 @@ extern "C" {
84
76
  #endif
85
77
 
86
78
  #if (DEBUGLEVEL>=2)
87
- # include <stdio.h>
79
+ # define ZSTD_DEPS_NEED_IO
80
+ # include "zstd_deps.h"
88
81
  extern int g_debuglevel; /* the variable is only declared,
89
82
  it actually lives in debug.c,
90
83
  and is shared by the whole process.
@@ -92,14 +85,14 @@ extern int g_debuglevel; /* the variable is only declared,
92
85
  It's useful when enabling very verbose levels
93
86
  on selective conditions (such as position in src) */
94
87
 
95
- # define RAWLOG(l, ...) { \
96
- if (l<=g_debuglevel) { \
97
- fprintf(stderr, __VA_ARGS__); \
88
+ # define RAWLOG(l, ...) { \
89
+ if (l<=g_debuglevel) { \
90
+ ZSTD_DEBUG_PRINT(__VA_ARGS__); \
98
91
  } }
99
- # define DEBUGLOG(l, ...) { \
100
- if (l<=g_debuglevel) { \
101
- fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
102
- fprintf(stderr, " \n"); \
92
+ # define DEBUGLOG(l, ...) { \
93
+ if (l<=g_debuglevel) { \
94
+ ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
95
+ ZSTD_DEBUG_PRINT(" \n"); \
103
96
  } }
104
97
  #else
105
98
  # define RAWLOG(l, ...) {} /* disabled */
@@ -38,8 +38,31 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
38
38
  /*-**************************************************************
39
39
  * FSE NCount encoding-decoding
40
40
  ****************************************************************/
41
- size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
42
- const void* headerBuffer, size_t hbSize)
41
+ static U32 FSE_ctz(U32 val)
42
+ {
43
+ assert(val != 0);
44
+ {
45
+ # if defined(_MSC_VER) /* Visual */
46
+ unsigned long r=0;
47
+ return _BitScanForward(&r, val) ? (unsigned)r : 0;
48
+ # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
49
+ return __builtin_ctz(val);
50
+ # elif defined(__ICCARM__) /* IAR Intrinsic */
51
+ return __CTZ(val);
52
+ # else /* Software version */
53
+ U32 count = 0;
54
+ while ((val & 1) == 0) {
55
+ val >>= 1;
56
+ ++count;
57
+ }
58
+ return count;
59
+ # endif
60
+ }
61
+ }
62
+
63
+ FORCE_INLINE_TEMPLATE
64
+ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
65
+ const void* headerBuffer, size_t hbSize)
43
66
  {
44
67
  const BYTE* const istart = (const BYTE*) headerBuffer;
45
68
  const BYTE* const iend = istart + hbSize;
@@ -50,23 +73,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
50
73
  U32 bitStream;
51
74
  int bitCount;
52
75
  unsigned charnum = 0;
76
+ unsigned const maxSV1 = *maxSVPtr + 1;
53
77
  int previous0 = 0;
54
78
 
55
- if (hbSize < 4) {
56
- /* This function only works when hbSize >= 4 */
57
- char buffer[4];
58
- memset(buffer, 0, sizeof(buffer));
59
- memcpy(buffer, headerBuffer, hbSize);
79
+ if (hbSize < 8) {
80
+ /* This function only works when hbSize >= 8 */
81
+ char buffer[8] = {0};
82
+ ZSTD_memcpy(buffer, headerBuffer, hbSize);
60
83
  { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
61
84
  buffer, sizeof(buffer));
62
85
  if (FSE_isError(countSize)) return countSize;
63
86
  if (countSize > hbSize) return ERROR(corruption_detected);
64
87
  return countSize;
65
88
  } }
66
- assert(hbSize >= 4);
89
+ assert(hbSize >= 8);
67
90
 
68
91
  /* init */
69
- memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
92
+ ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
70
93
  bitStream = MEM_readLE32(ip);
71
94
  nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
72
95
  if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@@ -77,36 +100,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
77
100
  threshold = 1<<nbBits;
78
101
  nbBits++;
79
102
 
80
- while ((remaining>1) & (charnum<=*maxSVPtr)) {
103
+ for (;;) {
81
104
  if (previous0) {
82
- unsigned n0 = charnum;
83
- while ((bitStream & 0xFFFF) == 0xFFFF) {
84
- n0 += 24;
85
- if (ip < iend-5) {
86
- ip += 2;
87
- bitStream = MEM_readLE32(ip) >> bitCount;
105
+ /* Count the number of repeats. Each time the
106
+ * 2-bit repeat code is 0b11 there is another
107
+ * repeat.
108
+ * Avoid UB by setting the high bit to 1.
109
+ */
110
+ int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
111
+ while (repeats >= 12) {
112
+ charnum += 3 * 12;
113
+ if (LIKELY(ip <= iend-7)) {
114
+ ip += 3;
88
115
  } else {
89
- bitStream >>= 16;
90
- bitCount += 16;
91
- } }
92
- while ((bitStream & 3) == 3) {
93
- n0 += 3;
94
- bitStream >>= 2;
95
- bitCount += 2;
116
+ bitCount -= (int)(8 * (iend - 7 - ip));
117
+ bitCount &= 31;
118
+ ip = iend - 4;
119
+ }
120
+ bitStream = MEM_readLE32(ip) >> bitCount;
121
+ repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
96
122
  }
97
- n0 += bitStream & 3;
123
+ charnum += 3 * repeats;
124
+ bitStream >>= 2 * repeats;
125
+ bitCount += 2 * repeats;
126
+
127
+ /* Add the final repeat which isn't 0b11. */
128
+ assert((bitStream & 3) < 3);
129
+ charnum += bitStream & 3;
98
130
  bitCount += 2;
99
- if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
100
- while (charnum < n0) normalizedCounter[charnum++] = 0;
101
- if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
131
+
132
+ /* This is an error, but break and return an error
133
+ * at the end, because returning out of a loop makes
134
+ * it harder for the compiler to optimize.
135
+ */
136
+ if (charnum >= maxSV1) break;
137
+
138
+ /* We don't need to set the normalized count to 0
139
+ * because we already memset the whole buffer to 0.
140
+ */
141
+
142
+ if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
102
143
  assert((bitCount >> 3) <= 3); /* For first condition to work */
103
144
  ip += bitCount>>3;
104
145
  bitCount &= 7;
105
- bitStream = MEM_readLE32(ip) >> bitCount;
106
146
  } else {
107
- bitStream >>= 2;
108
- } }
109
- { int const max = (2*threshold-1) - remaining;
147
+ bitCount -= (int)(8 * (iend - 4 - ip));
148
+ bitCount &= 31;
149
+ ip = iend - 4;
150
+ }
151
+ bitStream = MEM_readLE32(ip) >> bitCount;
152
+ }
153
+ {
154
+ int const max = (2*threshold-1) - remaining;
110
155
  int count;
111
156
 
112
157
  if ((bitStream & (threshold-1)) < (U32)max) {
@@ -119,24 +164,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
119
164
  }
120
165
 
121
166
  count--; /* extra accuracy */
122
- remaining -= count < 0 ? -count : count; /* -1 means +1 */
167
+ /* When it matters (small blocks), this is a
168
+ * predictable branch, because we don't use -1.
169
+ */
170
+ if (count >= 0) {
171
+ remaining -= count;
172
+ } else {
173
+ assert(count == -1);
174
+ remaining += count;
175
+ }
123
176
  normalizedCounter[charnum++] = (short)count;
124
177
  previous0 = !count;
125
- while (remaining < threshold) {
126
- nbBits--;
127
- threshold >>= 1;
178
+
179
+ assert(threshold > 1);
180
+ if (remaining < threshold) {
181
+ /* This branch can be folded into the
182
+ * threshold update condition because we
183
+ * know that threshold > 1.
184
+ */
185
+ if (remaining <= 1) break;
186
+ nbBits = BIT_highbit32(remaining) + 1;
187
+ threshold = 1 << (nbBits - 1);
128
188
  }
189
+ if (charnum >= maxSV1) break;
129
190
 
130
- if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
191
+ if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
131
192
  ip += bitCount>>3;
132
193
  bitCount &= 7;
133
194
  } else {
134
195
  bitCount -= (int)(8 * (iend - 4 - ip));
196
+ bitCount &= 31;
135
197
  ip = iend - 4;
136
198
  }
137
- bitStream = MEM_readLE32(ip) >> (bitCount & 31);
138
- } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
199
+ bitStream = MEM_readLE32(ip) >> bitCount;
200
+ } }
139
201
  if (remaining != 1) return ERROR(corruption_detected);
202
+ /* Only possible when there are too many zeros. */
203
+ if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
140
204
  if (bitCount > 32) return ERROR(corruption_detected);
141
205
  *maxSVPtr = charnum-1;
142
206
 
@@ -144,6 +208,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
144
208
  return ip-istart;
145
209
  }
146
210
 
211
+ /* Avoids the FORCE_INLINE of the _body() function. */
212
+ static size_t FSE_readNCount_body_default(
213
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
214
+ const void* headerBuffer, size_t hbSize)
215
+ {
216
+ return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
217
+ }
218
+
219
+ #if DYNAMIC_BMI2
220
+ TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
221
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
222
+ const void* headerBuffer, size_t hbSize)
223
+ {
224
+ return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
225
+ }
226
+ #endif
227
+
228
+ size_t FSE_readNCount_bmi2(
229
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
230
+ const void* headerBuffer, size_t hbSize, int bmi2)
231
+ {
232
+ #if DYNAMIC_BMI2
233
+ if (bmi2) {
234
+ return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
235
+ }
236
+ #endif
237
+ (void)bmi2;
238
+ return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
239
+ }
240
+
241
+ size_t FSE_readNCount(
242
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
243
+ const void* headerBuffer, size_t hbSize)
244
+ {
245
+ return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
246
+ }
247
+
147
248
 
148
249
  /*! HUF_readStats() :
149
250
  Read compact Huffman tree, saved by HUF_writeCTable().
@@ -155,6 +256,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
155
256
  size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
156
257
  U32* nbSymbolsPtr, U32* tableLogPtr,
157
258
  const void* src, size_t srcSize)
259
+ {
260
+ U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
261
+ return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
262
+ }
263
+
264
+ FORCE_INLINE_TEMPLATE size_t
265
+ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
266
+ U32* nbSymbolsPtr, U32* tableLogPtr,
267
+ const void* src, size_t srcSize,
268
+ void* workSpace, size_t wkspSize,
269
+ int bmi2)
158
270
  {
159
271
  U32 weightTotal;
160
272
  const BYTE* ip = (const BYTE*) src;
@@ -163,7 +275,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
163
275
 
164
276
  if (!srcSize) return ERROR(srcSize_wrong);
165
277
  iSize = ip[0];
166
- /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
278
+ /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
167
279
 
168
280
  if (iSize >= 128) { /* special header */
169
281
  oSize = iSize - 127;
@@ -177,14 +289,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
177
289
  huffWeight[n+1] = ip[n/2] & 15;
178
290
  } } }
179
291
  else { /* header compressed with FSE (normal case) */
180
- FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
181
292
  if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
182
- oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
293
+ /* max (hwSize-1) values decoded, as last one is implied */
294
+ oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
183
295
  if (FSE_isError(oSize)) return oSize;
184
296
  }
185
297
 
186
298
  /* collect weight stats */
187
- memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
299
+ ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
188
300
  weightTotal = 0;
189
301
  { U32 n; for (n=0; n<oSize; n++) {
190
302
  if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
@@ -214,3 +326,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
214
326
  *nbSymbolsPtr = (U32)(oSize+1);
215
327
  return iSize+1;
216
328
  }
329
+
330
+ /* Avoids the FORCE_INLINE of the _body() function. */
331
+ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
332
+ U32* nbSymbolsPtr, U32* tableLogPtr,
333
+ const void* src, size_t srcSize,
334
+ void* workSpace, size_t wkspSize)
335
+ {
336
+ return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
337
+ }
338
+
339
+ #if DYNAMIC_BMI2
340
+ static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
341
+ U32* nbSymbolsPtr, U32* tableLogPtr,
342
+ const void* src, size_t srcSize,
343
+ void* workSpace, size_t wkspSize)
344
+ {
345
+ return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
346
+ }
347
+ #endif
348
+
349
+ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
350
+ U32* nbSymbolsPtr, U32* tableLogPtr,
351
+ const void* src, size_t srcSize,
352
+ void* workSpace, size_t wkspSize,
353
+ int bmi2)
354
+ {
355
+ #if DYNAMIC_BMI2
356
+ if (bmi2) {
357
+ return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
358
+ }
359
+ #endif
360
+ (void)bmi2;
361
+ return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
362
+ }