zstd-ruby 1.4.0.0 → 1.4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -27,10 +27,10 @@ Enabling multithreading requires 2 conditions :
27
27
  Both conditions are automatically applied when invoking `make lib-mt` target.
28
28
 
29
29
  When linking a POSIX program with a multithreaded version of `libzstd`,
30
- note that it's necessary to request the `-pthread` flag during link stage.
30
+ note that it's necessary to invoke the `-pthread` flag during link stage.
31
31
 
32
32
  Multithreading capabilities are exposed
33
- via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
33
+ via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
34
34
 
35
35
 
36
36
  #### API
@@ -85,33 +85,72 @@ The file structure is designed to make this selection manually achievable for an
85
85
 
86
86
  - While invoking `make libzstd`, it's possible to define build macros
87
87
  `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
88
- and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
89
- This will also disable compilation of all dependencies
90
- (eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
91
-
92
- - There are some additional build macros that can be used to minify the decoder.
93
-
94
- Zstandard often has more than one implementation of a piece of functionality,
95
- where each implementation optimizes for different scenarios. For example, the
96
- Huffman decoder has complementary implementations that decode the stream one
97
- symbol at a time or two symbols at a time. Zstd normally includes both (and
98
- dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
99
- or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
88
+ and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
89
+ corresponding features. This will also disable compilation of all
90
+ dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
91
+ dictBuilder).
92
+
93
+ - There are a number of options that can help minimize the binary size of
94
+ `libzstd`.
95
+
96
+ The first step is to select the components needed (using the above-described
97
+ `ZSTD_LIB_COMPRESSION` etc.).
98
+
99
+ The next step is to set `ZSTD_LIB_MINIFY` to `1` when invoking `make`. This
100
+ disables various optional components and changes the compilation flags to
101
+ prioritize space-saving.
102
+
103
+ Detailed options: Zstandard's code and build environment is set up by default
104
+ to optimize above all else for performance. In pursuit of this goal, Zstandard
105
+ makes significant trade-offs in code size. For example, Zstandard often has
106
+ more than one implementation of a particular component, with each
107
+ implementation optimized for different scenarios. For example, the Huffman
108
+ decoder has complementary implementations that decode the stream one symbol at
109
+ a time or two symbols at a time. Zstd normally includes both (and dispatches
110
+ between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1` or
111
+ `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
100
112
  compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
101
113
  and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
102
114
  only one or the other of two decompression implementations. The smallest
103
115
  binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
104
- `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
116
+ `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`).
105
117
 
106
118
  For squeezing the last ounce of size out, you can also define
107
119
  `ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
108
120
  which removes the error messages that are otherwise returned by
109
- `ZSTD_getErrorName`.
121
+ `ZSTD_getErrorName` (implied by `ZSTD_LIB_MINIFY`).
122
+
123
+ Finally, when integrating into your application, make sure you're doing link-
124
+ time optimation and unused symbol garbage collection (via some combination of,
125
+ e.g., `-flto`, `-ffat-lto-objects`, `-fuse-linker-plugin`,
126
+ `-ffunction-sections`, `-fdata-sections`, `-fmerge-all-constants`,
127
+ `-Wl,--gc-sections`, `-Wl,-z,norelro`, and an archiver that understands
128
+ the compiler's intermediate representation, e.g., `AR=gcc-ar`). Consult your
129
+ compiler's documentation.
110
130
 
111
131
  - While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
112
132
  will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
113
133
  the shared library, which is now hidden by default.
114
134
 
135
+ - The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
136
+ which can detect at runtime the presence of BMI2 instructions, and use them only if present.
137
+ These instructions contribute to better performance, notably on the decoder side.
138
+ By default, this feature is automatically enabled on detecting
139
+ the right instruction set (x64) and compiler (clang or gcc >= 5).
140
+ It's obviously disabled for different cpus,
141
+ or when BMI2 instruction set is _required_ by the compiler command line
142
+ (in this case, only the BMI2 code path is generated).
143
+ Setting this macro will either force to generate the BMI2 dispatcher (1)
144
+ or prevent it (0). It overrides automatic detection.
145
+
146
+ - The build macro `ZSTD_NO_UNUSED_FUNCTIONS` can be defined to hide the definitions of functions
147
+ that zstd does not use. Not all unused functions are hidden, but they can be if needed.
148
+ Currently, this macro will hide function definitions in FSE and HUF that use an excessive
149
+ amount of stack space.
150
+
151
+ - The build macro `ZSTD_NO_INTRINSICS` can be defined to disable all explicit intrinsics.
152
+ Compiler builtins are still used.
153
+
115
154
 
116
155
  #### Windows : using MinGW+MSYS to create DLL
117
156
 
@@ -129,6 +168,26 @@ file it should be linked with `dll\libzstd.dll`. For example:
129
168
  The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`.
130
169
 
131
170
 
171
+ #### Advanced Build options
172
+
173
+ The build system requires a hash function in order to
174
+ separate object files created with different compilation flags.
175
+ By default, it tries to use `md5sum` or equivalent.
176
+ The hash function can be manually switched by setting the `HASH` variable.
177
+ For example : `make HASH=xxhsum`
178
+ The hash function needs to generate at least 64-bit using hexadecimal format.
179
+ When no hash function is found,
180
+ the Makefile just generates all object files into the same default directory,
181
+ irrespective of compilation flags.
182
+ This functionality only matters if `libzstd` is compiled multiple times
183
+ with different build flags.
184
+
185
+ The build directory, where object files are stored
186
+ can also be manually controlled using variable `BUILD_DIR`,
187
+ for example `make BUILD_DIR=objectDir/v1`.
188
+ In which case, the hash function doesn't matter.
189
+
190
+
132
191
  #### Deprecated API
133
192
 
134
193
  Obsolete API on their way out are stored in directory `lib/deprecated`.
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- bitstream
3
- Part of FSE library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * bitstream
3
+ * Part of FSE library
4
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
  #ifndef BITSTREAM_H_MODULE
35
15
  #define BITSTREAM_H_MODULE
@@ -37,7 +17,6 @@
37
17
  #if defined (__cplusplus)
38
18
  extern "C" {
39
19
  #endif
40
-
41
20
  /*
42
21
  * This API consists of small unitary functions, which must be inlined for best performance.
43
22
  * Since link-time-optimization is not available for all compilers,
@@ -48,6 +27,7 @@ extern "C" {
48
27
  * Dependencies
49
28
  ******************************************/
50
29
  #include "mem.h" /* unaligned access routines */
30
+ #include "compiler.h" /* UNLIKELY() */
51
31
  #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
52
32
  #include "error_private.h" /* error codes and messages */
53
33
 
@@ -55,8 +35,12 @@ extern "C" {
55
35
  /*=========================================
56
36
  * Target specific
57
37
  =========================================*/
58
- #if defined(__BMI__) && defined(__GNUC__)
59
- # include <immintrin.h> /* support for bextr (experimental) */
38
+ #ifndef ZSTD_NO_INTRINSICS
39
+ # if defined(__BMI__) && defined(__GNUC__)
40
+ # include <immintrin.h> /* support for bextr (experimental) */
41
+ # elif defined(__ICCARM__)
42
+ # include <intrinsics.h>
43
+ # endif
60
44
  #endif
61
45
 
62
46
  #define STREAM_ACCUMULATOR_MIN_32 25
@@ -158,11 +142,16 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
158
142
  assert(val != 0);
159
143
  {
160
144
  # if defined(_MSC_VER) /* Visual */
161
- unsigned long r=0;
162
- _BitScanReverse ( &r, val );
163
- return (unsigned) r;
145
+ # if STATIC_BMI2 == 1
146
+ return _lzcnt_u32(val) ^ 31;
147
+ # else
148
+ unsigned long r = 0;
149
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
150
+ # endif
164
151
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
165
- return 31 - __builtin_clz (val);
152
+ return __builtin_clz (val) ^ 31;
153
+ # elif defined(__ICCARM__) /* IAR Intrinsic */
154
+ return 31 - __CLZ(val);
166
155
  # else /* Software version */
167
156
  static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
168
157
  11, 14, 16, 18, 22, 25, 3, 30,
@@ -214,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
214
203
  MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
215
204
  size_t value, unsigned nbBits)
216
205
  {
217
- MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
206
+ DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
218
207
  assert(nbBits < BIT_MASK_SIZE);
219
208
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
220
209
  bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
@@ -240,9 +229,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
240
229
  {
241
230
  size_t const nbBytes = bitC->bitPos >> 3;
242
231
  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
232
+ assert(bitC->ptr <= bitC->endPtr);
243
233
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
244
234
  bitC->ptr += nbBytes;
245
- assert(bitC->ptr <= bitC->endPtr);
246
235
  bitC->bitPos &= 7;
247
236
  bitC->bitContainer >>= nbBytes*8;
248
237
  }
@@ -256,6 +245,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
256
245
  {
257
246
  size_t const nbBytes = bitC->bitPos >> 3;
258
247
  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
248
+ assert(bitC->ptr <= bitC->endPtr);
259
249
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
260
250
  bitC->ptr += nbBytes;
261
251
  if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
@@ -286,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
286
276
  */
287
277
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
288
278
  {
289
- if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
279
+ if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
290
280
 
291
281
  bitD->start = (const char*)srcBuffer;
292
282
  bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@@ -332,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
332
322
  return srcSize;
333
323
  }
334
324
 
335
- MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
325
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
336
326
  {
337
327
  return bitContainer >> start;
338
328
  }
339
329
 
340
- MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
330
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
341
331
  {
342
332
  U32 const regMask = sizeof(bitContainer)*8 - 1;
343
333
  /* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -345,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
345
335
  return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
346
336
  }
347
337
 
348
- MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
338
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
349
339
  {
340
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
341
+ return _bzhi_u64(bitContainer, nbBits);
342
+ #else
350
343
  assert(nbBits < BIT_MASK_SIZE);
351
344
  return bitContainer & BIT_mask[nbBits];
345
+ #endif
352
346
  }
353
347
 
354
348
  /*! BIT_lookBits() :
@@ -357,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
357
351
  * On 32-bits, maxNbBits==24.
358
352
  * On 64-bits, maxNbBits==56.
359
353
  * @return : value extracted */
360
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
354
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
361
355
  {
362
356
  /* arbitrate between double-shift and shift+mask */
363
357
  #if 1
@@ -380,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
380
374
  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
381
375
  }
382
376
 
383
- MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
377
+ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
384
378
  {
385
379
  bitD->bitsConsumed += nbBits;
386
380
  }
@@ -389,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
389
383
  * Read (consume) next n bits from local register and update.
390
384
  * Pay attention to not read more than nbBits contained into local register.
391
385
  * @return : extracted value. */
392
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
386
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
393
387
  {
394
388
  size_t const value = BIT_lookBits(bitD, nbBits);
395
389
  BIT_skipBits(bitD, nbBits);
@@ -406,6 +400,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
406
400
  return value;
407
401
  }
408
402
 
403
+ /*! BIT_reloadDStreamFast() :
404
+ * Similar to BIT_reloadDStream(), but with two differences:
405
+ * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
406
+ * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
407
+ * point you must use BIT_reloadDStream() to reload.
408
+ */
409
+ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
410
+ {
411
+ if (UNLIKELY(bitD->ptr < bitD->limitPtr))
412
+ return BIT_DStream_overflow;
413
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
414
+ bitD->ptr -= bitD->bitsConsumed >> 3;
415
+ bitD->bitsConsumed &= 7;
416
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
417
+ return BIT_DStream_unfinished;
418
+ }
419
+
409
420
  /*! BIT_reloadDStream() :
410
421
  * Refill `bitD` from buffer previously set in BIT_initDStream() .
411
422
  * This function is safe, it guarantees it will not read beyond src buffer.
@@ -417,10 +428,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
417
428
  return BIT_DStream_overflow;
418
429
 
419
430
  if (bitD->ptr >= bitD->limitPtr) {
420
- bitD->ptr -= bitD->bitsConsumed >> 3;
421
- bitD->bitsConsumed &= 7;
422
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
423
- return BIT_DStream_unfinished;
431
+ return BIT_reloadDStreamFast(bitD);
424
432
  }
425
433
  if (bitD->ptr == bitD->start) {
426
434
  if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -17,13 +17,13 @@
17
17
  /* force inlining */
18
18
 
19
19
  #if !defined(ZSTD_NO_INLINE)
20
- #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
20
+ #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
21
21
  # define INLINE_KEYWORD inline
22
22
  #else
23
23
  # define INLINE_KEYWORD
24
24
  #endif
25
25
 
26
- #if defined(__GNUC__)
26
+ #if defined(__GNUC__) || defined(__ICCARM__)
27
27
  # define FORCE_INLINE_ATTR __attribute__((always_inline))
28
28
  #elif defined(_MSC_VER)
29
29
  # define FORCE_INLINE_ATTR __forceinline
@@ -38,6 +38,17 @@
38
38
 
39
39
  #endif
40
40
 
41
+ /**
42
+ On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
43
+ This explictly marks such functions as __cdecl so that the code will still compile
44
+ if a CC other than __cdecl has been made the default.
45
+ */
46
+ #if defined(_MSC_VER)
47
+ # define WIN_CDECL __cdecl
48
+ #else
49
+ # define WIN_CDECL
50
+ #endif
51
+
41
52
  /**
42
53
  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
43
54
  * parameters. They must be inlined for the compiler to eliminate the constant
@@ -61,22 +72,30 @@
61
72
  # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
62
73
  #endif
63
74
 
75
+ /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
76
+ #if defined(__GNUC__)
77
+ # define UNUSED_ATTR __attribute__((unused))
78
+ #else
79
+ # define UNUSED_ATTR
80
+ #endif
81
+
64
82
  /* force no inlining */
65
83
  #ifdef _MSC_VER
66
84
  # define FORCE_NOINLINE static __declspec(noinline)
67
85
  #else
68
- # ifdef __GNUC__
86
+ # if defined(__GNUC__) || defined(__ICCARM__)
69
87
  # define FORCE_NOINLINE static __attribute__((__noinline__))
70
88
  # else
71
89
  # define FORCE_NOINLINE static
72
90
  # endif
73
91
  #endif
74
92
 
93
+
75
94
  /* target attribute */
76
95
  #ifndef __has_attribute
77
96
  #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
78
97
  #endif
79
- #if defined(__GNUC__)
98
+ #if defined(__GNUC__) || defined(__ICCARM__)
80
99
  # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
81
100
  #else
82
101
  # define TARGET_ATTRIBUTE(target)
@@ -110,6 +129,9 @@
110
129
  # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
111
130
  # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
112
131
  # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
132
+ # elif defined(__aarch64__)
133
+ # define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
134
+ # define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
113
135
  # else
114
136
  # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
115
137
  # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
@@ -127,6 +149,31 @@
127
149
  } \
128
150
  }
129
151
 
152
+ /* vectorization
153
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
154
+ #if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
155
+ # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
156
+ # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
157
+ # else
158
+ # define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
159
+ # endif
160
+ #else
161
+ # define DONT_VECTORIZE
162
+ #endif
163
+
164
+ /* Tell the compiler that a branch is likely or unlikely.
165
+ * Only use these macros if it causes the compiler to generate better code.
166
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
167
+ * and clang, please do.
168
+ */
169
+ #if defined(__GNUC__)
170
+ #define LIKELY(x) (__builtin_expect((x), 1))
171
+ #define UNLIKELY(x) (__builtin_expect((x), 0))
172
+ #else
173
+ #define LIKELY(x) (x)
174
+ #define UNLIKELY(x) (x)
175
+ #endif
176
+
130
177
  /* disable warnings */
131
178
  #ifdef _MSC_VER /* Visual Studio */
132
179
  # include <intrin.h> /* For Visual 2005 */
@@ -137,4 +184,106 @@
137
184
  # pragma warning(disable : 4324) /* disable: C4324: padded structure */
138
185
  #endif
139
186
 
187
+ /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
188
+ #ifndef STATIC_BMI2
189
+ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
190
+ # ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
191
+ # define STATIC_BMI2 1
192
+ # endif
193
+ # endif
194
+ #endif
195
+
196
+ #ifndef STATIC_BMI2
197
+ #define STATIC_BMI2 0
198
+ #endif
199
+
200
+ /* compat. with non-clang compilers */
201
+ #ifndef __has_builtin
202
+ # define __has_builtin(x) 0
203
+ #endif
204
+
205
+ /* compat. with non-clang compilers */
206
+ #ifndef __has_feature
207
+ # define __has_feature(x) 0
208
+ #endif
209
+
210
+ /* detects whether we are being compiled under msan */
211
+ #ifndef ZSTD_MEMORY_SANITIZER
212
+ # if __has_feature(memory_sanitizer)
213
+ # define ZSTD_MEMORY_SANITIZER 1
214
+ # else
215
+ # define ZSTD_MEMORY_SANITIZER 0
216
+ # endif
217
+ #endif
218
+
219
+ #if ZSTD_MEMORY_SANITIZER
220
+ /* Not all platforms that support msan provide sanitizers/msan_interface.h.
221
+ * We therefore declare the functions we need ourselves, rather than trying to
222
+ * include the header file... */
223
+ #include <stddef.h> /* size_t */
224
+ #define ZSTD_DEPS_NEED_STDINT
225
+ #include "zstd_deps.h" /* intptr_t */
226
+
227
+ /* Make memory region fully initialized (without changing its contents). */
228
+ void __msan_unpoison(const volatile void *a, size_t size);
229
+
230
+ /* Make memory region fully uninitialized (without changing its contents).
231
+ This is a legacy interface that does not update origin information. Use
232
+ __msan_allocated_memory() instead. */
233
+ void __msan_poison(const volatile void *a, size_t size);
234
+
235
+ /* Returns the offset of the first (at least partially) poisoned byte in the
236
+ memory range, or -1 if the whole range is good. */
237
+ intptr_t __msan_test_shadow(const volatile void *x, size_t size);
238
+ #endif
239
+
240
+ /* detects whether we are being compiled under asan */
241
+ #ifndef ZSTD_ADDRESS_SANITIZER
242
+ # if __has_feature(address_sanitizer)
243
+ # define ZSTD_ADDRESS_SANITIZER 1
244
+ # elif defined(__SANITIZE_ADDRESS__)
245
+ # define ZSTD_ADDRESS_SANITIZER 1
246
+ # else
247
+ # define ZSTD_ADDRESS_SANITIZER 0
248
+ # endif
249
+ #endif
250
+
251
+ #if ZSTD_ADDRESS_SANITIZER
252
+ /* Not all platforms that support asan provide sanitizers/asan_interface.h.
253
+ * We therefore declare the functions we need ourselves, rather than trying to
254
+ * include the header file... */
255
+ #include <stddef.h> /* size_t */
256
+
257
+ /**
258
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
259
+ *
260
+ * This memory must be previously allocated by your program. Instrumented
261
+ * code is forbidden from accessing addresses in this region until it is
262
+ * unpoisoned. This function is not guaranteed to poison the entire region -
263
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
264
+ * alignment restrictions.
265
+ *
266
+ * \note This function is not thread-safe because no two threads can poison or
267
+ * unpoison memory in the same memory region simultaneously.
268
+ *
269
+ * \param addr Start of memory region.
270
+ * \param size Size of memory region. */
271
+ void __asan_poison_memory_region(void const volatile *addr, size_t size);
272
+
273
+ /**
274
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
275
+ *
276
+ * This memory must be previously allocated by your program. Accessing
277
+ * addresses in this region is allowed until this region is poisoned again.
278
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
279
+ * to ASan alignment restrictions.
280
+ *
281
+ * \note This function is not thread-safe because no two threads can
282
+ * poison or unpoison memory in the same memory region simultaneously.
283
+ *
284
+ * \param addr Start of memory region.
285
+ * \param size Size of memory region. */
286
+ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
287
+ #endif
288
+
140
289
  #endif /* ZSTD_COMPILER_H */