zstd-ruby 1.4.0.0 → 1.4.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -27,10 +27,10 @@ Enabling multithreading requires 2 conditions :
27
27
  Both conditions are automatically applied when invoking `make lib-mt` target.
28
28
 
29
29
  When linking a POSIX program with a multithreaded version of `libzstd`,
30
- note that it's necessary to request the `-pthread` flag during link stage.
30
+ note that it's necessary to invoke the `-pthread` flag during link stage.
31
31
 
32
32
  Multithreading capabilities are exposed
33
- via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
33
+ via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
34
34
 
35
35
 
36
36
  #### API
@@ -85,33 +85,72 @@ The file structure is designed to make this selection manually achievable for an
85
85
 
86
86
  - While invoking `make libzstd`, it's possible to define build macros
87
87
  `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
88
- and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
89
- This will also disable compilation of all dependencies
90
- (eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
91
-
92
- - There are some additional build macros that can be used to minify the decoder.
93
-
94
- Zstandard often has more than one implementation of a piece of functionality,
95
- where each implementation optimizes for different scenarios. For example, the
96
- Huffman decoder has complementary implementations that decode the stream one
97
- symbol at a time or two symbols at a time. Zstd normally includes both (and
98
- dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
99
- or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
88
+ and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
89
+ corresponding features. This will also disable compilation of all
90
+ dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
91
+ dictBuilder).
92
+
93
+ - There are a number of options that can help minimize the binary size of
94
+ `libzstd`.
95
+
96
+ The first step is to select the components needed (using the above-described
97
+ `ZSTD_LIB_COMPRESSION` etc.).
98
+
99
+ The next step is to set `ZSTD_LIB_MINIFY` to `1` when invoking `make`. This
100
+ disables various optional components and changes the compilation flags to
101
+ prioritize space-saving.
102
+
103
+ Detailed options: Zstandard's code and build environment is set up by default
104
+ to optimize above all else for performance. In pursuit of this goal, Zstandard
105
+ makes significant trade-offs in code size. For example, Zstandard often has
106
+ more than one implementation of a particular component, with each
107
+ implementation optimized for different scenarios. For example, the Huffman
108
+ decoder has complementary implementations that decode the stream one symbol at
109
+ a time or two symbols at a time. Zstd normally includes both (and dispatches
110
+ between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1` or
111
+ `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
100
112
  compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
101
113
  and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
102
114
  only one or the other of two decompression implementations. The smallest
103
115
  binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
104
- `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
116
+ `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`).
105
117
 
106
118
  For squeezing the last ounce of size out, you can also define
107
119
  `ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
108
120
  which removes the error messages that are otherwise returned by
109
- `ZSTD_getErrorName`.
121
+ `ZSTD_getErrorName` (implied by `ZSTD_LIB_MINIFY`).
122
+
123
+ Finally, when integrating into your application, make sure you're doing link-
124
+ time optimation and unused symbol garbage collection (via some combination of,
125
+ e.g., `-flto`, `-ffat-lto-objects`, `-fuse-linker-plugin`,
126
+ `-ffunction-sections`, `-fdata-sections`, `-fmerge-all-constants`,
127
+ `-Wl,--gc-sections`, `-Wl,-z,norelro`, and an archiver that understands
128
+ the compiler's intermediate representation, e.g., `AR=gcc-ar`). Consult your
129
+ compiler's documentation.
110
130
 
111
131
  - While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
112
132
  will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
113
133
  the shared library, which is now hidden by default.
114
134
 
135
+ - The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
136
+ which can detect at runtime the presence of BMI2 instructions, and use them only if present.
137
+ These instructions contribute to better performance, notably on the decoder side.
138
+ By default, this feature is automatically enabled on detecting
139
+ the right instruction set (x64) and compiler (clang or gcc >= 5).
140
+ It's obviously disabled for different cpus,
141
+ or when BMI2 instruction set is _required_ by the compiler command line
142
+ (in this case, only the BMI2 code path is generated).
143
+ Setting this macro will either force to generate the BMI2 dispatcher (1)
144
+ or prevent it (0). It overrides automatic detection.
145
+
146
+ - The build macro `ZSTD_NO_UNUSED_FUNCTIONS` can be defined to hide the definitions of functions
147
+ that zstd does not use. Not all unused functions are hidden, but they can be if needed.
148
+ Currently, this macro will hide function definitions in FSE and HUF that use an excessive
149
+ amount of stack space.
150
+
151
+ - The build macro `ZSTD_NO_INTRINSICS` can be defined to disable all explicit intrinsics.
152
+ Compiler builtins are still used.
153
+
115
154
 
116
155
  #### Windows : using MinGW+MSYS to create DLL
117
156
 
@@ -129,6 +168,26 @@ file it should be linked with `dll\libzstd.dll`. For example:
129
168
  The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`.
130
169
 
131
170
 
171
+ #### Advanced Build options
172
+
173
+ The build system requires a hash function in order to
174
+ separate object files created with different compilation flags.
175
+ By default, it tries to use `md5sum` or equivalent.
176
+ The hash function can be manually switched by setting the `HASH` variable.
177
+ For example : `make HASH=xxhsum`
178
+ The hash function needs to generate at least 64-bit using hexadecimal format.
179
+ When no hash function is found,
180
+ the Makefile just generates all object files into the same default directory,
181
+ irrespective of compilation flags.
182
+ This functionality only matters if `libzstd` is compiled multiple times
183
+ with different build flags.
184
+
185
+ The build directory, where object files are stored
186
+ can also be manually controlled using variable `BUILD_DIR`,
187
+ for example `make BUILD_DIR=objectDir/v1`.
188
+ In which case, the hash function doesn't matter.
189
+
190
+
132
191
  #### Deprecated API
133
192
 
134
193
  Obsolete API on their way out are stored in directory `lib/deprecated`.
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- bitstream
3
- Part of FSE library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * bitstream
3
+ * Part of FSE library
4
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
  #ifndef BITSTREAM_H_MODULE
35
15
  #define BITSTREAM_H_MODULE
@@ -37,7 +17,6 @@
37
17
  #if defined (__cplusplus)
38
18
  extern "C" {
39
19
  #endif
40
-
41
20
  /*
42
21
  * This API consists of small unitary functions, which must be inlined for best performance.
43
22
  * Since link-time-optimization is not available for all compilers,
@@ -48,6 +27,7 @@ extern "C" {
48
27
  * Dependencies
49
28
  ******************************************/
50
29
  #include "mem.h" /* unaligned access routines */
30
+ #include "compiler.h" /* UNLIKELY() */
51
31
  #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
52
32
  #include "error_private.h" /* error codes and messages */
53
33
 
@@ -55,8 +35,12 @@ extern "C" {
55
35
  /*=========================================
56
36
  * Target specific
57
37
  =========================================*/
58
- #if defined(__BMI__) && defined(__GNUC__)
59
- # include <immintrin.h> /* support for bextr (experimental) */
38
+ #ifndef ZSTD_NO_INTRINSICS
39
+ # if defined(__BMI__) && defined(__GNUC__)
40
+ # include <immintrin.h> /* support for bextr (experimental) */
41
+ # elif defined(__ICCARM__)
42
+ # include <intrinsics.h>
43
+ # endif
60
44
  #endif
61
45
 
62
46
  #define STREAM_ACCUMULATOR_MIN_32 25
@@ -158,11 +142,16 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
158
142
  assert(val != 0);
159
143
  {
160
144
  # if defined(_MSC_VER) /* Visual */
161
- unsigned long r=0;
162
- _BitScanReverse ( &r, val );
163
- return (unsigned) r;
145
+ # if STATIC_BMI2 == 1
146
+ return _lzcnt_u32(val) ^ 31;
147
+ # else
148
+ unsigned long r = 0;
149
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
150
+ # endif
164
151
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
165
- return 31 - __builtin_clz (val);
152
+ return __builtin_clz (val) ^ 31;
153
+ # elif defined(__ICCARM__) /* IAR Intrinsic */
154
+ return 31 - __CLZ(val);
166
155
  # else /* Software version */
167
156
  static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
168
157
  11, 14, 16, 18, 22, 25, 3, 30,
@@ -214,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
214
203
  MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
215
204
  size_t value, unsigned nbBits)
216
205
  {
217
- MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
206
+ DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
218
207
  assert(nbBits < BIT_MASK_SIZE);
219
208
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
220
209
  bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
@@ -240,9 +229,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
240
229
  {
241
230
  size_t const nbBytes = bitC->bitPos >> 3;
242
231
  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
232
+ assert(bitC->ptr <= bitC->endPtr);
243
233
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
244
234
  bitC->ptr += nbBytes;
245
- assert(bitC->ptr <= bitC->endPtr);
246
235
  bitC->bitPos &= 7;
247
236
  bitC->bitContainer >>= nbBytes*8;
248
237
  }
@@ -256,6 +245,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
256
245
  {
257
246
  size_t const nbBytes = bitC->bitPos >> 3;
258
247
  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
248
+ assert(bitC->ptr <= bitC->endPtr);
259
249
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
260
250
  bitC->ptr += nbBytes;
261
251
  if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
@@ -286,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
286
276
  */
287
277
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
288
278
  {
289
- if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
279
+ if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
290
280
 
291
281
  bitD->start = (const char*)srcBuffer;
292
282
  bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@@ -332,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
332
322
  return srcSize;
333
323
  }
334
324
 
335
- MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
325
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
336
326
  {
337
327
  return bitContainer >> start;
338
328
  }
339
329
 
340
- MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
330
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
341
331
  {
342
332
  U32 const regMask = sizeof(bitContainer)*8 - 1;
343
333
  /* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -345,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
345
335
  return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
346
336
  }
347
337
 
348
- MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
338
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
349
339
  {
340
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
341
+ return _bzhi_u64(bitContainer, nbBits);
342
+ #else
350
343
  assert(nbBits < BIT_MASK_SIZE);
351
344
  return bitContainer & BIT_mask[nbBits];
345
+ #endif
352
346
  }
353
347
 
354
348
  /*! BIT_lookBits() :
@@ -357,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
357
351
  * On 32-bits, maxNbBits==24.
358
352
  * On 64-bits, maxNbBits==56.
359
353
  * @return : value extracted */
360
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
354
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
361
355
  {
362
356
  /* arbitrate between double-shift and shift+mask */
363
357
  #if 1
@@ -380,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
380
374
  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
381
375
  }
382
376
 
383
- MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
377
+ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
384
378
  {
385
379
  bitD->bitsConsumed += nbBits;
386
380
  }
@@ -389,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
389
383
  * Read (consume) next n bits from local register and update.
390
384
  * Pay attention to not read more than nbBits contained into local register.
391
385
  * @return : extracted value. */
392
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
386
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
393
387
  {
394
388
  size_t const value = BIT_lookBits(bitD, nbBits);
395
389
  BIT_skipBits(bitD, nbBits);
@@ -406,6 +400,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
406
400
  return value;
407
401
  }
408
402
 
403
+ /*! BIT_reloadDStreamFast() :
404
+ * Similar to BIT_reloadDStream(), but with two differences:
405
+ * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
406
+ * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
407
+ * point you must use BIT_reloadDStream() to reload.
408
+ */
409
+ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
410
+ {
411
+ if (UNLIKELY(bitD->ptr < bitD->limitPtr))
412
+ return BIT_DStream_overflow;
413
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
414
+ bitD->ptr -= bitD->bitsConsumed >> 3;
415
+ bitD->bitsConsumed &= 7;
416
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
417
+ return BIT_DStream_unfinished;
418
+ }
419
+
409
420
  /*! BIT_reloadDStream() :
410
421
  * Refill `bitD` from buffer previously set in BIT_initDStream() .
411
422
  * This function is safe, it guarantees it will not read beyond src buffer.
@@ -417,10 +428,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
417
428
  return BIT_DStream_overflow;
418
429
 
419
430
  if (bitD->ptr >= bitD->limitPtr) {
420
- bitD->ptr -= bitD->bitsConsumed >> 3;
421
- bitD->bitsConsumed &= 7;
422
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
423
- return BIT_DStream_unfinished;
431
+ return BIT_reloadDStreamFast(bitD);
424
432
  }
425
433
  if (bitD->ptr == bitD->start) {
426
434
  if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -17,13 +17,13 @@
17
17
  /* force inlining */
18
18
 
19
19
  #if !defined(ZSTD_NO_INLINE)
20
- #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
20
+ #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
21
21
  # define INLINE_KEYWORD inline
22
22
  #else
23
23
  # define INLINE_KEYWORD
24
24
  #endif
25
25
 
26
- #if defined(__GNUC__)
26
+ #if defined(__GNUC__) || defined(__ICCARM__)
27
27
  # define FORCE_INLINE_ATTR __attribute__((always_inline))
28
28
  #elif defined(_MSC_VER)
29
29
  # define FORCE_INLINE_ATTR __forceinline
@@ -38,6 +38,17 @@
38
38
 
39
39
  #endif
40
40
 
41
+ /**
42
+ On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
43
+ This explictly marks such functions as __cdecl so that the code will still compile
44
+ if a CC other than __cdecl has been made the default.
45
+ */
46
+ #if defined(_MSC_VER)
47
+ # define WIN_CDECL __cdecl
48
+ #else
49
+ # define WIN_CDECL
50
+ #endif
51
+
41
52
  /**
42
53
  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
43
54
  * parameters. They must be inlined for the compiler to eliminate the constant
@@ -61,22 +72,30 @@
61
72
  # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
62
73
  #endif
63
74
 
75
+ /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
76
+ #if defined(__GNUC__)
77
+ # define UNUSED_ATTR __attribute__((unused))
78
+ #else
79
+ # define UNUSED_ATTR
80
+ #endif
81
+
64
82
  /* force no inlining */
65
83
  #ifdef _MSC_VER
66
84
  # define FORCE_NOINLINE static __declspec(noinline)
67
85
  #else
68
- # ifdef __GNUC__
86
+ # if defined(__GNUC__) || defined(__ICCARM__)
69
87
  # define FORCE_NOINLINE static __attribute__((__noinline__))
70
88
  # else
71
89
  # define FORCE_NOINLINE static
72
90
  # endif
73
91
  #endif
74
92
 
93
+
75
94
  /* target attribute */
76
95
  #ifndef __has_attribute
77
96
  #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
78
97
  #endif
79
- #if defined(__GNUC__)
98
+ #if defined(__GNUC__) || defined(__ICCARM__)
80
99
  # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
81
100
  #else
82
101
  # define TARGET_ATTRIBUTE(target)
@@ -110,6 +129,9 @@
110
129
  # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
111
130
  # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
112
131
  # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
132
+ # elif defined(__aarch64__)
133
+ # define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
134
+ # define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
113
135
  # else
114
136
  # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
115
137
  # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
@@ -127,6 +149,31 @@
127
149
  } \
128
150
  }
129
151
 
152
+ /* vectorization
153
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
154
+ #if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
155
+ # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
156
+ # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
157
+ # else
158
+ # define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
159
+ # endif
160
+ #else
161
+ # define DONT_VECTORIZE
162
+ #endif
163
+
164
+ /* Tell the compiler that a branch is likely or unlikely.
165
+ * Only use these macros if it causes the compiler to generate better code.
166
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
167
+ * and clang, please do.
168
+ */
169
+ #if defined(__GNUC__)
170
+ #define LIKELY(x) (__builtin_expect((x), 1))
171
+ #define UNLIKELY(x) (__builtin_expect((x), 0))
172
+ #else
173
+ #define LIKELY(x) (x)
174
+ #define UNLIKELY(x) (x)
175
+ #endif
176
+
130
177
  /* disable warnings */
131
178
  #ifdef _MSC_VER /* Visual Studio */
132
179
  # include <intrin.h> /* For Visual 2005 */
@@ -137,4 +184,106 @@
137
184
  # pragma warning(disable : 4324) /* disable: C4324: padded structure */
138
185
  #endif
139
186
 
187
+ /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
188
+ #ifndef STATIC_BMI2
189
+ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
190
+ # ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
191
+ # define STATIC_BMI2 1
192
+ # endif
193
+ # endif
194
+ #endif
195
+
196
+ #ifndef STATIC_BMI2
197
+ #define STATIC_BMI2 0
198
+ #endif
199
+
200
+ /* compat. with non-clang compilers */
201
+ #ifndef __has_builtin
202
+ # define __has_builtin(x) 0
203
+ #endif
204
+
205
+ /* compat. with non-clang compilers */
206
+ #ifndef __has_feature
207
+ # define __has_feature(x) 0
208
+ #endif
209
+
210
+ /* detects whether we are being compiled under msan */
211
+ #ifndef ZSTD_MEMORY_SANITIZER
212
+ # if __has_feature(memory_sanitizer)
213
+ # define ZSTD_MEMORY_SANITIZER 1
214
+ # else
215
+ # define ZSTD_MEMORY_SANITIZER 0
216
+ # endif
217
+ #endif
218
+
219
+ #if ZSTD_MEMORY_SANITIZER
220
+ /* Not all platforms that support msan provide sanitizers/msan_interface.h.
221
+ * We therefore declare the functions we need ourselves, rather than trying to
222
+ * include the header file... */
223
+ #include <stddef.h> /* size_t */
224
+ #define ZSTD_DEPS_NEED_STDINT
225
+ #include "zstd_deps.h" /* intptr_t */
226
+
227
+ /* Make memory region fully initialized (without changing its contents). */
228
+ void __msan_unpoison(const volatile void *a, size_t size);
229
+
230
+ /* Make memory region fully uninitialized (without changing its contents).
231
+ This is a legacy interface that does not update origin information. Use
232
+ __msan_allocated_memory() instead. */
233
+ void __msan_poison(const volatile void *a, size_t size);
234
+
235
+ /* Returns the offset of the first (at least partially) poisoned byte in the
236
+ memory range, or -1 if the whole range is good. */
237
+ intptr_t __msan_test_shadow(const volatile void *x, size_t size);
238
+ #endif
239
+
240
+ /* detects whether we are being compiled under asan */
241
+ #ifndef ZSTD_ADDRESS_SANITIZER
242
+ # if __has_feature(address_sanitizer)
243
+ # define ZSTD_ADDRESS_SANITIZER 1
244
+ # elif defined(__SANITIZE_ADDRESS__)
245
+ # define ZSTD_ADDRESS_SANITIZER 1
246
+ # else
247
+ # define ZSTD_ADDRESS_SANITIZER 0
248
+ # endif
249
+ #endif
250
+
251
+ #if ZSTD_ADDRESS_SANITIZER
252
+ /* Not all platforms that support asan provide sanitizers/asan_interface.h.
253
+ * We therefore declare the functions we need ourselves, rather than trying to
254
+ * include the header file... */
255
+ #include <stddef.h> /* size_t */
256
+
257
+ /**
258
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
259
+ *
260
+ * This memory must be previously allocated by your program. Instrumented
261
+ * code is forbidden from accessing addresses in this region until it is
262
+ * unpoisoned. This function is not guaranteed to poison the entire region -
263
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
264
+ * alignment restrictions.
265
+ *
266
+ * \note This function is not thread-safe because no two threads can poison or
267
+ * unpoison memory in the same memory region simultaneously.
268
+ *
269
+ * \param addr Start of memory region.
270
+ * \param size Size of memory region. */
271
+ void __asan_poison_memory_region(void const volatile *addr, size_t size);
272
+
273
+ /**
274
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
275
+ *
276
+ * This memory must be previously allocated by your program. Accessing
277
+ * addresses in this region is allowed until this region is poisoned again.
278
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
279
+ * to ASan alignment restrictions.
280
+ *
281
+ * \note This function is not thread-safe because no two threads can
282
+ * poison or unpoison memory in the same memory region simultaneously.
283
+ *
284
+ * \param addr Start of memory region.
285
+ * \param size Size of memory region. */
286
+ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
287
+ #endif
288
+
140
289
  #endif /* ZSTD_COMPILER_H */