zstd-ruby 1.4.4.0 → 1.5.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/extconf.rb +1 -0
  6. data/ext/zstdruby/libzstd/BUCK +5 -7
  7. data/ext/zstdruby/libzstd/Makefile +241 -173
  8. data/ext/zstdruby/libzstd/README.md +76 -18
  9. data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
  10. data/ext/zstdruby/libzstd/common/compiler.h +196 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
  15. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +51 -42
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
  19. data/ext/zstdruby/libzstd/common/huf.h +60 -54
  20. data/ext/zstdruby/libzstd/common/mem.h +87 -98
  21. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  22. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +10 -8
  25. data/ext/zstdruby/libzstd/common/threading.h +4 -3
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
  67. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
  68. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  69. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
  70. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  71. data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
  72. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  73. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  74. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
  75. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
  76. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  77. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  78. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  79. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
  80. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
  82. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
  84. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
  86. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
  88. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  89. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
  90. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  91. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
  92. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  93. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  94. data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
  95. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  96. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  97. data/ext/zstdruby/libzstd/zstd.h +760 -234
  98. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  99. data/ext/zstdruby/zstdruby.c +2 -2
  100. data/lib/zstd-ruby/version.rb +1 -1
  101. metadata +20 -9
  102. data/.travis.yml +0 -14
@@ -19,12 +19,16 @@ The scope can be reduced on demand (see paragraph _modular build_).
19
19
 
20
20
  #### Multithreading support
21
21
 
22
- Multithreading is disabled by default when building with `make`.
22
+ When building with `make`, by default the dynamic library is multithreaded and static library is single-threaded (for compatibility reasons).
23
+
23
24
  Enabling multithreading requires 2 conditions :
24
25
  - set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
25
26
  - for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
26
27
 
27
- Both conditions are automatically applied when invoking `make lib-mt` target.
28
+ For convenience, we provide a build target to generate multi and single threaded libraries:
29
+ - Force enable multithreading on both dynamic and static libraries by appending `-mt` to the target, e.g. `make lib-mt`.
30
+ - Force disable multithreading on both dynamic and static libraries by appending `-nomt` to the target, e.g. `make lib-nomt`.
31
+ - By default, as mentioned before, dynamic library is multithreaded, and static library is single-threaded, e.g. `make lib`.
28
32
 
29
33
  When linking a POSIX program with a multithreaded version of `libzstd`,
30
34
  note that it's necessary to invoke the `-pthread` flag during link stage.
@@ -42,8 +46,8 @@ Zstandard's stable API is exposed within [lib/zstd.h](zstd.h).
42
46
 
43
47
  Optional advanced features are exposed via :
44
48
 
45
- - `lib/common/zstd_errors.h` : translates `size_t` function results
46
- into a `ZSTD_ErrorCode`, for accurate error handling.
49
+ - `lib/zstd_errors.h` : translates `size_t` function results
50
+ into a `ZSTD_ErrorCode`, for accurate error handling.
47
51
 
48
52
  - `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
49
53
  it unlocks access to the experimental API,
@@ -85,28 +89,48 @@ The file structure is designed to make this selection manually achievable for an
85
89
 
86
90
  - While invoking `make libzstd`, it's possible to define build macros
87
91
  `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
88
- and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
89
- This will also disable compilation of all dependencies
90
- (eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
91
-
92
- - There are some additional build macros that can be used to minify the decoder.
93
-
94
- Zstandard often has more than one implementation of a piece of functionality,
95
- where each implementation optimizes for different scenarios. For example, the
96
- Huffman decoder has complementary implementations that decode the stream one
97
- symbol at a time or two symbols at a time. Zstd normally includes both (and
98
- dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
99
- or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
92
+ and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
93
+ corresponding features. This will also disable compilation of all
94
+ dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
95
+ dictBuilder).
96
+
97
+ - There are a number of options that can help minimize the binary size of
98
+ `libzstd`.
99
+
100
+ The first step is to select the components needed (using the above-described
101
+ `ZSTD_LIB_COMPRESSION` etc.).
102
+
103
+ The next step is to set `ZSTD_LIB_MINIFY` to `1` when invoking `make`. This
104
+ disables various optional components and changes the compilation flags to
105
+ prioritize space-saving.
106
+
107
+ Detailed options: Zstandard's code and build environment is set up by default
108
+ to optimize above all else for performance. In pursuit of this goal, Zstandard
109
+ makes significant trade-offs in code size. For example, Zstandard often has
110
+ more than one implementation of a particular component, with each
111
+ implementation optimized for different scenarios. For example, the Huffman
112
+ decoder has complementary implementations that decode the stream one symbol at
113
+ a time or two symbols at a time. Zstd normally includes both (and dispatches
114
+ between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1` or
115
+ `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
100
116
  compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
101
117
  and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
102
118
  only one or the other of two decompression implementations. The smallest
103
119
  binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
104
- `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
120
+ `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`).
105
121
 
106
122
  For squeezing the last ounce of size out, you can also define
107
123
  `ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
108
124
  which removes the error messages that are otherwise returned by
109
- `ZSTD_getErrorName`.
125
+ `ZSTD_getErrorName` (implied by `ZSTD_LIB_MINIFY`).
126
+
127
+ Finally, when integrating into your application, make sure you're doing link-
128
+ time optimization and unused symbol garbage collection (via some combination of,
129
+ e.g., `-flto`, `-ffat-lto-objects`, `-fuse-linker-plugin`,
130
+ `-ffunction-sections`, `-fdata-sections`, `-fmerge-all-constants`,
131
+ `-Wl,--gc-sections`, `-Wl,-z,norelro`, and an archiver that understands
132
+ the compiler's intermediate representation, e.g., `AR=gcc-ar`). Consult your
133
+ compiler's documentation.
110
134
 
111
135
  - While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
112
136
  will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
@@ -123,6 +147,20 @@ The file structure is designed to make this selection manually achievable for an
123
147
  Setting this macro will either force to generate the BMI2 dispatcher (1)
124
148
  or prevent it (0). It overrides automatic detection.
125
149
 
150
+ - The build macro `ZSTD_NO_UNUSED_FUNCTIONS` can be defined to hide the definitions of functions
151
+ that zstd does not use. Not all unused functions are hidden, but they can be if needed.
152
+ Currently, this macro will hide function definitions in FSE and HUF that use an excessive
153
+ amount of stack space.
154
+
155
+ - The build macro `ZSTD_NO_INTRINSICS` can be defined to disable all explicit intrinsics.
156
+ Compiler builtins are still used.
157
+
158
+ - The build macro `ZSTD_DECODER_INTERNAL_BUFFER` can be set to control
159
+ the amount of extra memory used during decompression to store literals.
160
+ This defaults to 64kB. Reducing this value reduces the memory footprint of
161
+ `ZSTD_DCtx` decompression contexts,
162
+ but might also result in a small decompression speed cost.
163
+
126
164
 
127
165
  #### Windows : using MinGW+MSYS to create DLL
128
166
 
@@ -140,6 +178,26 @@ file it should be linked with `dll\libzstd.dll`. For example:
140
178
  The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`.
141
179
 
142
180
 
181
+ #### Advanced Build options
182
+
183
+ The build system requires a hash function in order to
184
+ separate object files created with different compilation flags.
185
+ By default, it tries to use `md5sum` or equivalent.
186
+ The hash function can be manually switched by setting the `HASH` variable.
187
+ For example : `make HASH=xxhsum`
188
+ The hash function needs to generate at least 64-bit using hexadecimal format.
189
+ When no hash function is found,
190
+ the Makefile just generates all object files into the same default directory,
191
+ irrespective of compilation flags.
192
+ This functionality only matters if `libzstd` is compiled multiple times
193
+ with different build flags.
194
+
195
+ The build directory, where object files are stored
196
+ can also be manually controlled using variable `BUILD_DIR`,
197
+ for example `make BUILD_DIR=objectDir/v1`.
198
+ In which case, the hash function doesn't matter.
199
+
200
+
143
201
  #### Deprecated API
144
202
 
145
203
  Obsolete API on their way out are stored in directory `lib/deprecated`.
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- bitstream
3
- Part of FSE library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * bitstream
3
+ * Part of FSE library
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
  #ifndef BITSTREAM_H_MODULE
35
15
  #define BITSTREAM_H_MODULE
@@ -37,7 +17,6 @@
37
17
  #if defined (__cplusplus)
38
18
  extern "C" {
39
19
  #endif
40
-
41
20
  /*
42
21
  * This API consists of small unitary functions, which must be inlined for best performance.
43
22
  * Since link-time-optimization is not available for all compilers,
@@ -48,6 +27,7 @@ extern "C" {
48
27
  * Dependencies
49
28
  ******************************************/
50
29
  #include "mem.h" /* unaligned access routines */
30
+ #include "compiler.h" /* UNLIKELY() */
51
31
  #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
52
32
  #include "error_private.h" /* error codes and messages */
53
33
 
@@ -55,10 +35,12 @@ extern "C" {
55
35
  /*=========================================
56
36
  * Target specific
57
37
  =========================================*/
58
- #if defined(__BMI__) && defined(__GNUC__)
59
- # include <immintrin.h> /* support for bextr (experimental) */
60
- #elif defined(__ICCARM__)
61
- # include <intrinsics.h>
38
+ #ifndef ZSTD_NO_INTRINSICS
39
+ # if defined(__BMI__) && defined(__GNUC__)
40
+ # include <immintrin.h> /* support for bextr (experimental) */
41
+ # elif defined(__ICCARM__)
42
+ # include <intrinsics.h>
43
+ # endif
62
44
  #endif
63
45
 
64
46
  #define STREAM_ACCUMULATOR_MIN_32 25
@@ -160,9 +142,18 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
160
142
  assert(val != 0);
161
143
  {
162
144
  # if defined(_MSC_VER) /* Visual */
163
- unsigned long r=0;
164
- _BitScanReverse ( &r, val );
165
- return (unsigned) r;
145
+ # if STATIC_BMI2 == 1
146
+ return _lzcnt_u32(val) ^ 31;
147
+ # else
148
+ if (val != 0) {
149
+ unsigned long r;
150
+ _BitScanReverse(&r, val);
151
+ return (unsigned)r;
152
+ } else {
153
+ /* Should not reach this code path */
154
+ __assume(0);
155
+ }
156
+ # endif
166
157
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
167
158
  return __builtin_clz (val) ^ 31;
168
159
  # elif defined(__ICCARM__) /* IAR Intrinsic */
@@ -218,7 +209,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
218
209
  MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
219
210
  size_t value, unsigned nbBits)
220
211
  {
221
- MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
212
+ DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
222
213
  assert(nbBits < BIT_MASK_SIZE);
223
214
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
224
215
  bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
@@ -291,7 +282,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
291
282
  */
292
283
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
293
284
  {
294
- if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
285
+ if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
295
286
 
296
287
  bitD->start = (const char*)srcBuffer;
297
288
  bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@@ -308,22 +299,22 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
308
299
  switch(srcSize)
309
300
  {
310
301
  case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
311
- /* fall-through */
302
+ ZSTD_FALLTHROUGH;
312
303
 
313
304
  case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
314
- /* fall-through */
305
+ ZSTD_FALLTHROUGH;
315
306
 
316
307
  case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
317
- /* fall-through */
308
+ ZSTD_FALLTHROUGH;
318
309
 
319
310
  case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
320
- /* fall-through */
311
+ ZSTD_FALLTHROUGH;
321
312
 
322
313
  case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
323
- /* fall-through */
314
+ ZSTD_FALLTHROUGH;
324
315
 
325
316
  case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
326
- /* fall-through */
317
+ ZSTD_FALLTHROUGH;
327
318
 
328
319
  default: break;
329
320
  }
@@ -337,23 +328,36 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
337
328
  return srcSize;
338
329
  }
339
330
 
340
- MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
331
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
341
332
  {
342
333
  return bitContainer >> start;
343
334
  }
344
335
 
345
- MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
336
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
346
337
  {
347
338
  U32 const regMask = sizeof(bitContainer)*8 - 1;
348
339
  /* if start > regMask, bitstream is corrupted, and result is undefined */
349
340
  assert(nbBits < BIT_MASK_SIZE);
341
+ /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
342
+ * than accessing memory. When bmi2 instruction is not present, we consider
343
+ * such cpus old (pre-Haswell, 2013) and their performance is not of that
344
+ * importance.
345
+ */
346
+ #if defined(__x86_64__) || defined(_M_X86)
347
+ return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
348
+ #else
350
349
  return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
350
+ #endif
351
351
  }
352
352
 
353
- MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
353
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
354
354
  {
355
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
356
+ return _bzhi_u64(bitContainer, nbBits);
357
+ #else
355
358
  assert(nbBits < BIT_MASK_SIZE);
356
359
  return bitContainer & BIT_mask[nbBits];
360
+ #endif
357
361
  }
358
362
 
359
363
  /*! BIT_lookBits() :
@@ -362,7 +366,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
362
366
  * On 32-bits, maxNbBits==24.
363
367
  * On 64-bits, maxNbBits==56.
364
368
  * @return : value extracted */
365
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
369
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
366
370
  {
367
371
  /* arbitrate between double-shift and shift+mask */
368
372
  #if 1
@@ -385,7 +389,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
385
389
  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
386
390
  }
387
391
 
388
- MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
392
+ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
389
393
  {
390
394
  bitD->bitsConsumed += nbBits;
391
395
  }
@@ -394,7 +398,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
394
398
  * Read (consume) next n bits from local register and update.
395
399
  * Pay attention to not read more than nbBits contained into local register.
396
400
  * @return : extracted value. */
397
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
401
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
398
402
  {
399
403
  size_t const value = BIT_lookBits(bitD, nbBits);
400
404
  BIT_skipBits(bitD, nbBits);
@@ -411,6 +415,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
411
415
  return value;
412
416
  }
413
417
 
418
+ /*! BIT_reloadDStreamFast() :
419
+ * Similar to BIT_reloadDStream(), but with two differences:
420
+ * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
421
+ * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
422
+ * point you must use BIT_reloadDStream() to reload.
423
+ */
424
+ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
425
+ {
426
+ if (UNLIKELY(bitD->ptr < bitD->limitPtr))
427
+ return BIT_DStream_overflow;
428
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
429
+ bitD->ptr -= bitD->bitsConsumed >> 3;
430
+ bitD->bitsConsumed &= 7;
431
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
432
+ return BIT_DStream_unfinished;
433
+ }
434
+
414
435
  /*! BIT_reloadDStream() :
415
436
  * Refill `bitD` from buffer previously set in BIT_initDStream() .
416
437
  * This function is safe, it guarantees it will not read beyond src buffer.
@@ -422,10 +443,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
422
443
  return BIT_DStream_overflow;
423
444
 
424
445
  if (bitD->ptr >= bitD->limitPtr) {
425
- bitD->ptr -= bitD->bitsConsumed >> 3;
426
- bitD->bitsConsumed &= 7;
427
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
428
- return BIT_DStream_unfinished;
446
+ return BIT_reloadDStreamFast(bitD);
429
447
  }
430
448
  if (bitD->ptr == bitD->start) {
431
449
  if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,13 +11,15 @@
11
11
  #ifndef ZSTD_COMPILER_H
12
12
  #define ZSTD_COMPILER_H
13
13
 
14
+ #include "portability_macros.h"
15
+
14
16
  /*-*******************************************************
15
17
  * Compiler specifics
16
18
  *********************************************************/
17
19
  /* force inlining */
18
20
 
19
21
  #if !defined(ZSTD_NO_INLINE)
20
- #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
22
+ #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
21
23
  # define INLINE_KEYWORD inline
22
24
  #else
23
25
  # define INLINE_KEYWORD
@@ -38,6 +40,17 @@
38
40
 
39
41
  #endif
40
42
 
43
+ /**
44
+ On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
45
+ This explicitly marks such functions as __cdecl so that the code will still compile
46
+ if a CC other than __cdecl has been made the default.
47
+ */
48
+ #if defined(_MSC_VER)
49
+ # define WIN_CDECL __cdecl
50
+ #else
51
+ # define WIN_CDECL
52
+ #endif
53
+
41
54
  /**
42
55
  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
43
56
  * parameters. They must be inlined for the compiler to eliminate the constant
@@ -79,30 +92,19 @@
79
92
  # endif
80
93
  #endif
81
94
 
95
+
82
96
  /* target attribute */
83
- #ifndef __has_attribute
84
- #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
85
- #endif
86
97
  #if defined(__GNUC__) || defined(__ICCARM__)
87
98
  # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
88
99
  #else
89
100
  # define TARGET_ATTRIBUTE(target)
90
101
  #endif
91
102
 
92
- /* Enable runtime BMI2 dispatch based on the CPU.
93
- * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
103
+ /* Target attribute for BMI2 dynamic dispatch.
104
+ * Enable lzcnt, bmi, and bmi2.
105
+ * We test for bmi1 & bmi2. lzcnt is included in bmi1.
94
106
  */
95
- #ifndef DYNAMIC_BMI2
96
- #if ((defined(__clang__) && __has_attribute(__target__)) \
97
- || (defined(__GNUC__) \
98
- && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
99
- && (defined(__x86_64__) || defined(_M_X86)) \
100
- && !defined(__BMI2__)
101
- # define DYNAMIC_BMI2 1
102
- #else
103
- # define DYNAMIC_BMI2 0
104
- #endif
105
- #endif
107
+ #define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
106
108
 
107
109
  /* prefetch
108
110
  * can be disabled, by declaring NO_PREFETCH build macro */
@@ -117,6 +119,9 @@
117
119
  # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
118
120
  # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
119
121
  # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
122
+ # elif defined(__aarch64__)
123
+ # define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
124
+ # define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
120
125
  # else
121
126
  # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
122
127
  # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
@@ -135,8 +140,9 @@
135
140
  }
136
141
 
137
142
  /* vectorization
138
- * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
139
- #if !defined(__clang__) && defined(__GNUC__)
143
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
144
+ * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
145
+ #if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
140
146
  # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
141
147
  # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
142
148
  # else
@@ -146,6 +152,19 @@
146
152
  # define DONT_VECTORIZE
147
153
  #endif
148
154
 
155
+ /* Tell the compiler that a branch is likely or unlikely.
156
+ * Only use these macros if it causes the compiler to generate better code.
157
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
158
+ * and clang, please do.
159
+ */
160
+ #if defined(__GNUC__)
161
+ #define LIKELY(x) (__builtin_expect((x), 1))
162
+ #define UNLIKELY(x) (__builtin_expect((x), 0))
163
+ #else
164
+ #define LIKELY(x) (x)
165
+ #define UNLIKELY(x) (x)
166
+ #endif
167
+
149
168
  /* disable warnings */
150
169
  #ifdef _MSC_VER /* Visual Studio */
151
170
  # include <intrin.h> /* For Visual 2005 */
@@ -156,4 +175,161 @@
156
175
  # pragma warning(disable : 4324) /* disable: C4324: padded structure */
157
176
  #endif
158
177
 
178
+ /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
179
+ #ifndef STATIC_BMI2
180
+ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
181
+ # ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
182
+ # define STATIC_BMI2 1
183
+ # endif
184
+ # endif
185
+ #endif
186
+
187
+ #ifndef STATIC_BMI2
188
+ #define STATIC_BMI2 0
189
+ #endif
190
+
191
+ /* compile time determination of SIMD support */
192
+ #if !defined(ZSTD_NO_INTRINSICS)
193
+ # if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
194
+ # define ZSTD_ARCH_X86_SSE2
195
+ # endif
196
+ # if defined(__ARM_NEON) || defined(_M_ARM64)
197
+ # define ZSTD_ARCH_ARM_NEON
198
+ # endif
199
+ #
200
+ # if defined(ZSTD_ARCH_X86_SSE2)
201
+ # include <emmintrin.h>
202
+ # elif defined(ZSTD_ARCH_ARM_NEON)
203
+ # include <arm_neon.h>
204
+ # endif
205
+ #endif
206
+
207
+ /* C-language Attributes are added in C23. */
208
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
209
+ # define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
210
+ #else
211
+ # define ZSTD_HAS_C_ATTRIBUTE(x) 0
212
+ #endif
213
+
214
+ /* Only use C++ attributes in C++. Some compilers report support for C++
215
+ * attributes when compiling with C.
216
+ */
217
+ #if defined(__cplusplus) && defined(__has_cpp_attribute)
218
+ # define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
219
+ #else
220
+ # define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
221
+ #endif
222
+
223
+ /* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
224
+ * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
225
+ * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
226
+ * - Else: __attribute__((__fallthrough__))
227
+ */
228
+ #ifndef ZSTD_FALLTHROUGH
229
+ # if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
230
+ # define ZSTD_FALLTHROUGH [[fallthrough]]
231
+ # elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
232
+ # define ZSTD_FALLTHROUGH [[fallthrough]]
233
+ # elif __has_attribute(__fallthrough__)
234
+ /* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon
235
+ * gcc complains about: a label can only be part of a statement and a declaration is not a statement.
236
+ */
237
+ # define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__))
238
+ # else
239
+ # define ZSTD_FALLTHROUGH
240
+ # endif
241
+ #endif
242
+
243
+ /*-**************************************************************
244
+ * Alignment check
245
+ *****************************************************************/
246
+
247
+ /* this test was initially positioned in mem.h,
248
+ * but this file is removed (or replaced) for linux kernel
249
+ * so it's now hosted in compiler.h,
250
+ * which remains valid for both user & kernel spaces.
251
+ */
252
+
253
+ #ifndef ZSTD_ALIGNOF
254
+ # if defined(__GNUC__) || defined(_MSC_VER)
255
+ /* covers gcc, clang & MSVC */
256
+ /* note : this section must come first, before C11,
257
+ * due to a limitation in the kernel source generator */
258
+ # define ZSTD_ALIGNOF(T) __alignof(T)
259
+
260
+ # elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
261
+ /* C11 support */
262
+ # include <stdalign.h>
263
+ # define ZSTD_ALIGNOF(T) alignof(T)
264
+
265
+ # else
266
+ /* No known support for alignof() - imperfect backup */
267
+ # define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T))
268
+
269
+ # endif
270
+ #endif /* ZSTD_ALIGNOF */
271
+
272
+ /*-**************************************************************
273
+ * Sanitizer
274
+ *****************************************************************/
275
+
276
+ #if ZSTD_MEMORY_SANITIZER
277
+ /* Not all platforms that support msan provide sanitizers/msan_interface.h.
278
+ * We therefore declare the functions we need ourselves, rather than trying to
279
+ * include the header file... */
280
+ #include <stddef.h> /* size_t */
281
+ #define ZSTD_DEPS_NEED_STDINT
282
+ #include "zstd_deps.h" /* intptr_t */
283
+
284
+ /* Make memory region fully initialized (without changing its contents). */
285
+ void __msan_unpoison(const volatile void *a, size_t size);
286
+
287
+ /* Make memory region fully uninitialized (without changing its contents).
288
+ This is a legacy interface that does not update origin information. Use
289
+ __msan_allocated_memory() instead. */
290
+ void __msan_poison(const volatile void *a, size_t size);
291
+
292
+ /* Returns the offset of the first (at least partially) poisoned byte in the
293
+ memory range, or -1 if the whole range is good. */
294
+ intptr_t __msan_test_shadow(const volatile void *x, size_t size);
295
+ #endif
296
+
297
+ #if ZSTD_ADDRESS_SANITIZER
298
+ /* Not all platforms that support asan provide sanitizers/asan_interface.h.
299
+ * We therefore declare the functions we need ourselves, rather than trying to
300
+ * include the header file... */
301
+ #include <stddef.h> /* size_t */
302
+
303
+ /**
304
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
305
+ *
306
+ * This memory must be previously allocated by your program. Instrumented
307
+ * code is forbidden from accessing addresses in this region until it is
308
+ * unpoisoned. This function is not guaranteed to poison the entire region -
309
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
310
+ * alignment restrictions.
311
+ *
312
+ * \note This function is not thread-safe because no two threads can poison or
313
+ * unpoison memory in the same memory region simultaneously.
314
+ *
315
+ * \param addr Start of memory region.
316
+ * \param size Size of memory region. */
317
+ void __asan_poison_memory_region(void const volatile *addr, size_t size);
318
+
319
+ /**
320
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
321
+ *
322
+ * This memory must be previously allocated by your program. Accessing
323
+ * addresses in this region is allowed until this region is poisoned again.
324
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
325
+ * to ASan alignment restrictions.
326
+ *
327
+ * \note This function is not thread-safe because no two threads can
328
+ * poison or unpoison memory in the same memory region simultaneously.
329
+ *
330
+ * \param addr Start of memory region.
331
+ * \param size Size of memory region. */
332
+ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
333
+ #endif
334
+
159
335
  #endif /* ZSTD_COMPILER_H */