zstd-ruby 1.4.1.0 → 1.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/libzstd/BUCK +5 -7
  6. data/ext/zstdruby/libzstd/Makefile +304 -113
  7. data/ext/zstdruby/libzstd/README.md +83 -20
  8. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  9. data/ext/zstdruby/libzstd/common/compiler.h +150 -8
  10. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  11. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  12. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  13. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  14. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  15. data/ext/zstdruby/libzstd/common/error_private.h +8 -4
  16. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  17. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -55
  18. data/ext/zstdruby/libzstd/common/huf.h +43 -39
  19. data/ext/zstdruby/libzstd/common/mem.h +69 -25
  20. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  21. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  22. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  23. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  24. data/ext/zstdruby/libzstd/common/xxhash.c +40 -92
  25. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  26. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  27. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  28. data/ext/zstdruby/libzstd/common/zstd_internal.h +230 -111
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +154 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +332 -193
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3614 -1696
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +546 -86
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +441 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +572 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +662 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +43 -41
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +85 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1184 -111
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +333 -208
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +228 -129
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +151 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +395 -276
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +630 -231
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +606 -380
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +39 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -46
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -31
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +53 -30
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +24 -14
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +17 -8
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +17 -8
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +25 -11
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +43 -32
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +27 -19
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +32 -20
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  90. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  91. data/ext/zstdruby/libzstd/zstd.h +740 -153
  92. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  93. data/lib/zstd-ruby/version.rb +1 -1
  94. data/zstd-ruby.gemspec +1 -1
  95. metadata +21 -10
  96. data/.travis.yml +0 -14
@@ -19,18 +19,22 @@ The scope can be reduced on demand (see paragraph _modular build_).
19
19
 
20
20
  #### Multithreading support
21
21
 
22
- Multithreading is disabled by default when building with `make`.
22
+ When building with `make`, by default the dynamic library is multithreaded and static library is single-threaded (for compatibility reasons).
23
+
23
24
  Enabling multithreading requires 2 conditions :
24
25
  - set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
25
26
  - for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
26
27
 
27
- Both conditions are automatically applied when invoking `make lib-mt` target.
28
+ For convenience, we provide a build target to generate multi and single threaded libraries:
29
+ - Force enable multithreading on both dynamic and static libraries by appending `-mt` to the target, e.g. `make lib-mt`.
30
+ - Force disable multithreading on both dynamic and static libraries by appending `-nomt` to the target, e.g. `make lib-nomt`.
31
+ - By default, as mentioned before, dynamic library is multithreaded, and static library is single-threaded, e.g. `make lib`.
28
32
 
29
33
  When linking a POSIX program with a multithreaded version of `libzstd`,
30
- note that it's necessary to request the `-pthread` flag during link stage.
34
+ note that it's necessary to invoke the `-pthread` flag during link stage.
31
35
 
32
36
  Multithreading capabilities are exposed
33
- via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
37
+ via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
34
38
 
35
39
 
36
40
  #### API
@@ -42,8 +46,8 @@ Zstandard's stable API is exposed within [lib/zstd.h](zstd.h).
42
46
 
43
47
  Optional advanced features are exposed via :
44
48
 
45
- - `lib/common/zstd_errors.h` : translates `size_t` function results
46
- into a `ZSTD_ErrorCode`, for accurate error handling.
49
+ - `lib/zstd_errors.h` : translates `size_t` function results
50
+ into a `ZSTD_ErrorCode`, for accurate error handling.
47
51
 
48
52
  - `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
49
53
  it unlocks access to the experimental API,
@@ -85,33 +89,72 @@ The file structure is designed to make this selection manually achievable for an
85
89
 
86
90
  - While invoking `make libzstd`, it's possible to define build macros
87
91
  `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
88
- and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
89
- This will also disable compilation of all dependencies
90
- (eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
91
-
92
- - There are some additional build macros that can be used to minify the decoder.
93
-
94
- Zstandard often has more than one implementation of a piece of functionality,
95
- where each implementation optimizes for different scenarios. For example, the
96
- Huffman decoder has complementary implementations that decode the stream one
97
- symbol at a time or two symbols at a time. Zstd normally includes both (and
98
- dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
99
- or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
92
+ and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
93
+ corresponding features. This will also disable compilation of all
94
+ dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
95
+ dictBuilder).
96
+
97
+ - There are a number of options that can help minimize the binary size of
98
+ `libzstd`.
99
+
100
+ The first step is to select the components needed (using the above-described
101
+ `ZSTD_LIB_COMPRESSION` etc.).
102
+
103
+ The next step is to set `ZSTD_LIB_MINIFY` to `1` when invoking `make`. This
104
+ disables various optional components and changes the compilation flags to
105
+ prioritize space-saving.
106
+
107
+ Detailed options: Zstandard's code and build environment is set up by default
108
+ to optimize above all else for performance. In pursuit of this goal, Zstandard
109
+ makes significant trade-offs in code size. For example, Zstandard often has
110
+ more than one implementation of a particular component, with each
111
+ implementation optimized for different scenarios. For example, the Huffman
112
+ decoder has complementary implementations that decode the stream one symbol at
113
+ a time or two symbols at a time. Zstd normally includes both (and dispatches
114
+ between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1` or
115
+ `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
100
116
  compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
101
117
  and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
102
118
  only one or the other of two decompression implementations. The smallest
103
119
  binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
104
- `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
120
+ `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`).
105
121
 
106
122
  For squeezing the last ounce of size out, you can also define
107
123
  `ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
108
124
  which removes the error messages that are otherwise returned by
109
- `ZSTD_getErrorName`.
125
+ `ZSTD_getErrorName` (implied by `ZSTD_LIB_MINIFY`).
126
+
127
+ Finally, when integrating into your application, make sure you're doing link-
128
+ time optimation and unused symbol garbage collection (via some combination of,
129
+ e.g., `-flto`, `-ffat-lto-objects`, `-fuse-linker-plugin`,
130
+ `-ffunction-sections`, `-fdata-sections`, `-fmerge-all-constants`,
131
+ `-Wl,--gc-sections`, `-Wl,-z,norelro`, and an archiver that understands
132
+ the compiler's intermediate representation, e.g., `AR=gcc-ar`). Consult your
133
+ compiler's documentation.
110
134
 
111
135
  - While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
112
136
  will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
113
137
  the shared library, which is now hidden by default.
114
138
 
139
+ - The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
140
+ which can detect at runtime the presence of BMI2 instructions, and use them only if present.
141
+ These instructions contribute to better performance, notably on the decoder side.
142
+ By default, this feature is automatically enabled on detecting
143
+ the right instruction set (x64) and compiler (clang or gcc >= 5).
144
+ It's obviously disabled for different cpus,
145
+ or when BMI2 instruction set is _required_ by the compiler command line
146
+ (in this case, only the BMI2 code path is generated).
147
+ Setting this macro will either force to generate the BMI2 dispatcher (1)
148
+ or prevent it (0). It overrides automatic detection.
149
+
150
+ - The build macro `ZSTD_NO_UNUSED_FUNCTIONS` can be defined to hide the definitions of functions
151
+ that zstd does not use. Not all unused functions are hidden, but they can be if needed.
152
+ Currently, this macro will hide function definitions in FSE and HUF that use an excessive
153
+ amount of stack space.
154
+
155
+ - The build macro `ZSTD_NO_INTRINSICS` can be defined to disable all explicit intrinsics.
156
+ Compiler builtins are still used.
157
+
115
158
 
116
159
  #### Windows : using MinGW+MSYS to create DLL
117
160
 
@@ -129,6 +172,26 @@ file it should be linked with `dll\libzstd.dll`. For example:
129
172
  The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`.
130
173
 
131
174
 
175
+ #### Advanced Build options
176
+
177
+ The build system requires a hash function in order to
178
+ separate object files created with different compilation flags.
179
+ By default, it tries to use `md5sum` or equivalent.
180
+ The hash function can be manually switched by setting the `HASH` variable.
181
+ For example : `make HASH=xxhsum`
182
+ The hash function needs to generate at least 64-bit using hexadecimal format.
183
+ When no hash function is found,
184
+ the Makefile just generates all object files into the same default directory,
185
+ irrespective of compilation flags.
186
+ This functionality only matters if `libzstd` is compiled multiple times
187
+ with different build flags.
188
+
189
+ The build directory, where object files are stored
190
+ can also be manually controlled using variable `BUILD_DIR`,
191
+ for example `make BUILD_DIR=objectDir/v1`.
192
+ In which case, the hash function doesn't matter.
193
+
194
+
132
195
  #### Deprecated API
133
196
 
134
197
  Obsolete API on their way out are stored in directory `lib/deprecated`.
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- bitstream
3
- Part of FSE library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * bitstream
3
+ * Part of FSE library
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
  #ifndef BITSTREAM_H_MODULE
35
15
  #define BITSTREAM_H_MODULE
@@ -37,7 +17,6 @@
37
17
  #if defined (__cplusplus)
38
18
  extern "C" {
39
19
  #endif
40
-
41
20
  /*
42
21
  * This API consists of small unitary functions, which must be inlined for best performance.
43
22
  * Since link-time-optimization is not available for all compilers,
@@ -48,6 +27,7 @@ extern "C" {
48
27
  * Dependencies
49
28
  ******************************************/
50
29
  #include "mem.h" /* unaligned access routines */
30
+ #include "compiler.h" /* UNLIKELY() */
51
31
  #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
52
32
  #include "error_private.h" /* error codes and messages */
53
33
 
@@ -55,8 +35,12 @@ extern "C" {
55
35
  /*=========================================
56
36
  * Target specific
57
37
  =========================================*/
58
- #if defined(__BMI__) && defined(__GNUC__)
59
- # include <immintrin.h> /* support for bextr (experimental) */
38
+ #ifndef ZSTD_NO_INTRINSICS
39
+ # if defined(__BMI__) && defined(__GNUC__)
40
+ # include <immintrin.h> /* support for bextr (experimental) */
41
+ # elif defined(__ICCARM__)
42
+ # include <intrinsics.h>
43
+ # endif
60
44
  #endif
61
45
 
62
46
  #define STREAM_ACCUMULATOR_MIN_32 25
@@ -158,11 +142,16 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
158
142
  assert(val != 0);
159
143
  {
160
144
  # if defined(_MSC_VER) /* Visual */
161
- unsigned long r=0;
162
- _BitScanReverse ( &r, val );
163
- return (unsigned) r;
145
+ # if STATIC_BMI2 == 1
146
+ return _lzcnt_u32(val) ^ 31;
147
+ # else
148
+ unsigned long r = 0;
149
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
150
+ # endif
164
151
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
165
- return 31 - __builtin_clz (val);
152
+ return __builtin_clz (val) ^ 31;
153
+ # elif defined(__ICCARM__) /* IAR Intrinsic */
154
+ return 31 - __CLZ(val);
166
155
  # else /* Software version */
167
156
  static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
168
157
  11, 14, 16, 18, 22, 25, 3, 30,
@@ -214,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
214
203
  MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
215
204
  size_t value, unsigned nbBits)
216
205
  {
217
- MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
206
+ DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
218
207
  assert(nbBits < BIT_MASK_SIZE);
219
208
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
220
209
  bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
@@ -240,9 +229,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
240
229
  {
241
230
  size_t const nbBytes = bitC->bitPos >> 3;
242
231
  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
232
+ assert(bitC->ptr <= bitC->endPtr);
243
233
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
244
234
  bitC->ptr += nbBytes;
245
- assert(bitC->ptr <= bitC->endPtr);
246
235
  bitC->bitPos &= 7;
247
236
  bitC->bitContainer >>= nbBytes*8;
248
237
  }
@@ -256,6 +245,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
256
245
  {
257
246
  size_t const nbBytes = bitC->bitPos >> 3;
258
247
  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
248
+ assert(bitC->ptr <= bitC->endPtr);
259
249
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
260
250
  bitC->ptr += nbBytes;
261
251
  if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
@@ -286,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
286
276
  */
287
277
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
288
278
  {
289
- if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
279
+ if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
290
280
 
291
281
  bitD->start = (const char*)srcBuffer;
292
282
  bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@@ -332,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
332
322
  return srcSize;
333
323
  }
334
324
 
335
- MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
325
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
336
326
  {
337
327
  return bitContainer >> start;
338
328
  }
339
329
 
340
- MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
330
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
341
331
  {
342
332
  U32 const regMask = sizeof(bitContainer)*8 - 1;
343
333
  /* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -345,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
345
335
  return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
346
336
  }
347
337
 
348
- MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
338
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
349
339
  {
340
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
341
+ return _bzhi_u64(bitContainer, nbBits);
342
+ #else
350
343
  assert(nbBits < BIT_MASK_SIZE);
351
344
  return bitContainer & BIT_mask[nbBits];
345
+ #endif
352
346
  }
353
347
 
354
348
  /*! BIT_lookBits() :
@@ -357,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
357
351
  * On 32-bits, maxNbBits==24.
358
352
  * On 64-bits, maxNbBits==56.
359
353
  * @return : value extracted */
360
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
354
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
361
355
  {
362
356
  /* arbitrate between double-shift and shift+mask */
363
357
  #if 1
@@ -380,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
380
374
  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
381
375
  }
382
376
 
383
- MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
377
+ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
384
378
  {
385
379
  bitD->bitsConsumed += nbBits;
386
380
  }
@@ -389,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
389
383
  * Read (consume) next n bits from local register and update.
390
384
  * Pay attention to not read more than nbBits contained into local register.
391
385
  * @return : extracted value. */
392
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
386
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
393
387
  {
394
388
  size_t const value = BIT_lookBits(bitD, nbBits);
395
389
  BIT_skipBits(bitD, nbBits);
@@ -406,6 +400,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
406
400
  return value;
407
401
  }
408
402
 
403
+ /*! BIT_reloadDStreamFast() :
404
+ * Similar to BIT_reloadDStream(), but with two differences:
405
+ * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
406
+ * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
407
+ * point you must use BIT_reloadDStream() to reload.
408
+ */
409
+ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
410
+ {
411
+ if (UNLIKELY(bitD->ptr < bitD->limitPtr))
412
+ return BIT_DStream_overflow;
413
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
414
+ bitD->ptr -= bitD->bitsConsumed >> 3;
415
+ bitD->bitsConsumed &= 7;
416
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
417
+ return BIT_DStream_unfinished;
418
+ }
419
+
409
420
  /*! BIT_reloadDStream() :
410
421
  * Refill `bitD` from buffer previously set in BIT_initDStream() .
411
422
  * This function is safe, it guarantees it will not read beyond src buffer.
@@ -417,10 +428,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
417
428
  return BIT_DStream_overflow;
418
429
 
419
430
  if (bitD->ptr >= bitD->limitPtr) {
420
- bitD->ptr -= bitD->bitsConsumed >> 3;
421
- bitD->bitsConsumed &= 7;
422
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
423
- return BIT_DStream_unfinished;
431
+ return BIT_reloadDStreamFast(bitD);
424
432
  }
425
433
  if (bitD->ptr == bitD->start) {
426
434
  if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -17,13 +17,13 @@
17
17
  /* force inlining */
18
18
 
19
19
  #if !defined(ZSTD_NO_INLINE)
20
- #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
20
+ #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
21
21
  # define INLINE_KEYWORD inline
22
22
  #else
23
23
  # define INLINE_KEYWORD
24
24
  #endif
25
25
 
26
- #if defined(__GNUC__)
26
+ #if defined(__GNUC__) || defined(__ICCARM__)
27
27
  # define FORCE_INLINE_ATTR __attribute__((always_inline))
28
28
  #elif defined(_MSC_VER)
29
29
  # define FORCE_INLINE_ATTR __forceinline
@@ -38,6 +38,17 @@
38
38
 
39
39
  #endif
40
40
 
41
+ /**
42
+ On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
43
+ This explictly marks such functions as __cdecl so that the code will still compile
44
+ if a CC other than __cdecl has been made the default.
45
+ */
46
+ #if defined(_MSC_VER)
47
+ # define WIN_CDECL __cdecl
48
+ #else
49
+ # define WIN_CDECL
50
+ #endif
51
+
41
52
  /**
42
53
  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
43
54
  * parameters. They must be inlined for the compiler to eliminate the constant
@@ -61,22 +72,30 @@
61
72
  # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
62
73
  #endif
63
74
 
75
+ /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
76
+ #if defined(__GNUC__)
77
+ # define UNUSED_ATTR __attribute__((unused))
78
+ #else
79
+ # define UNUSED_ATTR
80
+ #endif
81
+
64
82
  /* force no inlining */
65
83
  #ifdef _MSC_VER
66
84
  # define FORCE_NOINLINE static __declspec(noinline)
67
85
  #else
68
- # ifdef __GNUC__
86
+ # if defined(__GNUC__) || defined(__ICCARM__)
69
87
  # define FORCE_NOINLINE static __attribute__((__noinline__))
70
88
  # else
71
89
  # define FORCE_NOINLINE static
72
90
  # endif
73
91
  #endif
74
92
 
93
+
75
94
  /* target attribute */
76
95
  #ifndef __has_attribute
77
96
  #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
78
97
  #endif
79
- #if defined(__GNUC__)
98
+ #if defined(__GNUC__) || defined(__ICCARM__)
80
99
  # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
81
100
  #else
82
101
  # define TARGET_ATTRIBUTE(target)
@@ -110,6 +129,9 @@
110
129
  # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
111
130
  # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
112
131
  # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
132
+ # elif defined(__aarch64__)
133
+ # define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
134
+ # define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
113
135
  # else
114
136
  # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
115
137
  # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
@@ -127,13 +149,31 @@
127
149
  } \
128
150
  }
129
151
 
130
- /* vectorization */
131
- #if !defined(__clang__) && defined(__GNUC__)
132
- # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
152
+ /* vectorization
153
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
154
+ #if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
155
+ # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
156
+ # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
157
+ # else
158
+ # define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
159
+ # endif
133
160
  #else
134
161
  # define DONT_VECTORIZE
135
162
  #endif
136
163
 
164
+ /* Tell the compiler that a branch is likely or unlikely.
165
+ * Only use these macros if it causes the compiler to generate better code.
166
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
167
+ * and clang, please do.
168
+ */
169
+ #if defined(__GNUC__)
170
+ #define LIKELY(x) (__builtin_expect((x), 1))
171
+ #define UNLIKELY(x) (__builtin_expect((x), 0))
172
+ #else
173
+ #define LIKELY(x) (x)
174
+ #define UNLIKELY(x) (x)
175
+ #endif
176
+
137
177
  /* disable warnings */
138
178
  #ifdef _MSC_VER /* Visual Studio */
139
179
  # include <intrin.h> /* For Visual 2005 */
@@ -144,4 +184,106 @@
144
184
  # pragma warning(disable : 4324) /* disable: C4324: padded structure */
145
185
  #endif
146
186
 
187
+ /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
188
+ #ifndef STATIC_BMI2
189
+ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
190
+ # ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
191
+ # define STATIC_BMI2 1
192
+ # endif
193
+ # endif
194
+ #endif
195
+
196
+ #ifndef STATIC_BMI2
197
+ #define STATIC_BMI2 0
198
+ #endif
199
+
200
+ /* compat. with non-clang compilers */
201
+ #ifndef __has_builtin
202
+ # define __has_builtin(x) 0
203
+ #endif
204
+
205
+ /* compat. with non-clang compilers */
206
+ #ifndef __has_feature
207
+ # define __has_feature(x) 0
208
+ #endif
209
+
210
+ /* detects whether we are being compiled under msan */
211
+ #ifndef ZSTD_MEMORY_SANITIZER
212
+ # if __has_feature(memory_sanitizer)
213
+ # define ZSTD_MEMORY_SANITIZER 1
214
+ # else
215
+ # define ZSTD_MEMORY_SANITIZER 0
216
+ # endif
217
+ #endif
218
+
219
+ #if ZSTD_MEMORY_SANITIZER
220
+ /* Not all platforms that support msan provide sanitizers/msan_interface.h.
221
+ * We therefore declare the functions we need ourselves, rather than trying to
222
+ * include the header file... */
223
+ #include <stddef.h> /* size_t */
224
+ #define ZSTD_DEPS_NEED_STDINT
225
+ #include "zstd_deps.h" /* intptr_t */
226
+
227
+ /* Make memory region fully initialized (without changing its contents). */
228
+ void __msan_unpoison(const volatile void *a, size_t size);
229
+
230
+ /* Make memory region fully uninitialized (without changing its contents).
231
+ This is a legacy interface that does not update origin information. Use
232
+ __msan_allocated_memory() instead. */
233
+ void __msan_poison(const volatile void *a, size_t size);
234
+
235
+ /* Returns the offset of the first (at least partially) poisoned byte in the
236
+ memory range, or -1 if the whole range is good. */
237
+ intptr_t __msan_test_shadow(const volatile void *x, size_t size);
238
+ #endif
239
+
240
+ /* detects whether we are being compiled under asan */
241
+ #ifndef ZSTD_ADDRESS_SANITIZER
242
+ # if __has_feature(address_sanitizer)
243
+ # define ZSTD_ADDRESS_SANITIZER 1
244
+ # elif defined(__SANITIZE_ADDRESS__)
245
+ # define ZSTD_ADDRESS_SANITIZER 1
246
+ # else
247
+ # define ZSTD_ADDRESS_SANITIZER 0
248
+ # endif
249
+ #endif
250
+
251
+ #if ZSTD_ADDRESS_SANITIZER
252
+ /* Not all platforms that support asan provide sanitizers/asan_interface.h.
253
+ * We therefore declare the functions we need ourselves, rather than trying to
254
+ * include the header file... */
255
+ #include <stddef.h> /* size_t */
256
+
257
+ /**
258
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
259
+ *
260
+ * This memory must be previously allocated by your program. Instrumented
261
+ * code is forbidden from accessing addresses in this region until it is
262
+ * unpoisoned. This function is not guaranteed to poison the entire region -
263
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
264
+ * alignment restrictions.
265
+ *
266
+ * \note This function is not thread-safe because no two threads can poison or
267
+ * unpoison memory in the same memory region simultaneously.
268
+ *
269
+ * \param addr Start of memory region.
270
+ * \param size Size of memory region. */
271
+ void __asan_poison_memory_region(void const volatile *addr, size_t size);
272
+
273
+ /**
274
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
275
+ *
276
+ * This memory must be previously allocated by your program. Accessing
277
+ * addresses in this region is allowed until this region is poisoned again.
278
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
279
+ * to ASan alignment restrictions.
280
+ *
281
+ * \note This function is not thread-safe because no two threads can
282
+ * poison or unpoison memory in the same memory region simultaneously.
283
+ *
284
+ * \param addr Start of memory region.
285
+ * \param size Size of memory region. */
286
+ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
287
+ #endif
288
+
147
289
  #endif /* ZSTD_COMPILER_H */