zstd-ruby 1.4.0.0 → 1.4.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/Makefile +274 -107
- data/ext/zstdruby/libzstd/README.md +75 -16
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +154 -5
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +7 -3
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
- data/ext/zstdruby/libzstd/common/huf.h +41 -38
- data/ext/zstdruby/libzstd/common/mem.h +68 -22
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
- data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
- data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/zstd.h +655 -118
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +20 -10
- data/.travis.yml +0 -14
@@ -27,10 +27,10 @@ Enabling multithreading requires 2 conditions :
|
|
27
27
|
Both conditions are automatically applied when invoking `make lib-mt` target.
|
28
28
|
|
29
29
|
When linking a POSIX program with a multithreaded version of `libzstd`,
|
30
|
-
note that it's necessary to
|
30
|
+
note that it's necessary to invoke the `-pthread` flag during link stage.
|
31
31
|
|
32
32
|
Multithreading capabilities are exposed
|
33
|
-
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3
|
33
|
+
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
|
34
34
|
|
35
35
|
|
36
36
|
#### API
|
@@ -85,33 +85,72 @@ The file structure is designed to make this selection manually achievable for an
|
|
85
85
|
|
86
86
|
- While invoking `make libzstd`, it's possible to define build macros
|
87
87
|
`ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
|
88
|
-
and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
|
89
|
-
This will also disable compilation of all
|
90
|
-
(eg. `ZSTD_LIB_COMPRESSION=0` will also disable
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
88
|
+
and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
|
89
|
+
corresponding features. This will also disable compilation of all
|
90
|
+
dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
|
91
|
+
dictBuilder).
|
92
|
+
|
93
|
+
- There are a number of options that can help minimize the binary size of
|
94
|
+
`libzstd`.
|
95
|
+
|
96
|
+
The first step is to select the components needed (using the above-described
|
97
|
+
`ZSTD_LIB_COMPRESSION` etc.).
|
98
|
+
|
99
|
+
The next step is to set `ZSTD_LIB_MINIFY` to `1` when invoking `make`. This
|
100
|
+
disables various optional components and changes the compilation flags to
|
101
|
+
prioritize space-saving.
|
102
|
+
|
103
|
+
Detailed options: Zstandard's code and build environment is set up by default
|
104
|
+
to optimize above all else for performance. In pursuit of this goal, Zstandard
|
105
|
+
makes significant trade-offs in code size. For example, Zstandard often has
|
106
|
+
more than one implementation of a particular component, with each
|
107
|
+
implementation optimized for different scenarios. For example, the Huffman
|
108
|
+
decoder has complementary implementations that decode the stream one symbol at
|
109
|
+
a time or two symbols at a time. Zstd normally includes both (and dispatches
|
110
|
+
between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1` or
|
111
|
+
`HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
|
100
112
|
compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
|
101
113
|
and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
|
102
114
|
only one or the other of two decompression implementations. The smallest
|
103
115
|
binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
|
104
|
-
`ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
116
|
+
`ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`).
|
105
117
|
|
106
118
|
For squeezing the last ounce of size out, you can also define
|
107
119
|
`ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
|
108
120
|
which removes the error messages that are otherwise returned by
|
109
|
-
`ZSTD_getErrorName
|
121
|
+
`ZSTD_getErrorName` (implied by `ZSTD_LIB_MINIFY`).
|
122
|
+
|
123
|
+
Finally, when integrating into your application, make sure you're doing link-
|
124
|
+
time optimation and unused symbol garbage collection (via some combination of,
|
125
|
+
e.g., `-flto`, `-ffat-lto-objects`, `-fuse-linker-plugin`,
|
126
|
+
`-ffunction-sections`, `-fdata-sections`, `-fmerge-all-constants`,
|
127
|
+
`-Wl,--gc-sections`, `-Wl,-z,norelro`, and an archiver that understands
|
128
|
+
the compiler's intermediate representation, e.g., `AR=gcc-ar`). Consult your
|
129
|
+
compiler's documentation.
|
110
130
|
|
111
131
|
- While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
|
112
132
|
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
|
113
133
|
the shared library, which is now hidden by default.
|
114
134
|
|
135
|
+
- The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
|
136
|
+
which can detect at runtime the presence of BMI2 instructions, and use them only if present.
|
137
|
+
These instructions contribute to better performance, notably on the decoder side.
|
138
|
+
By default, this feature is automatically enabled on detecting
|
139
|
+
the right instruction set (x64) and compiler (clang or gcc >= 5).
|
140
|
+
It's obviously disabled for different cpus,
|
141
|
+
or when BMI2 instruction set is _required_ by the compiler command line
|
142
|
+
(in this case, only the BMI2 code path is generated).
|
143
|
+
Setting this macro will either force to generate the BMI2 dispatcher (1)
|
144
|
+
or prevent it (0). It overrides automatic detection.
|
145
|
+
|
146
|
+
- The build macro `ZSTD_NO_UNUSED_FUNCTIONS` can be defined to hide the definitions of functions
|
147
|
+
that zstd does not use. Not all unused functions are hidden, but they can be if needed.
|
148
|
+
Currently, this macro will hide function definitions in FSE and HUF that use an excessive
|
149
|
+
amount of stack space.
|
150
|
+
|
151
|
+
- The build macro `ZSTD_NO_INTRINSICS` can be defined to disable all explicit intrinsics.
|
152
|
+
Compiler builtins are still used.
|
153
|
+
|
115
154
|
|
116
155
|
#### Windows : using MinGW+MSYS to create DLL
|
117
156
|
|
@@ -129,6 +168,26 @@ file it should be linked with `dll\libzstd.dll`. For example:
|
|
129
168
|
The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`.
|
130
169
|
|
131
170
|
|
171
|
+
#### Advanced Build options
|
172
|
+
|
173
|
+
The build system requires a hash function in order to
|
174
|
+
separate object files created with different compilation flags.
|
175
|
+
By default, it tries to use `md5sum` or equivalent.
|
176
|
+
The hash function can be manually switched by setting the `HASH` variable.
|
177
|
+
For example : `make HASH=xxhsum`
|
178
|
+
The hash function needs to generate at least 64-bit using hexadecimal format.
|
179
|
+
When no hash function is found,
|
180
|
+
the Makefile just generates all object files into the same default directory,
|
181
|
+
irrespective of compilation flags.
|
182
|
+
This functionality only matters if `libzstd` is compiled multiple times
|
183
|
+
with different build flags.
|
184
|
+
|
185
|
+
The build directory, where object files are stored
|
186
|
+
can also be manually controlled using variable `BUILD_DIR`,
|
187
|
+
for example `make BUILD_DIR=objectDir/v1`.
|
188
|
+
In which case, the hash function doesn't matter.
|
189
|
+
|
190
|
+
|
132
191
|
#### Deprecated API
|
133
192
|
|
134
193
|
Obsolete API on their way out are stored in directory `lib/deprecated`.
|
@@ -1,35 +1,15 @@
|
|
1
1
|
/* ******************************************************************
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
notice, this list of conditions and the following disclaimer.
|
14
|
-
* Redistributions in binary form must reproduce the above
|
15
|
-
copyright notice, this list of conditions and the following disclaimer
|
16
|
-
in the documentation and/or other materials provided with the
|
17
|
-
distribution.
|
18
|
-
|
19
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
20
|
-
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
21
|
-
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
22
|
-
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
23
|
-
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
24
|
-
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
25
|
-
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
-
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
27
|
-
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
28
|
-
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
-
|
31
|
-
You can contact the author at :
|
32
|
-
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
2
|
+
* bitstream
|
3
|
+
* Part of FSE library
|
4
|
+
* Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
|
5
|
+
*
|
6
|
+
* You can contact the author at :
|
7
|
+
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
8
|
+
*
|
9
|
+
* This source code is licensed under both the BSD-style license (found in the
|
10
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
11
|
+
* in the COPYING file in the root directory of this source tree).
|
12
|
+
* You may select, at your option, one of the above-listed licenses.
|
33
13
|
****************************************************************** */
|
34
14
|
#ifndef BITSTREAM_H_MODULE
|
35
15
|
#define BITSTREAM_H_MODULE
|
@@ -37,7 +17,6 @@
|
|
37
17
|
#if defined (__cplusplus)
|
38
18
|
extern "C" {
|
39
19
|
#endif
|
40
|
-
|
41
20
|
/*
|
42
21
|
* This API consists of small unitary functions, which must be inlined for best performance.
|
43
22
|
* Since link-time-optimization is not available for all compilers,
|
@@ -48,6 +27,7 @@ extern "C" {
|
|
48
27
|
* Dependencies
|
49
28
|
******************************************/
|
50
29
|
#include "mem.h" /* unaligned access routines */
|
30
|
+
#include "compiler.h" /* UNLIKELY() */
|
51
31
|
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
|
52
32
|
#include "error_private.h" /* error codes and messages */
|
53
33
|
|
@@ -55,8 +35,12 @@ extern "C" {
|
|
55
35
|
/*=========================================
|
56
36
|
* Target specific
|
57
37
|
=========================================*/
|
58
|
-
#
|
59
|
-
#
|
38
|
+
#ifndef ZSTD_NO_INTRINSICS
|
39
|
+
# if defined(__BMI__) && defined(__GNUC__)
|
40
|
+
# include <immintrin.h> /* support for bextr (experimental) */
|
41
|
+
# elif defined(__ICCARM__)
|
42
|
+
# include <intrinsics.h>
|
43
|
+
# endif
|
60
44
|
#endif
|
61
45
|
|
62
46
|
#define STREAM_ACCUMULATOR_MIN_32 25
|
@@ -158,11 +142,16 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
|
158
142
|
assert(val != 0);
|
159
143
|
{
|
160
144
|
# if defined(_MSC_VER) /* Visual */
|
161
|
-
|
162
|
-
|
163
|
-
|
145
|
+
# if STATIC_BMI2 == 1
|
146
|
+
return _lzcnt_u32(val) ^ 31;
|
147
|
+
# else
|
148
|
+
unsigned long r = 0;
|
149
|
+
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
150
|
+
# endif
|
164
151
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
165
|
-
return
|
152
|
+
return __builtin_clz (val) ^ 31;
|
153
|
+
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
154
|
+
return 31 - __CLZ(val);
|
166
155
|
# else /* Software version */
|
167
156
|
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
|
168
157
|
11, 14, 16, 18, 22, 25, 3, 30,
|
@@ -214,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
|
|
214
203
|
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
215
204
|
size_t value, unsigned nbBits)
|
216
205
|
{
|
217
|
-
|
206
|
+
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
|
218
207
|
assert(nbBits < BIT_MASK_SIZE);
|
219
208
|
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
220
209
|
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
|
@@ -240,9 +229,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
|
|
240
229
|
{
|
241
230
|
size_t const nbBytes = bitC->bitPos >> 3;
|
242
231
|
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
232
|
+
assert(bitC->ptr <= bitC->endPtr);
|
243
233
|
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
|
244
234
|
bitC->ptr += nbBytes;
|
245
|
-
assert(bitC->ptr <= bitC->endPtr);
|
246
235
|
bitC->bitPos &= 7;
|
247
236
|
bitC->bitContainer >>= nbBytes*8;
|
248
237
|
}
|
@@ -256,6 +245,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
|
|
256
245
|
{
|
257
246
|
size_t const nbBytes = bitC->bitPos >> 3;
|
258
247
|
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
248
|
+
assert(bitC->ptr <= bitC->endPtr);
|
259
249
|
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
|
260
250
|
bitC->ptr += nbBytes;
|
261
251
|
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
@@ -286,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
|
|
286
276
|
*/
|
287
277
|
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
|
288
278
|
{
|
289
|
-
if (srcSize < 1) {
|
279
|
+
if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
|
290
280
|
|
291
281
|
bitD->start = (const char*)srcBuffer;
|
292
282
|
bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
|
@@ -332,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
|
332
322
|
return srcSize;
|
333
323
|
}
|
334
324
|
|
335
|
-
MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
325
|
+
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
336
326
|
{
|
337
327
|
return bitContainer >> start;
|
338
328
|
}
|
339
329
|
|
340
|
-
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
330
|
+
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
341
331
|
{
|
342
332
|
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
343
333
|
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
@@ -345,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
|
|
345
335
|
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
346
336
|
}
|
347
337
|
|
348
|
-
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
338
|
+
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
349
339
|
{
|
340
|
+
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
|
341
|
+
return _bzhi_u64(bitContainer, nbBits);
|
342
|
+
#else
|
350
343
|
assert(nbBits < BIT_MASK_SIZE);
|
351
344
|
return bitContainer & BIT_mask[nbBits];
|
345
|
+
#endif
|
352
346
|
}
|
353
347
|
|
354
348
|
/*! BIT_lookBits() :
|
@@ -357,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
|
357
351
|
* On 32-bits, maxNbBits==24.
|
358
352
|
* On 64-bits, maxNbBits==56.
|
359
353
|
* @return : value extracted */
|
360
|
-
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t*
|
354
|
+
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
361
355
|
{
|
362
356
|
/* arbitrate between double-shift and shift+mask */
|
363
357
|
#if 1
|
@@ -380,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
|
|
380
374
|
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
|
381
375
|
}
|
382
376
|
|
383
|
-
MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
377
|
+
MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
384
378
|
{
|
385
379
|
bitD->bitsConsumed += nbBits;
|
386
380
|
}
|
@@ -389,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
|
389
383
|
* Read (consume) next n bits from local register and update.
|
390
384
|
* Pay attention to not read more than nbBits contained into local register.
|
391
385
|
* @return : extracted value. */
|
392
|
-
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
386
|
+
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
393
387
|
{
|
394
388
|
size_t const value = BIT_lookBits(bitD, nbBits);
|
395
389
|
BIT_skipBits(bitD, nbBits);
|
@@ -406,6 +400,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
|
406
400
|
return value;
|
407
401
|
}
|
408
402
|
|
403
|
+
/*! BIT_reloadDStreamFast() :
|
404
|
+
* Similar to BIT_reloadDStream(), but with two differences:
|
405
|
+
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
|
406
|
+
* 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
|
407
|
+
* point you must use BIT_reloadDStream() to reload.
|
408
|
+
*/
|
409
|
+
MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
|
410
|
+
{
|
411
|
+
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
|
412
|
+
return BIT_DStream_overflow;
|
413
|
+
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
|
414
|
+
bitD->ptr -= bitD->bitsConsumed >> 3;
|
415
|
+
bitD->bitsConsumed &= 7;
|
416
|
+
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
417
|
+
return BIT_DStream_unfinished;
|
418
|
+
}
|
419
|
+
|
409
420
|
/*! BIT_reloadDStream() :
|
410
421
|
* Refill `bitD` from buffer previously set in BIT_initDStream() .
|
411
422
|
* This function is safe, it guarantees it will not read beyond src buffer.
|
@@ -417,10 +428,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
|
417
428
|
return BIT_DStream_overflow;
|
418
429
|
|
419
430
|
if (bitD->ptr >= bitD->limitPtr) {
|
420
|
-
|
421
|
-
bitD->bitsConsumed &= 7;
|
422
|
-
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
423
|
-
return BIT_DStream_unfinished;
|
431
|
+
return BIT_reloadDStreamFast(bitD);
|
424
432
|
}
|
425
433
|
if (bitD->ptr == bitD->start) {
|
426
434
|
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -17,13 +17,13 @@
|
|
17
17
|
/* force inlining */
|
18
18
|
|
19
19
|
#if !defined(ZSTD_NO_INLINE)
|
20
|
-
#if defined
|
20
|
+
#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
21
21
|
# define INLINE_KEYWORD inline
|
22
22
|
#else
|
23
23
|
# define INLINE_KEYWORD
|
24
24
|
#endif
|
25
25
|
|
26
|
-
#if defined(__GNUC__)
|
26
|
+
#if defined(__GNUC__) || defined(__ICCARM__)
|
27
27
|
# define FORCE_INLINE_ATTR __attribute__((always_inline))
|
28
28
|
#elif defined(_MSC_VER)
|
29
29
|
# define FORCE_INLINE_ATTR __forceinline
|
@@ -38,6 +38,17 @@
|
|
38
38
|
|
39
39
|
#endif
|
40
40
|
|
41
|
+
/**
|
42
|
+
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
|
43
|
+
This explictly marks such functions as __cdecl so that the code will still compile
|
44
|
+
if a CC other than __cdecl has been made the default.
|
45
|
+
*/
|
46
|
+
#if defined(_MSC_VER)
|
47
|
+
# define WIN_CDECL __cdecl
|
48
|
+
#else
|
49
|
+
# define WIN_CDECL
|
50
|
+
#endif
|
51
|
+
|
41
52
|
/**
|
42
53
|
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
|
43
54
|
* parameters. They must be inlined for the compiler to eliminate the constant
|
@@ -61,22 +72,30 @@
|
|
61
72
|
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
62
73
|
#endif
|
63
74
|
|
75
|
+
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
|
76
|
+
#if defined(__GNUC__)
|
77
|
+
# define UNUSED_ATTR __attribute__((unused))
|
78
|
+
#else
|
79
|
+
# define UNUSED_ATTR
|
80
|
+
#endif
|
81
|
+
|
64
82
|
/* force no inlining */
|
65
83
|
#ifdef _MSC_VER
|
66
84
|
# define FORCE_NOINLINE static __declspec(noinline)
|
67
85
|
#else
|
68
|
-
#
|
86
|
+
# if defined(__GNUC__) || defined(__ICCARM__)
|
69
87
|
# define FORCE_NOINLINE static __attribute__((__noinline__))
|
70
88
|
# else
|
71
89
|
# define FORCE_NOINLINE static
|
72
90
|
# endif
|
73
91
|
#endif
|
74
92
|
|
93
|
+
|
75
94
|
/* target attribute */
|
76
95
|
#ifndef __has_attribute
|
77
96
|
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
|
78
97
|
#endif
|
79
|
-
#if defined(__GNUC__)
|
98
|
+
#if defined(__GNUC__) || defined(__ICCARM__)
|
80
99
|
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
|
81
100
|
#else
|
82
101
|
# define TARGET_ATTRIBUTE(target)
|
@@ -110,6 +129,9 @@
|
|
110
129
|
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
111
130
|
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
112
131
|
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
132
|
+
# elif defined(__aarch64__)
|
133
|
+
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
|
134
|
+
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
|
113
135
|
# else
|
114
136
|
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
115
137
|
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
@@ -127,6 +149,31 @@
|
|
127
149
|
} \
|
128
150
|
}
|
129
151
|
|
152
|
+
/* vectorization
|
153
|
+
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
|
154
|
+
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
|
155
|
+
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
|
156
|
+
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
157
|
+
# else
|
158
|
+
# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
|
159
|
+
# endif
|
160
|
+
#else
|
161
|
+
# define DONT_VECTORIZE
|
162
|
+
#endif
|
163
|
+
|
164
|
+
/* Tell the compiler that a branch is likely or unlikely.
|
165
|
+
* Only use these macros if it causes the compiler to generate better code.
|
166
|
+
* If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
|
167
|
+
* and clang, please do.
|
168
|
+
*/
|
169
|
+
#if defined(__GNUC__)
|
170
|
+
#define LIKELY(x) (__builtin_expect((x), 1))
|
171
|
+
#define UNLIKELY(x) (__builtin_expect((x), 0))
|
172
|
+
#else
|
173
|
+
#define LIKELY(x) (x)
|
174
|
+
#define UNLIKELY(x) (x)
|
175
|
+
#endif
|
176
|
+
|
130
177
|
/* disable warnings */
|
131
178
|
#ifdef _MSC_VER /* Visual Studio */
|
132
179
|
# include <intrin.h> /* For Visual 2005 */
|
@@ -137,4 +184,106 @@
|
|
137
184
|
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
|
138
185
|
#endif
|
139
186
|
|
187
|
+
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
|
188
|
+
#ifndef STATIC_BMI2
|
189
|
+
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
|
190
|
+
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
|
191
|
+
# define STATIC_BMI2 1
|
192
|
+
# endif
|
193
|
+
# endif
|
194
|
+
#endif
|
195
|
+
|
196
|
+
#ifndef STATIC_BMI2
|
197
|
+
#define STATIC_BMI2 0
|
198
|
+
#endif
|
199
|
+
|
200
|
+
/* compat. with non-clang compilers */
|
201
|
+
#ifndef __has_builtin
|
202
|
+
# define __has_builtin(x) 0
|
203
|
+
#endif
|
204
|
+
|
205
|
+
/* compat. with non-clang compilers */
|
206
|
+
#ifndef __has_feature
|
207
|
+
# define __has_feature(x) 0
|
208
|
+
#endif
|
209
|
+
|
210
|
+
/* detects whether we are being compiled under msan */
|
211
|
+
#ifndef ZSTD_MEMORY_SANITIZER
|
212
|
+
# if __has_feature(memory_sanitizer)
|
213
|
+
# define ZSTD_MEMORY_SANITIZER 1
|
214
|
+
# else
|
215
|
+
# define ZSTD_MEMORY_SANITIZER 0
|
216
|
+
# endif
|
217
|
+
#endif
|
218
|
+
|
219
|
+
#if ZSTD_MEMORY_SANITIZER
|
220
|
+
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
|
221
|
+
* We therefore declare the functions we need ourselves, rather than trying to
|
222
|
+
* include the header file... */
|
223
|
+
#include <stddef.h> /* size_t */
|
224
|
+
#define ZSTD_DEPS_NEED_STDINT
|
225
|
+
#include "zstd_deps.h" /* intptr_t */
|
226
|
+
|
227
|
+
/* Make memory region fully initialized (without changing its contents). */
|
228
|
+
void __msan_unpoison(const volatile void *a, size_t size);
|
229
|
+
|
230
|
+
/* Make memory region fully uninitialized (without changing its contents).
|
231
|
+
This is a legacy interface that does not update origin information. Use
|
232
|
+
__msan_allocated_memory() instead. */
|
233
|
+
void __msan_poison(const volatile void *a, size_t size);
|
234
|
+
|
235
|
+
/* Returns the offset of the first (at least partially) poisoned byte in the
|
236
|
+
memory range, or -1 if the whole range is good. */
|
237
|
+
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
|
238
|
+
#endif
|
239
|
+
|
240
|
+
/* detects whether we are being compiled under asan */
|
241
|
+
#ifndef ZSTD_ADDRESS_SANITIZER
|
242
|
+
# if __has_feature(address_sanitizer)
|
243
|
+
# define ZSTD_ADDRESS_SANITIZER 1
|
244
|
+
# elif defined(__SANITIZE_ADDRESS__)
|
245
|
+
# define ZSTD_ADDRESS_SANITIZER 1
|
246
|
+
# else
|
247
|
+
# define ZSTD_ADDRESS_SANITIZER 0
|
248
|
+
# endif
|
249
|
+
#endif
|
250
|
+
|
251
|
+
#if ZSTD_ADDRESS_SANITIZER
|
252
|
+
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
|
253
|
+
* We therefore declare the functions we need ourselves, rather than trying to
|
254
|
+
* include the header file... */
|
255
|
+
#include <stddef.h> /* size_t */
|
256
|
+
|
257
|
+
/**
|
258
|
+
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
|
259
|
+
*
|
260
|
+
* This memory must be previously allocated by your program. Instrumented
|
261
|
+
* code is forbidden from accessing addresses in this region until it is
|
262
|
+
* unpoisoned. This function is not guaranteed to poison the entire region -
|
263
|
+
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
|
264
|
+
* alignment restrictions.
|
265
|
+
*
|
266
|
+
* \note This function is not thread-safe because no two threads can poison or
|
267
|
+
* unpoison memory in the same memory region simultaneously.
|
268
|
+
*
|
269
|
+
* \param addr Start of memory region.
|
270
|
+
* \param size Size of memory region. */
|
271
|
+
void __asan_poison_memory_region(void const volatile *addr, size_t size);
|
272
|
+
|
273
|
+
/**
|
274
|
+
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
|
275
|
+
*
|
276
|
+
* This memory must be previously allocated by your program. Accessing
|
277
|
+
* addresses in this region is allowed until this region is poisoned again.
|
278
|
+
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
|
279
|
+
* to ASan alignment restrictions.
|
280
|
+
*
|
281
|
+
* \note This function is not thread-safe because no two threads can
|
282
|
+
* poison or unpoison memory in the same memory region simultaneously.
|
283
|
+
*
|
284
|
+
* \param addr Start of memory region.
|
285
|
+
* \param size Size of memory region. */
|
286
|
+
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
|
287
|
+
#endif
|
288
|
+
|
140
289
|
#endif /* ZSTD_COMPILER_H */
|