extzstd 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +13 -0
- data/README.md +17 -14
- data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
- data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
- data/contrib/zstd/Makefile +99 -53
- data/contrib/zstd/README.md +59 -39
- data/contrib/zstd/TESTING.md +1 -1
- data/contrib/zstd/appveyor.yml +17 -6
- data/contrib/zstd/lib/BUCK +29 -2
- data/contrib/zstd/lib/Makefile +118 -21
- data/contrib/zstd/lib/README.md +84 -44
- data/contrib/zstd/lib/common/bitstream.h +17 -33
- data/contrib/zstd/lib/common/compiler.h +62 -8
- data/contrib/zstd/lib/common/cpu.h +215 -0
- data/contrib/zstd/lib/common/debug.c +44 -0
- data/contrib/zstd/lib/common/debug.h +134 -0
- data/contrib/zstd/lib/common/entropy_common.c +16 -1
- data/contrib/zstd/lib/common/error_private.c +7 -0
- data/contrib/zstd/lib/common/fse.h +48 -44
- data/contrib/zstd/lib/common/fse_decompress.c +3 -3
- data/contrib/zstd/lib/common/huf.h +169 -113
- data/contrib/zstd/lib/common/mem.h +20 -2
- data/contrib/zstd/lib/common/pool.c +135 -49
- data/contrib/zstd/lib/common/pool.h +40 -21
- data/contrib/zstd/lib/common/threading.c +2 -2
- data/contrib/zstd/lib/common/threading.h +12 -12
- data/contrib/zstd/lib/common/xxhash.c +3 -2
- data/contrib/zstd/lib/common/zstd_common.c +3 -6
- data/contrib/zstd/lib/common/zstd_errors.h +17 -7
- data/contrib/zstd/lib/common/zstd_internal.h +76 -48
- data/contrib/zstd/lib/compress/fse_compress.c +89 -209
- data/contrib/zstd/lib/compress/hist.c +203 -0
- data/contrib/zstd/lib/compress/hist.h +95 -0
- data/contrib/zstd/lib/compress/huf_compress.c +188 -80
- data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
- data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
- data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
- data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
- data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
- data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
- data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
- data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
- data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
- data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
- data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
- data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
- data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
- data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
- data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
- data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
- data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
- data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
- data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
- data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
- data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
- data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
- data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
- data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
- data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
- data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
- data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
- data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
- data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
- data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
- data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
- data/contrib/zstd/lib/zstd.h +1346 -832
- data/ext/extzstd.c +27 -19
- data/ext/extzstd_stream.c +20 -4
- data/ext/zstd_compress.c +1 -0
- data/ext/zstd_decompress.c +4 -0
- data/ext/zstd_dictbuilder.c +4 -0
- data/ext/zstd_dictbuilder_fastcover.c +5 -0
- data/lib/extzstd.rb +52 -220
- data/lib/extzstd/version.rb +1 -1
- metadata +21 -7
- data/contrib/zstd/circle.yml +0 -63
data/contrib/zstd/lib/README.md
CHANGED
@@ -2,15 +2,35 @@ Zstandard library files
|
|
2
2
|
================================
|
3
3
|
|
4
4
|
The __lib__ directory is split into several sub-directories,
|
5
|
-
in order to make it easier to select or exclude
|
5
|
+
in order to make it easier to select or exclude features.
|
6
6
|
|
7
7
|
|
8
8
|
#### Building
|
9
9
|
|
10
|
-
`Makefile` script is provided, supporting
|
11
|
-
|
10
|
+
`Makefile` script is provided, supporting [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions),
|
11
|
+
including commands variables, staged install, directory variables and standard targets.
|
12
12
|
- `make` : generates both static and dynamic libraries
|
13
|
-
- `make install` : install libraries in
|
13
|
+
- `make install` : install libraries and headers in target system directories
|
14
|
+
|
15
|
+
`libzstd` default scope is pretty large, including compression, decompression, dictionary builder,
|
16
|
+
and support for decoding legacy formats >= v0.5.0.
|
17
|
+
The scope can be reduced on demand (see paragraph _modular build_).
|
18
|
+
|
19
|
+
|
20
|
+
#### Multithreading support
|
21
|
+
|
22
|
+
Multithreading is disabled by default when building with `make`.
|
23
|
+
Enabling multithreading requires 2 conditions :
|
24
|
+
- set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
|
25
|
+
- for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
|
26
|
+
|
27
|
+
Both conditions are automatically applied when invoking `make lib-mt` target.
|
28
|
+
|
29
|
+
When linking a POSIX program with a multithreaded version of `libzstd`,
|
30
|
+
note that it's necessary to request the `-pthread` flag during link stage.
|
31
|
+
|
32
|
+
Multithreading capabilities are exposed
|
33
|
+
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
|
14
34
|
|
15
35
|
|
16
36
|
#### API
|
@@ -23,53 +43,74 @@ Zstandard's stable API is exposed within [lib/zstd.h](zstd.h).
|
|
23
43
|
Optional advanced features are exposed via :
|
24
44
|
|
25
45
|
- `lib/common/zstd_errors.h` : translates `size_t` function results
|
26
|
-
|
46
|
+
into a `ZSTD_ErrorCode`, for accurate error handling.
|
47
|
+
|
27
48
|
- `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
|
28
|
-
it unlocks access to
|
29
|
-
exposed in second part of `zstd.h`.
|
30
|
-
|
31
|
-
|
49
|
+
it unlocks access to the experimental API,
|
50
|
+
exposed in the second part of `zstd.h`.
|
51
|
+
All definitions in the experimental APIs are unstable,
|
52
|
+
they may still change in the future, or even be removed.
|
53
|
+
As a consequence, experimental definitions shall ___never be used with dynamic library___ !
|
32
54
|
Only static linking is allowed.
|
33
55
|
|
34
56
|
|
35
57
|
#### Modular build
|
36
58
|
|
37
|
-
|
38
|
-
|
39
|
-
- Decompression source code lies in `lib/decompress`
|
40
|
-
- It's possible to include only `compress` or only `decompress`, they don't depend on each other.
|
41
|
-
- `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples.
|
42
|
-
The API is exposed in `lib/dictBuilder/zdict.h`.
|
43
|
-
This module depends on both `lib/common` and `lib/compress` .
|
44
|
-
- `lib/legacy` : source code to decompress older zstd formats, starting from `v0.1`.
|
45
|
-
This module depends on `lib/common` and `lib/decompress`.
|
46
|
-
To enable this feature, it's necessary to define `ZSTD_LEGACY_SUPPORT = 1` during compilation.
|
47
|
-
Typically, with `gcc`, add argument `-DZSTD_LEGACY_SUPPORT=1`.
|
48
|
-
Using higher number limits the number of version supported.
|
49
|
-
For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats starting from v0.2+".
|
50
|
-
The API is exposed in `lib/legacy/zstd_legacy.h`.
|
51
|
-
Each version also provides a (dedicated) set of advanced API.
|
52
|
-
For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
|
59
|
+
It's possible to compile only a limited set of features within `libzstd`.
|
60
|
+
The file structure is designed to make this selection manually achievable for any build system :
|
53
61
|
|
62
|
+
- Directory `lib/common` is always required, for all variants.
|
54
63
|
|
55
|
-
|
64
|
+
- Compression source code lies in `lib/compress`
|
56
65
|
|
57
|
-
|
58
|
-
Enabling multithreading requires 2 conditions :
|
59
|
-
- set macro `ZSTD_MULTITHREAD`
|
60
|
-
- on POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc` for example)
|
66
|
+
- Decompression source code lies in `lib/decompress`
|
61
67
|
|
62
|
-
|
63
|
-
Note that, when linking a POSIX program with a multithreaded version of `libzstd`,
|
64
|
-
it's necessary to trigger `-pthread` flag during link stage.
|
68
|
+
- It's possible to include only `compress` or only `decompress`, they don't depend on each other.
|
65
69
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
-
|
71
|
-
|
72
|
-
|
70
|
+
- `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples.
|
71
|
+
The API is exposed in `lib/dictBuilder/zdict.h`.
|
72
|
+
This module depends on both `lib/common` and `lib/compress` .
|
73
|
+
|
74
|
+
- `lib/legacy` : makes it possible to decompress legacy zstd formats, starting from `v0.1.0`.
|
75
|
+
This module depends on `lib/common` and `lib/decompress`.
|
76
|
+
To enable this feature, define `ZSTD_LEGACY_SUPPORT` during compilation.
|
77
|
+
Specifying a number limits versions supported to that version onward.
|
78
|
+
For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats >= v0.2.0".
|
79
|
+
Conversely, `ZSTD_LEGACY_SUPPORT=0` means "do __not__ support legacy formats".
|
80
|
+
By default, this build macro is set as `ZSTD_LEGACY_SUPPORT=5`.
|
81
|
+
Decoding supported legacy format is a transparent capability triggered within decompression functions.
|
82
|
+
It's also allowed to invoke legacy API directly, exposed in `lib/legacy/zstd_legacy.h`.
|
83
|
+
Each version does also provide its own set of advanced API.
|
84
|
+
For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
|
85
|
+
|
86
|
+
- While invoking `make libzstd`, it's possible to define build macros
|
87
|
+
`ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
|
88
|
+
and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
|
89
|
+
This will also disable compilation of all dependencies
|
90
|
+
(eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
|
91
|
+
|
92
|
+
- There are some additional build macros that can be used to minify the decoder.
|
93
|
+
|
94
|
+
Zstandard often has more than one implementation of a piece of functionality,
|
95
|
+
where each implementation optimizes for different scenarios. For example, the
|
96
|
+
Huffman decoder has complementary implementations that decode the stream one
|
97
|
+
symbol at a time or two symbols at a time. Zstd normally includes both (and
|
98
|
+
dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
|
99
|
+
or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
|
100
|
+
compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
|
101
|
+
and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
|
102
|
+
only one or the other of two decompression implementations. The smallest
|
103
|
+
binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
|
104
|
+
`ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
|
105
|
+
|
106
|
+
For squeezing the last ounce of size out, you can also define
|
107
|
+
`ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
|
108
|
+
which removes the error messages that are otherwise returned by
|
109
|
+
`ZSTD_getErrorName`.
|
110
|
+
|
111
|
+
- While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
|
112
|
+
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
|
113
|
+
the shared library, which is now hidden by default.
|
73
114
|
|
74
115
|
|
75
116
|
#### Windows : using MinGW+MSYS to create DLL
|
@@ -92,7 +133,6 @@ The compiled executable will require ZSTD DLL which is available at `dll\libzstd
|
|
92
133
|
|
93
134
|
Obsolete API on their way out are stored in directory `lib/deprecated`.
|
94
135
|
At this stage, it contains older streaming prototypes, in `lib/deprecated/zbuff.h`.
|
95
|
-
Presence in this directory is temporary.
|
96
136
|
These prototypes will be removed in some future version.
|
97
137
|
Consider migrating code towards supported streaming API exposed in `zstd.h`.
|
98
138
|
|
@@ -101,8 +141,8 @@ Consider migrating code towards supported streaming API exposed in `zstd.h`.
|
|
101
141
|
|
102
142
|
The other files are not source code. There are :
|
103
143
|
|
104
|
-
- `LICENSE` : contains the BSD license text
|
105
|
-
- `Makefile` : `make` script to build and install zstd library (static and dynamic)
|
106
144
|
- `BUCK` : support for `buck` build system (https://buckbuild.com/)
|
107
|
-
- `
|
145
|
+
- `Makefile` : `make` script to build and install zstd library (static and dynamic)
|
108
146
|
- `README.md` : this file
|
147
|
+
- `dll/` : resources directory for Windows compilation
|
148
|
+
- `libzstd.pc.in` : script for `pkg-config` (used in `make install`)
|
@@ -1,8 +1,7 @@
|
|
1
1
|
/* ******************************************************************
|
2
2
|
bitstream
|
3
3
|
Part of FSE library
|
4
|
-
|
5
|
-
Copyright (C) 2013-2017, Yann Collet.
|
4
|
+
Copyright (C) 2013-present, Yann Collet.
|
6
5
|
|
7
6
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
8
7
|
|
@@ -49,21 +48,10 @@ extern "C" {
|
|
49
48
|
* Dependencies
|
50
49
|
******************************************/
|
51
50
|
#include "mem.h" /* unaligned access routines */
|
51
|
+
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
|
52
52
|
#include "error_private.h" /* error codes and messages */
|
53
53
|
|
54
54
|
|
55
|
-
/*-*************************************
|
56
|
-
* Debug
|
57
|
-
***************************************/
|
58
|
-
#if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
|
59
|
-
# include <assert.h>
|
60
|
-
#else
|
61
|
-
# ifndef assert
|
62
|
-
# define assert(condition) ((void)0)
|
63
|
-
# endif
|
64
|
-
#endif
|
65
|
-
|
66
|
-
|
67
55
|
/*=========================================
|
68
56
|
* Target specific
|
69
57
|
=========================================*/
|
@@ -83,8 +71,7 @@ extern "C" {
|
|
83
71
|
* A critical property of these streams is that they encode and decode in **reverse** direction.
|
84
72
|
* So the first bit sequence you add will be the last to be read, like a LIFO stack.
|
85
73
|
*/
|
86
|
-
typedef struct
|
87
|
-
{
|
74
|
+
typedef struct {
|
88
75
|
size_t bitContainer;
|
89
76
|
unsigned bitPos;
|
90
77
|
char* startPtr;
|
@@ -118,8 +105,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
|
|
118
105
|
/*-********************************************
|
119
106
|
* bitStream decoding API (read backward)
|
120
107
|
**********************************************/
|
121
|
-
typedef struct
|
122
|
-
{
|
108
|
+
typedef struct {
|
123
109
|
size_t bitContainer;
|
124
110
|
unsigned bitsConsumed;
|
125
111
|
const char* ptr;
|
@@ -236,7 +222,8 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
|
236
222
|
}
|
237
223
|
|
238
224
|
/*! BIT_addBitsFast() :
|
239
|
-
* works only if `value` is _clean_,
|
225
|
+
* works only if `value` is _clean_,
|
226
|
+
* meaning all high bits above nbBits are 0 */
|
240
227
|
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
|
241
228
|
size_t value, unsigned nbBits)
|
242
229
|
{
|
@@ -352,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
|
352
339
|
|
353
340
|
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
354
341
|
{
|
355
|
-
|
356
|
-
|
357
|
-
if (sizeof(bitContainer)==8)
|
358
|
-
return _bextr_u64(bitContainer, start, nbBits);
|
359
|
-
else
|
360
|
-
# endif
|
361
|
-
return _bextr_u32(bitContainer, start, nbBits);
|
362
|
-
#else
|
342
|
+
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
343
|
+
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
363
344
|
assert(nbBits < BIT_MASK_SIZE);
|
364
|
-
return (bitContainer >> start) & BIT_mask[nbBits];
|
365
|
-
#endif
|
345
|
+
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
366
346
|
}
|
367
347
|
|
368
348
|
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
@@ -379,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
|
379
359
|
* @return : value extracted */
|
380
360
|
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
381
361
|
{
|
382
|
-
|
362
|
+
/* arbitrate between double-shift and shift+mask */
|
363
|
+
#if 1
|
364
|
+
/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
|
365
|
+
* bitstream is likely corrupted, and result is undefined */
|
383
366
|
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
|
384
367
|
#else
|
368
|
+
/* this code path is slower on my os-x laptop */
|
385
369
|
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
|
386
370
|
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
|
387
371
|
#endif
|
@@ -405,7 +389,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
|
405
389
|
* Read (consume) next n bits from local register and update.
|
406
390
|
* Pay attention to not read more than nbBits contained into local register.
|
407
391
|
* @return : extracted value. */
|
408
|
-
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD,
|
392
|
+
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
409
393
|
{
|
410
394
|
size_t const value = BIT_lookBits(bitD, nbBits);
|
411
395
|
BIT_skipBits(bitD, nbBits);
|
@@ -414,7 +398,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
|
|
414
398
|
|
415
399
|
/*! BIT_readBitsFast() :
|
416
400
|
* unsafe version; only works only if nbBits >= 1 */
|
417
|
-
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD,
|
401
|
+
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
418
402
|
{
|
419
403
|
size_t const value = BIT_lookBitsFast(bitD, nbBits);
|
420
404
|
assert(nbBits >= 1);
|
@@ -426,7 +410,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
|
|
426
410
|
* Refill `bitD` from buffer previously set in BIT_initDStream() .
|
427
411
|
* This function is safe, it guarantees it will not read beyond src buffer.
|
428
412
|
* @return : status of `BIT_DStream_t` internal register.
|
429
|
-
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57
|
413
|
+
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
|
430
414
|
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
431
415
|
{
|
432
416
|
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
|
@@ -15,6 +15,8 @@
|
|
15
15
|
* Compiler specifics
|
16
16
|
*********************************************************/
|
17
17
|
/* force inlining */
|
18
|
+
|
19
|
+
#if !defined(ZSTD_NO_INLINE)
|
18
20
|
#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
19
21
|
# define INLINE_KEYWORD inline
|
20
22
|
#else
|
@@ -29,9 +31,16 @@
|
|
29
31
|
# define FORCE_INLINE_ATTR
|
30
32
|
#endif
|
31
33
|
|
34
|
+
#else
|
35
|
+
|
36
|
+
#define INLINE_KEYWORD
|
37
|
+
#define FORCE_INLINE_ATTR
|
38
|
+
|
39
|
+
#endif
|
40
|
+
|
32
41
|
/**
|
33
42
|
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
|
34
|
-
* parameters. They must be inlined for the compiler to
|
43
|
+
* parameters. They must be inlined for the compiler to eliminate the constant
|
35
44
|
* branches.
|
36
45
|
*/
|
37
46
|
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
@@ -63,16 +72,61 @@
|
|
63
72
|
# endif
|
64
73
|
#endif
|
65
74
|
|
66
|
-
/*
|
67
|
-
#
|
68
|
-
#
|
69
|
-
#
|
70
|
-
#
|
71
|
-
# define
|
75
|
+
/* target attribute */
|
76
|
+
#ifndef __has_attribute
|
77
|
+
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
|
78
|
+
#endif
|
79
|
+
#if defined(__GNUC__)
|
80
|
+
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
|
72
81
|
#else
|
73
|
-
# define
|
82
|
+
# define TARGET_ATTRIBUTE(target)
|
74
83
|
#endif
|
75
84
|
|
85
|
+
/* Enable runtime BMI2 dispatch based on the CPU.
|
86
|
+
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
87
|
+
*/
|
88
|
+
#ifndef DYNAMIC_BMI2
|
89
|
+
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
90
|
+
|| (defined(__GNUC__) \
|
91
|
+
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
92
|
+
&& (defined(__x86_64__) || defined(_M_X86)) \
|
93
|
+
&& !defined(__BMI2__)
|
94
|
+
# define DYNAMIC_BMI2 1
|
95
|
+
#else
|
96
|
+
# define DYNAMIC_BMI2 0
|
97
|
+
#endif
|
98
|
+
#endif
|
99
|
+
|
100
|
+
/* prefetch
|
101
|
+
* can be disabled, by declaring NO_PREFETCH build macro */
|
102
|
+
#if defined(NO_PREFETCH)
|
103
|
+
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
104
|
+
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
105
|
+
#else
|
106
|
+
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
107
|
+
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
108
|
+
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
109
|
+
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
110
|
+
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
111
|
+
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
112
|
+
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
113
|
+
# else
|
114
|
+
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
115
|
+
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
116
|
+
# endif
|
117
|
+
#endif /* NO_PREFETCH */
|
118
|
+
|
119
|
+
#define CACHELINE_SIZE 64
|
120
|
+
|
121
|
+
#define PREFETCH_AREA(p, s) { \
|
122
|
+
const char* const _ptr = (const char*)(p); \
|
123
|
+
size_t const _size = (size_t)(s); \
|
124
|
+
size_t _pos; \
|
125
|
+
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
|
126
|
+
PREFETCH_L2(_ptr + _pos); \
|
127
|
+
} \
|
128
|
+
}
|
129
|
+
|
76
130
|
/* disable warnings */
|
77
131
|
#ifdef _MSC_VER /* Visual Studio */
|
78
132
|
# include <intrin.h> /* For Visual 2005 */
|
@@ -0,0 +1,215 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2018-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef ZSTD_COMMON_CPU_H
|
12
|
+
#define ZSTD_COMMON_CPU_H
|
13
|
+
|
14
|
+
/**
|
15
|
+
* Implementation taken from folly/CpuId.h
|
16
|
+
* https://github.com/facebook/folly/blob/master/folly/CpuId.h
|
17
|
+
*/
|
18
|
+
|
19
|
+
#include <string.h>
|
20
|
+
|
21
|
+
#include "mem.h"
|
22
|
+
|
23
|
+
#ifdef _MSC_VER
|
24
|
+
#include <intrin.h>
|
25
|
+
#endif
|
26
|
+
|
27
|
+
typedef struct {
|
28
|
+
U32 f1c;
|
29
|
+
U32 f1d;
|
30
|
+
U32 f7b;
|
31
|
+
U32 f7c;
|
32
|
+
} ZSTD_cpuid_t;
|
33
|
+
|
34
|
+
MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
|
35
|
+
U32 f1c = 0;
|
36
|
+
U32 f1d = 0;
|
37
|
+
U32 f7b = 0;
|
38
|
+
U32 f7c = 0;
|
39
|
+
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
|
40
|
+
int reg[4];
|
41
|
+
__cpuid((int*)reg, 0);
|
42
|
+
{
|
43
|
+
int const n = reg[0];
|
44
|
+
if (n >= 1) {
|
45
|
+
__cpuid((int*)reg, 1);
|
46
|
+
f1c = (U32)reg[2];
|
47
|
+
f1d = (U32)reg[3];
|
48
|
+
}
|
49
|
+
if (n >= 7) {
|
50
|
+
__cpuidex((int*)reg, 7, 0);
|
51
|
+
f7b = (U32)reg[1];
|
52
|
+
f7c = (U32)reg[2];
|
53
|
+
}
|
54
|
+
}
|
55
|
+
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
|
56
|
+
/* The following block like the normal cpuid branch below, but gcc
|
57
|
+
* reserves ebx for use of its pic register so we must specially
|
58
|
+
* handle the save and restore to avoid clobbering the register
|
59
|
+
*/
|
60
|
+
U32 n;
|
61
|
+
__asm__(
|
62
|
+
"pushl %%ebx\n\t"
|
63
|
+
"cpuid\n\t"
|
64
|
+
"popl %%ebx\n\t"
|
65
|
+
: "=a"(n)
|
66
|
+
: "a"(0)
|
67
|
+
: "ecx", "edx");
|
68
|
+
if (n >= 1) {
|
69
|
+
U32 f1a;
|
70
|
+
__asm__(
|
71
|
+
"pushl %%ebx\n\t"
|
72
|
+
"cpuid\n\t"
|
73
|
+
"popl %%ebx\n\t"
|
74
|
+
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
|
75
|
+
: "a"(1));
|
76
|
+
}
|
77
|
+
if (n >= 7) {
|
78
|
+
__asm__(
|
79
|
+
"pushl %%ebx\n\t"
|
80
|
+
"cpuid\n\t"
|
81
|
+
"movl %%ebx, %%eax\n\t"
|
82
|
+
"popl %%ebx"
|
83
|
+
: "=a"(f7b), "=c"(f7c)
|
84
|
+
: "a"(7), "c"(0)
|
85
|
+
: "edx");
|
86
|
+
}
|
87
|
+
#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
|
88
|
+
U32 n;
|
89
|
+
__asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
|
90
|
+
if (n >= 1) {
|
91
|
+
U32 f1a;
|
92
|
+
__asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
|
93
|
+
}
|
94
|
+
if (n >= 7) {
|
95
|
+
U32 f7a;
|
96
|
+
__asm__("cpuid"
|
97
|
+
: "=a"(f7a), "=b"(f7b), "=c"(f7c)
|
98
|
+
: "a"(7), "c"(0)
|
99
|
+
: "edx");
|
100
|
+
}
|
101
|
+
#endif
|
102
|
+
{
|
103
|
+
ZSTD_cpuid_t cpuid;
|
104
|
+
cpuid.f1c = f1c;
|
105
|
+
cpuid.f1d = f1d;
|
106
|
+
cpuid.f7b = f7b;
|
107
|
+
cpuid.f7c = f7c;
|
108
|
+
return cpuid;
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
#define X(name, r, bit) \
|
113
|
+
MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
|
114
|
+
return ((cpuid.r) & (1U << bit)) != 0; \
|
115
|
+
}
|
116
|
+
|
117
|
+
/* cpuid(1): Processor Info and Feature Bits. */
|
118
|
+
#define C(name, bit) X(name, f1c, bit)
|
119
|
+
C(sse3, 0)
|
120
|
+
C(pclmuldq, 1)
|
121
|
+
C(dtes64, 2)
|
122
|
+
C(monitor, 3)
|
123
|
+
C(dscpl, 4)
|
124
|
+
C(vmx, 5)
|
125
|
+
C(smx, 6)
|
126
|
+
C(eist, 7)
|
127
|
+
C(tm2, 8)
|
128
|
+
C(ssse3, 9)
|
129
|
+
C(cnxtid, 10)
|
130
|
+
C(fma, 12)
|
131
|
+
C(cx16, 13)
|
132
|
+
C(xtpr, 14)
|
133
|
+
C(pdcm, 15)
|
134
|
+
C(pcid, 17)
|
135
|
+
C(dca, 18)
|
136
|
+
C(sse41, 19)
|
137
|
+
C(sse42, 20)
|
138
|
+
C(x2apic, 21)
|
139
|
+
C(movbe, 22)
|
140
|
+
C(popcnt, 23)
|
141
|
+
C(tscdeadline, 24)
|
142
|
+
C(aes, 25)
|
143
|
+
C(xsave, 26)
|
144
|
+
C(osxsave, 27)
|
145
|
+
C(avx, 28)
|
146
|
+
C(f16c, 29)
|
147
|
+
C(rdrand, 30)
|
148
|
+
#undef C
|
149
|
+
#define D(name, bit) X(name, f1d, bit)
|
150
|
+
D(fpu, 0)
|
151
|
+
D(vme, 1)
|
152
|
+
D(de, 2)
|
153
|
+
D(pse, 3)
|
154
|
+
D(tsc, 4)
|
155
|
+
D(msr, 5)
|
156
|
+
D(pae, 6)
|
157
|
+
D(mce, 7)
|
158
|
+
D(cx8, 8)
|
159
|
+
D(apic, 9)
|
160
|
+
D(sep, 11)
|
161
|
+
D(mtrr, 12)
|
162
|
+
D(pge, 13)
|
163
|
+
D(mca, 14)
|
164
|
+
D(cmov, 15)
|
165
|
+
D(pat, 16)
|
166
|
+
D(pse36, 17)
|
167
|
+
D(psn, 18)
|
168
|
+
D(clfsh, 19)
|
169
|
+
D(ds, 21)
|
170
|
+
D(acpi, 22)
|
171
|
+
D(mmx, 23)
|
172
|
+
D(fxsr, 24)
|
173
|
+
D(sse, 25)
|
174
|
+
D(sse2, 26)
|
175
|
+
D(ss, 27)
|
176
|
+
D(htt, 28)
|
177
|
+
D(tm, 29)
|
178
|
+
D(pbe, 31)
|
179
|
+
#undef D
|
180
|
+
|
181
|
+
/* cpuid(7): Extended Features. */
|
182
|
+
#define B(name, bit) X(name, f7b, bit)
|
183
|
+
B(bmi1, 3)
|
184
|
+
B(hle, 4)
|
185
|
+
B(avx2, 5)
|
186
|
+
B(smep, 7)
|
187
|
+
B(bmi2, 8)
|
188
|
+
B(erms, 9)
|
189
|
+
B(invpcid, 10)
|
190
|
+
B(rtm, 11)
|
191
|
+
B(mpx, 14)
|
192
|
+
B(avx512f, 16)
|
193
|
+
B(avx512dq, 17)
|
194
|
+
B(rdseed, 18)
|
195
|
+
B(adx, 19)
|
196
|
+
B(smap, 20)
|
197
|
+
B(avx512ifma, 21)
|
198
|
+
B(pcommit, 22)
|
199
|
+
B(clflushopt, 23)
|
200
|
+
B(clwb, 24)
|
201
|
+
B(avx512pf, 26)
|
202
|
+
B(avx512er, 27)
|
203
|
+
B(avx512cd, 28)
|
204
|
+
B(sha, 29)
|
205
|
+
B(avx512bw, 30)
|
206
|
+
B(avx512vl, 31)
|
207
|
+
#undef B
|
208
|
+
#define C(name, bit) X(name, f7c, bit)
|
209
|
+
C(prefetchwt1, 0)
|
210
|
+
C(avx512vbmi, 1)
|
211
|
+
#undef C
|
212
|
+
|
213
|
+
#undef X
|
214
|
+
|
215
|
+
#endif /* ZSTD_COMMON_CPU_H */
|