extzstd 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +13 -0
- data/README.md +17 -14
- data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
- data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
- data/contrib/zstd/Makefile +99 -53
- data/contrib/zstd/README.md +59 -39
- data/contrib/zstd/TESTING.md +1 -1
- data/contrib/zstd/appveyor.yml +17 -6
- data/contrib/zstd/lib/BUCK +29 -2
- data/contrib/zstd/lib/Makefile +118 -21
- data/contrib/zstd/lib/README.md +84 -44
- data/contrib/zstd/lib/common/bitstream.h +17 -33
- data/contrib/zstd/lib/common/compiler.h +62 -8
- data/contrib/zstd/lib/common/cpu.h +215 -0
- data/contrib/zstd/lib/common/debug.c +44 -0
- data/contrib/zstd/lib/common/debug.h +134 -0
- data/contrib/zstd/lib/common/entropy_common.c +16 -1
- data/contrib/zstd/lib/common/error_private.c +7 -0
- data/contrib/zstd/lib/common/fse.h +48 -44
- data/contrib/zstd/lib/common/fse_decompress.c +3 -3
- data/contrib/zstd/lib/common/huf.h +169 -113
- data/contrib/zstd/lib/common/mem.h +20 -2
- data/contrib/zstd/lib/common/pool.c +135 -49
- data/contrib/zstd/lib/common/pool.h +40 -21
- data/contrib/zstd/lib/common/threading.c +2 -2
- data/contrib/zstd/lib/common/threading.h +12 -12
- data/contrib/zstd/lib/common/xxhash.c +3 -2
- data/contrib/zstd/lib/common/zstd_common.c +3 -6
- data/contrib/zstd/lib/common/zstd_errors.h +17 -7
- data/contrib/zstd/lib/common/zstd_internal.h +76 -48
- data/contrib/zstd/lib/compress/fse_compress.c +89 -209
- data/contrib/zstd/lib/compress/hist.c +203 -0
- data/contrib/zstd/lib/compress/hist.h +95 -0
- data/contrib/zstd/lib/compress/huf_compress.c +188 -80
- data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
- data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
- data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
- data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
- data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
- data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
- data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
- data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
- data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
- data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
- data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
- data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
- data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
- data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
- data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
- data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
- data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
- data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
- data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
- data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
- data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
- data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
- data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
- data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
- data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
- data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
- data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
- data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
- data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
- data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
- data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
- data/contrib/zstd/lib/zstd.h +1346 -832
- data/ext/extzstd.c +27 -19
- data/ext/extzstd_stream.c +20 -4
- data/ext/zstd_compress.c +1 -0
- data/ext/zstd_decompress.c +4 -0
- data/ext/zstd_dictbuilder.c +4 -0
- data/ext/zstd_dictbuilder_fastcover.c +5 -0
- data/lib/extzstd.rb +52 -220
- data/lib/extzstd/version.rb +1 -1
- metadata +21 -7
- data/contrib/zstd/circle.yml +0 -63
data/contrib/zstd/lib/README.md
CHANGED
|
@@ -2,15 +2,35 @@ Zstandard library files
|
|
|
2
2
|
================================
|
|
3
3
|
|
|
4
4
|
The __lib__ directory is split into several sub-directories,
|
|
5
|
-
in order to make it easier to select or exclude
|
|
5
|
+
in order to make it easier to select or exclude features.
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
#### Building
|
|
9
9
|
|
|
10
|
-
`Makefile` script is provided, supporting
|
|
11
|
-
|
|
10
|
+
`Makefile` script is provided, supporting [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions),
|
|
11
|
+
including commands variables, staged install, directory variables and standard targets.
|
|
12
12
|
- `make` : generates both static and dynamic libraries
|
|
13
|
-
- `make install` : install libraries in
|
|
13
|
+
- `make install` : install libraries and headers in target system directories
|
|
14
|
+
|
|
15
|
+
`libzstd` default scope is pretty large, including compression, decompression, dictionary builder,
|
|
16
|
+
and support for decoding legacy formats >= v0.5.0.
|
|
17
|
+
The scope can be reduced on demand (see paragraph _modular build_).
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
#### Multithreading support
|
|
21
|
+
|
|
22
|
+
Multithreading is disabled by default when building with `make`.
|
|
23
|
+
Enabling multithreading requires 2 conditions :
|
|
24
|
+
- set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
|
|
25
|
+
- for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
|
|
26
|
+
|
|
27
|
+
Both conditions are automatically applied when invoking `make lib-mt` target.
|
|
28
|
+
|
|
29
|
+
When linking a POSIX program with a multithreaded version of `libzstd`,
|
|
30
|
+
note that it's necessary to request the `-pthread` flag during link stage.
|
|
31
|
+
|
|
32
|
+
Multithreading capabilities are exposed
|
|
33
|
+
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
|
|
14
34
|
|
|
15
35
|
|
|
16
36
|
#### API
|
|
@@ -23,53 +43,74 @@ Zstandard's stable API is exposed within [lib/zstd.h](zstd.h).
|
|
|
23
43
|
Optional advanced features are exposed via :
|
|
24
44
|
|
|
25
45
|
- `lib/common/zstd_errors.h` : translates `size_t` function results
|
|
26
|
-
|
|
46
|
+
into a `ZSTD_ErrorCode`, for accurate error handling.
|
|
47
|
+
|
|
27
48
|
- `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
|
|
28
|
-
it unlocks access to
|
|
29
|
-
exposed in second part of `zstd.h`.
|
|
30
|
-
|
|
31
|
-
|
|
49
|
+
it unlocks access to the experimental API,
|
|
50
|
+
exposed in the second part of `zstd.h`.
|
|
51
|
+
All definitions in the experimental APIs are unstable,
|
|
52
|
+
they may still change in the future, or even be removed.
|
|
53
|
+
As a consequence, experimental definitions shall ___never be used with dynamic library___ !
|
|
32
54
|
Only static linking is allowed.
|
|
33
55
|
|
|
34
56
|
|
|
35
57
|
#### Modular build
|
|
36
58
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
- Decompression source code lies in `lib/decompress`
|
|
40
|
-
- It's possible to include only `compress` or only `decompress`, they don't depend on each other.
|
|
41
|
-
- `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples.
|
|
42
|
-
The API is exposed in `lib/dictBuilder/zdict.h`.
|
|
43
|
-
This module depends on both `lib/common` and `lib/compress` .
|
|
44
|
-
- `lib/legacy` : source code to decompress older zstd formats, starting from `v0.1`.
|
|
45
|
-
This module depends on `lib/common` and `lib/decompress`.
|
|
46
|
-
To enable this feature, it's necessary to define `ZSTD_LEGACY_SUPPORT = 1` during compilation.
|
|
47
|
-
Typically, with `gcc`, add argument `-DZSTD_LEGACY_SUPPORT=1`.
|
|
48
|
-
Using higher number limits the number of version supported.
|
|
49
|
-
For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats starting from v0.2+".
|
|
50
|
-
The API is exposed in `lib/legacy/zstd_legacy.h`.
|
|
51
|
-
Each version also provides a (dedicated) set of advanced API.
|
|
52
|
-
For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
|
|
59
|
+
It's possible to compile only a limited set of features within `libzstd`.
|
|
60
|
+
The file structure is designed to make this selection manually achievable for any build system :
|
|
53
61
|
|
|
62
|
+
- Directory `lib/common` is always required, for all variants.
|
|
54
63
|
|
|
55
|
-
|
|
64
|
+
- Compression source code lies in `lib/compress`
|
|
56
65
|
|
|
57
|
-
|
|
58
|
-
Enabling multithreading requires 2 conditions :
|
|
59
|
-
- set macro `ZSTD_MULTITHREAD`
|
|
60
|
-
- on POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc` for example)
|
|
66
|
+
- Decompression source code lies in `lib/decompress`
|
|
61
67
|
|
|
62
|
-
|
|
63
|
-
Note that, when linking a POSIX program with a multithreaded version of `libzstd`,
|
|
64
|
-
it's necessary to trigger `-pthread` flag during link stage.
|
|
68
|
+
- It's possible to include only `compress` or only `decompress`, they don't depend on each other.
|
|
65
69
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
-
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
- `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples.
|
|
71
|
+
The API is exposed in `lib/dictBuilder/zdict.h`.
|
|
72
|
+
This module depends on both `lib/common` and `lib/compress` .
|
|
73
|
+
|
|
74
|
+
- `lib/legacy` : makes it possible to decompress legacy zstd formats, starting from `v0.1.0`.
|
|
75
|
+
This module depends on `lib/common` and `lib/decompress`.
|
|
76
|
+
To enable this feature, define `ZSTD_LEGACY_SUPPORT` during compilation.
|
|
77
|
+
Specifying a number limits versions supported to that version onward.
|
|
78
|
+
For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats >= v0.2.0".
|
|
79
|
+
Conversely, `ZSTD_LEGACY_SUPPORT=0` means "do __not__ support legacy formats".
|
|
80
|
+
By default, this build macro is set as `ZSTD_LEGACY_SUPPORT=5`.
|
|
81
|
+
Decoding supported legacy format is a transparent capability triggered within decompression functions.
|
|
82
|
+
It's also allowed to invoke legacy API directly, exposed in `lib/legacy/zstd_legacy.h`.
|
|
83
|
+
Each version does also provide its own set of advanced API.
|
|
84
|
+
For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
|
|
85
|
+
|
|
86
|
+
- While invoking `make libzstd`, it's possible to define build macros
|
|
87
|
+
`ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
|
|
88
|
+
and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
|
|
89
|
+
This will also disable compilation of all dependencies
|
|
90
|
+
(eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
|
|
91
|
+
|
|
92
|
+
- There are some additional build macros that can be used to minify the decoder.
|
|
93
|
+
|
|
94
|
+
Zstandard often has more than one implementation of a piece of functionality,
|
|
95
|
+
where each implementation optimizes for different scenarios. For example, the
|
|
96
|
+
Huffman decoder has complementary implementations that decode the stream one
|
|
97
|
+
symbol at a time or two symbols at a time. Zstd normally includes both (and
|
|
98
|
+
dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
|
|
99
|
+
or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
|
|
100
|
+
compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
|
|
101
|
+
and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
|
|
102
|
+
only one or the other of two decompression implementations. The smallest
|
|
103
|
+
binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
|
|
104
|
+
`ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
|
|
105
|
+
|
|
106
|
+
For squeezing the last ounce of size out, you can also define
|
|
107
|
+
`ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
|
|
108
|
+
which removes the error messages that are otherwise returned by
|
|
109
|
+
`ZSTD_getErrorName`.
|
|
110
|
+
|
|
111
|
+
- While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
|
|
112
|
+
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
|
|
113
|
+
the shared library, which is now hidden by default.
|
|
73
114
|
|
|
74
115
|
|
|
75
116
|
#### Windows : using MinGW+MSYS to create DLL
|
|
@@ -92,7 +133,6 @@ The compiled executable will require ZSTD DLL which is available at `dll\libzstd
|
|
|
92
133
|
|
|
93
134
|
Obsolete API on their way out are stored in directory `lib/deprecated`.
|
|
94
135
|
At this stage, it contains older streaming prototypes, in `lib/deprecated/zbuff.h`.
|
|
95
|
-
Presence in this directory is temporary.
|
|
96
136
|
These prototypes will be removed in some future version.
|
|
97
137
|
Consider migrating code towards supported streaming API exposed in `zstd.h`.
|
|
98
138
|
|
|
@@ -101,8 +141,8 @@ Consider migrating code towards supported streaming API exposed in `zstd.h`.
|
|
|
101
141
|
|
|
102
142
|
The other files are not source code. There are :
|
|
103
143
|
|
|
104
|
-
- `LICENSE` : contains the BSD license text
|
|
105
|
-
- `Makefile` : `make` script to build and install zstd library (static and dynamic)
|
|
106
144
|
- `BUCK` : support for `buck` build system (https://buckbuild.com/)
|
|
107
|
-
- `
|
|
145
|
+
- `Makefile` : `make` script to build and install zstd library (static and dynamic)
|
|
108
146
|
- `README.md` : this file
|
|
147
|
+
- `dll/` : resources directory for Windows compilation
|
|
148
|
+
- `libzstd.pc.in` : script for `pkg-config` (used in `make install`)
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
2
|
bitstream
|
|
3
3
|
Part of FSE library
|
|
4
|
-
|
|
5
|
-
Copyright (C) 2013-2017, Yann Collet.
|
|
4
|
+
Copyright (C) 2013-present, Yann Collet.
|
|
6
5
|
|
|
7
6
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
8
7
|
|
|
@@ -49,21 +48,10 @@ extern "C" {
|
|
|
49
48
|
* Dependencies
|
|
50
49
|
******************************************/
|
|
51
50
|
#include "mem.h" /* unaligned access routines */
|
|
51
|
+
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
|
|
52
52
|
#include "error_private.h" /* error codes and messages */
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
/*-*************************************
|
|
56
|
-
* Debug
|
|
57
|
-
***************************************/
|
|
58
|
-
#if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
|
|
59
|
-
# include <assert.h>
|
|
60
|
-
#else
|
|
61
|
-
# ifndef assert
|
|
62
|
-
# define assert(condition) ((void)0)
|
|
63
|
-
# endif
|
|
64
|
-
#endif
|
|
65
|
-
|
|
66
|
-
|
|
67
55
|
/*=========================================
|
|
68
56
|
* Target specific
|
|
69
57
|
=========================================*/
|
|
@@ -83,8 +71,7 @@ extern "C" {
|
|
|
83
71
|
* A critical property of these streams is that they encode and decode in **reverse** direction.
|
|
84
72
|
* So the first bit sequence you add will be the last to be read, like a LIFO stack.
|
|
85
73
|
*/
|
|
86
|
-
typedef struct
|
|
87
|
-
{
|
|
74
|
+
typedef struct {
|
|
88
75
|
size_t bitContainer;
|
|
89
76
|
unsigned bitPos;
|
|
90
77
|
char* startPtr;
|
|
@@ -118,8 +105,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
|
|
|
118
105
|
/*-********************************************
|
|
119
106
|
* bitStream decoding API (read backward)
|
|
120
107
|
**********************************************/
|
|
121
|
-
typedef struct
|
|
122
|
-
{
|
|
108
|
+
typedef struct {
|
|
123
109
|
size_t bitContainer;
|
|
124
110
|
unsigned bitsConsumed;
|
|
125
111
|
const char* ptr;
|
|
@@ -236,7 +222,8 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
|
|
236
222
|
}
|
|
237
223
|
|
|
238
224
|
/*! BIT_addBitsFast() :
|
|
239
|
-
* works only if `value` is _clean_,
|
|
225
|
+
* works only if `value` is _clean_,
|
|
226
|
+
* meaning all high bits above nbBits are 0 */
|
|
240
227
|
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
|
|
241
228
|
size_t value, unsigned nbBits)
|
|
242
229
|
{
|
|
@@ -352,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
|
|
352
339
|
|
|
353
340
|
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
|
354
341
|
{
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
if (sizeof(bitContainer)==8)
|
|
358
|
-
return _bextr_u64(bitContainer, start, nbBits);
|
|
359
|
-
else
|
|
360
|
-
# endif
|
|
361
|
-
return _bextr_u32(bitContainer, start, nbBits);
|
|
362
|
-
#else
|
|
342
|
+
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
|
343
|
+
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
|
363
344
|
assert(nbBits < BIT_MASK_SIZE);
|
|
364
|
-
return (bitContainer >> start) & BIT_mask[nbBits];
|
|
365
|
-
#endif
|
|
345
|
+
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
|
366
346
|
}
|
|
367
347
|
|
|
368
348
|
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
|
@@ -379,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
|
|
379
359
|
* @return : value extracted */
|
|
380
360
|
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
|
381
361
|
{
|
|
382
|
-
|
|
362
|
+
/* arbitrate between double-shift and shift+mask */
|
|
363
|
+
#if 1
|
|
364
|
+
/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
|
|
365
|
+
* bitstream is likely corrupted, and result is undefined */
|
|
383
366
|
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
|
|
384
367
|
#else
|
|
368
|
+
/* this code path is slower on my os-x laptop */
|
|
385
369
|
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
|
|
386
370
|
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
|
|
387
371
|
#endif
|
|
@@ -405,7 +389,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
|
|
405
389
|
* Read (consume) next n bits from local register and update.
|
|
406
390
|
* Pay attention to not read more than nbBits contained into local register.
|
|
407
391
|
* @return : extracted value. */
|
|
408
|
-
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD,
|
|
392
|
+
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
|
409
393
|
{
|
|
410
394
|
size_t const value = BIT_lookBits(bitD, nbBits);
|
|
411
395
|
BIT_skipBits(bitD, nbBits);
|
|
@@ -414,7 +398,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
|
|
|
414
398
|
|
|
415
399
|
/*! BIT_readBitsFast() :
|
|
416
400
|
* unsafe version; only works only if nbBits >= 1 */
|
|
417
|
-
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD,
|
|
401
|
+
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
|
418
402
|
{
|
|
419
403
|
size_t const value = BIT_lookBitsFast(bitD, nbBits);
|
|
420
404
|
assert(nbBits >= 1);
|
|
@@ -426,7 +410,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
|
|
|
426
410
|
* Refill `bitD` from buffer previously set in BIT_initDStream() .
|
|
427
411
|
* This function is safe, it guarantees it will not read beyond src buffer.
|
|
428
412
|
* @return : status of `BIT_DStream_t` internal register.
|
|
429
|
-
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57
|
|
413
|
+
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
|
|
430
414
|
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
|
431
415
|
{
|
|
432
416
|
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
|
|
@@ -15,6 +15,8 @@
|
|
|
15
15
|
* Compiler specifics
|
|
16
16
|
*********************************************************/
|
|
17
17
|
/* force inlining */
|
|
18
|
+
|
|
19
|
+
#if !defined(ZSTD_NO_INLINE)
|
|
18
20
|
#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
|
19
21
|
# define INLINE_KEYWORD inline
|
|
20
22
|
#else
|
|
@@ -29,9 +31,16 @@
|
|
|
29
31
|
# define FORCE_INLINE_ATTR
|
|
30
32
|
#endif
|
|
31
33
|
|
|
34
|
+
#else
|
|
35
|
+
|
|
36
|
+
#define INLINE_KEYWORD
|
|
37
|
+
#define FORCE_INLINE_ATTR
|
|
38
|
+
|
|
39
|
+
#endif
|
|
40
|
+
|
|
32
41
|
/**
|
|
33
42
|
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
|
|
34
|
-
* parameters. They must be inlined for the compiler to
|
|
43
|
+
* parameters. They must be inlined for the compiler to eliminate the constant
|
|
35
44
|
* branches.
|
|
36
45
|
*/
|
|
37
46
|
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
|
@@ -63,16 +72,61 @@
|
|
|
63
72
|
# endif
|
|
64
73
|
#endif
|
|
65
74
|
|
|
66
|
-
/*
|
|
67
|
-
#
|
|
68
|
-
#
|
|
69
|
-
#
|
|
70
|
-
#
|
|
71
|
-
# define
|
|
75
|
+
/* target attribute */
|
|
76
|
+
#ifndef __has_attribute
|
|
77
|
+
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
|
|
78
|
+
#endif
|
|
79
|
+
#if defined(__GNUC__)
|
|
80
|
+
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
|
|
72
81
|
#else
|
|
73
|
-
# define
|
|
82
|
+
# define TARGET_ATTRIBUTE(target)
|
|
74
83
|
#endif
|
|
75
84
|
|
|
85
|
+
/* Enable runtime BMI2 dispatch based on the CPU.
|
|
86
|
+
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
|
87
|
+
*/
|
|
88
|
+
#ifndef DYNAMIC_BMI2
|
|
89
|
+
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
|
90
|
+
|| (defined(__GNUC__) \
|
|
91
|
+
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
|
92
|
+
&& (defined(__x86_64__) || defined(_M_X86)) \
|
|
93
|
+
&& !defined(__BMI2__)
|
|
94
|
+
# define DYNAMIC_BMI2 1
|
|
95
|
+
#else
|
|
96
|
+
# define DYNAMIC_BMI2 0
|
|
97
|
+
#endif
|
|
98
|
+
#endif
|
|
99
|
+
|
|
100
|
+
/* prefetch
|
|
101
|
+
* can be disabled, by declaring NO_PREFETCH build macro */
|
|
102
|
+
#if defined(NO_PREFETCH)
|
|
103
|
+
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
|
104
|
+
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
|
105
|
+
#else
|
|
106
|
+
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
|
107
|
+
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
|
108
|
+
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
|
109
|
+
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
|
110
|
+
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
|
111
|
+
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
|
112
|
+
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
|
113
|
+
# else
|
|
114
|
+
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
|
115
|
+
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
|
116
|
+
# endif
|
|
117
|
+
#endif /* NO_PREFETCH */
|
|
118
|
+
|
|
119
|
+
#define CACHELINE_SIZE 64
|
|
120
|
+
|
|
121
|
+
#define PREFETCH_AREA(p, s) { \
|
|
122
|
+
const char* const _ptr = (const char*)(p); \
|
|
123
|
+
size_t const _size = (size_t)(s); \
|
|
124
|
+
size_t _pos; \
|
|
125
|
+
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
|
|
126
|
+
PREFETCH_L2(_ptr + _pos); \
|
|
127
|
+
} \
|
|
128
|
+
}
|
|
129
|
+
|
|
76
130
|
/* disable warnings */
|
|
77
131
|
#ifdef _MSC_VER /* Visual Studio */
|
|
78
132
|
# include <intrin.h> /* For Visual 2005 */
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) 2018-present, Facebook, Inc.
|
|
3
|
+
* All rights reserved.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#ifndef ZSTD_COMMON_CPU_H
|
|
12
|
+
#define ZSTD_COMMON_CPU_H
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Implementation taken from folly/CpuId.h
|
|
16
|
+
* https://github.com/facebook/folly/blob/master/folly/CpuId.h
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#include <string.h>
|
|
20
|
+
|
|
21
|
+
#include "mem.h"
|
|
22
|
+
|
|
23
|
+
#ifdef _MSC_VER
|
|
24
|
+
#include <intrin.h>
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
typedef struct {
|
|
28
|
+
U32 f1c;
|
|
29
|
+
U32 f1d;
|
|
30
|
+
U32 f7b;
|
|
31
|
+
U32 f7c;
|
|
32
|
+
} ZSTD_cpuid_t;
|
|
33
|
+
|
|
34
|
+
MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
|
|
35
|
+
U32 f1c = 0;
|
|
36
|
+
U32 f1d = 0;
|
|
37
|
+
U32 f7b = 0;
|
|
38
|
+
U32 f7c = 0;
|
|
39
|
+
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
|
|
40
|
+
int reg[4];
|
|
41
|
+
__cpuid((int*)reg, 0);
|
|
42
|
+
{
|
|
43
|
+
int const n = reg[0];
|
|
44
|
+
if (n >= 1) {
|
|
45
|
+
__cpuid((int*)reg, 1);
|
|
46
|
+
f1c = (U32)reg[2];
|
|
47
|
+
f1d = (U32)reg[3];
|
|
48
|
+
}
|
|
49
|
+
if (n >= 7) {
|
|
50
|
+
__cpuidex((int*)reg, 7, 0);
|
|
51
|
+
f7b = (U32)reg[1];
|
|
52
|
+
f7c = (U32)reg[2];
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
|
|
56
|
+
/* The following block like the normal cpuid branch below, but gcc
|
|
57
|
+
* reserves ebx for use of its pic register so we must specially
|
|
58
|
+
* handle the save and restore to avoid clobbering the register
|
|
59
|
+
*/
|
|
60
|
+
U32 n;
|
|
61
|
+
__asm__(
|
|
62
|
+
"pushl %%ebx\n\t"
|
|
63
|
+
"cpuid\n\t"
|
|
64
|
+
"popl %%ebx\n\t"
|
|
65
|
+
: "=a"(n)
|
|
66
|
+
: "a"(0)
|
|
67
|
+
: "ecx", "edx");
|
|
68
|
+
if (n >= 1) {
|
|
69
|
+
U32 f1a;
|
|
70
|
+
__asm__(
|
|
71
|
+
"pushl %%ebx\n\t"
|
|
72
|
+
"cpuid\n\t"
|
|
73
|
+
"popl %%ebx\n\t"
|
|
74
|
+
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
|
|
75
|
+
: "a"(1));
|
|
76
|
+
}
|
|
77
|
+
if (n >= 7) {
|
|
78
|
+
__asm__(
|
|
79
|
+
"pushl %%ebx\n\t"
|
|
80
|
+
"cpuid\n\t"
|
|
81
|
+
"movl %%ebx, %%eax\n\t"
|
|
82
|
+
"popl %%ebx"
|
|
83
|
+
: "=a"(f7b), "=c"(f7c)
|
|
84
|
+
: "a"(7), "c"(0)
|
|
85
|
+
: "edx");
|
|
86
|
+
}
|
|
87
|
+
#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
|
|
88
|
+
U32 n;
|
|
89
|
+
__asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
|
|
90
|
+
if (n >= 1) {
|
|
91
|
+
U32 f1a;
|
|
92
|
+
__asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
|
|
93
|
+
}
|
|
94
|
+
if (n >= 7) {
|
|
95
|
+
U32 f7a;
|
|
96
|
+
__asm__("cpuid"
|
|
97
|
+
: "=a"(f7a), "=b"(f7b), "=c"(f7c)
|
|
98
|
+
: "a"(7), "c"(0)
|
|
99
|
+
: "edx");
|
|
100
|
+
}
|
|
101
|
+
#endif
|
|
102
|
+
{
|
|
103
|
+
ZSTD_cpuid_t cpuid;
|
|
104
|
+
cpuid.f1c = f1c;
|
|
105
|
+
cpuid.f1d = f1d;
|
|
106
|
+
cpuid.f7b = f7b;
|
|
107
|
+
cpuid.f7c = f7c;
|
|
108
|
+
return cpuid;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
#define X(name, r, bit) \
|
|
113
|
+
MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
|
|
114
|
+
return ((cpuid.r) & (1U << bit)) != 0; \
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/* cpuid(1): Processor Info and Feature Bits. */
|
|
118
|
+
#define C(name, bit) X(name, f1c, bit)
|
|
119
|
+
C(sse3, 0)
|
|
120
|
+
C(pclmuldq, 1)
|
|
121
|
+
C(dtes64, 2)
|
|
122
|
+
C(monitor, 3)
|
|
123
|
+
C(dscpl, 4)
|
|
124
|
+
C(vmx, 5)
|
|
125
|
+
C(smx, 6)
|
|
126
|
+
C(eist, 7)
|
|
127
|
+
C(tm2, 8)
|
|
128
|
+
C(ssse3, 9)
|
|
129
|
+
C(cnxtid, 10)
|
|
130
|
+
C(fma, 12)
|
|
131
|
+
C(cx16, 13)
|
|
132
|
+
C(xtpr, 14)
|
|
133
|
+
C(pdcm, 15)
|
|
134
|
+
C(pcid, 17)
|
|
135
|
+
C(dca, 18)
|
|
136
|
+
C(sse41, 19)
|
|
137
|
+
C(sse42, 20)
|
|
138
|
+
C(x2apic, 21)
|
|
139
|
+
C(movbe, 22)
|
|
140
|
+
C(popcnt, 23)
|
|
141
|
+
C(tscdeadline, 24)
|
|
142
|
+
C(aes, 25)
|
|
143
|
+
C(xsave, 26)
|
|
144
|
+
C(osxsave, 27)
|
|
145
|
+
C(avx, 28)
|
|
146
|
+
C(f16c, 29)
|
|
147
|
+
C(rdrand, 30)
|
|
148
|
+
#undef C
|
|
149
|
+
#define D(name, bit) X(name, f1d, bit)
|
|
150
|
+
D(fpu, 0)
|
|
151
|
+
D(vme, 1)
|
|
152
|
+
D(de, 2)
|
|
153
|
+
D(pse, 3)
|
|
154
|
+
D(tsc, 4)
|
|
155
|
+
D(msr, 5)
|
|
156
|
+
D(pae, 6)
|
|
157
|
+
D(mce, 7)
|
|
158
|
+
D(cx8, 8)
|
|
159
|
+
D(apic, 9)
|
|
160
|
+
D(sep, 11)
|
|
161
|
+
D(mtrr, 12)
|
|
162
|
+
D(pge, 13)
|
|
163
|
+
D(mca, 14)
|
|
164
|
+
D(cmov, 15)
|
|
165
|
+
D(pat, 16)
|
|
166
|
+
D(pse36, 17)
|
|
167
|
+
D(psn, 18)
|
|
168
|
+
D(clfsh, 19)
|
|
169
|
+
D(ds, 21)
|
|
170
|
+
D(acpi, 22)
|
|
171
|
+
D(mmx, 23)
|
|
172
|
+
D(fxsr, 24)
|
|
173
|
+
D(sse, 25)
|
|
174
|
+
D(sse2, 26)
|
|
175
|
+
D(ss, 27)
|
|
176
|
+
D(htt, 28)
|
|
177
|
+
D(tm, 29)
|
|
178
|
+
D(pbe, 31)
|
|
179
|
+
#undef D
|
|
180
|
+
|
|
181
|
+
/* cpuid(7): Extended Features. */
|
|
182
|
+
#define B(name, bit) X(name, f7b, bit)
|
|
183
|
+
B(bmi1, 3)
|
|
184
|
+
B(hle, 4)
|
|
185
|
+
B(avx2, 5)
|
|
186
|
+
B(smep, 7)
|
|
187
|
+
B(bmi2, 8)
|
|
188
|
+
B(erms, 9)
|
|
189
|
+
B(invpcid, 10)
|
|
190
|
+
B(rtm, 11)
|
|
191
|
+
B(mpx, 14)
|
|
192
|
+
B(avx512f, 16)
|
|
193
|
+
B(avx512dq, 17)
|
|
194
|
+
B(rdseed, 18)
|
|
195
|
+
B(adx, 19)
|
|
196
|
+
B(smap, 20)
|
|
197
|
+
B(avx512ifma, 21)
|
|
198
|
+
B(pcommit, 22)
|
|
199
|
+
B(clflushopt, 23)
|
|
200
|
+
B(clwb, 24)
|
|
201
|
+
B(avx512pf, 26)
|
|
202
|
+
B(avx512er, 27)
|
|
203
|
+
B(avx512cd, 28)
|
|
204
|
+
B(sha, 29)
|
|
205
|
+
B(avx512bw, 30)
|
|
206
|
+
B(avx512vl, 31)
|
|
207
|
+
#undef B
|
|
208
|
+
#define C(name, bit) X(name, f7c, bit)
|
|
209
|
+
C(prefetchwt1, 0)
|
|
210
|
+
C(avx512vbmi, 1)
|
|
211
|
+
#undef C
|
|
212
|
+
|
|
213
|
+
#undef X
|
|
214
|
+
|
|
215
|
+
#endif /* ZSTD_COMMON_CPU_H */
|