extzstd 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -2,15 +2,35 @@ Zstandard library files
2
2
  ================================
3
3
 
4
4
  The __lib__ directory is split into several sub-directories,
5
- in order to make it easier to select or exclude specific features.
5
+ in order to make it easier to select or exclude features.
6
6
 
7
7
 
8
8
  #### Building
9
9
 
10
- `Makefile` script is provided, supporting the standard set of commands,
11
- directories, and variables (see https://www.gnu.org/prep/standards/html_node/Command-Variables.html).
10
+ `Makefile` script is provided, supporting [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions),
11
+ including commands variables, staged install, directory variables and standard targets.
12
12
  - `make` : generates both static and dynamic libraries
13
- - `make install` : install libraries in default system directories
13
+ - `make install` : install libraries and headers in target system directories
14
+
15
+ `libzstd` default scope is pretty large, including compression, decompression, dictionary builder,
16
+ and support for decoding legacy formats >= v0.5.0.
17
+ The scope can be reduced on demand (see paragraph _modular build_).
18
+
19
+
20
+ #### Multithreading support
21
+
22
+ Multithreading is disabled by default when building with `make`.
23
+ Enabling multithreading requires 2 conditions :
24
+ - set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
25
+ - for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
26
+
27
+ Both conditions are automatically applied when invoking `make lib-mt` target.
28
+
29
+ When linking a POSIX program with a multithreaded version of `libzstd`,
30
+ note that it's necessary to request the `-pthread` flag during link stage.
31
+
32
+ Multithreading capabilities are exposed
33
+ via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
14
34
 
15
35
 
16
36
  #### API
@@ -23,53 +43,74 @@ Zstandard's stable API is exposed within [lib/zstd.h](zstd.h).
23
43
  Optional advanced features are exposed via :
24
44
 
25
45
  - `lib/common/zstd_errors.h` : translates `size_t` function results
26
- into an `ZSTD_ErrorCode`, for accurate error handling.
46
+ into a `ZSTD_ErrorCode`, for accurate error handling.
47
+
27
48
  - `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
28
- it unlocks access to advanced experimental API,
29
- exposed in second part of `zstd.h`.
30
- These APIs shall ___never be used with dynamic library___ !
31
- They are not "stable", their definition may change in the future.
49
+ it unlocks access to the experimental API,
50
+ exposed in the second part of `zstd.h`.
51
+ All definitions in the experimental APIs are unstable,
52
+ they may still change in the future, or even be removed.
53
+ As a consequence, experimental definitions shall ___never be used with dynamic library___ !
32
54
  Only static linking is allowed.
33
55
 
34
56
 
35
57
  #### Modular build
36
58
 
37
- - Directory `lib/common` is always required, for all variants.
38
- - Compression source code lies in `lib/compress`
39
- - Decompression source code lies in `lib/decompress`
40
- - It's possible to include only `compress` or only `decompress`, they don't depend on each other.
41
- - `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples.
42
- The API is exposed in `lib/dictBuilder/zdict.h`.
43
- This module depends on both `lib/common` and `lib/compress` .
44
- - `lib/legacy` : source code to decompress older zstd formats, starting from `v0.1`.
45
- This module depends on `lib/common` and `lib/decompress`.
46
- To enable this feature, it's necessary to define `ZSTD_LEGACY_SUPPORT = 1` during compilation.
47
- Typically, with `gcc`, add argument `-DZSTD_LEGACY_SUPPORT=1`.
48
- Using higher number limits the number of version supported.
49
- For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats starting from v0.2+".
50
- The API is exposed in `lib/legacy/zstd_legacy.h`.
51
- Each version also provides a (dedicated) set of advanced API.
52
- For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
59
+ It's possible to compile only a limited set of features within `libzstd`.
60
+ The file structure is designed to make this selection manually achievable for any build system :
53
61
 
62
+ - Directory `lib/common` is always required, for all variants.
54
63
 
55
- #### Multithreading support
64
+ - Compression source code lies in `lib/compress`
56
65
 
57
- Multithreading is disabled by default when building with `make`.
58
- Enabling multithreading requires 2 conditions :
59
- - set macro `ZSTD_MULTITHREAD`
60
- - on POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc` for example)
66
+ - Decompression source code lies in `lib/decompress`
61
67
 
62
- Both conditions are automatically triggered by invoking `make lib-mt` target.
63
- Note that, when linking a POSIX program with a multithreaded version of `libzstd`,
64
- it's necessary to trigger `-pthread` flag during link stage.
68
+ - It's possible to include only `compress` or only `decompress`, they don't depend on each other.
65
69
 
66
- Multithreading capabilities are exposed via :
67
- - private API `lib/compress/zstdmt_compress.h`.
68
- Symbols defined in this header are currently exposed in `libzstd`, hence usable.
69
- Note however that this API is planned to be locked and remain strictly internal in the future.
70
- - advanced API `ZSTD_compress_generic()`, defined in `lib/zstd.h`, experimental section.
71
- This API is still considered experimental, but is designed to be labelled "stable" at some point in the future.
72
- It's the recommended entry point for multi-threading operations.
70
+ - `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples.
71
+ The API is exposed in `lib/dictBuilder/zdict.h`.
72
+ This module depends on both `lib/common` and `lib/compress` .
73
+
74
+ - `lib/legacy` : makes it possible to decompress legacy zstd formats, starting from `v0.1.0`.
75
+ This module depends on `lib/common` and `lib/decompress`.
76
+ To enable this feature, define `ZSTD_LEGACY_SUPPORT` during compilation.
77
+ Specifying a number limits versions supported to that version onward.
78
+ For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats >= v0.2.0".
79
+ Conversely, `ZSTD_LEGACY_SUPPORT=0` means "do __not__ support legacy formats".
80
+ By default, this build macro is set as `ZSTD_LEGACY_SUPPORT=5`.
81
+ Decoding supported legacy format is a transparent capability triggered within decompression functions.
82
+ It's also allowed to invoke legacy API directly, exposed in `lib/legacy/zstd_legacy.h`.
83
+ Each version does also provide its own set of advanced API.
84
+ For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
85
+
86
+ - While invoking `make libzstd`, it's possible to define build macros
87
+ `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
88
+ and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
89
+ This will also disable compilation of all dependencies
90
+ (eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
91
+
92
+ - There are some additional build macros that can be used to minify the decoder.
93
+
94
+ Zstandard often has more than one implementation of a piece of functionality,
95
+ where each implementation optimizes for different scenarios. For example, the
96
+ Huffman decoder has complementary implementations that decode the stream one
97
+ symbol at a time or two symbols at a time. Zstd normally includes both (and
98
+ dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
99
+ or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
100
+ compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
101
+ and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
102
+ only one or the other of two decompression implementations. The smallest
103
+ binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
104
+ `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
105
+
106
+ For squeezing the last ounce of size out, you can also define
107
+ `ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
108
+ which removes the error messages that are otherwise returned by
109
+ `ZSTD_getErrorName`.
110
+
111
+ - While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
112
+ will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
113
+ the shared library, which is now hidden by default.
73
114
 
74
115
 
75
116
  #### Windows : using MinGW+MSYS to create DLL
@@ -92,7 +133,6 @@ The compiled executable will require ZSTD DLL which is available at `dll\libzstd
92
133
 
93
134
  Obsolete API on their way out are stored in directory `lib/deprecated`.
94
135
  At this stage, it contains older streaming prototypes, in `lib/deprecated/zbuff.h`.
95
- Presence in this directory is temporary.
96
136
  These prototypes will be removed in some future version.
97
137
  Consider migrating code towards supported streaming API exposed in `zstd.h`.
98
138
 
@@ -101,8 +141,8 @@ Consider migrating code towards supported streaming API exposed in `zstd.h`.
101
141
 
102
142
  The other files are not source code. There are :
103
143
 
104
- - `LICENSE` : contains the BSD license text
105
- - `Makefile` : `make` script to build and install zstd library (static and dynamic)
106
144
  - `BUCK` : support for `buck` build system (https://buckbuild.com/)
107
- - `libzstd.pc.in` : for `pkg-config` (used in `make install`)
145
+ - `Makefile` : `make` script to build and install zstd library (static and dynamic)
108
146
  - `README.md` : this file
147
+ - `dll/` : resources directory for Windows compilation
148
+ - `libzstd.pc.in` : script for `pkg-config` (used in `make install`)
@@ -1,8 +1,7 @@
1
1
  /* ******************************************************************
2
2
  bitstream
3
3
  Part of FSE library
4
- header file (to include)
5
- Copyright (C) 2013-2017, Yann Collet.
4
+ Copyright (C) 2013-present, Yann Collet.
6
5
 
7
6
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8
7
 
@@ -49,21 +48,10 @@ extern "C" {
49
48
  * Dependencies
50
49
  ******************************************/
51
50
  #include "mem.h" /* unaligned access routines */
51
+ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
52
52
  #include "error_private.h" /* error codes and messages */
53
53
 
54
54
 
55
- /*-*************************************
56
- * Debug
57
- ***************************************/
58
- #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
59
- # include <assert.h>
60
- #else
61
- # ifndef assert
62
- # define assert(condition) ((void)0)
63
- # endif
64
- #endif
65
-
66
-
67
55
  /*=========================================
68
56
  * Target specific
69
57
  =========================================*/
@@ -83,8 +71,7 @@ extern "C" {
83
71
  * A critical property of these streams is that they encode and decode in **reverse** direction.
84
72
  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
85
73
  */
86
- typedef struct
87
- {
74
+ typedef struct {
88
75
  size_t bitContainer;
89
76
  unsigned bitPos;
90
77
  char* startPtr;
@@ -118,8 +105,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
118
105
  /*-********************************************
119
106
  * bitStream decoding API (read backward)
120
107
  **********************************************/
121
- typedef struct
122
- {
108
+ typedef struct {
123
109
  size_t bitContainer;
124
110
  unsigned bitsConsumed;
125
111
  const char* ptr;
@@ -236,7 +222,8 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
236
222
  }
237
223
 
238
224
  /*! BIT_addBitsFast() :
239
- * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
225
+ * works only if `value` is _clean_,
226
+ * meaning all high bits above nbBits are 0 */
240
227
  MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
241
228
  size_t value, unsigned nbBits)
242
229
  {
@@ -352,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
352
339
 
353
340
  MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
354
341
  {
355
- #if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */
356
- # if defined(__x86_64__)
357
- if (sizeof(bitContainer)==8)
358
- return _bextr_u64(bitContainer, start, nbBits);
359
- else
360
- # endif
361
- return _bextr_u32(bitContainer, start, nbBits);
362
- #else
342
+ U32 const regMask = sizeof(bitContainer)*8 - 1;
343
+ /* if start > regMask, bitstream is corrupted, and result is undefined */
363
344
  assert(nbBits < BIT_MASK_SIZE);
364
- return (bitContainer >> start) & BIT_mask[nbBits];
365
- #endif
345
+ return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
366
346
  }
367
347
 
368
348
  MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
@@ -379,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
379
359
  * @return : value extracted */
380
360
  MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
381
361
  {
382
- #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
362
+ /* arbitrate between double-shift and shift+mask */
363
+ #if 1
364
+ /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
365
+ * bitstream is likely corrupted, and result is undefined */
383
366
  return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
384
367
  #else
368
+ /* this code path is slower on my os-x laptop */
385
369
  U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
386
370
  return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
387
371
  #endif
@@ -405,7 +389,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
405
389
  * Read (consume) next n bits from local register and update.
406
390
  * Pay attention to not read more than nbBits contained into local register.
407
391
  * @return : extracted value. */
408
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
392
+ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
409
393
  {
410
394
  size_t const value = BIT_lookBits(bitD, nbBits);
411
395
  BIT_skipBits(bitD, nbBits);
@@ -414,7 +398,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
414
398
 
415
399
  /*! BIT_readBitsFast() :
416
400
  * unsafe version; only works only if nbBits >= 1 */
417
- MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
401
+ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
418
402
  {
419
403
  size_t const value = BIT_lookBitsFast(bitD, nbBits);
420
404
  assert(nbBits >= 1);
@@ -426,7 +410,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
426
410
  * Refill `bitD` from buffer previously set in BIT_initDStream() .
427
411
  * This function is safe, it guarantees it will not read beyond src buffer.
428
412
  * @return : status of `BIT_DStream_t` internal register.
429
- * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
413
+ * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
430
414
  MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
431
415
  {
432
416
  if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
@@ -15,6 +15,8 @@
15
15
  * Compiler specifics
16
16
  *********************************************************/
17
17
  /* force inlining */
18
+
19
+ #if !defined(ZSTD_NO_INLINE)
18
20
  #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
19
21
  # define INLINE_KEYWORD inline
20
22
  #else
@@ -29,9 +31,16 @@
29
31
  # define FORCE_INLINE_ATTR
30
32
  #endif
31
33
 
34
+ #else
35
+
36
+ #define INLINE_KEYWORD
37
+ #define FORCE_INLINE_ATTR
38
+
39
+ #endif
40
+
32
41
  /**
33
42
  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
34
- * parameters. They must be inlined for the compiler to elimininate the constant
43
+ * parameters. They must be inlined for the compiler to eliminate the constant
35
44
  * branches.
36
45
  */
37
46
  #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
@@ -63,16 +72,61 @@
63
72
  # endif
64
73
  #endif
65
74
 
66
- /* prefetch */
67
- #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
68
- # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
69
- # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
70
- #elif defined(__GNUC__)
71
- # define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
75
+ /* target attribute */
76
+ #ifndef __has_attribute
77
+ #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
78
+ #endif
79
+ #if defined(__GNUC__)
80
+ # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
72
81
  #else
73
- # define PREFETCH(ptr) /* disabled */
82
+ # define TARGET_ATTRIBUTE(target)
74
83
  #endif
75
84
 
85
+ /* Enable runtime BMI2 dispatch based on the CPU.
86
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
87
+ */
88
+ #ifndef DYNAMIC_BMI2
89
+ #if ((defined(__clang__) && __has_attribute(__target__)) \
90
+ || (defined(__GNUC__) \
91
+ && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
92
+ && (defined(__x86_64__) || defined(_M_X86)) \
93
+ && !defined(__BMI2__)
94
+ # define DYNAMIC_BMI2 1
95
+ #else
96
+ # define DYNAMIC_BMI2 0
97
+ #endif
98
+ #endif
99
+
100
+ /* prefetch
101
+ * can be disabled, by declaring NO_PREFETCH build macro */
102
+ #if defined(NO_PREFETCH)
103
+ # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
104
+ # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
105
+ #else
106
+ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
107
+ # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
108
+ # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
109
+ # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
110
+ # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
111
+ # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
112
+ # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
113
+ # else
114
+ # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
115
+ # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
116
+ # endif
117
+ #endif /* NO_PREFETCH */
118
+
119
+ #define CACHELINE_SIZE 64
120
+
121
+ #define PREFETCH_AREA(p, s) { \
122
+ const char* const _ptr = (const char*)(p); \
123
+ size_t const _size = (size_t)(s); \
124
+ size_t _pos; \
125
+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
126
+ PREFETCH_L2(_ptr + _pos); \
127
+ } \
128
+ }
129
+
76
130
  /* disable warnings */
77
131
  #ifdef _MSC_VER /* Visual Studio */
78
132
  # include <intrin.h> /* For Visual 2005 */
@@ -0,0 +1,215 @@
1
+ /*
2
+ * Copyright (c) 2018-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_COMMON_CPU_H
12
+ #define ZSTD_COMMON_CPU_H
13
+
14
+ /**
15
+ * Implementation taken from folly/CpuId.h
16
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
17
+ */
18
+
19
+ #include <string.h>
20
+
21
+ #include "mem.h"
22
+
23
+ #ifdef _MSC_VER
24
+ #include <intrin.h>
25
+ #endif
26
+
27
+ typedef struct {
28
+ U32 f1c;
29
+ U32 f1d;
30
+ U32 f7b;
31
+ U32 f7c;
32
+ } ZSTD_cpuid_t;
33
+
34
+ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
35
+ U32 f1c = 0;
36
+ U32 f1d = 0;
37
+ U32 f7b = 0;
38
+ U32 f7c = 0;
39
+ #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
40
+ int reg[4];
41
+ __cpuid((int*)reg, 0);
42
+ {
43
+ int const n = reg[0];
44
+ if (n >= 1) {
45
+ __cpuid((int*)reg, 1);
46
+ f1c = (U32)reg[2];
47
+ f1d = (U32)reg[3];
48
+ }
49
+ if (n >= 7) {
50
+ __cpuidex((int*)reg, 7, 0);
51
+ f7b = (U32)reg[1];
52
+ f7c = (U32)reg[2];
53
+ }
54
+ }
55
+ #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
56
+ /* The following block like the normal cpuid branch below, but gcc
57
+ * reserves ebx for use of its pic register so we must specially
58
+ * handle the save and restore to avoid clobbering the register
59
+ */
60
+ U32 n;
61
+ __asm__(
62
+ "pushl %%ebx\n\t"
63
+ "cpuid\n\t"
64
+ "popl %%ebx\n\t"
65
+ : "=a"(n)
66
+ : "a"(0)
67
+ : "ecx", "edx");
68
+ if (n >= 1) {
69
+ U32 f1a;
70
+ __asm__(
71
+ "pushl %%ebx\n\t"
72
+ "cpuid\n\t"
73
+ "popl %%ebx\n\t"
74
+ : "=a"(f1a), "=c"(f1c), "=d"(f1d)
75
+ : "a"(1));
76
+ }
77
+ if (n >= 7) {
78
+ __asm__(
79
+ "pushl %%ebx\n\t"
80
+ "cpuid\n\t"
81
+ "movl %%ebx, %%eax\n\t"
82
+ "popl %%ebx"
83
+ : "=a"(f7b), "=c"(f7c)
84
+ : "a"(7), "c"(0)
85
+ : "edx");
86
+ }
87
+ #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
88
+ U32 n;
89
+ __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
90
+ if (n >= 1) {
91
+ U32 f1a;
92
+ __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
93
+ }
94
+ if (n >= 7) {
95
+ U32 f7a;
96
+ __asm__("cpuid"
97
+ : "=a"(f7a), "=b"(f7b), "=c"(f7c)
98
+ : "a"(7), "c"(0)
99
+ : "edx");
100
+ }
101
+ #endif
102
+ {
103
+ ZSTD_cpuid_t cpuid;
104
+ cpuid.f1c = f1c;
105
+ cpuid.f1d = f1d;
106
+ cpuid.f7b = f7b;
107
+ cpuid.f7c = f7c;
108
+ return cpuid;
109
+ }
110
+ }
111
+
112
+ #define X(name, r, bit) \
113
+ MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
114
+ return ((cpuid.r) & (1U << bit)) != 0; \
115
+ }
116
+
117
+ /* cpuid(1): Processor Info and Feature Bits. */
118
+ #define C(name, bit) X(name, f1c, bit)
119
+ C(sse3, 0)
120
+ C(pclmuldq, 1)
121
+ C(dtes64, 2)
122
+ C(monitor, 3)
123
+ C(dscpl, 4)
124
+ C(vmx, 5)
125
+ C(smx, 6)
126
+ C(eist, 7)
127
+ C(tm2, 8)
128
+ C(ssse3, 9)
129
+ C(cnxtid, 10)
130
+ C(fma, 12)
131
+ C(cx16, 13)
132
+ C(xtpr, 14)
133
+ C(pdcm, 15)
134
+ C(pcid, 17)
135
+ C(dca, 18)
136
+ C(sse41, 19)
137
+ C(sse42, 20)
138
+ C(x2apic, 21)
139
+ C(movbe, 22)
140
+ C(popcnt, 23)
141
+ C(tscdeadline, 24)
142
+ C(aes, 25)
143
+ C(xsave, 26)
144
+ C(osxsave, 27)
145
+ C(avx, 28)
146
+ C(f16c, 29)
147
+ C(rdrand, 30)
148
+ #undef C
149
+ #define D(name, bit) X(name, f1d, bit)
150
+ D(fpu, 0)
151
+ D(vme, 1)
152
+ D(de, 2)
153
+ D(pse, 3)
154
+ D(tsc, 4)
155
+ D(msr, 5)
156
+ D(pae, 6)
157
+ D(mce, 7)
158
+ D(cx8, 8)
159
+ D(apic, 9)
160
+ D(sep, 11)
161
+ D(mtrr, 12)
162
+ D(pge, 13)
163
+ D(mca, 14)
164
+ D(cmov, 15)
165
+ D(pat, 16)
166
+ D(pse36, 17)
167
+ D(psn, 18)
168
+ D(clfsh, 19)
169
+ D(ds, 21)
170
+ D(acpi, 22)
171
+ D(mmx, 23)
172
+ D(fxsr, 24)
173
+ D(sse, 25)
174
+ D(sse2, 26)
175
+ D(ss, 27)
176
+ D(htt, 28)
177
+ D(tm, 29)
178
+ D(pbe, 31)
179
+ #undef D
180
+
181
+ /* cpuid(7): Extended Features. */
182
+ #define B(name, bit) X(name, f7b, bit)
183
+ B(bmi1, 3)
184
+ B(hle, 4)
185
+ B(avx2, 5)
186
+ B(smep, 7)
187
+ B(bmi2, 8)
188
+ B(erms, 9)
189
+ B(invpcid, 10)
190
+ B(rtm, 11)
191
+ B(mpx, 14)
192
+ B(avx512f, 16)
193
+ B(avx512dq, 17)
194
+ B(rdseed, 18)
195
+ B(adx, 19)
196
+ B(smap, 20)
197
+ B(avx512ifma, 21)
198
+ B(pcommit, 22)
199
+ B(clflushopt, 23)
200
+ B(clwb, 24)
201
+ B(avx512pf, 26)
202
+ B(avx512er, 27)
203
+ B(avx512cd, 28)
204
+ B(sha, 29)
205
+ B(avx512bw, 30)
206
+ B(avx512vl, 31)
207
+ #undef B
208
+ #define C(name, bit) X(name, f7c, bit)
209
+ C(prefetchwt1, 0)
210
+ C(avx512vbmi, 1)
211
+ #undef C
212
+
213
+ #undef X
214
+
215
+ #endif /* ZSTD_COMMON_CPU_H */