extzstd 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -2,15 +2,35 @@ Zstandard library files
2
2
  ================================
3
3
 
4
4
  The __lib__ directory is split into several sub-directories,
5
- in order to make it easier to select or exclude specific features.
5
+ in order to make it easier to select or exclude features.
6
6
 
7
7
 
8
8
  #### Building
9
9
 
10
- `Makefile` script is provided, supporting the standard set of commands,
11
- directories, and variables (see https://www.gnu.org/prep/standards/html_node/Command-Variables.html).
10
+ `Makefile` script is provided, supporting [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions),
11
+ including commands variables, staged install, directory variables and standard targets.
12
12
  - `make` : generates both static and dynamic libraries
13
- - `make install` : install libraries in default system directories
13
+ - `make install` : install libraries and headers in target system directories
14
+
15
+ `libzstd` default scope is pretty large, including compression, decompression, dictionary builder,
16
+ and support for decoding legacy formats >= v0.5.0.
17
+ The scope can be reduced on demand (see paragraph _modular build_).
18
+
19
+
20
+ #### Multithreading support
21
+
22
+ Multithreading is disabled by default when building with `make`.
23
+ Enabling multithreading requires 2 conditions :
24
+ - set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
25
+ - for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
26
+
27
+ Both conditions are automatically applied when invoking `make lib-mt` target.
28
+
29
+ When linking a POSIX program with a multithreaded version of `libzstd`,
30
+ note that it's necessary to request the `-pthread` flag during link stage.
31
+
32
+ Multithreading capabilities are exposed
33
+ via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
14
34
 
15
35
 
16
36
  #### API
@@ -23,53 +43,74 @@ Zstandard's stable API is exposed within [lib/zstd.h](zstd.h).
23
43
  Optional advanced features are exposed via :
24
44
 
25
45
  - `lib/common/zstd_errors.h` : translates `size_t` function results
26
- into an `ZSTD_ErrorCode`, for accurate error handling.
46
+ into a `ZSTD_ErrorCode`, for accurate error handling.
47
+
27
48
  - `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
28
- it unlocks access to advanced experimental API,
29
- exposed in second part of `zstd.h`.
30
- These APIs shall ___never be used with dynamic library___ !
31
- They are not "stable", their definition may change in the future.
49
+ it unlocks access to the experimental API,
50
+ exposed in the second part of `zstd.h`.
51
+ All definitions in the experimental APIs are unstable,
52
+ they may still change in the future, or even be removed.
53
+ As a consequence, experimental definitions shall ___never be used with dynamic library___ !
32
54
  Only static linking is allowed.
33
55
 
34
56
 
35
57
  #### Modular build
36
58
 
37
- - Directory `lib/common` is always required, for all variants.
38
- - Compression source code lies in `lib/compress`
39
- - Decompression source code lies in `lib/decompress`
40
- - It's possible to include only `compress` or only `decompress`, they don't depend on each other.
41
- - `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples.
42
- The API is exposed in `lib/dictBuilder/zdict.h`.
43
- This module depends on both `lib/common` and `lib/compress` .
44
- - `lib/legacy` : source code to decompress older zstd formats, starting from `v0.1`.
45
- This module depends on `lib/common` and `lib/decompress`.
46
- To enable this feature, it's necessary to define `ZSTD_LEGACY_SUPPORT = 1` during compilation.
47
- Typically, with `gcc`, add argument `-DZSTD_LEGACY_SUPPORT=1`.
48
- Using higher number limits the number of version supported.
49
- For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats starting from v0.2+".
50
- The API is exposed in `lib/legacy/zstd_legacy.h`.
51
- Each version also provides a (dedicated) set of advanced API.
52
- For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
59
+ It's possible to compile only a limited set of features within `libzstd`.
60
+ The file structure is designed to make this selection manually achievable for any build system :
53
61
 
62
+ - Directory `lib/common` is always required, for all variants.
54
63
 
55
- #### Multithreading support
64
+ - Compression source code lies in `lib/compress`
56
65
 
57
- Multithreading is disabled by default when building with `make`.
58
- Enabling multithreading requires 2 conditions :
59
- - set macro `ZSTD_MULTITHREAD`
60
- - on POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc` for example)
66
+ - Decompression source code lies in `lib/decompress`
61
67
 
62
- Both conditions are automatically triggered by invoking `make lib-mt` target.
63
- Note that, when linking a POSIX program with a multithreaded version of `libzstd`,
64
- it's necessary to trigger `-pthread` flag during link stage.
68
+ - It's possible to include only `compress` or only `decompress`, they don't depend on each other.
65
69
 
66
- Multithreading capabilities are exposed via :
67
- - private API `lib/compress/zstdmt_compress.h`.
68
- Symbols defined in this header are currently exposed in `libzstd`, hence usable.
69
- Note however that this API is planned to be locked and remain strictly internal in the future.
70
- - advanced API `ZSTD_compress_generic()`, defined in `lib/zstd.h`, experimental section.
71
- This API is still considered experimental, but is designed to be labelled "stable" at some point in the future.
72
- It's the recommended entry point for multi-threading operations.
70
+ - `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples.
71
+ The API is exposed in `lib/dictBuilder/zdict.h`.
72
+ This module depends on both `lib/common` and `lib/compress` .
73
+
74
+ - `lib/legacy` : makes it possible to decompress legacy zstd formats, starting from `v0.1.0`.
75
+ This module depends on `lib/common` and `lib/decompress`.
76
+ To enable this feature, define `ZSTD_LEGACY_SUPPORT` during compilation.
77
+ Specifying a number limits versions supported to that version onward.
78
+ For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats >= v0.2.0".
79
+ Conversely, `ZSTD_LEGACY_SUPPORT=0` means "do __not__ support legacy formats".
80
+ By default, this build macro is set as `ZSTD_LEGACY_SUPPORT=5`.
81
+ Decoding supported legacy format is a transparent capability triggered within decompression functions.
82
+ It's also allowed to invoke legacy API directly, exposed in `lib/legacy/zstd_legacy.h`.
83
+ Each version does also provide its own set of advanced API.
84
+ For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
85
+
86
+ - While invoking `make libzstd`, it's possible to define build macros
87
+ `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
88
+ and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
89
+ This will also disable compilation of all dependencies
90
+ (eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
91
+
92
+ - There are some additional build macros that can be used to minify the decoder.
93
+
94
+ Zstandard often has more than one implementation of a piece of functionality,
95
+ where each implementation optimizes for different scenarios. For example, the
96
+ Huffman decoder has complementary implementations that decode the stream one
97
+ symbol at a time or two symbols at a time. Zstd normally includes both (and
98
+ dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
99
+ or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
100
+ compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
101
+ and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
102
+ only one or the other of two decompression implementations. The smallest
103
+ binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
104
+ `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
105
+
106
+ For squeezing the last ounce of size out, you can also define
107
+ `ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
108
+ which removes the error messages that are otherwise returned by
109
+ `ZSTD_getErrorName`.
110
+
111
+ - While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
112
+ will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
113
+ the shared library, which is now hidden by default.
73
114
 
74
115
 
75
116
  #### Windows : using MinGW+MSYS to create DLL
@@ -92,7 +133,6 @@ The compiled executable will require ZSTD DLL which is available at `dll\libzstd
92
133
 
93
134
  Obsolete API on their way out are stored in directory `lib/deprecated`.
94
135
  At this stage, it contains older streaming prototypes, in `lib/deprecated/zbuff.h`.
95
- Presence in this directory is temporary.
96
136
  These prototypes will be removed in some future version.
97
137
  Consider migrating code towards supported streaming API exposed in `zstd.h`.
98
138
 
@@ -101,8 +141,8 @@ Consider migrating code towards supported streaming API exposed in `zstd.h`.
101
141
 
102
142
  The other files are not source code. There are :
103
143
 
104
- - `LICENSE` : contains the BSD license text
105
- - `Makefile` : `make` script to build and install zstd library (static and dynamic)
106
144
  - `BUCK` : support for `buck` build system (https://buckbuild.com/)
107
- - `libzstd.pc.in` : for `pkg-config` (used in `make install`)
145
+ - `Makefile` : `make` script to build and install zstd library (static and dynamic)
108
146
  - `README.md` : this file
147
+ - `dll/` : resources directory for Windows compilation
148
+ - `libzstd.pc.in` : script for `pkg-config` (used in `make install`)
@@ -1,8 +1,7 @@
1
1
  /* ******************************************************************
2
2
  bitstream
3
3
  Part of FSE library
4
- header file (to include)
5
- Copyright (C) 2013-2017, Yann Collet.
4
+ Copyright (C) 2013-present, Yann Collet.
6
5
 
7
6
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8
7
 
@@ -49,21 +48,10 @@ extern "C" {
49
48
  * Dependencies
50
49
  ******************************************/
51
50
  #include "mem.h" /* unaligned access routines */
51
+ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
52
52
  #include "error_private.h" /* error codes and messages */
53
53
 
54
54
 
55
- /*-*************************************
56
- * Debug
57
- ***************************************/
58
- #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
59
- # include <assert.h>
60
- #else
61
- # ifndef assert
62
- # define assert(condition) ((void)0)
63
- # endif
64
- #endif
65
-
66
-
67
55
  /*=========================================
68
56
  * Target specific
69
57
  =========================================*/
@@ -83,8 +71,7 @@ extern "C" {
83
71
  * A critical property of these streams is that they encode and decode in **reverse** direction.
84
72
  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
85
73
  */
86
- typedef struct
87
- {
74
+ typedef struct {
88
75
  size_t bitContainer;
89
76
  unsigned bitPos;
90
77
  char* startPtr;
@@ -118,8 +105,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
118
105
  /*-********************************************
119
106
  * bitStream decoding API (read backward)
120
107
  **********************************************/
121
- typedef struct
122
- {
108
+ typedef struct {
123
109
  size_t bitContainer;
124
110
  unsigned bitsConsumed;
125
111
  const char* ptr;
@@ -236,7 +222,8 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
236
222
  }
237
223
 
238
224
  /*! BIT_addBitsFast() :
239
- * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
225
+ * works only if `value` is _clean_,
226
+ * meaning all high bits above nbBits are 0 */
240
227
  MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
241
228
  size_t value, unsigned nbBits)
242
229
  {
@@ -352,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
352
339
 
353
340
  MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
354
341
  {
355
- #if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */
356
- # if defined(__x86_64__)
357
- if (sizeof(bitContainer)==8)
358
- return _bextr_u64(bitContainer, start, nbBits);
359
- else
360
- # endif
361
- return _bextr_u32(bitContainer, start, nbBits);
362
- #else
342
+ U32 const regMask = sizeof(bitContainer)*8 - 1;
343
+ /* if start > regMask, bitstream is corrupted, and result is undefined */
363
344
  assert(nbBits < BIT_MASK_SIZE);
364
- return (bitContainer >> start) & BIT_mask[nbBits];
365
- #endif
345
+ return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
366
346
  }
367
347
 
368
348
  MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
@@ -379,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
379
359
  * @return : value extracted */
380
360
  MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
381
361
  {
382
- #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
362
+ /* arbitrate between double-shift and shift+mask */
363
+ #if 1
364
+ /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
365
+ * bitstream is likely corrupted, and result is undefined */
383
366
  return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
384
367
  #else
368
+ /* this code path is slower on my os-x laptop */
385
369
  U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
386
370
  return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
387
371
  #endif
@@ -405,7 +389,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
405
389
  * Read (consume) next n bits from local register and update.
406
390
  * Pay attention to not read more than nbBits contained into local register.
407
391
  * @return : extracted value. */
408
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
392
+ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
409
393
  {
410
394
  size_t const value = BIT_lookBits(bitD, nbBits);
411
395
  BIT_skipBits(bitD, nbBits);
@@ -414,7 +398,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
414
398
 
415
399
  /*! BIT_readBitsFast() :
416
400
  * unsafe version; only works only if nbBits >= 1 */
417
- MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
401
+ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
418
402
  {
419
403
  size_t const value = BIT_lookBitsFast(bitD, nbBits);
420
404
  assert(nbBits >= 1);
@@ -426,7 +410,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
426
410
  * Refill `bitD` from buffer previously set in BIT_initDStream() .
427
411
  * This function is safe, it guarantees it will not read beyond src buffer.
428
412
  * @return : status of `BIT_DStream_t` internal register.
429
- * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
413
+ * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
430
414
  MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
431
415
  {
432
416
  if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
@@ -15,6 +15,8 @@
15
15
  * Compiler specifics
16
16
  *********************************************************/
17
17
  /* force inlining */
18
+
19
+ #if !defined(ZSTD_NO_INLINE)
18
20
  #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
19
21
  # define INLINE_KEYWORD inline
20
22
  #else
@@ -29,9 +31,16 @@
29
31
  # define FORCE_INLINE_ATTR
30
32
  #endif
31
33
 
34
+ #else
35
+
36
+ #define INLINE_KEYWORD
37
+ #define FORCE_INLINE_ATTR
38
+
39
+ #endif
40
+
32
41
  /**
33
42
  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
34
- * parameters. They must be inlined for the compiler to elimininate the constant
43
+ * parameters. They must be inlined for the compiler to eliminate the constant
35
44
  * branches.
36
45
  */
37
46
  #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
@@ -63,16 +72,61 @@
63
72
  # endif
64
73
  #endif
65
74
 
66
- /* prefetch */
67
- #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
68
- # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
69
- # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
70
- #elif defined(__GNUC__)
71
- # define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
75
+ /* target attribute */
76
+ #ifndef __has_attribute
77
+ #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
78
+ #endif
79
+ #if defined(__GNUC__)
80
+ # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
72
81
  #else
73
- # define PREFETCH(ptr) /* disabled */
82
+ # define TARGET_ATTRIBUTE(target)
74
83
  #endif
75
84
 
85
+ /* Enable runtime BMI2 dispatch based on the CPU.
86
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
87
+ */
88
+ #ifndef DYNAMIC_BMI2
89
+ #if ((defined(__clang__) && __has_attribute(__target__)) \
90
+ || (defined(__GNUC__) \
91
+ && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
92
+ && (defined(__x86_64__) || defined(_M_X86)) \
93
+ && !defined(__BMI2__)
94
+ # define DYNAMIC_BMI2 1
95
+ #else
96
+ # define DYNAMIC_BMI2 0
97
+ #endif
98
+ #endif
99
+
100
+ /* prefetch
101
+ * can be disabled, by declaring NO_PREFETCH build macro */
102
+ #if defined(NO_PREFETCH)
103
+ # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
104
+ # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
105
+ #else
106
+ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
107
+ # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
108
+ # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
109
+ # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
110
+ # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
111
+ # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
112
+ # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
113
+ # else
114
+ # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
115
+ # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
116
+ # endif
117
+ #endif /* NO_PREFETCH */
118
+
119
+ #define CACHELINE_SIZE 64
120
+
121
+ #define PREFETCH_AREA(p, s) { \
122
+ const char* const _ptr = (const char*)(p); \
123
+ size_t const _size = (size_t)(s); \
124
+ size_t _pos; \
125
+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
126
+ PREFETCH_L2(_ptr + _pos); \
127
+ } \
128
+ }
129
+
76
130
  /* disable warnings */
77
131
  #ifdef _MSC_VER /* Visual Studio */
78
132
  # include <intrin.h> /* For Visual 2005 */
@@ -0,0 +1,215 @@
1
+ /*
2
+ * Copyright (c) 2018-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_COMMON_CPU_H
12
+ #define ZSTD_COMMON_CPU_H
13
+
14
+ /**
15
+ * Implementation taken from folly/CpuId.h
16
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
17
+ */
18
+
19
+ #include <string.h>
20
+
21
+ #include "mem.h"
22
+
23
+ #ifdef _MSC_VER
24
+ #include <intrin.h>
25
+ #endif
26
+
27
+ typedef struct {
28
+ U32 f1c;
29
+ U32 f1d;
30
+ U32 f7b;
31
+ U32 f7c;
32
+ } ZSTD_cpuid_t;
33
+
34
+ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
35
+ U32 f1c = 0;
36
+ U32 f1d = 0;
37
+ U32 f7b = 0;
38
+ U32 f7c = 0;
39
+ #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
40
+ int reg[4];
41
+ __cpuid((int*)reg, 0);
42
+ {
43
+ int const n = reg[0];
44
+ if (n >= 1) {
45
+ __cpuid((int*)reg, 1);
46
+ f1c = (U32)reg[2];
47
+ f1d = (U32)reg[3];
48
+ }
49
+ if (n >= 7) {
50
+ __cpuidex((int*)reg, 7, 0);
51
+ f7b = (U32)reg[1];
52
+ f7c = (U32)reg[2];
53
+ }
54
+ }
55
+ #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
56
+ /* The following block like the normal cpuid branch below, but gcc
57
+ * reserves ebx for use of its pic register so we must specially
58
+ * handle the save and restore to avoid clobbering the register
59
+ */
60
+ U32 n;
61
+ __asm__(
62
+ "pushl %%ebx\n\t"
63
+ "cpuid\n\t"
64
+ "popl %%ebx\n\t"
65
+ : "=a"(n)
66
+ : "a"(0)
67
+ : "ecx", "edx");
68
+ if (n >= 1) {
69
+ U32 f1a;
70
+ __asm__(
71
+ "pushl %%ebx\n\t"
72
+ "cpuid\n\t"
73
+ "popl %%ebx\n\t"
74
+ : "=a"(f1a), "=c"(f1c), "=d"(f1d)
75
+ : "a"(1));
76
+ }
77
+ if (n >= 7) {
78
+ __asm__(
79
+ "pushl %%ebx\n\t"
80
+ "cpuid\n\t"
81
+ "movl %%ebx, %%eax\n\t"
82
+ "popl %%ebx"
83
+ : "=a"(f7b), "=c"(f7c)
84
+ : "a"(7), "c"(0)
85
+ : "edx");
86
+ }
87
+ #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
88
+ U32 n;
89
+ __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
90
+ if (n >= 1) {
91
+ U32 f1a;
92
+ __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
93
+ }
94
+ if (n >= 7) {
95
+ U32 f7a;
96
+ __asm__("cpuid"
97
+ : "=a"(f7a), "=b"(f7b), "=c"(f7c)
98
+ : "a"(7), "c"(0)
99
+ : "edx");
100
+ }
101
+ #endif
102
+ {
103
+ ZSTD_cpuid_t cpuid;
104
+ cpuid.f1c = f1c;
105
+ cpuid.f1d = f1d;
106
+ cpuid.f7b = f7b;
107
+ cpuid.f7c = f7c;
108
+ return cpuid;
109
+ }
110
+ }
111
+
112
+ #define X(name, r, bit) \
113
+ MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
114
+ return ((cpuid.r) & (1U << bit)) != 0; \
115
+ }
116
+
117
+ /* cpuid(1): Processor Info and Feature Bits. */
118
+ #define C(name, bit) X(name, f1c, bit)
119
+ C(sse3, 0)
120
+ C(pclmuldq, 1)
121
+ C(dtes64, 2)
122
+ C(monitor, 3)
123
+ C(dscpl, 4)
124
+ C(vmx, 5)
125
+ C(smx, 6)
126
+ C(eist, 7)
127
+ C(tm2, 8)
128
+ C(ssse3, 9)
129
+ C(cnxtid, 10)
130
+ C(fma, 12)
131
+ C(cx16, 13)
132
+ C(xtpr, 14)
133
+ C(pdcm, 15)
134
+ C(pcid, 17)
135
+ C(dca, 18)
136
+ C(sse41, 19)
137
+ C(sse42, 20)
138
+ C(x2apic, 21)
139
+ C(movbe, 22)
140
+ C(popcnt, 23)
141
+ C(tscdeadline, 24)
142
+ C(aes, 25)
143
+ C(xsave, 26)
144
+ C(osxsave, 27)
145
+ C(avx, 28)
146
+ C(f16c, 29)
147
+ C(rdrand, 30)
148
+ #undef C
149
+ #define D(name, bit) X(name, f1d, bit)
150
+ D(fpu, 0)
151
+ D(vme, 1)
152
+ D(de, 2)
153
+ D(pse, 3)
154
+ D(tsc, 4)
155
+ D(msr, 5)
156
+ D(pae, 6)
157
+ D(mce, 7)
158
+ D(cx8, 8)
159
+ D(apic, 9)
160
+ D(sep, 11)
161
+ D(mtrr, 12)
162
+ D(pge, 13)
163
+ D(mca, 14)
164
+ D(cmov, 15)
165
+ D(pat, 16)
166
+ D(pse36, 17)
167
+ D(psn, 18)
168
+ D(clfsh, 19)
169
+ D(ds, 21)
170
+ D(acpi, 22)
171
+ D(mmx, 23)
172
+ D(fxsr, 24)
173
+ D(sse, 25)
174
+ D(sse2, 26)
175
+ D(ss, 27)
176
+ D(htt, 28)
177
+ D(tm, 29)
178
+ D(pbe, 31)
179
+ #undef D
180
+
181
+ /* cpuid(7): Extended Features. */
182
+ #define B(name, bit) X(name, f7b, bit)
183
+ B(bmi1, 3)
184
+ B(hle, 4)
185
+ B(avx2, 5)
186
+ B(smep, 7)
187
+ B(bmi2, 8)
188
+ B(erms, 9)
189
+ B(invpcid, 10)
190
+ B(rtm, 11)
191
+ B(mpx, 14)
192
+ B(avx512f, 16)
193
+ B(avx512dq, 17)
194
+ B(rdseed, 18)
195
+ B(adx, 19)
196
+ B(smap, 20)
197
+ B(avx512ifma, 21)
198
+ B(pcommit, 22)
199
+ B(clflushopt, 23)
200
+ B(clwb, 24)
201
+ B(avx512pf, 26)
202
+ B(avx512er, 27)
203
+ B(avx512cd, 28)
204
+ B(sha, 29)
205
+ B(avx512bw, 30)
206
+ B(avx512vl, 31)
207
+ #undef B
208
+ #define C(name, bit) X(name, f7c, bit)
209
+ C(prefetchwt1, 0)
210
+ C(avx512vbmi, 1)
211
+ #undef C
212
+
213
+ #undef X
214
+
215
+ #endif /* ZSTD_COMMON_CPU_H */