extzstd 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +28 -14
  3. data/contrib/zstd/CHANGELOG +301 -56
  4. data/contrib/zstd/CONTRIBUTING.md +169 -72
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +116 -87
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +62 -32
  9. data/contrib/zstd/TESTING.md +2 -3
  10. data/contrib/zstd/appveyor.yml +52 -136
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +225 -222
  13. data/contrib/zstd/lib/README.md +51 -6
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +45 -62
  17. data/contrib/zstd/lib/common/compiler.h +205 -22
  18. data/contrib/zstd/lib/common/cpu.h +1 -3
  19. data/contrib/zstd/lib/common/debug.c +1 -1
  20. data/contrib/zstd/lib/common/debug.h +12 -19
  21. data/contrib/zstd/lib/common/entropy_common.c +172 -48
  22. data/contrib/zstd/lib/common/error_private.c +10 -2
  23. data/contrib/zstd/lib/common/error_private.h +82 -3
  24. data/contrib/zstd/lib/common/fse.h +37 -86
  25. data/contrib/zstd/lib/common/fse_decompress.c +117 -92
  26. data/contrib/zstd/lib/common/huf.h +99 -166
  27. data/contrib/zstd/lib/common/mem.h +124 -142
  28. data/contrib/zstd/lib/common/pool.c +54 -27
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +156 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -19
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +7 -847
  34. data/contrib/zstd/lib/common/xxhash.h +5568 -167
  35. data/contrib/zstd/lib/common/zstd_common.c +2 -37
  36. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  37. data/contrib/zstd/lib/common/zstd_internal.h +132 -187
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +83 -157
  41. data/contrib/zstd/lib/compress/hist.c +27 -29
  42. data/contrib/zstd/lib/compress/hist.h +2 -2
  43. data/contrib/zstd/lib/compress/huf_compress.c +916 -279
  44. data/contrib/zstd/lib/compress/zstd_compress.c +3773 -1019
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +610 -203
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +119 -42
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +42 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +49 -317
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +320 -103
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +388 -151
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +729 -265
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1270 -251
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +61 -1
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +324 -219
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +9 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +481 -209
  63. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +181 -457
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +34 -113
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1199 -565
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -12
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +627 -157
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1086 -326
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +19 -5
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +62 -13
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +73 -52
  79. data/contrib/zstd/lib/dictBuilder/cover.h +7 -6
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +44 -35
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +103 -111
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +21 -54
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +29 -70
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +30 -73
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +29 -71
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +40 -86
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +47 -88
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +40 -83
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +214 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +7 -6
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +203 -34
  102. data/contrib/zstd/lib/zstd.h +1217 -287
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +28 -8
  104. data/ext/extconf.rb +7 -6
  105. data/ext/extzstd.c +19 -10
  106. data/ext/extzstd.h +6 -0
  107. data/ext/libzstd_conf.h +0 -1
  108. data/ext/zstd_decompress_asm.S +1 -0
  109. data/gemstub.rb +3 -21
  110. data/lib/extzstd/version.rb +6 -1
  111. data/lib/extzstd.rb +0 -2
  112. data/test/test_basic.rb +0 -5
  113. metadata +18 -6
@@ -0,0 +1,55 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ /* This file provides custom allocation primitives
12
+ */
13
+
14
+ #define ZSTD_DEPS_NEED_MALLOC
15
+ #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
16
+
17
+ #include "mem.h" /* MEM_STATIC */
18
+ #define ZSTD_STATIC_LINKING_ONLY
19
+ #include "../zstd.h" /* ZSTD_customMem */
20
+
21
+ #ifndef ZSTD_ALLOCATIONS_H
22
+ #define ZSTD_ALLOCATIONS_H
23
+
24
+ /* custom memory allocation functions */
25
+
26
+ MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
27
+ {
28
+ if (customMem.customAlloc)
29
+ return customMem.customAlloc(customMem.opaque, size);
30
+ return ZSTD_malloc(size);
31
+ }
32
+
33
+ MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
34
+ {
35
+ if (customMem.customAlloc) {
36
+ /* calloc implemented as malloc+memset;
37
+ * not as efficient as calloc, but next best guess for custom malloc */
38
+ void* const ptr = customMem.customAlloc(customMem.opaque, size);
39
+ ZSTD_memset(ptr, 0, size);
40
+ return ptr;
41
+ }
42
+ return ZSTD_calloc(1, size);
43
+ }
44
+
45
+ MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
46
+ {
47
+ if (ptr!=NULL) {
48
+ if (customMem.customFree)
49
+ customMem.customFree(customMem.opaque, ptr);
50
+ else
51
+ ZSTD_free(ptr);
52
+ }
53
+ }
54
+
55
+ #endif /* ZSTD_ALLOCATIONS_H */
@@ -0,0 +1,200 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_BITS_H
12
+ #define ZSTD_BITS_H
13
+
14
+ #include "mem.h"
15
+
16
+ MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
17
+ {
18
+ assert(val != 0);
19
+ {
20
+ static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
21
+ 30, 22, 20, 15, 25, 17, 4, 8,
22
+ 31, 27, 13, 23, 21, 19, 16, 7,
23
+ 26, 12, 18, 6, 11, 5, 10, 9};
24
+ return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
25
+ }
26
+ }
27
+
28
+ MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
29
+ {
30
+ assert(val != 0);
31
+ # if defined(_MSC_VER)
32
+ # if STATIC_BMI2 == 1
33
+ return (unsigned)_tzcnt_u32(val);
34
+ # else
35
+ if (val != 0) {
36
+ unsigned long r;
37
+ _BitScanForward(&r, val);
38
+ return (unsigned)r;
39
+ } else {
40
+ /* Should not reach this code path */
41
+ __assume(0);
42
+ }
43
+ # endif
44
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
45
+ return (unsigned)__builtin_ctz(val);
46
+ # else
47
+ return ZSTD_countTrailingZeros32_fallback(val);
48
+ # endif
49
+ }
50
+
51
+ MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
52
+ assert(val != 0);
53
+ {
54
+ static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
55
+ 11, 14, 16, 18, 22, 25, 3, 30,
56
+ 8, 12, 20, 28, 15, 17, 24, 7,
57
+ 19, 27, 23, 6, 26, 5, 4, 31};
58
+ val |= val >> 1;
59
+ val |= val >> 2;
60
+ val |= val >> 4;
61
+ val |= val >> 8;
62
+ val |= val >> 16;
63
+ return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
64
+ }
65
+ }
66
+
67
+ MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
68
+ {
69
+ assert(val != 0);
70
+ # if defined(_MSC_VER)
71
+ # if STATIC_BMI2 == 1
72
+ return (unsigned)_lzcnt_u32(val);
73
+ # else
74
+ if (val != 0) {
75
+ unsigned long r;
76
+ _BitScanReverse(&r, val);
77
+ return (unsigned)(31 - r);
78
+ } else {
79
+ /* Should not reach this code path */
80
+ __assume(0);
81
+ }
82
+ # endif
83
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
84
+ return (unsigned)__builtin_clz(val);
85
+ # else
86
+ return ZSTD_countLeadingZeros32_fallback(val);
87
+ # endif
88
+ }
89
+
90
+ MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
91
+ {
92
+ assert(val != 0);
93
+ # if defined(_MSC_VER) && defined(_WIN64)
94
+ # if STATIC_BMI2 == 1
95
+ return (unsigned)_tzcnt_u64(val);
96
+ # else
97
+ if (val != 0) {
98
+ unsigned long r;
99
+ _BitScanForward64(&r, val);
100
+ return (unsigned)r;
101
+ } else {
102
+ /* Should not reach this code path */
103
+ __assume(0);
104
+ }
105
+ # endif
106
+ # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
107
+ return (unsigned)__builtin_ctzll(val);
108
+ # else
109
+ {
110
+ U32 mostSignificantWord = (U32)(val >> 32);
111
+ U32 leastSignificantWord = (U32)val;
112
+ if (leastSignificantWord == 0) {
113
+ return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
114
+ } else {
115
+ return ZSTD_countTrailingZeros32(leastSignificantWord);
116
+ }
117
+ }
118
+ # endif
119
+ }
120
+
121
+ MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
122
+ {
123
+ assert(val != 0);
124
+ # if defined(_MSC_VER) && defined(_WIN64)
125
+ # if STATIC_BMI2 == 1
126
+ return (unsigned)_lzcnt_u64(val);
127
+ # else
128
+ if (val != 0) {
129
+ unsigned long r;
130
+ _BitScanReverse64(&r, val);
131
+ return (unsigned)(63 - r);
132
+ } else {
133
+ /* Should not reach this code path */
134
+ __assume(0);
135
+ }
136
+ # endif
137
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
138
+ return (unsigned)(__builtin_clzll(val));
139
+ # else
140
+ {
141
+ U32 mostSignificantWord = (U32)(val >> 32);
142
+ U32 leastSignificantWord = (U32)val;
143
+ if (mostSignificantWord == 0) {
144
+ return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
145
+ } else {
146
+ return ZSTD_countLeadingZeros32(mostSignificantWord);
147
+ }
148
+ }
149
+ # endif
150
+ }
151
+
152
+ MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
153
+ {
154
+ if (MEM_isLittleEndian()) {
155
+ if (MEM_64bits()) {
156
+ return ZSTD_countTrailingZeros64((U64)val) >> 3;
157
+ } else {
158
+ return ZSTD_countTrailingZeros32((U32)val) >> 3;
159
+ }
160
+ } else { /* Big Endian CPU */
161
+ if (MEM_64bits()) {
162
+ return ZSTD_countLeadingZeros64((U64)val) >> 3;
163
+ } else {
164
+ return ZSTD_countLeadingZeros32((U32)val) >> 3;
165
+ }
166
+ }
167
+ }
168
+
169
+ MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
170
+ {
171
+ assert(val != 0);
172
+ return 31 - ZSTD_countLeadingZeros32(val);
173
+ }
174
+
175
+ /* ZSTD_rotateRight_*():
176
+ * Rotates a bitfield to the right by "count" bits.
177
+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
178
+ */
179
+ MEM_STATIC
180
+ U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
181
+ assert(count < 64);
182
+ count &= 0x3F; /* for fickle pattern recognition */
183
+ return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
184
+ }
185
+
186
+ MEM_STATIC
187
+ U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
188
+ assert(count < 32);
189
+ count &= 0x1F; /* for fickle pattern recognition */
190
+ return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
191
+ }
192
+
193
+ MEM_STATIC
194
+ U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
195
+ assert(count < 16);
196
+ count &= 0x0F; /* for fickle pattern recognition */
197
+ return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
198
+ }
199
+
200
+ #endif /* ZSTD_BITS_H */
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * bitstream
3
3
  * Part of FSE library
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -17,7 +17,6 @@
17
17
  #if defined (__cplusplus)
18
18
  extern "C" {
19
19
  #endif
20
-
21
20
  /*
22
21
  * This API consists of small unitary functions, which must be inlined for best performance.
23
22
  * Since link-time-optimization is not available for all compilers,
@@ -31,15 +30,18 @@ extern "C" {
31
30
  #include "compiler.h" /* UNLIKELY() */
32
31
  #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
33
32
  #include "error_private.h" /* error codes and messages */
33
+ #include "bits.h" /* ZSTD_highbit32 */
34
34
 
35
35
 
36
36
  /*=========================================
37
37
  * Target specific
38
38
  =========================================*/
39
- #if defined(__BMI__) && defined(__GNUC__)
40
- # include <immintrin.h> /* support for bextr (experimental) */
41
- #elif defined(__ICCARM__)
42
- # include <intrinsics.h>
39
+ #ifndef ZSTD_NO_INTRINSICS
40
+ # if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
41
+ # include <immintrin.h> /* support for bextr (experimental)/bzhi */
42
+ # elif defined(__ICCARM__)
43
+ # include <intrinsics.h>
44
+ # endif
43
45
  #endif
44
46
 
45
47
  #define STREAM_ACCUMULATOR_MIN_32 25
@@ -131,38 +133,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
131
133
  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
132
134
  /* faster, but works only if nbBits >= 1 */
133
135
 
134
-
135
-
136
- /*-**************************************************************
137
- * Internal functions
138
- ****************************************************************/
139
- MEM_STATIC unsigned BIT_highbit32 (U32 val)
140
- {
141
- assert(val != 0);
142
- {
143
- # if defined(_MSC_VER) /* Visual */
144
- unsigned long r=0;
145
- return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
146
- # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
147
- return __builtin_clz (val) ^ 31;
148
- # elif defined(__ICCARM__) /* IAR Intrinsic */
149
- return 31 - __CLZ(val);
150
- # else /* Software version */
151
- static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
152
- 11, 14, 16, 18, 22, 25, 3, 30,
153
- 8, 12, 20, 28, 15, 17, 24, 7,
154
- 19, 27, 23, 6, 26, 5, 4, 31 };
155
- U32 v = val;
156
- v |= v >> 1;
157
- v |= v >> 2;
158
- v |= v >> 4;
159
- v |= v >> 8;
160
- v |= v >> 16;
161
- return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
162
- # endif
163
- }
164
- }
165
-
166
136
  /*===== Local Constants =====*/
167
137
  static const unsigned BIT_mask[] = {
168
138
  0, 1, 3, 7, 0xF, 0x1F,
@@ -192,16 +162,26 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
192
162
  return 0;
193
163
  }
194
164
 
165
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
166
+ {
167
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
168
+ return _bzhi_u64(bitContainer, nbBits);
169
+ #else
170
+ assert(nbBits < BIT_MASK_SIZE);
171
+ return bitContainer & BIT_mask[nbBits];
172
+ #endif
173
+ }
174
+
195
175
  /*! BIT_addBits() :
196
176
  * can add up to 31 bits into `bitC`.
197
177
  * Note : does not check for register overflow ! */
198
178
  MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
199
179
  size_t value, unsigned nbBits)
200
180
  {
201
- MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
181
+ DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
202
182
  assert(nbBits < BIT_MASK_SIZE);
203
183
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
204
- bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
184
+ bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
205
185
  bitC->bitPos += nbBits;
206
186
  }
207
187
 
@@ -271,7 +251,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
271
251
  */
272
252
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
273
253
  {
274
- if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
254
+ if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
275
255
 
276
256
  bitD->start = (const char*)srcBuffer;
277
257
  bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@@ -280,7 +260,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
280
260
  bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
281
261
  bitD->bitContainer = MEM_readLEST(bitD->ptr);
282
262
  { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
283
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
263
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
284
264
  if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
285
265
  } else {
286
266
  bitD->ptr = bitD->start;
@@ -288,27 +268,27 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
288
268
  switch(srcSize)
289
269
  {
290
270
  case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
291
- /* fall-through */
271
+ ZSTD_FALLTHROUGH;
292
272
 
293
273
  case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
294
- /* fall-through */
274
+ ZSTD_FALLTHROUGH;
295
275
 
296
276
  case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
297
- /* fall-through */
277
+ ZSTD_FALLTHROUGH;
298
278
 
299
279
  case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
300
- /* fall-through */
280
+ ZSTD_FALLTHROUGH;
301
281
 
302
282
  case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
303
- /* fall-through */
283
+ ZSTD_FALLTHROUGH;
304
284
 
305
285
  case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
306
- /* fall-through */
286
+ ZSTD_FALLTHROUGH;
307
287
 
308
288
  default: break;
309
289
  }
310
290
  { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
311
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
291
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
312
292
  if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
313
293
  }
314
294
  bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
@@ -317,23 +297,26 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
317
297
  return srcSize;
318
298
  }
319
299
 
320
- MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
300
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
321
301
  {
322
302
  return bitContainer >> start;
323
303
  }
324
304
 
325
- MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
305
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
326
306
  {
327
307
  U32 const regMask = sizeof(bitContainer)*8 - 1;
328
308
  /* if start > regMask, bitstream is corrupted, and result is undefined */
329
309
  assert(nbBits < BIT_MASK_SIZE);
310
+ /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
311
+ * than accessing memory. When bmi2 instruction is not present, we consider
312
+ * such cpus old (pre-Haswell, 2013) and their performance is not of that
313
+ * importance.
314
+ */
315
+ #if defined(__x86_64__) || defined(_M_X86)
316
+ return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
317
+ #else
330
318
  return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
331
- }
332
-
333
- MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
334
- {
335
- assert(nbBits < BIT_MASK_SIZE);
336
- return bitContainer & BIT_mask[nbBits];
319
+ #endif
337
320
  }
338
321
 
339
322
  /*! BIT_lookBits() :
@@ -342,7 +325,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
342
325
  * On 32-bits, maxNbBits==24.
343
326
  * On 64-bits, maxNbBits==56.
344
327
  * @return : value extracted */
345
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
328
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
346
329
  {
347
330
  /* arbitrate between double-shift and shift+mask */
348
331
  #if 1
@@ -365,7 +348,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
365
348
  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
366
349
  }
367
350
 
368
- MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
351
+ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
369
352
  {
370
353
  bitD->bitsConsumed += nbBits;
371
354
  }
@@ -374,7 +357,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
374
357
  * Read (consume) next n bits from local register and update.
375
358
  * Pay attention to not read more than nbBits contained into local register.
376
359
  * @return : extracted value. */
377
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
360
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
378
361
  {
379
362
  size_t const value = BIT_lookBits(bitD, nbBits);
380
363
  BIT_skipBits(bitD, nbBits);
@@ -382,7 +365,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
382
365
  }
383
366
 
384
367
  /*! BIT_readBitsFast() :
385
- * unsafe version; only works only if nbBits >= 1 */
368
+ * unsafe version; only works if nbBits >= 1 */
386
369
  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
387
370
  {
388
371
  size_t const value = BIT_lookBitsFast(bitD, nbBits);
@@ -413,7 +396,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
413
396
  * This function is safe, it guarantees it will not read beyond src buffer.
414
397
  * @return : status of `BIT_DStream_t` internal register.
415
398
  * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
416
- MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
399
+ MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
417
400
  {
418
401
  if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
419
402
  return BIT_DStream_overflow;