extzstd 0.3.1 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +28 -14
  3. data/contrib/zstd/CHANGELOG +301 -56
  4. data/contrib/zstd/CONTRIBUTING.md +169 -72
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +116 -87
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +62 -32
  9. data/contrib/zstd/TESTING.md +2 -3
  10. data/contrib/zstd/appveyor.yml +52 -136
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +225 -222
  13. data/contrib/zstd/lib/README.md +51 -6
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +45 -62
  17. data/contrib/zstd/lib/common/compiler.h +205 -22
  18. data/contrib/zstd/lib/common/cpu.h +1 -3
  19. data/contrib/zstd/lib/common/debug.c +1 -1
  20. data/contrib/zstd/lib/common/debug.h +12 -19
  21. data/contrib/zstd/lib/common/entropy_common.c +172 -48
  22. data/contrib/zstd/lib/common/error_private.c +10 -2
  23. data/contrib/zstd/lib/common/error_private.h +82 -3
  24. data/contrib/zstd/lib/common/fse.h +37 -86
  25. data/contrib/zstd/lib/common/fse_decompress.c +117 -92
  26. data/contrib/zstd/lib/common/huf.h +99 -166
  27. data/contrib/zstd/lib/common/mem.h +124 -142
  28. data/contrib/zstd/lib/common/pool.c +54 -27
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +156 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -19
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +7 -847
  34. data/contrib/zstd/lib/common/xxhash.h +5568 -167
  35. data/contrib/zstd/lib/common/zstd_common.c +2 -37
  36. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  37. data/contrib/zstd/lib/common/zstd_internal.h +132 -187
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +83 -157
  41. data/contrib/zstd/lib/compress/hist.c +27 -29
  42. data/contrib/zstd/lib/compress/hist.h +2 -2
  43. data/contrib/zstd/lib/compress/huf_compress.c +916 -279
  44. data/contrib/zstd/lib/compress/zstd_compress.c +3773 -1019
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +610 -203
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +119 -42
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +42 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +49 -317
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +320 -103
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +388 -151
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +729 -265
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1270 -251
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +61 -1
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +324 -219
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +9 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +481 -209
  63. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +181 -457
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +34 -113
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1199 -565
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -12
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +627 -157
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1086 -326
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +19 -5
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +62 -13
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +73 -52
  79. data/contrib/zstd/lib/dictBuilder/cover.h +7 -6
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +44 -35
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +103 -111
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +21 -54
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +29 -70
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +30 -73
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +29 -71
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +40 -86
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +47 -88
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +40 -83
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +214 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +7 -6
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +203 -34
  102. data/contrib/zstd/lib/zstd.h +1217 -287
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +28 -8
  104. data/ext/extconf.rb +7 -6
  105. data/ext/extzstd.c +19 -10
  106. data/ext/extzstd.h +6 -0
  107. data/ext/libzstd_conf.h +0 -1
  108. data/ext/zstd_decompress_asm.S +1 -0
  109. data/gemstub.rb +3 -21
  110. data/lib/extzstd/version.rb +6 -1
  111. data/lib/extzstd.rb +0 -2
  112. data/test/test_basic.rb +0 -5
  113. metadata +18 -6
@@ -0,0 +1,55 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ /* This file provides custom allocation primitives
12
+ */
13
+
14
+ #define ZSTD_DEPS_NEED_MALLOC
15
+ #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
16
+
17
+ #include "mem.h" /* MEM_STATIC */
18
+ #define ZSTD_STATIC_LINKING_ONLY
19
+ #include "../zstd.h" /* ZSTD_customMem */
20
+
21
+ #ifndef ZSTD_ALLOCATIONS_H
22
+ #define ZSTD_ALLOCATIONS_H
23
+
24
+ /* custom memory allocation functions */
25
+
26
+ MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
27
+ {
28
+ if (customMem.customAlloc)
29
+ return customMem.customAlloc(customMem.opaque, size);
30
+ return ZSTD_malloc(size);
31
+ }
32
+
33
+ MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
34
+ {
35
+ if (customMem.customAlloc) {
36
+ /* calloc implemented as malloc+memset;
37
+ * not as efficient as calloc, but next best guess for custom malloc */
38
+ void* const ptr = customMem.customAlloc(customMem.opaque, size);
39
+ ZSTD_memset(ptr, 0, size);
40
+ return ptr;
41
+ }
42
+ return ZSTD_calloc(1, size);
43
+ }
44
+
45
+ MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
46
+ {
47
+ if (ptr!=NULL) {
48
+ if (customMem.customFree)
49
+ customMem.customFree(customMem.opaque, ptr);
50
+ else
51
+ ZSTD_free(ptr);
52
+ }
53
+ }
54
+
55
+ #endif /* ZSTD_ALLOCATIONS_H */
@@ -0,0 +1,200 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_BITS_H
12
+ #define ZSTD_BITS_H
13
+
14
+ #include "mem.h"
15
+
16
+ MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
17
+ {
18
+ assert(val != 0);
19
+ {
20
+ static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
21
+ 30, 22, 20, 15, 25, 17, 4, 8,
22
+ 31, 27, 13, 23, 21, 19, 16, 7,
23
+ 26, 12, 18, 6, 11, 5, 10, 9};
24
+ return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
25
+ }
26
+ }
27
+
28
+ MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
29
+ {
30
+ assert(val != 0);
31
+ # if defined(_MSC_VER)
32
+ # if STATIC_BMI2 == 1
33
+ return (unsigned)_tzcnt_u32(val);
34
+ # else
35
+ if (val != 0) {
36
+ unsigned long r;
37
+ _BitScanForward(&r, val);
38
+ return (unsigned)r;
39
+ } else {
40
+ /* Should not reach this code path */
41
+ __assume(0);
42
+ }
43
+ # endif
44
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
45
+ return (unsigned)__builtin_ctz(val);
46
+ # else
47
+ return ZSTD_countTrailingZeros32_fallback(val);
48
+ # endif
49
+ }
50
+
51
+ MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
52
+ assert(val != 0);
53
+ {
54
+ static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
55
+ 11, 14, 16, 18, 22, 25, 3, 30,
56
+ 8, 12, 20, 28, 15, 17, 24, 7,
57
+ 19, 27, 23, 6, 26, 5, 4, 31};
58
+ val |= val >> 1;
59
+ val |= val >> 2;
60
+ val |= val >> 4;
61
+ val |= val >> 8;
62
+ val |= val >> 16;
63
+ return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
64
+ }
65
+ }
66
+
67
+ MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
68
+ {
69
+ assert(val != 0);
70
+ # if defined(_MSC_VER)
71
+ # if STATIC_BMI2 == 1
72
+ return (unsigned)_lzcnt_u32(val);
73
+ # else
74
+ if (val != 0) {
75
+ unsigned long r;
76
+ _BitScanReverse(&r, val);
77
+ return (unsigned)(31 - r);
78
+ } else {
79
+ /* Should not reach this code path */
80
+ __assume(0);
81
+ }
82
+ # endif
83
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
84
+ return (unsigned)__builtin_clz(val);
85
+ # else
86
+ return ZSTD_countLeadingZeros32_fallback(val);
87
+ # endif
88
+ }
89
+
90
+ MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
91
+ {
92
+ assert(val != 0);
93
+ # if defined(_MSC_VER) && defined(_WIN64)
94
+ # if STATIC_BMI2 == 1
95
+ return (unsigned)_tzcnt_u64(val);
96
+ # else
97
+ if (val != 0) {
98
+ unsigned long r;
99
+ _BitScanForward64(&r, val);
100
+ return (unsigned)r;
101
+ } else {
102
+ /* Should not reach this code path */
103
+ __assume(0);
104
+ }
105
+ # endif
106
+ # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
107
+ return (unsigned)__builtin_ctzll(val);
108
+ # else
109
+ {
110
+ U32 mostSignificantWord = (U32)(val >> 32);
111
+ U32 leastSignificantWord = (U32)val;
112
+ if (leastSignificantWord == 0) {
113
+ return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
114
+ } else {
115
+ return ZSTD_countTrailingZeros32(leastSignificantWord);
116
+ }
117
+ }
118
+ # endif
119
+ }
120
+
121
+ MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
122
+ {
123
+ assert(val != 0);
124
+ # if defined(_MSC_VER) && defined(_WIN64)
125
+ # if STATIC_BMI2 == 1
126
+ return (unsigned)_lzcnt_u64(val);
127
+ # else
128
+ if (val != 0) {
129
+ unsigned long r;
130
+ _BitScanReverse64(&r, val);
131
+ return (unsigned)(63 - r);
132
+ } else {
133
+ /* Should not reach this code path */
134
+ __assume(0);
135
+ }
136
+ # endif
137
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
138
+ return (unsigned)(__builtin_clzll(val));
139
+ # else
140
+ {
141
+ U32 mostSignificantWord = (U32)(val >> 32);
142
+ U32 leastSignificantWord = (U32)val;
143
+ if (mostSignificantWord == 0) {
144
+ return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
145
+ } else {
146
+ return ZSTD_countLeadingZeros32(mostSignificantWord);
147
+ }
148
+ }
149
+ # endif
150
+ }
151
+
152
+ MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
153
+ {
154
+ if (MEM_isLittleEndian()) {
155
+ if (MEM_64bits()) {
156
+ return ZSTD_countTrailingZeros64((U64)val) >> 3;
157
+ } else {
158
+ return ZSTD_countTrailingZeros32((U32)val) >> 3;
159
+ }
160
+ } else { /* Big Endian CPU */
161
+ if (MEM_64bits()) {
162
+ return ZSTD_countLeadingZeros64((U64)val) >> 3;
163
+ } else {
164
+ return ZSTD_countLeadingZeros32((U32)val) >> 3;
165
+ }
166
+ }
167
+ }
168
+
169
+ MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
170
+ {
171
+ assert(val != 0);
172
+ return 31 - ZSTD_countLeadingZeros32(val);
173
+ }
174
+
175
+ /* ZSTD_rotateRight_*():
176
+ * Rotates a bitfield to the right by "count" bits.
177
+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
178
+ */
179
+ MEM_STATIC
180
+ U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
181
+ assert(count < 64);
182
+ count &= 0x3F; /* for fickle pattern recognition */
183
+ return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
184
+ }
185
+
186
+ MEM_STATIC
187
+ U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
188
+ assert(count < 32);
189
+ count &= 0x1F; /* for fickle pattern recognition */
190
+ return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
191
+ }
192
+
193
+ MEM_STATIC
194
+ U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
195
+ assert(count < 16);
196
+ count &= 0x0F; /* for fickle pattern recognition */
197
+ return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
198
+ }
199
+
200
+ #endif /* ZSTD_BITS_H */
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * bitstream
3
3
  * Part of FSE library
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -17,7 +17,6 @@
17
17
  #if defined (__cplusplus)
18
18
  extern "C" {
19
19
  #endif
20
-
21
20
  /*
22
21
  * This API consists of small unitary functions, which must be inlined for best performance.
23
22
  * Since link-time-optimization is not available for all compilers,
@@ -31,15 +30,18 @@ extern "C" {
31
30
  #include "compiler.h" /* UNLIKELY() */
32
31
  #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
33
32
  #include "error_private.h" /* error codes and messages */
33
+ #include "bits.h" /* ZSTD_highbit32 */
34
34
 
35
35
 
36
36
  /*=========================================
37
37
  * Target specific
38
38
  =========================================*/
39
- #if defined(__BMI__) && defined(__GNUC__)
40
- # include <immintrin.h> /* support for bextr (experimental) */
41
- #elif defined(__ICCARM__)
42
- # include <intrinsics.h>
39
+ #ifndef ZSTD_NO_INTRINSICS
40
+ # if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
41
+ # include <immintrin.h> /* support for bextr (experimental)/bzhi */
42
+ # elif defined(__ICCARM__)
43
+ # include <intrinsics.h>
44
+ # endif
43
45
  #endif
44
46
 
45
47
  #define STREAM_ACCUMULATOR_MIN_32 25
@@ -131,38 +133,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
131
133
  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
132
134
  /* faster, but works only if nbBits >= 1 */
133
135
 
134
-
135
-
136
- /*-**************************************************************
137
- * Internal functions
138
- ****************************************************************/
139
- MEM_STATIC unsigned BIT_highbit32 (U32 val)
140
- {
141
- assert(val != 0);
142
- {
143
- # if defined(_MSC_VER) /* Visual */
144
- unsigned long r=0;
145
- return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
146
- # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
147
- return __builtin_clz (val) ^ 31;
148
- # elif defined(__ICCARM__) /* IAR Intrinsic */
149
- return 31 - __CLZ(val);
150
- # else /* Software version */
151
- static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
152
- 11, 14, 16, 18, 22, 25, 3, 30,
153
- 8, 12, 20, 28, 15, 17, 24, 7,
154
- 19, 27, 23, 6, 26, 5, 4, 31 };
155
- U32 v = val;
156
- v |= v >> 1;
157
- v |= v >> 2;
158
- v |= v >> 4;
159
- v |= v >> 8;
160
- v |= v >> 16;
161
- return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
162
- # endif
163
- }
164
- }
165
-
166
136
  /*===== Local Constants =====*/
167
137
  static const unsigned BIT_mask[] = {
168
138
  0, 1, 3, 7, 0xF, 0x1F,
@@ -192,16 +162,26 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
192
162
  return 0;
193
163
  }
194
164
 
165
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
166
+ {
167
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
168
+ return _bzhi_u64(bitContainer, nbBits);
169
+ #else
170
+ assert(nbBits < BIT_MASK_SIZE);
171
+ return bitContainer & BIT_mask[nbBits];
172
+ #endif
173
+ }
174
+
195
175
  /*! BIT_addBits() :
196
176
  * can add up to 31 bits into `bitC`.
197
177
  * Note : does not check for register overflow ! */
198
178
  MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
199
179
  size_t value, unsigned nbBits)
200
180
  {
201
- MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
181
+ DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
202
182
  assert(nbBits < BIT_MASK_SIZE);
203
183
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
204
- bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
184
+ bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
205
185
  bitC->bitPos += nbBits;
206
186
  }
207
187
 
@@ -271,7 +251,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
271
251
  */
272
252
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
273
253
  {
274
- if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
254
+ if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
275
255
 
276
256
  bitD->start = (const char*)srcBuffer;
277
257
  bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@@ -280,7 +260,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
280
260
  bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
281
261
  bitD->bitContainer = MEM_readLEST(bitD->ptr);
282
262
  { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
283
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
263
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
284
264
  if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
285
265
  } else {
286
266
  bitD->ptr = bitD->start;
@@ -288,27 +268,27 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
288
268
  switch(srcSize)
289
269
  {
290
270
  case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
291
- /* fall-through */
271
+ ZSTD_FALLTHROUGH;
292
272
 
293
273
  case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
294
- /* fall-through */
274
+ ZSTD_FALLTHROUGH;
295
275
 
296
276
  case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
297
- /* fall-through */
277
+ ZSTD_FALLTHROUGH;
298
278
 
299
279
  case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
300
- /* fall-through */
280
+ ZSTD_FALLTHROUGH;
301
281
 
302
282
  case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
303
- /* fall-through */
283
+ ZSTD_FALLTHROUGH;
304
284
 
305
285
  case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
306
- /* fall-through */
286
+ ZSTD_FALLTHROUGH;
307
287
 
308
288
  default: break;
309
289
  }
310
290
  { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
311
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
291
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
312
292
  if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
313
293
  }
314
294
  bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
@@ -317,23 +297,26 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
317
297
  return srcSize;
318
298
  }
319
299
 
320
- MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
300
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
321
301
  {
322
302
  return bitContainer >> start;
323
303
  }
324
304
 
325
- MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
305
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
326
306
  {
327
307
  U32 const regMask = sizeof(bitContainer)*8 - 1;
328
308
  /* if start > regMask, bitstream is corrupted, and result is undefined */
329
309
  assert(nbBits < BIT_MASK_SIZE);
310
+ /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
311
+ * than accessing memory. When bmi2 instruction is not present, we consider
312
+ * such cpus old (pre-Haswell, 2013) and their performance is not of that
313
+ * importance.
314
+ */
315
+ #if defined(__x86_64__) || defined(_M_X86)
316
+ return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
317
+ #else
330
318
  return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
331
- }
332
-
333
- MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
334
- {
335
- assert(nbBits < BIT_MASK_SIZE);
336
- return bitContainer & BIT_mask[nbBits];
319
+ #endif
337
320
  }
338
321
 
339
322
  /*! BIT_lookBits() :
@@ -342,7 +325,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
342
325
  * On 32-bits, maxNbBits==24.
343
326
  * On 64-bits, maxNbBits==56.
344
327
  * @return : value extracted */
345
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
328
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
346
329
  {
347
330
  /* arbitrate between double-shift and shift+mask */
348
331
  #if 1
@@ -365,7 +348,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
365
348
  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
366
349
  }
367
350
 
368
- MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
351
+ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
369
352
  {
370
353
  bitD->bitsConsumed += nbBits;
371
354
  }
@@ -374,7 +357,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
374
357
  * Read (consume) next n bits from local register and update.
375
358
  * Pay attention to not read more than nbBits contained into local register.
376
359
  * @return : extracted value. */
377
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
360
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
378
361
  {
379
362
  size_t const value = BIT_lookBits(bitD, nbBits);
380
363
  BIT_skipBits(bitD, nbBits);
@@ -382,7 +365,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
382
365
  }
383
366
 
384
367
  /*! BIT_readBitsFast() :
385
- * unsafe version; only works only if nbBits >= 1 */
368
+ * unsafe version; only works if nbBits >= 1 */
386
369
  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
387
370
  {
388
371
  size_t const value = BIT_lookBitsFast(bitD, nbBits);
@@ -413,7 +396,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
413
396
  * This function is safe, it guarantees it will not read beyond src buffer.
414
397
  * @return : status of `BIT_DStream_t` internal register.
415
398
  * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
416
- MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
399
+ MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
417
400
  {
418
401
  if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
419
402
  return BIT_DStream_overflow;