extzstd 0.3.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -0,0 +1,200 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_BITS_H
12
+ #define ZSTD_BITS_H
13
+
14
+ #include "mem.h"
15
+
16
+ MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
17
+ {
18
+ assert(val != 0);
19
+ {
20
+ static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
21
+ 30, 22, 20, 15, 25, 17, 4, 8,
22
+ 31, 27, 13, 23, 21, 19, 16, 7,
23
+ 26, 12, 18, 6, 11, 5, 10, 9};
24
+ return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
25
+ }
26
+ }
27
+
28
+ MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
29
+ {
30
+ assert(val != 0);
31
+ # if defined(_MSC_VER)
32
+ # if STATIC_BMI2 == 1
33
+ return (unsigned)_tzcnt_u32(val);
34
+ # else
35
+ if (val != 0) {
36
+ unsigned long r;
37
+ _BitScanForward(&r, val);
38
+ return (unsigned)r;
39
+ } else {
40
+ /* Should not reach this code path */
41
+ __assume(0);
42
+ }
43
+ # endif
44
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
45
+ return (unsigned)__builtin_ctz(val);
46
+ # else
47
+ return ZSTD_countTrailingZeros32_fallback(val);
48
+ # endif
49
+ }
50
+
51
+ MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
52
+ assert(val != 0);
53
+ {
54
+ static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
55
+ 11, 14, 16, 18, 22, 25, 3, 30,
56
+ 8, 12, 20, 28, 15, 17, 24, 7,
57
+ 19, 27, 23, 6, 26, 5, 4, 31};
58
+ val |= val >> 1;
59
+ val |= val >> 2;
60
+ val |= val >> 4;
61
+ val |= val >> 8;
62
+ val |= val >> 16;
63
+ return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
64
+ }
65
+ }
66
+
67
+ MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
68
+ {
69
+ assert(val != 0);
70
+ # if defined(_MSC_VER)
71
+ # if STATIC_BMI2 == 1
72
+ return (unsigned)_lzcnt_u32(val);
73
+ # else
74
+ if (val != 0) {
75
+ unsigned long r;
76
+ _BitScanReverse(&r, val);
77
+ return (unsigned)(31 - r);
78
+ } else {
79
+ /* Should not reach this code path */
80
+ __assume(0);
81
+ }
82
+ # endif
83
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
84
+ return (unsigned)__builtin_clz(val);
85
+ # else
86
+ return ZSTD_countLeadingZeros32_fallback(val);
87
+ # endif
88
+ }
89
+
90
+ MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
91
+ {
92
+ assert(val != 0);
93
+ # if defined(_MSC_VER) && defined(_WIN64)
94
+ # if STATIC_BMI2 == 1
95
+ return (unsigned)_tzcnt_u64(val);
96
+ # else
97
+ if (val != 0) {
98
+ unsigned long r;
99
+ _BitScanForward64(&r, val);
100
+ return (unsigned)r;
101
+ } else {
102
+ /* Should not reach this code path */
103
+ __assume(0);
104
+ }
105
+ # endif
106
+ # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
107
+ return (unsigned)__builtin_ctzll(val);
108
+ # else
109
+ {
110
+ U32 mostSignificantWord = (U32)(val >> 32);
111
+ U32 leastSignificantWord = (U32)val;
112
+ if (leastSignificantWord == 0) {
113
+ return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
114
+ } else {
115
+ return ZSTD_countTrailingZeros32(leastSignificantWord);
116
+ }
117
+ }
118
+ # endif
119
+ }
120
+
121
+ MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
122
+ {
123
+ assert(val != 0);
124
+ # if defined(_MSC_VER) && defined(_WIN64)
125
+ # if STATIC_BMI2 == 1
126
+ return (unsigned)_lzcnt_u64(val);
127
+ # else
128
+ if (val != 0) {
129
+ unsigned long r;
130
+ _BitScanReverse64(&r, val);
131
+ return (unsigned)(63 - r);
132
+ } else {
133
+ /* Should not reach this code path */
134
+ __assume(0);
135
+ }
136
+ # endif
137
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
138
+ return (unsigned)(__builtin_clzll(val));
139
+ # else
140
+ {
141
+ U32 mostSignificantWord = (U32)(val >> 32);
142
+ U32 leastSignificantWord = (U32)val;
143
+ if (mostSignificantWord == 0) {
144
+ return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
145
+ } else {
146
+ return ZSTD_countLeadingZeros32(mostSignificantWord);
147
+ }
148
+ }
149
+ # endif
150
+ }
151
+
152
+ MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
153
+ {
154
+ if (MEM_isLittleEndian()) {
155
+ if (MEM_64bits()) {
156
+ return ZSTD_countTrailingZeros64((U64)val) >> 3;
157
+ } else {
158
+ return ZSTD_countTrailingZeros32((U32)val) >> 3;
159
+ }
160
+ } else { /* Big Endian CPU */
161
+ if (MEM_64bits()) {
162
+ return ZSTD_countLeadingZeros64((U64)val) >> 3;
163
+ } else {
164
+ return ZSTD_countLeadingZeros32((U32)val) >> 3;
165
+ }
166
+ }
167
+ }
168
+
169
+ MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
170
+ {
171
+ assert(val != 0);
172
+ return 31 - ZSTD_countLeadingZeros32(val);
173
+ }
174
+
175
+ /* ZSTD_rotateRight_*():
176
+ * Rotates a bitfield to the right by "count" bits.
177
+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
178
+ */
179
+ MEM_STATIC
180
+ U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
181
+ assert(count < 64);
182
+ count &= 0x3F; /* for fickle pattern recognition */
183
+ return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
184
+ }
185
+
186
+ MEM_STATIC
187
+ U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
188
+ assert(count < 32);
189
+ count &= 0x1F; /* for fickle pattern recognition */
190
+ return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
191
+ }
192
+
193
+ MEM_STATIC
194
+ U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
195
+ assert(count < 16);
196
+ count &= 0x0F; /* for fickle pattern recognition */
197
+ return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
198
+ }
199
+
200
+ #endif /* ZSTD_BITS_H */
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * bitstream
3
3
  * Part of FSE library
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -30,14 +30,15 @@ extern "C" {
30
30
  #include "compiler.h" /* UNLIKELY() */
31
31
  #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
32
32
  #include "error_private.h" /* error codes and messages */
33
+ #include "bits.h" /* ZSTD_highbit32 */
33
34
 
34
35
 
35
36
  /*=========================================
36
37
  * Target specific
37
38
  =========================================*/
38
39
  #ifndef ZSTD_NO_INTRINSICS
39
- # if defined(__BMI__) && defined(__GNUC__)
40
- # include <immintrin.h> /* support for bextr (experimental) */
40
+ # if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
41
+ # include <immintrin.h> /* support for bextr (experimental)/bzhi */
41
42
  # elif defined(__ICCARM__)
42
43
  # include <intrinsics.h>
43
44
  # endif
@@ -89,19 +90,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
89
90
  /*-********************************************
90
91
  * bitStream decoding API (read backward)
91
92
  **********************************************/
93
+ typedef size_t BitContainerType;
92
94
  typedef struct {
93
- size_t bitContainer;
95
+ BitContainerType bitContainer;
94
96
  unsigned bitsConsumed;
95
97
  const char* ptr;
96
98
  const char* start;
97
99
  const char* limitPtr;
98
100
  } BIT_DStream_t;
99
101
 
100
- typedef enum { BIT_DStream_unfinished = 0,
101
- BIT_DStream_endOfBuffer = 1,
102
- BIT_DStream_completed = 2,
103
- BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
104
- /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
102
+ typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
103
+ BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
104
+ BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
105
+ BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
106
+ } BIT_DStream_status; /* result of BIT_reloadDStream() */
105
107
 
106
108
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
107
109
  MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
@@ -111,7 +113,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
111
113
 
112
114
  /* Start by invoking BIT_initDStream().
113
115
  * A chunk of the bitStream is then stored into a local register.
114
- * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
116
+ * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
115
117
  * You can then retrieve bitFields stored into the local register, **in reverse order**.
116
118
  * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
117
119
  * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
@@ -132,42 +134,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
132
134
  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
133
135
  /* faster, but works only if nbBits >= 1 */
134
136
 
135
-
136
-
137
- /*-**************************************************************
138
- * Internal functions
139
- ****************************************************************/
140
- MEM_STATIC unsigned BIT_highbit32 (U32 val)
141
- {
142
- assert(val != 0);
143
- {
144
- # if defined(_MSC_VER) /* Visual */
145
- # if STATIC_BMI2 == 1
146
- return _lzcnt_u32(val) ^ 31;
147
- # else
148
- unsigned long r = 0;
149
- return _BitScanReverse(&r, val) ? (unsigned)r : 0;
150
- # endif
151
- # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
152
- return __builtin_clz (val) ^ 31;
153
- # elif defined(__ICCARM__) /* IAR Intrinsic */
154
- return 31 - __CLZ(val);
155
- # else /* Software version */
156
- static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
157
- 11, 14, 16, 18, 22, 25, 3, 30,
158
- 8, 12, 20, 28, 15, 17, 24, 7,
159
- 19, 27, 23, 6, 26, 5, 4, 31 };
160
- U32 v = val;
161
- v |= v >> 1;
162
- v |= v >> 2;
163
- v |= v >> 4;
164
- v |= v >> 8;
165
- v |= v >> 16;
166
- return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
167
- # endif
168
- }
169
- }
170
-
171
137
  /*===== Local Constants =====*/
172
138
  static const unsigned BIT_mask[] = {
173
139
  0, 1, 3, 7, 0xF, 0x1F,
@@ -197,6 +163,16 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
197
163
  return 0;
198
164
  }
199
165
 
166
+ FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
167
+ {
168
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
169
+ return _bzhi_u64(bitContainer, nbBits);
170
+ #else
171
+ assert(nbBits < BIT_MASK_SIZE);
172
+ return bitContainer & BIT_mask[nbBits];
173
+ #endif
174
+ }
175
+
200
176
  /*! BIT_addBits() :
201
177
  * can add up to 31 bits into `bitC`.
202
178
  * Note : does not check for register overflow ! */
@@ -206,7 +182,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
206
182
  DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
207
183
  assert(nbBits < BIT_MASK_SIZE);
208
184
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
209
- bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
185
+ bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
210
186
  bitC->bitPos += nbBits;
211
187
  }
212
188
 
@@ -285,35 +261,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
285
261
  bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
286
262
  bitD->bitContainer = MEM_readLEST(bitD->ptr);
287
263
  { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
288
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
264
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
289
265
  if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
290
266
  } else {
291
267
  bitD->ptr = bitD->start;
292
268
  bitD->bitContainer = *(const BYTE*)(bitD->start);
293
269
  switch(srcSize)
294
270
  {
295
- case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
296
- /* fall-through */
271
+ case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
272
+ ZSTD_FALLTHROUGH;
297
273
 
298
- case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
299
- /* fall-through */
274
+ case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
275
+ ZSTD_FALLTHROUGH;
300
276
 
301
- case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
302
- /* fall-through */
277
+ case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
278
+ ZSTD_FALLTHROUGH;
303
279
 
304
- case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
305
- /* fall-through */
280
+ case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
281
+ ZSTD_FALLTHROUGH;
306
282
 
307
- case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
308
- /* fall-through */
283
+ case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
284
+ ZSTD_FALLTHROUGH;
309
285
 
310
- case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
311
- /* fall-through */
286
+ case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
287
+ ZSTD_FALLTHROUGH;
312
288
 
313
289
  default: break;
314
290
  }
315
291
  { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
316
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
292
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
317
293
  if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
318
294
  }
319
295
  bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
@@ -322,26 +298,25 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
322
298
  return srcSize;
323
299
  }
324
300
 
325
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
301
+ FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
326
302
  {
327
303
  return bitContainer >> start;
328
304
  }
329
305
 
330
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
306
+ FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
331
307
  {
332
308
  U32 const regMask = sizeof(bitContainer)*8 - 1;
333
309
  /* if start > regMask, bitstream is corrupted, and result is undefined */
334
310
  assert(nbBits < BIT_MASK_SIZE);
335
- return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
336
- }
337
-
338
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
339
- {
340
- #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
341
- return _bzhi_u64(bitContainer, nbBits);
311
+ /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
312
+ * than accessing memory. When bmi2 instruction is not present, we consider
313
+ * such cpus old (pre-Haswell, 2013) and their performance is not of that
314
+ * importance.
315
+ */
316
+ #if defined(__x86_64__) || defined(_M_X86)
317
+ return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
342
318
  #else
343
- assert(nbBits < BIT_MASK_SIZE);
344
- return bitContainer & BIT_mask[nbBits];
319
+ return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
345
320
  #endif
346
321
  }
347
322
 
@@ -351,7 +326,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 co
351
326
  * On 32-bits, maxNbBits==24.
352
327
  * On 64-bits, maxNbBits==56.
353
328
  * @return : value extracted */
354
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
329
+ FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
355
330
  {
356
331
  /* arbitrate between double-shift and shift+mask */
357
332
  #if 1
@@ -374,7 +349,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
374
349
  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
375
350
  }
376
351
 
377
- MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
352
+ FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
378
353
  {
379
354
  bitD->bitsConsumed += nbBits;
380
355
  }
@@ -383,7 +358,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
383
358
  * Read (consume) next n bits from local register and update.
384
359
  * Pay attention to not read more than nbBits contained into local register.
385
360
  * @return : extracted value. */
386
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
361
+ FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
387
362
  {
388
363
  size_t const value = BIT_lookBits(bitD, nbBits);
389
364
  BIT_skipBits(bitD, nbBits);
@@ -391,7 +366,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n
391
366
  }
392
367
 
393
368
  /*! BIT_readBitsFast() :
394
- * unsafe version; only works only if nbBits >= 1 */
369
+ * unsafe version; only works if nbBits >= 1 */
395
370
  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
396
371
  {
397
372
  size_t const value = BIT_lookBitsFast(bitD, nbBits);
@@ -400,6 +375,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
400
375
  return value;
401
376
  }
402
377
 
378
+ /*! BIT_reloadDStream_internal() :
379
+ * Simple variant of BIT_reloadDStream(), with two conditions:
380
+ * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
381
+ * 2. look window is valid after shifted down : bitD->ptr >= bitD->start
382
+ */
383
+ MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
384
+ {
385
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
386
+ bitD->ptr -= bitD->bitsConsumed >> 3;
387
+ assert(bitD->ptr >= bitD->start);
388
+ bitD->bitsConsumed &= 7;
389
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
390
+ return BIT_DStream_unfinished;
391
+ }
392
+
403
393
  /*! BIT_reloadDStreamFast() :
404
394
  * Similar to BIT_reloadDStream(), but with two differences:
405
395
  * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
@@ -410,31 +400,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
410
400
  {
411
401
  if (UNLIKELY(bitD->ptr < bitD->limitPtr))
412
402
  return BIT_DStream_overflow;
413
- assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
414
- bitD->ptr -= bitD->bitsConsumed >> 3;
415
- bitD->bitsConsumed &= 7;
416
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
417
- return BIT_DStream_unfinished;
403
+ return BIT_reloadDStream_internal(bitD);
418
404
  }
419
405
 
420
406
  /*! BIT_reloadDStream() :
421
407
  * Refill `bitD` from buffer previously set in BIT_initDStream() .
422
- * This function is safe, it guarantees it will not read beyond src buffer.
408
+ * This function is safe, it guarantees it will not never beyond src buffer.
423
409
  * @return : status of `BIT_DStream_t` internal register.
424
410
  * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
425
- MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
411
+ FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
426
412
  {
427
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
413
+ /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
414
+ if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
415
+ static const BitContainerType zeroFilled = 0;
416
+ bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
417
+ /* overflow detected, erroneous scenario or end of stream: no update */
428
418
  return BIT_DStream_overflow;
419
+ }
420
+
421
+ assert(bitD->ptr >= bitD->start);
429
422
 
430
423
  if (bitD->ptr >= bitD->limitPtr) {
431
- return BIT_reloadDStreamFast(bitD);
424
+ return BIT_reloadDStream_internal(bitD);
432
425
  }
433
426
  if (bitD->ptr == bitD->start) {
427
+ /* reached end of bitStream => no update */
434
428
  if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
435
429
  return BIT_DStream_completed;
436
430
  }
437
- /* start < ptr < limitPtr */
431
+ /* start < ptr < limitPtr => cautious update */
438
432
  { U32 nbBytes = bitD->bitsConsumed >> 3;
439
433
  BIT_DStream_status result = BIT_DStream_unfinished;
440
434
  if (bitD->ptr - nbBytes < bitD->start) {