extzstd 0.3.2 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -0,0 +1,200 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_BITS_H
12
+ #define ZSTD_BITS_H
13
+
14
+ #include "mem.h"
15
+
16
+ MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
17
+ {
18
+ assert(val != 0);
19
+ {
20
+ static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
21
+ 30, 22, 20, 15, 25, 17, 4, 8,
22
+ 31, 27, 13, 23, 21, 19, 16, 7,
23
+ 26, 12, 18, 6, 11, 5, 10, 9};
24
+ return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
25
+ }
26
+ }
27
+
28
+ MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
29
+ {
30
+ assert(val != 0);
31
+ # if defined(_MSC_VER)
32
+ # if STATIC_BMI2 == 1
33
+ return (unsigned)_tzcnt_u32(val);
34
+ # else
35
+ if (val != 0) {
36
+ unsigned long r;
37
+ _BitScanForward(&r, val);
38
+ return (unsigned)r;
39
+ } else {
40
+ /* Should not reach this code path */
41
+ __assume(0);
42
+ }
43
+ # endif
44
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
45
+ return (unsigned)__builtin_ctz(val);
46
+ # else
47
+ return ZSTD_countTrailingZeros32_fallback(val);
48
+ # endif
49
+ }
50
+
51
+ MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
52
+ assert(val != 0);
53
+ {
54
+ static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
55
+ 11, 14, 16, 18, 22, 25, 3, 30,
56
+ 8, 12, 20, 28, 15, 17, 24, 7,
57
+ 19, 27, 23, 6, 26, 5, 4, 31};
58
+ val |= val >> 1;
59
+ val |= val >> 2;
60
+ val |= val >> 4;
61
+ val |= val >> 8;
62
+ val |= val >> 16;
63
+ return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
64
+ }
65
+ }
66
+
67
+ MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
68
+ {
69
+ assert(val != 0);
70
+ # if defined(_MSC_VER)
71
+ # if STATIC_BMI2 == 1
72
+ return (unsigned)_lzcnt_u32(val);
73
+ # else
74
+ if (val != 0) {
75
+ unsigned long r;
76
+ _BitScanReverse(&r, val);
77
+ return (unsigned)(31 - r);
78
+ } else {
79
+ /* Should not reach this code path */
80
+ __assume(0);
81
+ }
82
+ # endif
83
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
84
+ return (unsigned)__builtin_clz(val);
85
+ # else
86
+ return ZSTD_countLeadingZeros32_fallback(val);
87
+ # endif
88
+ }
89
+
90
+ MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
91
+ {
92
+ assert(val != 0);
93
+ # if defined(_MSC_VER) && defined(_WIN64)
94
+ # if STATIC_BMI2 == 1
95
+ return (unsigned)_tzcnt_u64(val);
96
+ # else
97
+ if (val != 0) {
98
+ unsigned long r;
99
+ _BitScanForward64(&r, val);
100
+ return (unsigned)r;
101
+ } else {
102
+ /* Should not reach this code path */
103
+ __assume(0);
104
+ }
105
+ # endif
106
+ # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
107
+ return (unsigned)__builtin_ctzll(val);
108
+ # else
109
+ {
110
+ U32 mostSignificantWord = (U32)(val >> 32);
111
+ U32 leastSignificantWord = (U32)val;
112
+ if (leastSignificantWord == 0) {
113
+ return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
114
+ } else {
115
+ return ZSTD_countTrailingZeros32(leastSignificantWord);
116
+ }
117
+ }
118
+ # endif
119
+ }
120
+
121
+ MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
122
+ {
123
+ assert(val != 0);
124
+ # if defined(_MSC_VER) && defined(_WIN64)
125
+ # if STATIC_BMI2 == 1
126
+ return (unsigned)_lzcnt_u64(val);
127
+ # else
128
+ if (val != 0) {
129
+ unsigned long r;
130
+ _BitScanReverse64(&r, val);
131
+ return (unsigned)(63 - r);
132
+ } else {
133
+ /* Should not reach this code path */
134
+ __assume(0);
135
+ }
136
+ # endif
137
+ # elif defined(__GNUC__) && (__GNUC__ >= 4)
138
+ return (unsigned)(__builtin_clzll(val));
139
+ # else
140
+ {
141
+ U32 mostSignificantWord = (U32)(val >> 32);
142
+ U32 leastSignificantWord = (U32)val;
143
+ if (mostSignificantWord == 0) {
144
+ return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
145
+ } else {
146
+ return ZSTD_countLeadingZeros32(mostSignificantWord);
147
+ }
148
+ }
149
+ # endif
150
+ }
151
+
152
+ MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
153
+ {
154
+ if (MEM_isLittleEndian()) {
155
+ if (MEM_64bits()) {
156
+ return ZSTD_countTrailingZeros64((U64)val) >> 3;
157
+ } else {
158
+ return ZSTD_countTrailingZeros32((U32)val) >> 3;
159
+ }
160
+ } else { /* Big Endian CPU */
161
+ if (MEM_64bits()) {
162
+ return ZSTD_countLeadingZeros64((U64)val) >> 3;
163
+ } else {
164
+ return ZSTD_countLeadingZeros32((U32)val) >> 3;
165
+ }
166
+ }
167
+ }
168
+
169
+ MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
170
+ {
171
+ assert(val != 0);
172
+ return 31 - ZSTD_countLeadingZeros32(val);
173
+ }
174
+
175
+ /* ZSTD_rotateRight_*():
176
+ * Rotates a bitfield to the right by "count" bits.
177
+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
178
+ */
179
+ MEM_STATIC
180
+ U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
181
+ assert(count < 64);
182
+ count &= 0x3F; /* for fickle pattern recognition */
183
+ return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
184
+ }
185
+
186
+ MEM_STATIC
187
+ U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
188
+ assert(count < 32);
189
+ count &= 0x1F; /* for fickle pattern recognition */
190
+ return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
191
+ }
192
+
193
+ MEM_STATIC
194
+ U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
195
+ assert(count < 16);
196
+ count &= 0x0F; /* for fickle pattern recognition */
197
+ return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
198
+ }
199
+
200
+ #endif /* ZSTD_BITS_H */
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * bitstream
3
3
  * Part of FSE library
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -30,14 +30,15 @@ extern "C" {
30
30
  #include "compiler.h" /* UNLIKELY() */
31
31
  #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
32
32
  #include "error_private.h" /* error codes and messages */
33
+ #include "bits.h" /* ZSTD_highbit32 */
33
34
 
34
35
 
35
36
  /*=========================================
36
37
  * Target specific
37
38
  =========================================*/
38
39
  #ifndef ZSTD_NO_INTRINSICS
39
- # if defined(__BMI__) && defined(__GNUC__)
40
- # include <immintrin.h> /* support for bextr (experimental) */
40
+ # if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
41
+ # include <immintrin.h> /* support for bextr (experimental)/bzhi */
41
42
  # elif defined(__ICCARM__)
42
43
  # include <intrinsics.h>
43
44
  # endif
@@ -89,19 +90,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
89
90
  /*-********************************************
90
91
  * bitStream decoding API (read backward)
91
92
  **********************************************/
93
+ typedef size_t BitContainerType;
92
94
  typedef struct {
93
- size_t bitContainer;
95
+ BitContainerType bitContainer;
94
96
  unsigned bitsConsumed;
95
97
  const char* ptr;
96
98
  const char* start;
97
99
  const char* limitPtr;
98
100
  } BIT_DStream_t;
99
101
 
100
- typedef enum { BIT_DStream_unfinished = 0,
101
- BIT_DStream_endOfBuffer = 1,
102
- BIT_DStream_completed = 2,
103
- BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
104
- /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
102
+ typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
103
+ BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
104
+ BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
105
+ BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
106
+ } BIT_DStream_status; /* result of BIT_reloadDStream() */
105
107
 
106
108
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
107
109
  MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
@@ -111,7 +113,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
111
113
 
112
114
  /* Start by invoking BIT_initDStream().
113
115
  * A chunk of the bitStream is then stored into a local register.
114
- * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
116
+ * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
115
117
  * You can then retrieve bitFields stored into the local register, **in reverse order**.
116
118
  * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
117
119
  * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
@@ -132,42 +134,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
132
134
  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
133
135
  /* faster, but works only if nbBits >= 1 */
134
136
 
135
-
136
-
137
- /*-**************************************************************
138
- * Internal functions
139
- ****************************************************************/
140
- MEM_STATIC unsigned BIT_highbit32 (U32 val)
141
- {
142
- assert(val != 0);
143
- {
144
- # if defined(_MSC_VER) /* Visual */
145
- # if STATIC_BMI2 == 1
146
- return _lzcnt_u32(val) ^ 31;
147
- # else
148
- unsigned long r = 0;
149
- return _BitScanReverse(&r, val) ? (unsigned)r : 0;
150
- # endif
151
- # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
152
- return __builtin_clz (val) ^ 31;
153
- # elif defined(__ICCARM__) /* IAR Intrinsic */
154
- return 31 - __CLZ(val);
155
- # else /* Software version */
156
- static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
157
- 11, 14, 16, 18, 22, 25, 3, 30,
158
- 8, 12, 20, 28, 15, 17, 24, 7,
159
- 19, 27, 23, 6, 26, 5, 4, 31 };
160
- U32 v = val;
161
- v |= v >> 1;
162
- v |= v >> 2;
163
- v |= v >> 4;
164
- v |= v >> 8;
165
- v |= v >> 16;
166
- return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
167
- # endif
168
- }
169
- }
170
-
171
137
  /*===== Local Constants =====*/
172
138
  static const unsigned BIT_mask[] = {
173
139
  0, 1, 3, 7, 0xF, 0x1F,
@@ -197,6 +163,16 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
197
163
  return 0;
198
164
  }
199
165
 
166
+ FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
167
+ {
168
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
169
+ return _bzhi_u64(bitContainer, nbBits);
170
+ #else
171
+ assert(nbBits < BIT_MASK_SIZE);
172
+ return bitContainer & BIT_mask[nbBits];
173
+ #endif
174
+ }
175
+
200
176
  /*! BIT_addBits() :
201
177
  * can add up to 31 bits into `bitC`.
202
178
  * Note : does not check for register overflow ! */
@@ -206,7 +182,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
206
182
  DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
207
183
  assert(nbBits < BIT_MASK_SIZE);
208
184
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
209
- bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
185
+ bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
210
186
  bitC->bitPos += nbBits;
211
187
  }
212
188
 
@@ -285,35 +261,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
285
261
  bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
286
262
  bitD->bitContainer = MEM_readLEST(bitD->ptr);
287
263
  { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
288
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
264
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
289
265
  if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
290
266
  } else {
291
267
  bitD->ptr = bitD->start;
292
268
  bitD->bitContainer = *(const BYTE*)(bitD->start);
293
269
  switch(srcSize)
294
270
  {
295
- case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
296
- /* fall-through */
271
+ case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
272
+ ZSTD_FALLTHROUGH;
297
273
 
298
- case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
299
- /* fall-through */
274
+ case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
275
+ ZSTD_FALLTHROUGH;
300
276
 
301
- case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
302
- /* fall-through */
277
+ case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
278
+ ZSTD_FALLTHROUGH;
303
279
 
304
- case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
305
- /* fall-through */
280
+ case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
281
+ ZSTD_FALLTHROUGH;
306
282
 
307
- case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
308
- /* fall-through */
283
+ case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
284
+ ZSTD_FALLTHROUGH;
309
285
 
310
- case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
311
- /* fall-through */
286
+ case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
287
+ ZSTD_FALLTHROUGH;
312
288
 
313
289
  default: break;
314
290
  }
315
291
  { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
316
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
292
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
317
293
  if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
318
294
  }
319
295
  bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
@@ -322,26 +298,25 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
322
298
  return srcSize;
323
299
  }
324
300
 
325
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
301
+ FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
326
302
  {
327
303
  return bitContainer >> start;
328
304
  }
329
305
 
330
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
306
+ FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
331
307
  {
332
308
  U32 const regMask = sizeof(bitContainer)*8 - 1;
333
309
  /* if start > regMask, bitstream is corrupted, and result is undefined */
334
310
  assert(nbBits < BIT_MASK_SIZE);
335
- return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
336
- }
337
-
338
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
339
- {
340
- #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
341
- return _bzhi_u64(bitContainer, nbBits);
311
+ /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
312
+ * than accessing memory. When bmi2 instruction is not present, we consider
313
+ * such cpus old (pre-Haswell, 2013) and their performance is not of that
314
+ * importance.
315
+ */
316
+ #if defined(__x86_64__) || defined(_M_X86)
317
+ return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
342
318
  #else
343
- assert(nbBits < BIT_MASK_SIZE);
344
- return bitContainer & BIT_mask[nbBits];
319
+ return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
345
320
  #endif
346
321
  }
347
322
 
@@ -351,7 +326,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 co
351
326
  * On 32-bits, maxNbBits==24.
352
327
  * On 64-bits, maxNbBits==56.
353
328
  * @return : value extracted */
354
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
329
+ FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
355
330
  {
356
331
  /* arbitrate between double-shift and shift+mask */
357
332
  #if 1
@@ -374,7 +349,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
374
349
  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
375
350
  }
376
351
 
377
- MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
352
+ FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
378
353
  {
379
354
  bitD->bitsConsumed += nbBits;
380
355
  }
@@ -383,7 +358,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
383
358
  * Read (consume) next n bits from local register and update.
384
359
  * Pay attention to not read more than nbBits contained into local register.
385
360
  * @return : extracted value. */
386
- MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
361
+ FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
387
362
  {
388
363
  size_t const value = BIT_lookBits(bitD, nbBits);
389
364
  BIT_skipBits(bitD, nbBits);
@@ -391,7 +366,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n
391
366
  }
392
367
 
393
368
  /*! BIT_readBitsFast() :
394
- * unsafe version; only works only if nbBits >= 1 */
369
+ * unsafe version; only works if nbBits >= 1 */
395
370
  MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
396
371
  {
397
372
  size_t const value = BIT_lookBitsFast(bitD, nbBits);
@@ -400,6 +375,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
400
375
  return value;
401
376
  }
402
377
 
378
+ /*! BIT_reloadDStream_internal() :
379
+ * Simple variant of BIT_reloadDStream(), with two conditions:
380
+ * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
381
+ * 2. look window is valid after shifted down : bitD->ptr >= bitD->start
382
+ */
383
+ MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
384
+ {
385
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
386
+ bitD->ptr -= bitD->bitsConsumed >> 3;
387
+ assert(bitD->ptr >= bitD->start);
388
+ bitD->bitsConsumed &= 7;
389
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
390
+ return BIT_DStream_unfinished;
391
+ }
392
+
403
393
  /*! BIT_reloadDStreamFast() :
404
394
  * Similar to BIT_reloadDStream(), but with two differences:
405
395
  * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
@@ -410,31 +400,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
410
400
  {
411
401
  if (UNLIKELY(bitD->ptr < bitD->limitPtr))
412
402
  return BIT_DStream_overflow;
413
- assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
414
- bitD->ptr -= bitD->bitsConsumed >> 3;
415
- bitD->bitsConsumed &= 7;
416
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
417
- return BIT_DStream_unfinished;
403
+ return BIT_reloadDStream_internal(bitD);
418
404
  }
419
405
 
420
406
  /*! BIT_reloadDStream() :
421
407
  * Refill `bitD` from buffer previously set in BIT_initDStream() .
422
- * This function is safe, it guarantees it will not read beyond src buffer.
408
+ * This function is safe, it guarantees it will not never beyond src buffer.
423
409
  * @return : status of `BIT_DStream_t` internal register.
424
410
  * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
425
- MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
411
+ FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
426
412
  {
427
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
413
+ /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
414
+ if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
415
+ static const BitContainerType zeroFilled = 0;
416
+ bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
417
+ /* overflow detected, erroneous scenario or end of stream: no update */
428
418
  return BIT_DStream_overflow;
419
+ }
420
+
421
+ assert(bitD->ptr >= bitD->start);
429
422
 
430
423
  if (bitD->ptr >= bitD->limitPtr) {
431
- return BIT_reloadDStreamFast(bitD);
424
+ return BIT_reloadDStream_internal(bitD);
432
425
  }
433
426
  if (bitD->ptr == bitD->start) {
427
+ /* reached end of bitStream => no update */
434
428
  if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
435
429
  return BIT_DStream_completed;
436
430
  }
437
- /* start < ptr < limitPtr */
431
+ /* start < ptr < limitPtr => cautious update */
438
432
  { U32 nbBytes = bitD->bitsConsumed >> 3;
439
433
  BIT_DStream_status result = BIT_DStream_unfinished;
440
434
  if (bitD->ptr - nbBytes < bitD->start) {