extzstd 0.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +39 -0
  3. data/README.md +38 -56
  4. data/contrib/zstd/CHANGELOG +613 -0
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/CONTRIBUTING.md +406 -0
  7. data/contrib/zstd/COPYING +339 -0
  8. data/contrib/zstd/Makefile +420 -0
  9. data/contrib/zstd/README.md +179 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +292 -0
  12. data/contrib/zstd/lib/BUCK +234 -0
  13. data/contrib/zstd/lib/Makefile +451 -0
  14. data/contrib/zstd/lib/README.md +207 -0
  15. data/contrib/zstd/{common → lib/common}/bitstream.h +187 -138
  16. data/contrib/zstd/lib/common/compiler.h +288 -0
  17. data/contrib/zstd/lib/common/cpu.h +213 -0
  18. data/contrib/zstd/lib/common/debug.c +24 -0
  19. data/contrib/zstd/lib/common/debug.h +107 -0
  20. data/contrib/zstd/lib/common/entropy_common.c +362 -0
  21. data/contrib/zstd/{common → lib/common}/error_private.c +25 -12
  22. data/contrib/zstd/{common → lib/common}/error_private.h +14 -10
  23. data/contrib/zstd/{common → lib/common}/fse.h +173 -92
  24. data/contrib/zstd/{common → lib/common}/fse_decompress.c +149 -85
  25. data/contrib/zstd/lib/common/huf.h +361 -0
  26. data/contrib/zstd/{common → lib/common}/mem.h +115 -59
  27. data/contrib/zstd/lib/common/pool.c +350 -0
  28. data/contrib/zstd/lib/common/pool.h +84 -0
  29. data/contrib/zstd/lib/common/threading.c +122 -0
  30. data/contrib/zstd/lib/common/threading.h +155 -0
  31. data/contrib/zstd/{common → lib/common}/xxhash.c +55 -96
  32. data/contrib/zstd/{common → lib/common}/xxhash.h +23 -47
  33. data/contrib/zstd/lib/common/zstd_common.c +83 -0
  34. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  35. data/contrib/zstd/lib/common/zstd_errors.h +95 -0
  36. data/contrib/zstd/lib/common/zstd_internal.h +478 -0
  37. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +214 -319
  38. data/contrib/zstd/lib/compress/hist.c +181 -0
  39. data/contrib/zstd/lib/compress/hist.h +75 -0
  40. data/contrib/zstd/lib/compress/huf_compress.c +913 -0
  41. data/contrib/zstd/lib/compress/zstd_compress.c +5208 -0
  42. data/contrib/zstd/lib/compress/zstd_compress_internal.h +1203 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  45. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +433 -0
  46. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  47. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +849 -0
  48. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  49. data/contrib/zstd/lib/compress/zstd_cwksp.h +561 -0
  50. data/contrib/zstd/lib/compress/zstd_double_fast.c +521 -0
  51. data/contrib/zstd/lib/compress/zstd_double_fast.h +38 -0
  52. data/contrib/zstd/lib/compress/zstd_fast.c +496 -0
  53. data/contrib/zstd/lib/compress/zstd_fast.h +37 -0
  54. data/contrib/zstd/lib/compress/zstd_lazy.c +1412 -0
  55. data/contrib/zstd/lib/compress/zstd_lazy.h +87 -0
  56. data/contrib/zstd/lib/compress/zstd_ldm.c +660 -0
  57. data/contrib/zstd/lib/compress/zstd_ldm.h +116 -0
  58. data/contrib/zstd/lib/compress/zstd_opt.c +1345 -0
  59. data/contrib/zstd/lib/compress/zstd_opt.h +56 -0
  60. data/contrib/zstd/lib/compress/zstdmt_compress.c +1811 -0
  61. data/contrib/zstd/lib/compress/zstdmt_compress.h +110 -0
  62. data/contrib/zstd/lib/decompress/huf_decompress.c +1350 -0
  63. data/contrib/zstd/lib/decompress/zstd_ddict.c +244 -0
  64. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  65. data/contrib/zstd/lib/decompress/zstd_decompress.c +1930 -0
  66. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1540 -0
  67. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +62 -0
  68. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +190 -0
  69. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +68 -45
  70. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  71. data/contrib/zstd/lib/deprecated/zbuff_compress.c +147 -0
  72. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +75 -0
  73. data/contrib/zstd/lib/dictBuilder/cover.c +1245 -0
  74. data/contrib/zstd/lib/dictBuilder/cover.h +157 -0
  75. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +3 -3
  76. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  77. data/contrib/zstd/lib/dictBuilder/fastcover.c +758 -0
  78. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +318 -194
  79. data/contrib/zstd/lib/dictBuilder/zdict.h +305 -0
  80. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +171 -15
  81. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +191 -124
  82. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +19 -5
  83. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +125 -125
  84. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +19 -5
  85. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +125 -124
  86. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +20 -6
  87. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +151 -299
  88. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +19 -5
  89. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +237 -243
  90. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +19 -6
  91. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +130 -143
  92. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +18 -5
  93. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +158 -157
  94. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +19 -5
  95. data/contrib/zstd/lib/libzstd.pc.in +15 -0
  96. data/contrib/zstd/lib/zstd.h +2391 -0
  97. data/ext/depend +2 -0
  98. data/ext/extconf.rb +15 -6
  99. data/ext/extzstd.c +76 -145
  100. data/ext/extzstd.h +80 -31
  101. data/ext/extzstd_stream.c +417 -142
  102. data/ext/libzstd_conf.h +8 -0
  103. data/ext/zstd_common.c +10 -7
  104. data/ext/zstd_compress.c +14 -5
  105. data/ext/zstd_decompress.c +5 -4
  106. data/ext/zstd_dictbuilder.c +9 -4
  107. data/ext/zstd_dictbuilder_fastcover.c +3 -0
  108. data/ext/zstd_legacy_v01.c +3 -1
  109. data/ext/zstd_legacy_v02.c +3 -1
  110. data/ext/zstd_legacy_v03.c +3 -1
  111. data/ext/zstd_legacy_v04.c +3 -1
  112. data/ext/zstd_legacy_v05.c +3 -1
  113. data/ext/zstd_legacy_v06.c +3 -1
  114. data/ext/zstd_legacy_v07.c +3 -1
  115. data/gemstub.rb +10 -24
  116. data/lib/extzstd.rb +64 -179
  117. data/lib/extzstd/version.rb +6 -1
  118. data/test/test_basic.rb +9 -6
  119. metadata +113 -57
  120. data/HISTORY.ja +0 -5
  121. data/contrib/zstd/common/entropy_common.c +0 -225
  122. data/contrib/zstd/common/huf.h +0 -228
  123. data/contrib/zstd/common/zstd_common.c +0 -83
  124. data/contrib/zstd/common/zstd_errors.h +0 -60
  125. data/contrib/zstd/common/zstd_internal.h +0 -267
  126. data/contrib/zstd/compress/huf_compress.c +0 -533
  127. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  128. data/contrib/zstd/compress/zstd_compress.c +0 -3264
  129. data/contrib/zstd/compress/zstd_opt.h +0 -900
  130. data/contrib/zstd/decompress/huf_decompress.c +0 -883
  131. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  132. data/contrib/zstd/decompress/zstd_decompress.c +0 -1842
  133. data/contrib/zstd/dictBuilder/zdict.h +0 -111
  134. data/contrib/zstd/zstd.h +0 -640
@@ -1,36 +1,15 @@
1
1
  /* ******************************************************************
2
- bitstream
3
- Part of FSE library
4
- header file (to include)
5
- Copyright (C) 2013-2016, Yann Collet.
6
-
7
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8
-
9
- Redistribution and use in source and binary forms, with or without
10
- modification, are permitted provided that the following conditions are
11
- met:
12
-
13
- * Redistributions of source code must retain the above copyright
14
- notice, this list of conditions and the following disclaimer.
15
- * Redistributions in binary form must reproduce the above
16
- copyright notice, this list of conditions and the following disclaimer
17
- in the documentation and/or other materials provided with the
18
- distribution.
19
-
20
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
-
32
- You can contact the author at :
33
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * bitstream
3
+ * Part of FSE library
4
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
34
13
  ****************************************************************** */
35
14
  #ifndef BITSTREAM_H_MODULE
36
15
  #define BITSTREAM_H_MODULE
@@ -38,8 +17,6 @@
38
17
  #if defined (__cplusplus)
39
18
  extern "C" {
40
19
  #endif
41
-
42
-
43
20
  /*
44
21
  * This API consists of small unitary functions, which must be inlined for best performance.
45
22
  * Since link-time-optimization is not available for all compilers,
@@ -50,28 +27,37 @@ extern "C" {
50
27
  * Dependencies
51
28
  ******************************************/
52
29
  #include "mem.h" /* unaligned access routines */
30
+ #include "compiler.h" /* UNLIKELY() */
31
+ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
53
32
  #include "error_private.h" /* error codes and messages */
54
33
 
55
34
 
56
35
  /*=========================================
57
36
  * Target specific
58
37
  =========================================*/
59
- #if defined(__BMI__) && defined(__GNUC__)
60
- # include <immintrin.h> /* support for bextr (experimental) */
38
+ #ifndef ZSTD_NO_INTRINSICS
39
+ # if defined(__BMI__) && defined(__GNUC__)
40
+ # include <immintrin.h> /* support for bextr (experimental) */
41
+ # elif defined(__ICCARM__)
42
+ # include <intrinsics.h>
43
+ # endif
61
44
  #endif
62
45
 
46
+ #define STREAM_ACCUMULATOR_MIN_32 25
47
+ #define STREAM_ACCUMULATOR_MIN_64 57
48
+ #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
49
+
63
50
 
64
51
  /*-******************************************
65
52
  * bitStream encoding API (write forward)
66
53
  ********************************************/
67
54
  /* bitStream can mix input from multiple sources.
68
- * A critical property of these streams is that they encode and decode in **reverse** direction.
69
- * So the first bit sequence you add will be the last to be read, like a LIFO stack.
70
- */
71
- typedef struct
72
- {
55
+ * A critical property of these streams is that they encode and decode in **reverse** direction.
56
+ * So the first bit sequence you add will be the last to be read, like a LIFO stack.
57
+ */
58
+ typedef struct {
73
59
  size_t bitContainer;
74
- int bitPos;
60
+ unsigned bitPos;
75
61
  char* startPtr;
76
62
  char* ptr;
77
63
  char* endPtr;
@@ -103,12 +89,12 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
103
89
  /*-********************************************
104
90
  * bitStream decoding API (read backward)
105
91
  **********************************************/
106
- typedef struct
107
- {
92
+ typedef struct {
108
93
  size_t bitContainer;
109
94
  unsigned bitsConsumed;
110
95
  const char* ptr;
111
96
  const char* start;
97
+ const char* limitPtr;
112
98
  } BIT_DStream_t;
113
99
 
114
100
  typedef enum { BIT_DStream_unfinished = 0,
@@ -151,168 +137,212 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
151
137
  /*-**************************************************************
152
138
  * Internal functions
153
139
  ****************************************************************/
154
- MEM_STATIC unsigned BIT_highbit32 (register U32 val)
140
+ MEM_STATIC unsigned BIT_highbit32 (U32 val)
155
141
  {
142
+ assert(val != 0);
143
+ {
156
144
  # if defined(_MSC_VER) /* Visual */
157
- unsigned long r=0;
158
- _BitScanReverse ( &r, val );
159
- return (unsigned) r;
145
+ # if STATIC_BMI2 == 1
146
+ return _lzcnt_u32(val) ^ 31;
147
+ # else
148
+ unsigned long r = 0;
149
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
150
+ # endif
160
151
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
161
- return 31 - __builtin_clz (val);
152
+ return __builtin_clz (val) ^ 31;
153
+ # elif defined(__ICCARM__) /* IAR Intrinsic */
154
+ return 31 - __CLZ(val);
162
155
  # else /* Software version */
163
- static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
164
- U32 v = val;
165
- v |= v >> 1;
166
- v |= v >> 2;
167
- v |= v >> 4;
168
- v |= v >> 8;
169
- v |= v >> 16;
170
- return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
156
+ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
157
+ 11, 14, 16, 18, 22, 25, 3, 30,
158
+ 8, 12, 20, 28, 15, 17, 24, 7,
159
+ 19, 27, 23, 6, 26, 5, 4, 31 };
160
+ U32 v = val;
161
+ v |= v >> 1;
162
+ v |= v >> 2;
163
+ v |= v >> 4;
164
+ v |= v >> 8;
165
+ v |= v >> 16;
166
+ return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
171
167
  # endif
168
+ }
172
169
  }
173
170
 
174
171
  /*===== Local Constants =====*/
175
- static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */
176
-
172
+ static const unsigned BIT_mask[] = {
173
+ 0, 1, 3, 7, 0xF, 0x1F,
174
+ 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
175
+ 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
176
+ 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
177
+ 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
178
+ 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
179
+ #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
177
180
 
178
181
  /*-**************************************************************
179
182
  * bitStream encoding
180
183
  ****************************************************************/
181
184
  /*! BIT_initCStream() :
182
- * `dstCapacity` must be > sizeof(void*)
185
+ * `dstCapacity` must be > sizeof(size_t)
183
186
  * @return : 0 if success,
184
- otherwise an error code (can be tested using ERR_isError() ) */
185
- MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
187
+ * otherwise an error code (can be tested using ERR_isError()) */
188
+ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
189
+ void* startPtr, size_t dstCapacity)
186
190
  {
187
191
  bitC->bitContainer = 0;
188
192
  bitC->bitPos = 0;
189
193
  bitC->startPtr = (char*)startPtr;
190
194
  bitC->ptr = bitC->startPtr;
191
- bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
192
- if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
195
+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
196
+ if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
193
197
  return 0;
194
198
  }
195
199
 
196
200
  /*! BIT_addBits() :
197
- can add up to 26 bits into `bitC`.
198
- Does not check for register overflow ! */
199
- MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
201
+ * can add up to 31 bits into `bitC`.
202
+ * Note : does not check for register overflow ! */
203
+ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
204
+ size_t value, unsigned nbBits)
200
205
  {
206
+ DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
207
+ assert(nbBits < BIT_MASK_SIZE);
208
+ assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
201
209
  bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
202
210
  bitC->bitPos += nbBits;
203
211
  }
204
212
 
205
213
  /*! BIT_addBitsFast() :
206
- * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
207
- MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
214
+ * works only if `value` is _clean_,
215
+ * meaning all high bits above nbBits are 0 */
216
+ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
217
+ size_t value, unsigned nbBits)
208
218
  {
219
+ assert((value>>nbBits) == 0);
220
+ assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
209
221
  bitC->bitContainer |= value << bitC->bitPos;
210
222
  bitC->bitPos += nbBits;
211
223
  }
212
224
 
213
225
  /*! BIT_flushBitsFast() :
226
+ * assumption : bitContainer has not overflowed
214
227
  * unsafe version; does not check buffer overflow */
215
228
  MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
216
229
  {
217
230
  size_t const nbBytes = bitC->bitPos >> 3;
231
+ assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
232
+ assert(bitC->ptr <= bitC->endPtr);
218
233
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
219
234
  bitC->ptr += nbBytes;
220
235
  bitC->bitPos &= 7;
221
- bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
236
+ bitC->bitContainer >>= nbBytes*8;
222
237
  }
223
238
 
224
239
  /*! BIT_flushBits() :
240
+ * assumption : bitContainer has not overflowed
225
241
  * safe version; check for buffer overflow, and prevents it.
226
- * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
242
+ * note : does not signal buffer overflow.
243
+ * overflow will be revealed later on using BIT_closeCStream() */
227
244
  MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
228
245
  {
229
246
  size_t const nbBytes = bitC->bitPos >> 3;
247
+ assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
248
+ assert(bitC->ptr <= bitC->endPtr);
230
249
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
231
250
  bitC->ptr += nbBytes;
232
251
  if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
233
252
  bitC->bitPos &= 7;
234
- bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
253
+ bitC->bitContainer >>= nbBytes*8;
235
254
  }
236
255
 
237
256
  /*! BIT_closeCStream() :
238
257
  * @return : size of CStream, in bytes,
239
- or 0 if it could not fit into dstBuffer */
258
+ * or 0 if it could not fit into dstBuffer */
240
259
  MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
241
260
  {
242
261
  BIT_addBitsFast(bitC, 1, 1); /* endMark */
243
262
  BIT_flushBits(bitC);
244
-
245
- if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
246
-
263
+ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
247
264
  return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
248
265
  }
249
266
 
250
267
 
251
268
  /*-********************************************************
252
- * bitStream decoding
269
+ * bitStream decoding
253
270
  **********************************************************/
254
271
  /*! BIT_initDStream() :
255
- * Initialize a BIT_DStream_t.
256
- * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
257
- * `srcSize` must be the *exact* size of the bitStream, in bytes.
258
- * @return : size of stream (== srcSize) or an errorCode if a problem is detected
259
- */
272
+ * Initialize a BIT_DStream_t.
273
+ * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
274
+ * `srcSize` must be the *exact* size of the bitStream, in bytes.
275
+ * @return : size of stream (== srcSize), or an errorCode if a problem is detected
276
+ */
260
277
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
261
278
  {
262
- if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
279
+ if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
280
+
281
+ bitD->start = (const char*)srcBuffer;
282
+ bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
263
283
 
264
284
  if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
265
- bitD->start = (const char*)srcBuffer;
266
285
  bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
267
286
  bitD->bitContainer = MEM_readLEST(bitD->ptr);
268
287
  { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
269
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
288
+ bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
270
289
  if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
271
290
  } else {
272
- bitD->start = (const char*)srcBuffer;
273
291
  bitD->ptr = bitD->start;
274
292
  bitD->bitContainer = *(const BYTE*)(bitD->start);
275
293
  switch(srcSize)
276
294
  {
277
- case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
278
- case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
279
- case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
280
- case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
281
- case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
282
- case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
283
- default:;
295
+ case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
296
+ /* fall-through */
297
+
298
+ case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
299
+ /* fall-through */
300
+
301
+ case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
302
+ /* fall-through */
303
+
304
+ case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
305
+ /* fall-through */
306
+
307
+ case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
308
+ /* fall-through */
309
+
310
+ case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
311
+ /* fall-through */
312
+
313
+ default: break;
314
+ }
315
+ { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
316
+ bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
317
+ if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
284
318
  }
285
- { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
286
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
287
- if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
288
319
  bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
289
320
  }
290
321
 
291
322
  return srcSize;
292
323
  }
293
324
 
294
- MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
325
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
295
326
  {
296
327
  return bitContainer >> start;
297
328
  }
298
329
 
299
- MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
330
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
300
331
  {
301
- #if defined(__BMI__) && defined(__GNUC__) /* experimental */
302
- # if defined(__x86_64__)
303
- if (sizeof(bitContainer)==8)
304
- return _bextr_u64(bitContainer, start, nbBits);
305
- else
306
- # endif
307
- return _bextr_u32(bitContainer, start, nbBits);
308
- #else
309
- return (bitContainer >> start) & BIT_mask[nbBits];
310
- #endif
332
+ U32 const regMask = sizeof(bitContainer)*8 - 1;
333
+ /* if start > regMask, bitstream is corrupted, and result is undefined */
334
+ assert(nbBits < BIT_MASK_SIZE);
335
+ return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
311
336
  }
312
337
 
313
- MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
338
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
314
339
  {
340
+ #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
341
+ return _bzhi_u64(bitContainer, nbBits);
342
+ #else
343
+ assert(nbBits < BIT_MASK_SIZE);
315
344
  return bitContainer & BIT_mask[nbBits];
345
+ #endif
316
346
  }
317
347
 
318
348
  /*! BIT_lookBits() :
@@ -320,27 +350,31 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
320
350
  * local register is not modified.
321
351
  * On 32-bits, maxNbBits==24.
322
352
  * On 64-bits, maxNbBits==56.
323
- * @return : value extracted
324
- */
325
- MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
353
+ * @return : value extracted */
354
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
326
355
  {
327
- #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
356
+ /* arbitrate between double-shift and shift+mask */
357
+ #if 1
358
+ /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
359
+ * bitstream is likely corrupted, and result is undefined */
328
360
  return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
329
361
  #else
330
- U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
331
- return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
362
+ /* this code path is slower on my os-x laptop */
363
+ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
364
+ return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
332
365
  #endif
333
366
  }
334
367
 
335
368
  /*! BIT_lookBitsFast() :
336
- * unsafe version; only works only if nbBits >= 1 */
369
+ * unsafe version; only works if nbBits >= 1 */
337
370
  MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
338
371
  {
339
- U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
340
- return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
372
+ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
373
+ assert(nbBits >= 1);
374
+ return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
341
375
  }
342
376
 
343
- MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
377
+ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
344
378
  {
345
379
  bitD->bitsConsumed += nbBits;
346
380
  }
@@ -348,9 +382,8 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
348
382
  /*! BIT_readBits() :
349
383
  * Read (consume) next n bits from local register and update.
350
384
  * Pay attention to not read more than nbBits contained into local register.
351
- * @return : extracted value.
352
- */
353
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
385
+ * @return : extracted value. */
386
+ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
354
387
  {
355
388
  size_t const value = BIT_lookBits(bitD, nbBits);
356
389
  BIT_skipBits(bitD, nbBits);
@@ -358,34 +391,50 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
358
391
  }
359
392
 
360
393
  /*! BIT_readBitsFast() :
361
- * unsafe version; only works only if nbBits >= 1 */
362
- MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
394
+ * unsafe version; only works only if nbBits >= 1 */
395
+ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
363
396
  {
364
397
  size_t const value = BIT_lookBitsFast(bitD, nbBits);
398
+ assert(nbBits >= 1);
365
399
  BIT_skipBits(bitD, nbBits);
366
400
  return value;
367
401
  }
368
402
 
403
+ /*! BIT_reloadDStreamFast() :
404
+ * Similar to BIT_reloadDStream(), but with two differences:
405
+ * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
406
+ * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
407
+ * point you must use BIT_reloadDStream() to reload.
408
+ */
409
+ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
410
+ {
411
+ if (UNLIKELY(bitD->ptr < bitD->limitPtr))
412
+ return BIT_DStream_overflow;
413
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
414
+ bitD->ptr -= bitD->bitsConsumed >> 3;
415
+ bitD->bitsConsumed &= 7;
416
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
417
+ return BIT_DStream_unfinished;
418
+ }
419
+
369
420
  /*! BIT_reloadDStream() :
370
- * Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ).
371
- * This function is safe, it guarantees it will not read beyond src buffer.
372
- * @return : status of `BIT_DStream_t` internal register.
373
- if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
421
+ * Refill `bitD` from buffer previously set in BIT_initDStream() .
422
+ * This function is safe, it guarantees it will not read beyond src buffer.
423
+ * @return : status of `BIT_DStream_t` internal register.
424
+ * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
374
425
  MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
375
426
  {
376
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should not happen => corruption detected */
377
- return BIT_DStream_overflow;
427
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
428
+ return BIT_DStream_overflow;
378
429
 
379
- if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
380
- bitD->ptr -= bitD->bitsConsumed >> 3;
381
- bitD->bitsConsumed &= 7;
382
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
383
- return BIT_DStream_unfinished;
430
+ if (bitD->ptr >= bitD->limitPtr) {
431
+ return BIT_reloadDStreamFast(bitD);
384
432
  }
385
433
  if (bitD->ptr == bitD->start) {
386
434
  if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
387
435
  return BIT_DStream_completed;
388
436
  }
437
+ /* start < ptr < limitPtr */
389
438
  { U32 nbBytes = bitD->bitsConsumed >> 3;
390
439
  BIT_DStream_status result = BIT_DStream_unfinished;
391
440
  if (bitD->ptr - nbBytes < bitD->start) {
@@ -394,14 +443,14 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
394
443
  }
395
444
  bitD->ptr -= nbBytes;
396
445
  bitD->bitsConsumed -= nbBytes*8;
397
- bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
446
+ bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
398
447
  return result;
399
448
  }
400
449
  }
401
450
 
402
451
  /*! BIT_endOfDStream() :
403
- * @return Tells if DStream has exactly reached its end (all bits consumed).
404
- */
452
+ * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
453
+ */
405
454
  MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
406
455
  {
407
456
  return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));