extzstd 0.3.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
@@ -0,0 +1,200 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef ZSTD_BITS_H
|
12
|
+
#define ZSTD_BITS_H
|
13
|
+
|
14
|
+
#include "mem.h"
|
15
|
+
|
16
|
+
MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
|
17
|
+
{
|
18
|
+
assert(val != 0);
|
19
|
+
{
|
20
|
+
static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
|
21
|
+
30, 22, 20, 15, 25, 17, 4, 8,
|
22
|
+
31, 27, 13, 23, 21, 19, 16, 7,
|
23
|
+
26, 12, 18, 6, 11, 5, 10, 9};
|
24
|
+
return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
|
29
|
+
{
|
30
|
+
assert(val != 0);
|
31
|
+
# if defined(_MSC_VER)
|
32
|
+
# if STATIC_BMI2 == 1
|
33
|
+
return (unsigned)_tzcnt_u32(val);
|
34
|
+
# else
|
35
|
+
if (val != 0) {
|
36
|
+
unsigned long r;
|
37
|
+
_BitScanForward(&r, val);
|
38
|
+
return (unsigned)r;
|
39
|
+
} else {
|
40
|
+
/* Should not reach this code path */
|
41
|
+
__assume(0);
|
42
|
+
}
|
43
|
+
# endif
|
44
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
45
|
+
return (unsigned)__builtin_ctz(val);
|
46
|
+
# else
|
47
|
+
return ZSTD_countTrailingZeros32_fallback(val);
|
48
|
+
# endif
|
49
|
+
}
|
50
|
+
|
51
|
+
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
|
52
|
+
assert(val != 0);
|
53
|
+
{
|
54
|
+
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
|
55
|
+
11, 14, 16, 18, 22, 25, 3, 30,
|
56
|
+
8, 12, 20, 28, 15, 17, 24, 7,
|
57
|
+
19, 27, 23, 6, 26, 5, 4, 31};
|
58
|
+
val |= val >> 1;
|
59
|
+
val |= val >> 2;
|
60
|
+
val |= val >> 4;
|
61
|
+
val |= val >> 8;
|
62
|
+
val |= val >> 16;
|
63
|
+
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
|
68
|
+
{
|
69
|
+
assert(val != 0);
|
70
|
+
# if defined(_MSC_VER)
|
71
|
+
# if STATIC_BMI2 == 1
|
72
|
+
return (unsigned)_lzcnt_u32(val);
|
73
|
+
# else
|
74
|
+
if (val != 0) {
|
75
|
+
unsigned long r;
|
76
|
+
_BitScanReverse(&r, val);
|
77
|
+
return (unsigned)(31 - r);
|
78
|
+
} else {
|
79
|
+
/* Should not reach this code path */
|
80
|
+
__assume(0);
|
81
|
+
}
|
82
|
+
# endif
|
83
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
84
|
+
return (unsigned)__builtin_clz(val);
|
85
|
+
# else
|
86
|
+
return ZSTD_countLeadingZeros32_fallback(val);
|
87
|
+
# endif
|
88
|
+
}
|
89
|
+
|
90
|
+
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
|
91
|
+
{
|
92
|
+
assert(val != 0);
|
93
|
+
# if defined(_MSC_VER) && defined(_WIN64)
|
94
|
+
# if STATIC_BMI2 == 1
|
95
|
+
return (unsigned)_tzcnt_u64(val);
|
96
|
+
# else
|
97
|
+
if (val != 0) {
|
98
|
+
unsigned long r;
|
99
|
+
_BitScanForward64(&r, val);
|
100
|
+
return (unsigned)r;
|
101
|
+
} else {
|
102
|
+
/* Should not reach this code path */
|
103
|
+
__assume(0);
|
104
|
+
}
|
105
|
+
# endif
|
106
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
|
107
|
+
return (unsigned)__builtin_ctzll(val);
|
108
|
+
# else
|
109
|
+
{
|
110
|
+
U32 mostSignificantWord = (U32)(val >> 32);
|
111
|
+
U32 leastSignificantWord = (U32)val;
|
112
|
+
if (leastSignificantWord == 0) {
|
113
|
+
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
|
114
|
+
} else {
|
115
|
+
return ZSTD_countTrailingZeros32(leastSignificantWord);
|
116
|
+
}
|
117
|
+
}
|
118
|
+
# endif
|
119
|
+
}
|
120
|
+
|
121
|
+
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
|
122
|
+
{
|
123
|
+
assert(val != 0);
|
124
|
+
# if defined(_MSC_VER) && defined(_WIN64)
|
125
|
+
# if STATIC_BMI2 == 1
|
126
|
+
return (unsigned)_lzcnt_u64(val);
|
127
|
+
# else
|
128
|
+
if (val != 0) {
|
129
|
+
unsigned long r;
|
130
|
+
_BitScanReverse64(&r, val);
|
131
|
+
return (unsigned)(63 - r);
|
132
|
+
} else {
|
133
|
+
/* Should not reach this code path */
|
134
|
+
__assume(0);
|
135
|
+
}
|
136
|
+
# endif
|
137
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
138
|
+
return (unsigned)(__builtin_clzll(val));
|
139
|
+
# else
|
140
|
+
{
|
141
|
+
U32 mostSignificantWord = (U32)(val >> 32);
|
142
|
+
U32 leastSignificantWord = (U32)val;
|
143
|
+
if (mostSignificantWord == 0) {
|
144
|
+
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
|
145
|
+
} else {
|
146
|
+
return ZSTD_countLeadingZeros32(mostSignificantWord);
|
147
|
+
}
|
148
|
+
}
|
149
|
+
# endif
|
150
|
+
}
|
151
|
+
|
152
|
+
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
|
153
|
+
{
|
154
|
+
if (MEM_isLittleEndian()) {
|
155
|
+
if (MEM_64bits()) {
|
156
|
+
return ZSTD_countTrailingZeros64((U64)val) >> 3;
|
157
|
+
} else {
|
158
|
+
return ZSTD_countTrailingZeros32((U32)val) >> 3;
|
159
|
+
}
|
160
|
+
} else { /* Big Endian CPU */
|
161
|
+
if (MEM_64bits()) {
|
162
|
+
return ZSTD_countLeadingZeros64((U64)val) >> 3;
|
163
|
+
} else {
|
164
|
+
return ZSTD_countLeadingZeros32((U32)val) >> 3;
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
|
170
|
+
{
|
171
|
+
assert(val != 0);
|
172
|
+
return 31 - ZSTD_countLeadingZeros32(val);
|
173
|
+
}
|
174
|
+
|
175
|
+
/* ZSTD_rotateRight_*():
|
176
|
+
* Rotates a bitfield to the right by "count" bits.
|
177
|
+
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
|
178
|
+
*/
|
179
|
+
MEM_STATIC
|
180
|
+
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
|
181
|
+
assert(count < 64);
|
182
|
+
count &= 0x3F; /* for fickle pattern recognition */
|
183
|
+
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
|
184
|
+
}
|
185
|
+
|
186
|
+
MEM_STATIC
|
187
|
+
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
|
188
|
+
assert(count < 32);
|
189
|
+
count &= 0x1F; /* for fickle pattern recognition */
|
190
|
+
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
|
191
|
+
}
|
192
|
+
|
193
|
+
MEM_STATIC
|
194
|
+
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
|
195
|
+
assert(count < 16);
|
196
|
+
count &= 0x0F; /* for fickle pattern recognition */
|
197
|
+
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
|
198
|
+
}
|
199
|
+
|
200
|
+
#endif /* ZSTD_BITS_H */
|
@@ -1,7 +1,7 @@
|
|
1
1
|
/* ******************************************************************
|
2
2
|
* bitstream
|
3
3
|
* Part of FSE library
|
4
|
-
* Copyright (c)
|
4
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
5
5
|
*
|
6
6
|
* You can contact the author at :
|
7
7
|
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
@@ -30,14 +30,15 @@ extern "C" {
|
|
30
30
|
#include "compiler.h" /* UNLIKELY() */
|
31
31
|
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
|
32
32
|
#include "error_private.h" /* error codes and messages */
|
33
|
+
#include "bits.h" /* ZSTD_highbit32 */
|
33
34
|
|
34
35
|
|
35
36
|
/*=========================================
|
36
37
|
* Target specific
|
37
38
|
=========================================*/
|
38
39
|
#ifndef ZSTD_NO_INTRINSICS
|
39
|
-
# if defined(__BMI__) && defined(__GNUC__)
|
40
|
-
# include <immintrin.h> /* support for bextr (experimental) */
|
40
|
+
# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
|
41
|
+
# include <immintrin.h> /* support for bextr (experimental)/bzhi */
|
41
42
|
# elif defined(__ICCARM__)
|
42
43
|
# include <intrinsics.h>
|
43
44
|
# endif
|
@@ -89,19 +90,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
|
|
89
90
|
/*-********************************************
|
90
91
|
* bitStream decoding API (read backward)
|
91
92
|
**********************************************/
|
93
|
+
typedef size_t BitContainerType;
|
92
94
|
typedef struct {
|
93
|
-
|
95
|
+
BitContainerType bitContainer;
|
94
96
|
unsigned bitsConsumed;
|
95
97
|
const char* ptr;
|
96
98
|
const char* start;
|
97
99
|
const char* limitPtr;
|
98
100
|
} BIT_DStream_t;
|
99
101
|
|
100
|
-
typedef enum { BIT_DStream_unfinished = 0,
|
101
|
-
BIT_DStream_endOfBuffer = 1,
|
102
|
-
BIT_DStream_completed = 2,
|
103
|
-
BIT_DStream_overflow = 3
|
104
|
-
|
102
|
+
typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
|
103
|
+
BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
|
104
|
+
BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
|
105
|
+
BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
|
106
|
+
} BIT_DStream_status; /* result of BIT_reloadDStream() */
|
105
107
|
|
106
108
|
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
|
107
109
|
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
|
@@ -111,7 +113,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
|
|
111
113
|
|
112
114
|
/* Start by invoking BIT_initDStream().
|
113
115
|
* A chunk of the bitStream is then stored into a local register.
|
114
|
-
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (
|
116
|
+
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
|
115
117
|
* You can then retrieve bitFields stored into the local register, **in reverse order**.
|
116
118
|
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
|
117
119
|
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
|
@@ -132,42 +134,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
|
|
132
134
|
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
|
133
135
|
/* faster, but works only if nbBits >= 1 */
|
134
136
|
|
135
|
-
|
136
|
-
|
137
|
-
/*-**************************************************************
|
138
|
-
* Internal functions
|
139
|
-
****************************************************************/
|
140
|
-
MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
141
|
-
{
|
142
|
-
assert(val != 0);
|
143
|
-
{
|
144
|
-
# if defined(_MSC_VER) /* Visual */
|
145
|
-
# if STATIC_BMI2 == 1
|
146
|
-
return _lzcnt_u32(val) ^ 31;
|
147
|
-
# else
|
148
|
-
unsigned long r = 0;
|
149
|
-
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
150
|
-
# endif
|
151
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
152
|
-
return __builtin_clz (val) ^ 31;
|
153
|
-
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
154
|
-
return 31 - __CLZ(val);
|
155
|
-
# else /* Software version */
|
156
|
-
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
|
157
|
-
11, 14, 16, 18, 22, 25, 3, 30,
|
158
|
-
8, 12, 20, 28, 15, 17, 24, 7,
|
159
|
-
19, 27, 23, 6, 26, 5, 4, 31 };
|
160
|
-
U32 v = val;
|
161
|
-
v |= v >> 1;
|
162
|
-
v |= v >> 2;
|
163
|
-
v |= v >> 4;
|
164
|
-
v |= v >> 8;
|
165
|
-
v |= v >> 16;
|
166
|
-
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
|
167
|
-
# endif
|
168
|
-
}
|
169
|
-
}
|
170
|
-
|
171
137
|
/*===== Local Constants =====*/
|
172
138
|
static const unsigned BIT_mask[] = {
|
173
139
|
0, 1, 3, 7, 0xF, 0x1F,
|
@@ -197,6 +163,16 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
|
|
197
163
|
return 0;
|
198
164
|
}
|
199
165
|
|
166
|
+
FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
167
|
+
{
|
168
|
+
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
|
169
|
+
return _bzhi_u64(bitContainer, nbBits);
|
170
|
+
#else
|
171
|
+
assert(nbBits < BIT_MASK_SIZE);
|
172
|
+
return bitContainer & BIT_mask[nbBits];
|
173
|
+
#endif
|
174
|
+
}
|
175
|
+
|
200
176
|
/*! BIT_addBits() :
|
201
177
|
* can add up to 31 bits into `bitC`.
|
202
178
|
* Note : does not check for register overflow ! */
|
@@ -206,7 +182,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
|
206
182
|
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
|
207
183
|
assert(nbBits < BIT_MASK_SIZE);
|
208
184
|
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
209
|
-
bitC->bitContainer |= (value
|
185
|
+
bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
|
210
186
|
bitC->bitPos += nbBits;
|
211
187
|
}
|
212
188
|
|
@@ -285,35 +261,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
|
285
261
|
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
|
286
262
|
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
287
263
|
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
|
288
|
-
bitD->bitsConsumed = lastByte ? 8 -
|
264
|
+
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
|
289
265
|
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
|
290
266
|
} else {
|
291
267
|
bitD->ptr = bitD->start;
|
292
268
|
bitD->bitContainer = *(const BYTE*)(bitD->start);
|
293
269
|
switch(srcSize)
|
294
270
|
{
|
295
|
-
case 7: bitD->bitContainer += (
|
296
|
-
|
271
|
+
case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
|
272
|
+
ZSTD_FALLTHROUGH;
|
297
273
|
|
298
|
-
case 6: bitD->bitContainer += (
|
299
|
-
|
274
|
+
case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
|
275
|
+
ZSTD_FALLTHROUGH;
|
300
276
|
|
301
|
-
case 5: bitD->bitContainer += (
|
302
|
-
|
277
|
+
case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
|
278
|
+
ZSTD_FALLTHROUGH;
|
303
279
|
|
304
|
-
case 4: bitD->bitContainer += (
|
305
|
-
|
280
|
+
case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
|
281
|
+
ZSTD_FALLTHROUGH;
|
306
282
|
|
307
|
-
case 3: bitD->bitContainer += (
|
308
|
-
|
283
|
+
case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
|
284
|
+
ZSTD_FALLTHROUGH;
|
309
285
|
|
310
|
-
case 2: bitD->bitContainer += (
|
311
|
-
|
286
|
+
case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
|
287
|
+
ZSTD_FALLTHROUGH;
|
312
288
|
|
313
289
|
default: break;
|
314
290
|
}
|
315
291
|
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
|
316
|
-
bitD->bitsConsumed = lastByte ? 8 -
|
292
|
+
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
|
317
293
|
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
|
318
294
|
}
|
319
295
|
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
|
@@ -322,26 +298,25 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
|
322
298
|
return srcSize;
|
323
299
|
}
|
324
300
|
|
325
|
-
|
301
|
+
FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
|
326
302
|
{
|
327
303
|
return bitContainer >> start;
|
328
304
|
}
|
329
305
|
|
330
|
-
|
306
|
+
FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
|
331
307
|
{
|
332
308
|
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
333
309
|
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
334
310
|
assert(nbBits < BIT_MASK_SIZE);
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
#if defined(
|
341
|
-
|
311
|
+
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
|
312
|
+
* than accessing memory. When bmi2 instruction is not present, we consider
|
313
|
+
* such cpus old (pre-Haswell, 2013) and their performance is not of that
|
314
|
+
* importance.
|
315
|
+
*/
|
316
|
+
#if defined(__x86_64__) || defined(_M_X86)
|
317
|
+
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
|
342
318
|
#else
|
343
|
-
|
344
|
-
return bitContainer & BIT_mask[nbBits];
|
319
|
+
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
345
320
|
#endif
|
346
321
|
}
|
347
322
|
|
@@ -351,7 +326,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 co
|
|
351
326
|
* On 32-bits, maxNbBits==24.
|
352
327
|
* On 64-bits, maxNbBits==56.
|
353
328
|
* @return : value extracted */
|
354
|
-
|
329
|
+
FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
355
330
|
{
|
356
331
|
/* arbitrate between double-shift and shift+mask */
|
357
332
|
#if 1
|
@@ -374,7 +349,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
|
|
374
349
|
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
|
375
350
|
}
|
376
351
|
|
377
|
-
|
352
|
+
FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
378
353
|
{
|
379
354
|
bitD->bitsConsumed += nbBits;
|
380
355
|
}
|
@@ -383,7 +358,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
|
383
358
|
* Read (consume) next n bits from local register and update.
|
384
359
|
* Pay attention to not read more than nbBits contained into local register.
|
385
360
|
* @return : extracted value. */
|
386
|
-
|
361
|
+
FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
387
362
|
{
|
388
363
|
size_t const value = BIT_lookBits(bitD, nbBits);
|
389
364
|
BIT_skipBits(bitD, nbBits);
|
@@ -391,7 +366,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n
|
|
391
366
|
}
|
392
367
|
|
393
368
|
/*! BIT_readBitsFast() :
|
394
|
-
* unsafe version; only works
|
369
|
+
* unsafe version; only works if nbBits >= 1 */
|
395
370
|
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
396
371
|
{
|
397
372
|
size_t const value = BIT_lookBitsFast(bitD, nbBits);
|
@@ -400,6 +375,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
|
400
375
|
return value;
|
401
376
|
}
|
402
377
|
|
378
|
+
/*! BIT_reloadDStream_internal() :
|
379
|
+
* Simple variant of BIT_reloadDStream(), with two conditions:
|
380
|
+
* 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
|
381
|
+
* 2. look window is valid after shifted down : bitD->ptr >= bitD->start
|
382
|
+
*/
|
383
|
+
MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
|
384
|
+
{
|
385
|
+
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
|
386
|
+
bitD->ptr -= bitD->bitsConsumed >> 3;
|
387
|
+
assert(bitD->ptr >= bitD->start);
|
388
|
+
bitD->bitsConsumed &= 7;
|
389
|
+
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
390
|
+
return BIT_DStream_unfinished;
|
391
|
+
}
|
392
|
+
|
403
393
|
/*! BIT_reloadDStreamFast() :
|
404
394
|
* Similar to BIT_reloadDStream(), but with two differences:
|
405
395
|
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
|
@@ -410,31 +400,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
|
|
410
400
|
{
|
411
401
|
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
|
412
402
|
return BIT_DStream_overflow;
|
413
|
-
|
414
|
-
bitD->ptr -= bitD->bitsConsumed >> 3;
|
415
|
-
bitD->bitsConsumed &= 7;
|
416
|
-
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
417
|
-
return BIT_DStream_unfinished;
|
403
|
+
return BIT_reloadDStream_internal(bitD);
|
418
404
|
}
|
419
405
|
|
420
406
|
/*! BIT_reloadDStream() :
|
421
407
|
* Refill `bitD` from buffer previously set in BIT_initDStream() .
|
422
|
-
* This function is safe, it guarantees it will not
|
408
|
+
* This function is safe, it guarantees it will not never beyond src buffer.
|
423
409
|
* @return : status of `BIT_DStream_t` internal register.
|
424
410
|
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
|
425
|
-
|
411
|
+
FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
426
412
|
{
|
427
|
-
|
413
|
+
/* note : once in overflow mode, a bitstream remains in this mode until it's reset */
|
414
|
+
if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
|
415
|
+
static const BitContainerType zeroFilled = 0;
|
416
|
+
bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
|
417
|
+
/* overflow detected, erroneous scenario or end of stream: no update */
|
428
418
|
return BIT_DStream_overflow;
|
419
|
+
}
|
420
|
+
|
421
|
+
assert(bitD->ptr >= bitD->start);
|
429
422
|
|
430
423
|
if (bitD->ptr >= bitD->limitPtr) {
|
431
|
-
return
|
424
|
+
return BIT_reloadDStream_internal(bitD);
|
432
425
|
}
|
433
426
|
if (bitD->ptr == bitD->start) {
|
427
|
+
/* reached end of bitStream => no update */
|
434
428
|
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
|
435
429
|
return BIT_DStream_completed;
|
436
430
|
}
|
437
|
-
/* start < ptr < limitPtr */
|
431
|
+
/* start < ptr < limitPtr => cautious update */
|
438
432
|
{ U32 nbBytes = bitD->bitsConsumed >> 3;
|
439
433
|
BIT_DStream_status result = BIT_DStream_unfinished;
|
440
434
|
if (bitD->ptr - nbBytes < bitD->start) {
|