extzstd 0.3.2 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
@@ -0,0 +1,200 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef ZSTD_BITS_H
|
12
|
+
#define ZSTD_BITS_H
|
13
|
+
|
14
|
+
#include "mem.h"
|
15
|
+
|
16
|
+
MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
|
17
|
+
{
|
18
|
+
assert(val != 0);
|
19
|
+
{
|
20
|
+
static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
|
21
|
+
30, 22, 20, 15, 25, 17, 4, 8,
|
22
|
+
31, 27, 13, 23, 21, 19, 16, 7,
|
23
|
+
26, 12, 18, 6, 11, 5, 10, 9};
|
24
|
+
return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
|
29
|
+
{
|
30
|
+
assert(val != 0);
|
31
|
+
# if defined(_MSC_VER)
|
32
|
+
# if STATIC_BMI2 == 1
|
33
|
+
return (unsigned)_tzcnt_u32(val);
|
34
|
+
# else
|
35
|
+
if (val != 0) {
|
36
|
+
unsigned long r;
|
37
|
+
_BitScanForward(&r, val);
|
38
|
+
return (unsigned)r;
|
39
|
+
} else {
|
40
|
+
/* Should not reach this code path */
|
41
|
+
__assume(0);
|
42
|
+
}
|
43
|
+
# endif
|
44
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
45
|
+
return (unsigned)__builtin_ctz(val);
|
46
|
+
# else
|
47
|
+
return ZSTD_countTrailingZeros32_fallback(val);
|
48
|
+
# endif
|
49
|
+
}
|
50
|
+
|
51
|
+
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
|
52
|
+
assert(val != 0);
|
53
|
+
{
|
54
|
+
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
|
55
|
+
11, 14, 16, 18, 22, 25, 3, 30,
|
56
|
+
8, 12, 20, 28, 15, 17, 24, 7,
|
57
|
+
19, 27, 23, 6, 26, 5, 4, 31};
|
58
|
+
val |= val >> 1;
|
59
|
+
val |= val >> 2;
|
60
|
+
val |= val >> 4;
|
61
|
+
val |= val >> 8;
|
62
|
+
val |= val >> 16;
|
63
|
+
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
|
68
|
+
{
|
69
|
+
assert(val != 0);
|
70
|
+
# if defined(_MSC_VER)
|
71
|
+
# if STATIC_BMI2 == 1
|
72
|
+
return (unsigned)_lzcnt_u32(val);
|
73
|
+
# else
|
74
|
+
if (val != 0) {
|
75
|
+
unsigned long r;
|
76
|
+
_BitScanReverse(&r, val);
|
77
|
+
return (unsigned)(31 - r);
|
78
|
+
} else {
|
79
|
+
/* Should not reach this code path */
|
80
|
+
__assume(0);
|
81
|
+
}
|
82
|
+
# endif
|
83
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
84
|
+
return (unsigned)__builtin_clz(val);
|
85
|
+
# else
|
86
|
+
return ZSTD_countLeadingZeros32_fallback(val);
|
87
|
+
# endif
|
88
|
+
}
|
89
|
+
|
90
|
+
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
|
91
|
+
{
|
92
|
+
assert(val != 0);
|
93
|
+
# if defined(_MSC_VER) && defined(_WIN64)
|
94
|
+
# if STATIC_BMI2 == 1
|
95
|
+
return (unsigned)_tzcnt_u64(val);
|
96
|
+
# else
|
97
|
+
if (val != 0) {
|
98
|
+
unsigned long r;
|
99
|
+
_BitScanForward64(&r, val);
|
100
|
+
return (unsigned)r;
|
101
|
+
} else {
|
102
|
+
/* Should not reach this code path */
|
103
|
+
__assume(0);
|
104
|
+
}
|
105
|
+
# endif
|
106
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
|
107
|
+
return (unsigned)__builtin_ctzll(val);
|
108
|
+
# else
|
109
|
+
{
|
110
|
+
U32 mostSignificantWord = (U32)(val >> 32);
|
111
|
+
U32 leastSignificantWord = (U32)val;
|
112
|
+
if (leastSignificantWord == 0) {
|
113
|
+
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
|
114
|
+
} else {
|
115
|
+
return ZSTD_countTrailingZeros32(leastSignificantWord);
|
116
|
+
}
|
117
|
+
}
|
118
|
+
# endif
|
119
|
+
}
|
120
|
+
|
121
|
+
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
|
122
|
+
{
|
123
|
+
assert(val != 0);
|
124
|
+
# if defined(_MSC_VER) && defined(_WIN64)
|
125
|
+
# if STATIC_BMI2 == 1
|
126
|
+
return (unsigned)_lzcnt_u64(val);
|
127
|
+
# else
|
128
|
+
if (val != 0) {
|
129
|
+
unsigned long r;
|
130
|
+
_BitScanReverse64(&r, val);
|
131
|
+
return (unsigned)(63 - r);
|
132
|
+
} else {
|
133
|
+
/* Should not reach this code path */
|
134
|
+
__assume(0);
|
135
|
+
}
|
136
|
+
# endif
|
137
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
138
|
+
return (unsigned)(__builtin_clzll(val));
|
139
|
+
# else
|
140
|
+
{
|
141
|
+
U32 mostSignificantWord = (U32)(val >> 32);
|
142
|
+
U32 leastSignificantWord = (U32)val;
|
143
|
+
if (mostSignificantWord == 0) {
|
144
|
+
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
|
145
|
+
} else {
|
146
|
+
return ZSTD_countLeadingZeros32(mostSignificantWord);
|
147
|
+
}
|
148
|
+
}
|
149
|
+
# endif
|
150
|
+
}
|
151
|
+
|
152
|
+
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
|
153
|
+
{
|
154
|
+
if (MEM_isLittleEndian()) {
|
155
|
+
if (MEM_64bits()) {
|
156
|
+
return ZSTD_countTrailingZeros64((U64)val) >> 3;
|
157
|
+
} else {
|
158
|
+
return ZSTD_countTrailingZeros32((U32)val) >> 3;
|
159
|
+
}
|
160
|
+
} else { /* Big Endian CPU */
|
161
|
+
if (MEM_64bits()) {
|
162
|
+
return ZSTD_countLeadingZeros64((U64)val) >> 3;
|
163
|
+
} else {
|
164
|
+
return ZSTD_countLeadingZeros32((U32)val) >> 3;
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
|
170
|
+
{
|
171
|
+
assert(val != 0);
|
172
|
+
return 31 - ZSTD_countLeadingZeros32(val);
|
173
|
+
}
|
174
|
+
|
175
|
+
/* ZSTD_rotateRight_*():
|
176
|
+
* Rotates a bitfield to the right by "count" bits.
|
177
|
+
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
|
178
|
+
*/
|
179
|
+
MEM_STATIC
|
180
|
+
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
|
181
|
+
assert(count < 64);
|
182
|
+
count &= 0x3F; /* for fickle pattern recognition */
|
183
|
+
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
|
184
|
+
}
|
185
|
+
|
186
|
+
MEM_STATIC
|
187
|
+
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
|
188
|
+
assert(count < 32);
|
189
|
+
count &= 0x1F; /* for fickle pattern recognition */
|
190
|
+
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
|
191
|
+
}
|
192
|
+
|
193
|
+
MEM_STATIC
|
194
|
+
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
|
195
|
+
assert(count < 16);
|
196
|
+
count &= 0x0F; /* for fickle pattern recognition */
|
197
|
+
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
|
198
|
+
}
|
199
|
+
|
200
|
+
#endif /* ZSTD_BITS_H */
|
@@ -1,7 +1,7 @@
|
|
1
1
|
/* ******************************************************************
|
2
2
|
* bitstream
|
3
3
|
* Part of FSE library
|
4
|
-
* Copyright (c)
|
4
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
5
5
|
*
|
6
6
|
* You can contact the author at :
|
7
7
|
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
@@ -30,14 +30,15 @@ extern "C" {
|
|
30
30
|
#include "compiler.h" /* UNLIKELY() */
|
31
31
|
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
|
32
32
|
#include "error_private.h" /* error codes and messages */
|
33
|
+
#include "bits.h" /* ZSTD_highbit32 */
|
33
34
|
|
34
35
|
|
35
36
|
/*=========================================
|
36
37
|
* Target specific
|
37
38
|
=========================================*/
|
38
39
|
#ifndef ZSTD_NO_INTRINSICS
|
39
|
-
# if defined(__BMI__) && defined(__GNUC__)
|
40
|
-
# include <immintrin.h> /* support for bextr (experimental) */
|
40
|
+
# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
|
41
|
+
# include <immintrin.h> /* support for bextr (experimental)/bzhi */
|
41
42
|
# elif defined(__ICCARM__)
|
42
43
|
# include <intrinsics.h>
|
43
44
|
# endif
|
@@ -89,19 +90,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
|
|
89
90
|
/*-********************************************
|
90
91
|
* bitStream decoding API (read backward)
|
91
92
|
**********************************************/
|
93
|
+
typedef size_t BitContainerType;
|
92
94
|
typedef struct {
|
93
|
-
|
95
|
+
BitContainerType bitContainer;
|
94
96
|
unsigned bitsConsumed;
|
95
97
|
const char* ptr;
|
96
98
|
const char* start;
|
97
99
|
const char* limitPtr;
|
98
100
|
} BIT_DStream_t;
|
99
101
|
|
100
|
-
typedef enum { BIT_DStream_unfinished = 0,
|
101
|
-
BIT_DStream_endOfBuffer = 1,
|
102
|
-
BIT_DStream_completed = 2,
|
103
|
-
BIT_DStream_overflow = 3
|
104
|
-
|
102
|
+
typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
|
103
|
+
BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
|
104
|
+
BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
|
105
|
+
BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
|
106
|
+
} BIT_DStream_status; /* result of BIT_reloadDStream() */
|
105
107
|
|
106
108
|
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
|
107
109
|
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
|
@@ -111,7 +113,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
|
|
111
113
|
|
112
114
|
/* Start by invoking BIT_initDStream().
|
113
115
|
* A chunk of the bitStream is then stored into a local register.
|
114
|
-
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (
|
116
|
+
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
|
115
117
|
* You can then retrieve bitFields stored into the local register, **in reverse order**.
|
116
118
|
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
|
117
119
|
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
|
@@ -132,42 +134,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
|
|
132
134
|
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
|
133
135
|
/* faster, but works only if nbBits >= 1 */
|
134
136
|
|
135
|
-
|
136
|
-
|
137
|
-
/*-**************************************************************
|
138
|
-
* Internal functions
|
139
|
-
****************************************************************/
|
140
|
-
MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
141
|
-
{
|
142
|
-
assert(val != 0);
|
143
|
-
{
|
144
|
-
# if defined(_MSC_VER) /* Visual */
|
145
|
-
# if STATIC_BMI2 == 1
|
146
|
-
return _lzcnt_u32(val) ^ 31;
|
147
|
-
# else
|
148
|
-
unsigned long r = 0;
|
149
|
-
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
150
|
-
# endif
|
151
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
152
|
-
return __builtin_clz (val) ^ 31;
|
153
|
-
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
154
|
-
return 31 - __CLZ(val);
|
155
|
-
# else /* Software version */
|
156
|
-
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
|
157
|
-
11, 14, 16, 18, 22, 25, 3, 30,
|
158
|
-
8, 12, 20, 28, 15, 17, 24, 7,
|
159
|
-
19, 27, 23, 6, 26, 5, 4, 31 };
|
160
|
-
U32 v = val;
|
161
|
-
v |= v >> 1;
|
162
|
-
v |= v >> 2;
|
163
|
-
v |= v >> 4;
|
164
|
-
v |= v >> 8;
|
165
|
-
v |= v >> 16;
|
166
|
-
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
|
167
|
-
# endif
|
168
|
-
}
|
169
|
-
}
|
170
|
-
|
171
137
|
/*===== Local Constants =====*/
|
172
138
|
static const unsigned BIT_mask[] = {
|
173
139
|
0, 1, 3, 7, 0xF, 0x1F,
|
@@ -197,6 +163,16 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
|
|
197
163
|
return 0;
|
198
164
|
}
|
199
165
|
|
166
|
+
FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
167
|
+
{
|
168
|
+
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
|
169
|
+
return _bzhi_u64(bitContainer, nbBits);
|
170
|
+
#else
|
171
|
+
assert(nbBits < BIT_MASK_SIZE);
|
172
|
+
return bitContainer & BIT_mask[nbBits];
|
173
|
+
#endif
|
174
|
+
}
|
175
|
+
|
200
176
|
/*! BIT_addBits() :
|
201
177
|
* can add up to 31 bits into `bitC`.
|
202
178
|
* Note : does not check for register overflow ! */
|
@@ -206,7 +182,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
|
206
182
|
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
|
207
183
|
assert(nbBits < BIT_MASK_SIZE);
|
208
184
|
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
209
|
-
bitC->bitContainer |= (value
|
185
|
+
bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
|
210
186
|
bitC->bitPos += nbBits;
|
211
187
|
}
|
212
188
|
|
@@ -285,35 +261,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
|
285
261
|
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
|
286
262
|
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
287
263
|
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
|
288
|
-
bitD->bitsConsumed = lastByte ? 8 -
|
264
|
+
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
|
289
265
|
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
|
290
266
|
} else {
|
291
267
|
bitD->ptr = bitD->start;
|
292
268
|
bitD->bitContainer = *(const BYTE*)(bitD->start);
|
293
269
|
switch(srcSize)
|
294
270
|
{
|
295
|
-
case 7: bitD->bitContainer += (
|
296
|
-
|
271
|
+
case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
|
272
|
+
ZSTD_FALLTHROUGH;
|
297
273
|
|
298
|
-
case 6: bitD->bitContainer += (
|
299
|
-
|
274
|
+
case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
|
275
|
+
ZSTD_FALLTHROUGH;
|
300
276
|
|
301
|
-
case 5: bitD->bitContainer += (
|
302
|
-
|
277
|
+
case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
|
278
|
+
ZSTD_FALLTHROUGH;
|
303
279
|
|
304
|
-
case 4: bitD->bitContainer += (
|
305
|
-
|
280
|
+
case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
|
281
|
+
ZSTD_FALLTHROUGH;
|
306
282
|
|
307
|
-
case 3: bitD->bitContainer += (
|
308
|
-
|
283
|
+
case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
|
284
|
+
ZSTD_FALLTHROUGH;
|
309
285
|
|
310
|
-
case 2: bitD->bitContainer += (
|
311
|
-
|
286
|
+
case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
|
287
|
+
ZSTD_FALLTHROUGH;
|
312
288
|
|
313
289
|
default: break;
|
314
290
|
}
|
315
291
|
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
|
316
|
-
bitD->bitsConsumed = lastByte ? 8 -
|
292
|
+
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
|
317
293
|
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
|
318
294
|
}
|
319
295
|
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
|
@@ -322,26 +298,25 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
|
322
298
|
return srcSize;
|
323
299
|
}
|
324
300
|
|
325
|
-
|
301
|
+
FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
|
326
302
|
{
|
327
303
|
return bitContainer >> start;
|
328
304
|
}
|
329
305
|
|
330
|
-
|
306
|
+
FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
|
331
307
|
{
|
332
308
|
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
333
309
|
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
334
310
|
assert(nbBits < BIT_MASK_SIZE);
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
#if defined(
|
341
|
-
|
311
|
+
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
|
312
|
+
* than accessing memory. When bmi2 instruction is not present, we consider
|
313
|
+
* such cpus old (pre-Haswell, 2013) and their performance is not of that
|
314
|
+
* importance.
|
315
|
+
*/
|
316
|
+
#if defined(__x86_64__) || defined(_M_X86)
|
317
|
+
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
|
342
318
|
#else
|
343
|
-
|
344
|
-
return bitContainer & BIT_mask[nbBits];
|
319
|
+
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
345
320
|
#endif
|
346
321
|
}
|
347
322
|
|
@@ -351,7 +326,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 co
|
|
351
326
|
* On 32-bits, maxNbBits==24.
|
352
327
|
* On 64-bits, maxNbBits==56.
|
353
328
|
* @return : value extracted */
|
354
|
-
|
329
|
+
FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
355
330
|
{
|
356
331
|
/* arbitrate between double-shift and shift+mask */
|
357
332
|
#if 1
|
@@ -374,7 +349,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
|
|
374
349
|
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
|
375
350
|
}
|
376
351
|
|
377
|
-
|
352
|
+
FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
378
353
|
{
|
379
354
|
bitD->bitsConsumed += nbBits;
|
380
355
|
}
|
@@ -383,7 +358,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
|
383
358
|
* Read (consume) next n bits from local register and update.
|
384
359
|
* Pay attention to not read more than nbBits contained into local register.
|
385
360
|
* @return : extracted value. */
|
386
|
-
|
361
|
+
FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
387
362
|
{
|
388
363
|
size_t const value = BIT_lookBits(bitD, nbBits);
|
389
364
|
BIT_skipBits(bitD, nbBits);
|
@@ -391,7 +366,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n
|
|
391
366
|
}
|
392
367
|
|
393
368
|
/*! BIT_readBitsFast() :
|
394
|
-
* unsafe version; only works
|
369
|
+
* unsafe version; only works if nbBits >= 1 */
|
395
370
|
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
396
371
|
{
|
397
372
|
size_t const value = BIT_lookBitsFast(bitD, nbBits);
|
@@ -400,6 +375,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
|
400
375
|
return value;
|
401
376
|
}
|
402
377
|
|
378
|
+
/*! BIT_reloadDStream_internal() :
|
379
|
+
* Simple variant of BIT_reloadDStream(), with two conditions:
|
380
|
+
* 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
|
381
|
+
* 2. look window is valid after shifted down : bitD->ptr >= bitD->start
|
382
|
+
*/
|
383
|
+
MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
|
384
|
+
{
|
385
|
+
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
|
386
|
+
bitD->ptr -= bitD->bitsConsumed >> 3;
|
387
|
+
assert(bitD->ptr >= bitD->start);
|
388
|
+
bitD->bitsConsumed &= 7;
|
389
|
+
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
390
|
+
return BIT_DStream_unfinished;
|
391
|
+
}
|
392
|
+
|
403
393
|
/*! BIT_reloadDStreamFast() :
|
404
394
|
* Similar to BIT_reloadDStream(), but with two differences:
|
405
395
|
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
|
@@ -410,31 +400,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
|
|
410
400
|
{
|
411
401
|
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
|
412
402
|
return BIT_DStream_overflow;
|
413
|
-
|
414
|
-
bitD->ptr -= bitD->bitsConsumed >> 3;
|
415
|
-
bitD->bitsConsumed &= 7;
|
416
|
-
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
417
|
-
return BIT_DStream_unfinished;
|
403
|
+
return BIT_reloadDStream_internal(bitD);
|
418
404
|
}
|
419
405
|
|
420
406
|
/*! BIT_reloadDStream() :
|
421
407
|
* Refill `bitD` from buffer previously set in BIT_initDStream() .
|
422
|
-
* This function is safe, it guarantees it will not
|
408
|
+
* This function is safe, it guarantees it will not never beyond src buffer.
|
423
409
|
* @return : status of `BIT_DStream_t` internal register.
|
424
410
|
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
|
425
|
-
|
411
|
+
FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
426
412
|
{
|
427
|
-
|
413
|
+
/* note : once in overflow mode, a bitstream remains in this mode until it's reset */
|
414
|
+
if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
|
415
|
+
static const BitContainerType zeroFilled = 0;
|
416
|
+
bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
|
417
|
+
/* overflow detected, erroneous scenario or end of stream: no update */
|
428
418
|
return BIT_DStream_overflow;
|
419
|
+
}
|
420
|
+
|
421
|
+
assert(bitD->ptr >= bitD->start);
|
429
422
|
|
430
423
|
if (bitD->ptr >= bitD->limitPtr) {
|
431
|
-
return
|
424
|
+
return BIT_reloadDStream_internal(bitD);
|
432
425
|
}
|
433
426
|
if (bitD->ptr == bitD->start) {
|
427
|
+
/* reached end of bitStream => no update */
|
434
428
|
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
|
435
429
|
return BIT_DStream_completed;
|
436
430
|
}
|
437
|
-
/* start < ptr < limitPtr */
|
431
|
+
/* start < ptr < limitPtr => cautious update */
|
438
432
|
{ U32 nbBytes = bitD->bitsConsumed >> 3;
|
439
433
|
BIT_DStream_status result = BIT_DStream_unfinished;
|
440
434
|
if (bitD->ptr - nbBytes < bitD->start) {
|