libdeflate 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/test.yml +34 -0
- data/README.md +1 -6
- data/ext/libdeflate/extconf.rb +18 -7
- data/ext/libdeflate/libdeflate_ext.c +17 -17
- data/lib/libdeflate/version.rb +1 -1
- data/libdeflate.gemspec +2 -1
- metadata +13 -84
- data/.gitmodules +0 -3
- data/.travis.yml +0 -5
- data/ext/libdeflate/libdeflate/.gitignore +0 -19
- data/ext/libdeflate/libdeflate/COPYING +0 -21
- data/ext/libdeflate/libdeflate/Makefile +0 -231
- data/ext/libdeflate/libdeflate/Makefile.msc +0 -64
- data/ext/libdeflate/libdeflate/NEWS +0 -57
- data/ext/libdeflate/libdeflate/README.md +0 -170
- data/ext/libdeflate/libdeflate/common/common_defs.h +0 -351
- data/ext/libdeflate/libdeflate/common/compiler_gcc.h +0 -134
- data/ext/libdeflate/libdeflate/common/compiler_msc.h +0 -95
- data/ext/libdeflate/libdeflate/lib/adler32.c +0 -213
- data/ext/libdeflate/libdeflate/lib/adler32_impl.h +0 -281
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +0 -57
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +0 -13
- data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +0 -357
- data/ext/libdeflate/libdeflate/lib/crc32.c +0 -368
- data/ext/libdeflate/libdeflate/lib/crc32_impl.h +0 -286
- data/ext/libdeflate/libdeflate/lib/crc32_table.h +0 -526
- data/ext/libdeflate/libdeflate/lib/decompress_impl.h +0 -404
- data/ext/libdeflate/libdeflate/lib/deflate_compress.c +0 -2817
- data/ext/libdeflate/libdeflate/lib/deflate_compress.h +0 -14
- data/ext/libdeflate/libdeflate/lib/deflate_constants.h +0 -66
- data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +0 -889
- data/ext/libdeflate/libdeflate/lib/gzip_compress.c +0 -95
- data/ext/libdeflate/libdeflate/lib/gzip_constants.h +0 -45
- data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +0 -130
- data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +0 -405
- data/ext/libdeflate/libdeflate/lib/lib_common.h +0 -35
- data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +0 -53
- data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +0 -205
- data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +0 -61
- data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +0 -53
- data/ext/libdeflate/libdeflate/lib/unaligned.h +0 -202
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +0 -169
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +0 -48
- data/ext/libdeflate/libdeflate/lib/zlib_compress.c +0 -87
- data/ext/libdeflate/libdeflate/lib/zlib_constants.h +0 -21
- data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +0 -91
- data/ext/libdeflate/libdeflate/libdeflate.h +0 -274
- data/ext/libdeflate/libdeflate/programs/benchmark.c +0 -558
- data/ext/libdeflate/libdeflate/programs/checksum.c +0 -197
- data/ext/libdeflate/libdeflate/programs/detect.sh +0 -62
- data/ext/libdeflate/libdeflate/programs/gzip.c +0 -603
- data/ext/libdeflate/libdeflate/programs/prog_util.c +0 -530
- data/ext/libdeflate/libdeflate/programs/prog_util.h +0 -162
- data/ext/libdeflate/libdeflate/programs/test_checksums.c +0 -135
- data/ext/libdeflate/libdeflate/programs/tgetopt.c +0 -118
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +0 -12
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +0 -40
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +0 -28
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +0 -3
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +0 -28
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +0 -14
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +0 -28
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +0 -3
- data/ext/libdeflate/libdeflate/tools/android_build.sh +0 -104
- data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +0 -76
- data/ext/libdeflate/libdeflate/tools/exec_tests.sh +0 -30
- data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +0 -108
- data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +0 -100
- data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +0 -412
- data/ext/libdeflate/libdeflate/tools/make-windows-releases +0 -21
- data/ext/libdeflate/libdeflate/tools/mips_build.sh +0 -9
- data/ext/libdeflate/libdeflate/tools/msc_test.bat +0 -3
- data/ext/libdeflate/libdeflate/tools/pgo_build.sh +0 -23
- data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +0 -37
- data/ext/libdeflate/libdeflate/tools/run_tests.sh +0 -305
- data/ext/libdeflate/libdeflate/tools/windows_build.sh +0 -10
@@ -1,889 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* deflate_decompress.c - a decompressor for DEFLATE
|
3
|
-
*
|
4
|
-
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
5
|
-
*
|
6
|
-
* Copyright 2016 Eric Biggers
|
7
|
-
*
|
8
|
-
* Permission is hereby granted, free of charge, to any person
|
9
|
-
* obtaining a copy of this software and associated documentation
|
10
|
-
* files (the "Software"), to deal in the Software without
|
11
|
-
* restriction, including without limitation the rights to use,
|
12
|
-
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
13
|
-
* copies of the Software, and to permit persons to whom the
|
14
|
-
* Software is furnished to do so, subject to the following
|
15
|
-
* conditions:
|
16
|
-
*
|
17
|
-
* The above copyright notice and this permission notice shall be
|
18
|
-
* included in all copies or substantial portions of the Software.
|
19
|
-
*
|
20
|
-
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
22
|
-
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
-
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
24
|
-
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
25
|
-
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
26
|
-
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
27
|
-
* OTHER DEALINGS IN THE SOFTWARE.
|
28
|
-
*
|
29
|
-
* ---------------------------------------------------------------------------
|
30
|
-
*
|
31
|
-
* This is a highly optimized DEFLATE decompressor. When compiled with gcc on
|
32
|
-
* x86_64, it decompresses data in about 52% of the time of zlib (48% if BMI2
|
33
|
-
* instructions are available). On other architectures it should still be
|
34
|
-
* significantly faster than zlib, but the difference may be smaller.
|
35
|
-
*
|
36
|
-
* Why this is faster than zlib's implementation:
|
37
|
-
*
|
38
|
-
* - Word accesses rather than byte accesses when reading input
|
39
|
-
* - Word accesses rather than byte accesses when copying matches
|
40
|
-
* - Faster Huffman decoding combined with various DEFLATE-specific tricks
|
41
|
-
* - Larger bitbuffer variable that doesn't need to be filled as often
|
42
|
-
* - Other optimizations to remove unnecessary branches
|
43
|
-
* - Only full-buffer decompression is supported, so the code doesn't need to
|
44
|
-
* support stopping and resuming decompression.
|
45
|
-
* - On x86_64, compile a version of the decompression routine using BMI2
|
46
|
-
* instructions and use it automatically at runtime when supported.
|
47
|
-
*/
|
48
|
-
|
49
|
-
#include <stdlib.h>
|
50
|
-
#include <string.h>
|
51
|
-
|
52
|
-
#include "deflate_constants.h"
|
53
|
-
#include "unaligned.h"
|
54
|
-
#include "x86_cpu_features.h"
|
55
|
-
|
56
|
-
#include "libdeflate.h"
|
57
|
-
|
58
|
-
/*
|
59
|
-
* If the expression passed to SAFETY_CHECK() evaluates to false, then the
|
60
|
-
* decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the
|
61
|
-
* compressed data is invalid.
|
62
|
-
*
|
63
|
-
* Theoretically, these checks could be disabled for specialized applications
|
64
|
-
* where all input to the decompressor will be trusted.
|
65
|
-
*/
|
66
|
-
#if 0
|
67
|
-
# pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!")
|
68
|
-
# define SAFETY_CHECK(expr) (void)(expr)
|
69
|
-
#else
|
70
|
-
# define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA
|
71
|
-
#endif
|
72
|
-
|
73
|
-
/*
|
74
|
-
* Each TABLEBITS number is the base-2 logarithm of the number of entries in the
|
75
|
-
* main portion of the corresponding decode table. Each number should be large
|
76
|
-
* enough to ensure that for typical data, the vast majority of symbols can be
|
77
|
-
* decoded by a direct lookup of the next TABLEBITS bits of compressed data.
|
78
|
-
* However, this must be balanced against the fact that a larger table requires
|
79
|
-
* more memory and requires more time to fill.
|
80
|
-
*
|
81
|
-
* Note: you cannot change a TABLEBITS number without also changing the
|
82
|
-
* corresponding ENOUGH number!
|
83
|
-
*/
|
84
|
-
#define PRECODE_TABLEBITS 7
|
85
|
-
#define LITLEN_TABLEBITS 10
|
86
|
-
#define OFFSET_TABLEBITS 8
|
87
|
-
|
88
|
-
/*
|
89
|
-
* Each ENOUGH number is the maximum number of decode table entries that may be
|
90
|
-
* required for the corresponding Huffman code, including the main table and all
|
91
|
-
* subtables. Each number depends on three parameters:
|
92
|
-
*
|
93
|
-
* (1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMBOLS)
|
94
|
-
* (2) the number of main table bits (the TABLEBITS numbers defined above)
|
95
|
-
* (3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN)
|
96
|
-
*
|
97
|
-
* The ENOUGH numbers were computed using the utility program 'enough' from
|
98
|
-
* zlib. This program enumerates all possible relevant Huffman codes to find
|
99
|
-
* the worst-case usage of decode table entries.
|
100
|
-
*/
|
101
|
-
#define PRECODE_ENOUGH 128 /* enough 19 7 7 */
|
102
|
-
#define LITLEN_ENOUGH 1334 /* enough 288 10 15 */
|
103
|
-
#define OFFSET_ENOUGH 402 /* enough 32 8 15 */
|
104
|
-
|
105
|
-
/*
|
106
|
-
* Type for codeword lengths.
|
107
|
-
*/
|
108
|
-
typedef u8 len_t;
|
109
|
-
|
110
|
-
/*
|
111
|
-
* The main DEFLATE decompressor structure. Since this implementation only
|
112
|
-
* supports full buffer decompression, this structure does not store the entire
|
113
|
-
* decompression state, but rather only some arrays that are too large to
|
114
|
-
* comfortably allocate on the stack.
|
115
|
-
*/
|
116
|
-
struct libdeflate_decompressor {
|
117
|
-
|
118
|
-
/*
|
119
|
-
* The arrays aren't all needed at the same time. 'precode_lens' and
|
120
|
-
* 'precode_decode_table' are unneeded after 'lens' has been filled.
|
121
|
-
* Furthermore, 'lens' need not be retained after building the litlen
|
122
|
-
* and offset decode tables. In fact, 'lens' can be in union with
|
123
|
-
* 'litlen_decode_table' provided that 'offset_decode_table' is separate
|
124
|
-
* and is built first.
|
125
|
-
*/
|
126
|
-
|
127
|
-
union {
|
128
|
-
len_t precode_lens[DEFLATE_NUM_PRECODE_SYMS];
|
129
|
-
|
130
|
-
struct {
|
131
|
-
len_t lens[DEFLATE_NUM_LITLEN_SYMS +
|
132
|
-
DEFLATE_NUM_OFFSET_SYMS +
|
133
|
-
DEFLATE_MAX_LENS_OVERRUN];
|
134
|
-
|
135
|
-
u32 precode_decode_table[PRECODE_ENOUGH];
|
136
|
-
} l;
|
137
|
-
|
138
|
-
u32 litlen_decode_table[LITLEN_ENOUGH];
|
139
|
-
} u;
|
140
|
-
|
141
|
-
u32 offset_decode_table[OFFSET_ENOUGH];
|
142
|
-
|
143
|
-
u16 working_space[2 * (DEFLATE_MAX_CODEWORD_LEN + 1) +
|
144
|
-
DEFLATE_MAX_NUM_SYMS];
|
145
|
-
};
|
146
|
-
|
147
|
-
/*****************************************************************************
|
148
|
-
* Input bitstream *
|
149
|
-
*****************************************************************************/
|
150
|
-
|
151
|
-
/*
|
152
|
-
* The state of the "input bitstream" consists of the following variables:
|
153
|
-
*
|
154
|
-
* - in_next: pointer to the next unread byte in the input buffer
|
155
|
-
*
|
156
|
-
* - in_end: pointer just past the end of the input buffer
|
157
|
-
*
|
158
|
-
* - bitbuf: a word-sized variable containing bits that have been read from
|
159
|
-
* the input buffer. The buffered bits are right-aligned
|
160
|
-
* (they're the low-order bits).
|
161
|
-
*
|
162
|
-
* - bitsleft: number of bits in 'bitbuf' that are valid.
|
163
|
-
*
|
164
|
-
* To make it easier for the compiler to optimize the code by keeping variables
|
165
|
-
* in registers, these are declared as normal variables and manipulated using
|
166
|
-
* macros.
|
167
|
-
*/
|
168
|
-
|
169
|
-
/*
|
170
|
-
* The type for the bitbuffer variable ('bitbuf' described above). For best
|
171
|
-
* performance, this should have size equal to a machine word.
|
172
|
-
*
|
173
|
-
* 64-bit platforms have a significant advantage: they get a bigger bitbuffer
|
174
|
-
* which they have to fill less often.
|
175
|
-
*/
|
176
|
-
typedef machine_word_t bitbuf_t;
|
177
|
-
|
178
|
-
/*
|
179
|
-
* Number of bits the bitbuffer variable can hold.
|
180
|
-
*/
|
181
|
-
#define BITBUF_NBITS (8 * sizeof(bitbuf_t))
|
182
|
-
|
183
|
-
/*
|
184
|
-
* The maximum number of bits that can be requested to be in the bitbuffer
|
185
|
-
* variable. This is the maximum value of 'n' that can be passed
|
186
|
-
* ENSURE_BITS(n).
|
187
|
-
*
|
188
|
-
* This not equal to BITBUF_NBITS because we never read less than one byte at a
|
189
|
-
* time. If the bitbuffer variable contains more than (BITBUF_NBITS - 8) bits,
|
190
|
-
* then we can't read another byte without first consuming some bits. So the
|
191
|
-
* maximum count we can ensure is (BITBUF_NBITS - 7).
|
192
|
-
*/
|
193
|
-
#define MAX_ENSURE (BITBUF_NBITS - 7)
|
194
|
-
|
195
|
-
/*
|
196
|
-
* Evaluates to true if 'n' is a valid argument to ENSURE_BITS(n), or false if
|
197
|
-
* 'n' is too large to be passed to ENSURE_BITS(n). Note: if 'n' is a compile
|
198
|
-
* time constant, then this expression will be a compile-type constant.
|
199
|
-
* Therefore, CAN_ENSURE() can be used choose between alternative
|
200
|
-
* implementations at compile time.
|
201
|
-
*/
|
202
|
-
#define CAN_ENSURE(n) ((n) <= MAX_ENSURE)
|
203
|
-
|
204
|
-
/*
|
205
|
-
* Fill the bitbuffer variable, reading one byte at a time.
|
206
|
-
*
|
207
|
-
* Note: if we would overrun the input buffer, we just don't read anything,
|
208
|
-
* leaving the bits as 0 but marking them as filled. This makes the
|
209
|
-
* implementation simpler because this removes the need to distinguish between
|
210
|
-
* "real" overruns and overruns that occur because of our own lookahead during
|
211
|
-
* Huffman decoding. The disadvantage is that a "real" overrun can go
|
212
|
-
* undetected, and libdeflate_deflate_decompress() may return a success status
|
213
|
-
* rather than the expected failure status if one occurs. However, this is
|
214
|
-
* irrelevant because even if this specific case were to be handled "correctly",
|
215
|
-
* one could easily come up with a different case where the compressed data
|
216
|
-
* would be corrupted in such a way that fully retains its validity. Users
|
217
|
-
* should run a checksum against the uncompressed data if they wish to detect
|
218
|
-
* corruptions.
|
219
|
-
*/
|
220
|
-
#define FILL_BITS_BYTEWISE() \
|
221
|
-
do { \
|
222
|
-
if (likely(in_next != in_end)) \
|
223
|
-
bitbuf |= (bitbuf_t)*in_next++ << bitsleft; \
|
224
|
-
else \
|
225
|
-
overrun_count++; \
|
226
|
-
bitsleft += 8; \
|
227
|
-
} while (bitsleft <= BITBUF_NBITS - 8)
|
228
|
-
|
229
|
-
/*
|
230
|
-
* Fill the bitbuffer variable by reading the next word from the input buffer.
|
231
|
-
* This can be significantly faster than FILL_BITS_BYTEWISE(). However, for
|
232
|
-
* this to work correctly, the word must be interpreted in little-endian format.
|
233
|
-
* In addition, the memory access may be unaligned. Therefore, this method is
|
234
|
-
* most efficient on little-endian architectures that support fast unaligned
|
235
|
-
* access, such as x86 and x86_64.
|
236
|
-
*/
|
237
|
-
#define FILL_BITS_WORDWISE() \
|
238
|
-
do { \
|
239
|
-
bitbuf |= get_unaligned_leword(in_next) << bitsleft; \
|
240
|
-
in_next += (BITBUF_NBITS - bitsleft) >> 3; \
|
241
|
-
bitsleft += (BITBUF_NBITS - bitsleft) & ~7; \
|
242
|
-
} while (0)
|
243
|
-
|
244
|
-
/*
|
245
|
-
* Does the bitbuffer variable currently contain at least 'n' bits?
|
246
|
-
*/
|
247
|
-
#define HAVE_BITS(n) (bitsleft >= (n))
|
248
|
-
|
249
|
-
/*
|
250
|
-
* Load more bits from the input buffer until the specified number of bits is
|
251
|
-
* present in the bitbuffer variable. 'n' cannot be too large; see MAX_ENSURE
|
252
|
-
* and CAN_ENSURE().
|
253
|
-
*/
|
254
|
-
#define ENSURE_BITS(n) \
|
255
|
-
if (!HAVE_BITS(n)) { \
|
256
|
-
if (CPU_IS_LITTLE_ENDIAN() && \
|
257
|
-
UNALIGNED_ACCESS_IS_FAST && \
|
258
|
-
likely(in_end - in_next >= sizeof(bitbuf_t))) \
|
259
|
-
FILL_BITS_WORDWISE(); \
|
260
|
-
else \
|
261
|
-
FILL_BITS_BYTEWISE(); \
|
262
|
-
}
|
263
|
-
|
264
|
-
/*
|
265
|
-
* Return the next 'n' bits from the bitbuffer variable without removing them.
|
266
|
-
*/
|
267
|
-
#define BITS(n) ((u32)bitbuf & (((u32)1 << (n)) - 1))
|
268
|
-
|
269
|
-
/*
|
270
|
-
* Remove the next 'n' bits from the bitbuffer variable.
|
271
|
-
*/
|
272
|
-
#define REMOVE_BITS(n) (bitbuf >>= (n), bitsleft -= (n))
|
273
|
-
|
274
|
-
/*
|
275
|
-
* Remove and return the next 'n' bits from the bitbuffer variable.
|
276
|
-
*/
|
277
|
-
#define POP_BITS(n) (tmp32 = BITS(n), REMOVE_BITS(n), tmp32)
|
278
|
-
|
279
|
-
/*
|
280
|
-
* Align the input to the next byte boundary, discarding any remaining bits in
|
281
|
-
* the current byte.
|
282
|
-
*
|
283
|
-
* Note that if the bitbuffer variable currently contains more than 8 bits, then
|
284
|
-
* we must rewind 'in_next', effectively putting those bits back. Only the bits
|
285
|
-
* in what would be the "current" byte if we were reading one byte at a time can
|
286
|
-
* be actually discarded.
|
287
|
-
*/
|
288
|
-
#define ALIGN_INPUT() \
|
289
|
-
do { \
|
290
|
-
in_next -= (bitsleft >> 3) - MIN(overrun_count, bitsleft >> 3); \
|
291
|
-
bitbuf = 0; \
|
292
|
-
bitsleft = 0; \
|
293
|
-
} while(0)
|
294
|
-
|
295
|
-
/*
|
296
|
-
* Read a 16-bit value from the input. This must have been preceded by a call
|
297
|
-
* to ALIGN_INPUT(), and the caller must have already checked for overrun.
|
298
|
-
*/
|
299
|
-
#define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16)
|
300
|
-
|
301
|
-
/*****************************************************************************
|
302
|
-
* Huffman decoding *
|
303
|
-
*****************************************************************************/
|
304
|
-
|
305
|
-
/*
|
306
|
-
* A decode table for order TABLEBITS consists of a main table of (1 <<
|
307
|
-
* TABLEBITS) entries followed by a variable number of subtables.
|
308
|
-
*
|
309
|
-
* The decoding algorithm takes the next TABLEBITS bits of compressed data and
|
310
|
-
* uses them as an index into the decode table. The resulting entry is either a
|
311
|
-
* "direct entry", meaning that it contains the value desired, or a "subtable
|
312
|
-
* pointer", meaning that the entry references a subtable that must be indexed
|
313
|
-
* using more bits of the compressed data to decode the symbol.
|
314
|
-
*
|
315
|
-
* Each decode table (a main table along with with its subtables, if any) is
|
316
|
-
* associated with a Huffman code. Logically, the result of a decode table
|
317
|
-
* lookup is a symbol from the alphabet from which the corresponding Huffman
|
318
|
-
* code was constructed. A symbol with codeword length n <= TABLEBITS is
|
319
|
-
* associated with 2**(TABLEBITS - n) direct entries in the table, whereas a
|
320
|
-
* symbol with codeword length n > TABLEBITS is associated with one or more
|
321
|
-
* subtable entries.
|
322
|
-
*
|
323
|
-
* On top of this basic design, we implement several optimizations:
|
324
|
-
*
|
325
|
-
* - We store the length of each codeword directly in each of its decode table
|
326
|
-
* entries. This allows the codeword length to be produced without indexing
|
327
|
-
* an additional table.
|
328
|
-
*
|
329
|
-
* - When beneficial, we don't store the Huffman symbol itself, but instead data
|
330
|
-
* generated from it. For example, when decoding an offset symbol in DEFLATE,
|
331
|
-
* it's more efficient if we can decode the offset base and number of extra
|
332
|
-
* offset bits directly rather than decoding the offset symbol and then
|
333
|
-
* looking up both of those values in an additional table or tables.
|
334
|
-
*
|
335
|
-
* The size of each decode table entry is 32 bits, which provides slightly
|
336
|
-
* better performance than 16-bit entries on 32 and 64 bit processers, provided
|
337
|
-
* that the table doesn't get so large that it takes up too much memory and
|
338
|
-
* starts generating cache misses. The bits of each decode table entry are
|
339
|
-
* defined as follows:
|
340
|
-
*
|
341
|
-
* - Bits 30 -- 31: flags (see below)
|
342
|
-
* - Bits 8 -- 29: decode result: a Huffman symbol or related data
|
343
|
-
* - Bits 0 -- 7: codeword length
|
344
|
-
*/
|
345
|
-
|
346
|
-
/*
|
347
|
-
* This flag is set in all main decode table entries that represent subtable
|
348
|
-
* pointers.
|
349
|
-
*/
|
350
|
-
#define HUFFDEC_SUBTABLE_POINTER 0x80000000
|
351
|
-
|
352
|
-
/*
|
353
|
-
* This flag is set in all entries in the litlen decode table that represent
|
354
|
-
* literals.
|
355
|
-
*/
|
356
|
-
#define HUFFDEC_LITERAL 0x40000000
|
357
|
-
|
358
|
-
/* Mask for extracting the codeword length from a decode table entry. */
|
359
|
-
#define HUFFDEC_LENGTH_MASK 0xFF
|
360
|
-
|
361
|
-
/* Shift to extract the decode result from a decode table entry. */
|
362
|
-
#define HUFFDEC_RESULT_SHIFT 8
|
363
|
-
|
364
|
-
/* The decode result for each precode symbol. There is no special optimization
|
365
|
-
* for the precode; the decode result is simply the symbol value. */
|
366
|
-
static const u32 precode_decode_results[DEFLATE_NUM_PRECODE_SYMS] = {
|
367
|
-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
|
368
|
-
};
|
369
|
-
|
370
|
-
/* The decode result for each litlen symbol. For literals, this is the literal
|
371
|
-
* value itself and the HUFFDEC_LITERAL flag. For lengths, this is the length
|
372
|
-
* base and the number of extra length bits. */
|
373
|
-
static const u32 litlen_decode_results[DEFLATE_NUM_LITLEN_SYMS] = {
|
374
|
-
#define ENTRY(literal) ((HUFFDEC_LITERAL >> HUFFDEC_RESULT_SHIFT) | (literal))
|
375
|
-
|
376
|
-
/* Literals */
|
377
|
-
ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
|
378
|
-
ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
|
379
|
-
ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
|
380
|
-
ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
|
381
|
-
ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) ,
|
382
|
-
ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) ,
|
383
|
-
ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) ,
|
384
|
-
ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) ,
|
385
|
-
ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) ,
|
386
|
-
ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) ,
|
387
|
-
ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) ,
|
388
|
-
ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) ,
|
389
|
-
ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) ,
|
390
|
-
ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) ,
|
391
|
-
ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) ,
|
392
|
-
ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) ,
|
393
|
-
ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) ,
|
394
|
-
ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) ,
|
395
|
-
ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) ,
|
396
|
-
ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) ,
|
397
|
-
ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) ,
|
398
|
-
ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) ,
|
399
|
-
ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) ,
|
400
|
-
ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) ,
|
401
|
-
ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) ,
|
402
|
-
ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) ,
|
403
|
-
ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) ,
|
404
|
-
ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) ,
|
405
|
-
ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) ,
|
406
|
-
ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) ,
|
407
|
-
ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) ,
|
408
|
-
ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) ,
|
409
|
-
ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) ,
|
410
|
-
ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) ,
|
411
|
-
ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) ,
|
412
|
-
ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) ,
|
413
|
-
ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) ,
|
414
|
-
ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) ,
|
415
|
-
ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) ,
|
416
|
-
ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) ,
|
417
|
-
ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) ,
|
418
|
-
ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) ,
|
419
|
-
ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) ,
|
420
|
-
ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) ,
|
421
|
-
ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) ,
|
422
|
-
ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) ,
|
423
|
-
ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) ,
|
424
|
-
ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) ,
|
425
|
-
ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) ,
|
426
|
-
ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) ,
|
427
|
-
ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) ,
|
428
|
-
ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) ,
|
429
|
-
ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) ,
|
430
|
-
ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) ,
|
431
|
-
ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) ,
|
432
|
-
ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) ,
|
433
|
-
ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) ,
|
434
|
-
ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) ,
|
435
|
-
ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) ,
|
436
|
-
ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) ,
|
437
|
-
ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) ,
|
438
|
-
ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) ,
|
439
|
-
ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) ,
|
440
|
-
ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) ,
|
441
|
-
#undef ENTRY
|
442
|
-
|
443
|
-
#define HUFFDEC_EXTRA_LENGTH_BITS_MASK 0xFF
|
444
|
-
#define HUFFDEC_LENGTH_BASE_SHIFT 8
|
445
|
-
#define HUFFDEC_END_OF_BLOCK_LENGTH 0
|
446
|
-
|
447
|
-
#define ENTRY(length_base, num_extra_bits) \
|
448
|
-
(((u32)(length_base) << HUFFDEC_LENGTH_BASE_SHIFT) | (num_extra_bits))
|
449
|
-
|
450
|
-
/* End of block */
|
451
|
-
ENTRY(HUFFDEC_END_OF_BLOCK_LENGTH, 0),
|
452
|
-
|
453
|
-
/* Lengths */
|
454
|
-
ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0),
|
455
|
-
ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0),
|
456
|
-
ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1),
|
457
|
-
ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2),
|
458
|
-
ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3),
|
459
|
-
ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4),
|
460
|
-
ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5),
|
461
|
-
ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) ,
|
462
|
-
#undef ENTRY
|
463
|
-
};
|
464
|
-
|
465
|
-
/* The decode result for each offset symbol. This is the offset base and the
|
466
|
-
* number of extra offset bits. */
|
467
|
-
static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = {
|
468
|
-
|
469
|
-
#define HUFFDEC_EXTRA_OFFSET_BITS_SHIFT 16
|
470
|
-
#define HUFFDEC_OFFSET_BASE_MASK (((u32)1 << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) - 1)
|
471
|
-
|
472
|
-
#define ENTRY(offset_base, num_extra_bits) \
|
473
|
-
((offset_base) | ((u32)(num_extra_bits) << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT))
|
474
|
-
ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) ,
|
475
|
-
ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) ,
|
476
|
-
ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) ,
|
477
|
-
ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) ,
|
478
|
-
ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) ,
|
479
|
-
ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) ,
|
480
|
-
ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) ,
|
481
|
-
ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(32769 , 14) , ENTRY(49153 , 14) ,
|
482
|
-
#undef ENTRY
|
483
|
-
};
|
484
|
-
|
485
|
-
/* Construct a decode table entry from a decode result and codeword length. */
|
486
|
-
static forceinline u32
|
487
|
-
make_decode_table_entry(u32 result, u32 length)
|
488
|
-
{
|
489
|
-
return (result << HUFFDEC_RESULT_SHIFT) | length;
|
490
|
-
}
|
491
|
-
|
492
|
-
/*
|
493
|
-
* Build a table for fast decoding of symbols from a Huffman code. As input,
|
494
|
-
* this function takes the codeword length of each symbol which may be used in
|
495
|
-
* the code. As output, it produces a decode table for the canonical Huffman
|
496
|
-
* code described by the codeword lengths. The decode table is built with the
|
497
|
-
* assumption that it will be indexed with "bit-reversed" codewords, where the
|
498
|
-
* low-order bit is the first bit of the codeword. This format is used for all
|
499
|
-
* Huffman codes in DEFLATE.
|
500
|
-
*
|
501
|
-
* @decode_table
|
502
|
-
* The array in which the decode table will be generated. This array must
|
503
|
-
* have sufficient length; see the definition of the ENOUGH numbers.
|
504
|
-
* @lens
|
505
|
-
* An array which provides, for each symbol, the length of the
|
506
|
-
* corresponding codeword in bits, or 0 if the symbol is unused. This may
|
507
|
-
* alias @decode_table, since nothing is written to @decode_table until all
|
508
|
-
* @lens have been consumed. All codeword lengths are assumed to be <=
|
509
|
-
* @max_codeword_len but are otherwise considered untrusted. If they do
|
510
|
-
* not form a valid Huffman code, then the decode table is not built and
|
511
|
-
* %false is returned.
|
512
|
-
* @num_syms
|
513
|
-
* The number of symbols in the code, including all unused symbols.
|
514
|
-
* @decode_results
|
515
|
-
* An array which provides, for each symbol, the actual value to store into
|
516
|
-
* the decode table. This value will be directly produced as the result of
|
517
|
-
* decoding that symbol, thereby moving the indirection out of the decode
|
518
|
-
* loop and into the table initialization.
|
519
|
-
* @table_bits
|
520
|
-
* The log base-2 of the number of main table entries to use.
|
521
|
-
* @max_codeword_len
|
522
|
-
* The maximum allowed codeword length for this Huffman code.
|
523
|
-
* @working_space
|
524
|
-
* A temporary array of length '2 * (@max_codeword_len + 1) + @num_syms'.
|
525
|
-
*
|
526
|
-
* Returns %true if successful; %false if the codeword lengths do not form a
|
527
|
-
* valid Huffman code.
|
528
|
-
*/
|
529
|
-
static bool
|
530
|
-
build_decode_table(u32 decode_table[],
|
531
|
-
const len_t lens[],
|
532
|
-
const unsigned num_syms,
|
533
|
-
const u32 decode_results[],
|
534
|
-
const unsigned table_bits,
|
535
|
-
const unsigned max_codeword_len,
|
536
|
-
u16 working_space[])
|
537
|
-
{
|
538
|
-
u16 * const len_counts = &working_space[0];
|
539
|
-
u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
|
540
|
-
u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
|
541
|
-
unsigned len;
|
542
|
-
unsigned sym;
|
543
|
-
s32 remainder;
|
544
|
-
unsigned sym_idx;
|
545
|
-
unsigned codeword_len;
|
546
|
-
unsigned codeword_reversed = 0;
|
547
|
-
unsigned cur_codeword_prefix = -1;
|
548
|
-
unsigned cur_table_start = 0;
|
549
|
-
unsigned cur_table_bits = table_bits;
|
550
|
-
unsigned num_dropped_bits = 0;
|
551
|
-
const unsigned table_mask = (1U << table_bits) - 1;
|
552
|
-
|
553
|
-
/* Count how many symbols have each codeword length, including 0. */
|
554
|
-
for (len = 0; len <= max_codeword_len; len++)
|
555
|
-
len_counts[len] = 0;
|
556
|
-
for (sym = 0; sym < num_syms; sym++)
|
557
|
-
len_counts[lens[sym]]++;
|
558
|
-
|
559
|
-
/* Sort the symbols primarily by increasing codeword length and
|
560
|
-
* secondarily by increasing symbol value. */
|
561
|
-
|
562
|
-
/* Initialize 'offsets' so that offsets[len] is the number of codewords
|
563
|
-
* shorter than 'len' bits, including length 0. */
|
564
|
-
offsets[0] = 0;
|
565
|
-
for (len = 0; len < max_codeword_len; len++)
|
566
|
-
offsets[len + 1] = offsets[len] + len_counts[len];
|
567
|
-
|
568
|
-
/* Use the 'offsets' array to sort the symbols. */
|
569
|
-
for (sym = 0; sym < num_syms; sym++)
|
570
|
-
sorted_syms[offsets[lens[sym]]++] = sym;
|
571
|
-
|
572
|
-
/* It is already guaranteed that all lengths are <= max_codeword_len,
|
573
|
-
* but it cannot be assumed they form a complete prefix code. A
|
574
|
-
* codeword of length n should require a proportion of the codespace
|
575
|
-
* equaling (1/2)^n. The code is complete if and only if, by this
|
576
|
-
* measure, the codespace is exactly filled by the lengths. */
|
577
|
-
remainder = 1;
|
578
|
-
for (len = 1; len <= max_codeword_len; len++) {
|
579
|
-
remainder <<= 1;
|
580
|
-
remainder -= len_counts[len];
|
581
|
-
if (unlikely(remainder < 0)) {
|
582
|
-
/* The lengths overflow the codespace; that is, the code
|
583
|
-
* is over-subscribed. */
|
584
|
-
return false;
|
585
|
-
}
|
586
|
-
}
|
587
|
-
|
588
|
-
if (unlikely(remainder != 0)) {
|
589
|
-
/* The lengths do not fill the codespace; that is, they form an
|
590
|
-
* incomplete code. */
|
591
|
-
|
592
|
-
/* Initialize the table entries to default values. When
|
593
|
-
* decompressing a well-formed stream, these default values will
|
594
|
-
* never be used. But since a malformed stream might contain
|
595
|
-
* any bits at all, these entries need to be set anyway. */
|
596
|
-
u32 entry = make_decode_table_entry(decode_results[0], 1);
|
597
|
-
for (sym = 0; sym < (1U << table_bits); sym++)
|
598
|
-
decode_table[sym] = entry;
|
599
|
-
|
600
|
-
/* A completely empty code is permitted. */
|
601
|
-
if (remainder == (1U << max_codeword_len))
|
602
|
-
return true;
|
603
|
-
|
604
|
-
/* The code is nonempty and incomplete. Proceed only if there
|
605
|
-
* is a single used symbol and its codeword has length 1. The
|
606
|
-
* DEFLATE RFC is somewhat unclear regarding this case. What
|
607
|
-
* zlib's decompressor does is permit this case for
|
608
|
-
* literal/length and offset codes and assume the codeword is 0
|
609
|
-
* rather than 1. We do the same except we allow this case for
|
610
|
-
* precodes too. */
|
611
|
-
if (remainder != (1U << (max_codeword_len - 1)) ||
|
612
|
-
len_counts[1] != 1)
|
613
|
-
return false;
|
614
|
-
}
|
615
|
-
|
616
|
-
/* Generate the decode table entries. Since we process codewords from
|
617
|
-
* shortest to longest, the main portion of the decode table is filled
|
618
|
-
* first; then the subtables are filled. Note that it's already been
|
619
|
-
* verified that the code is nonempty and not over-subscribed. */
|
620
|
-
|
621
|
-
/* Start with the smallest codeword length and the smallest-valued
|
622
|
-
* symbol which has that codeword length. */
|
623
|
-
sym_idx = offsets[0];
|
624
|
-
codeword_len = 1;
|
625
|
-
while (len_counts[codeword_len] == 0)
|
626
|
-
codeword_len++;
|
627
|
-
|
628
|
-
for (;;) { /* For each used symbol and its codeword... */
|
629
|
-
unsigned sym;
|
630
|
-
u32 entry;
|
631
|
-
unsigned i;
|
632
|
-
unsigned end;
|
633
|
-
unsigned increment;
|
634
|
-
unsigned bit;
|
635
|
-
|
636
|
-
/* Get the next symbol. */
|
637
|
-
sym = sorted_syms[sym_idx];
|
638
|
-
|
639
|
-
/* Start a new subtable if the codeword is long enough to
|
640
|
-
* require a subtable, *and* the first 'table_bits' bits of the
|
641
|
-
* codeword don't match the prefix for the previous subtable if
|
642
|
-
* any. */
|
643
|
-
if (codeword_len > table_bits &&
|
644
|
-
(codeword_reversed & table_mask) != cur_codeword_prefix) {
|
645
|
-
|
646
|
-
cur_codeword_prefix = (codeword_reversed & table_mask);
|
647
|
-
|
648
|
-
cur_table_start += 1U << cur_table_bits;
|
649
|
-
|
650
|
-
/* Calculate the subtable length. If the codeword
|
651
|
-
* length exceeds 'table_bits' by n, the subtable needs
|
652
|
-
* at least 2**n entries. But it may need more; if
|
653
|
-
* there are fewer than 2**n codewords of length
|
654
|
-
* 'table_bits + n' remaining, then n will need to be
|
655
|
-
* incremented to bring in longer codewords until the
|
656
|
-
* subtable can be filled completely. Note that it
|
657
|
-
* always will, eventually, be possible to fill the
|
658
|
-
* subtable, since the only case where we may have an
|
659
|
-
* incomplete code is a single codeword of length 1,
|
660
|
-
* and that never requires any subtables. */
|
661
|
-
cur_table_bits = codeword_len - table_bits;
|
662
|
-
remainder = (s32)1 << cur_table_bits;
|
663
|
-
for (;;) {
|
664
|
-
remainder -= len_counts[table_bits +
|
665
|
-
cur_table_bits];
|
666
|
-
if (remainder <= 0)
|
667
|
-
break;
|
668
|
-
cur_table_bits++;
|
669
|
-
remainder <<= 1;
|
670
|
-
}
|
671
|
-
|
672
|
-
/* Create the entry that points from the main table to
|
673
|
-
* the subtable. This entry contains the index of the
|
674
|
-
* start of the subtable and the number of bits with
|
675
|
-
* which the subtable is indexed (the log base 2 of the
|
676
|
-
* number of entries it contains). */
|
677
|
-
decode_table[cur_codeword_prefix] =
|
678
|
-
HUFFDEC_SUBTABLE_POINTER |
|
679
|
-
make_decode_table_entry(cur_table_start,
|
680
|
-
cur_table_bits);
|
681
|
-
|
682
|
-
/* Now that we're filling a subtable, we need to drop
|
683
|
-
* the first 'table_bits' bits of the codewords. */
|
684
|
-
num_dropped_bits = table_bits;
|
685
|
-
}
|
686
|
-
|
687
|
-
/* Create the decode table entry, which packs the decode result
|
688
|
-
* and the codeword length (minus 'table_bits' for subtables)
|
689
|
-
* together. */
|
690
|
-
entry = make_decode_table_entry(decode_results[sym],
|
691
|
-
codeword_len - num_dropped_bits);
|
692
|
-
|
693
|
-
/* Fill in as many copies of the decode table entry as are
|
694
|
-
* needed. The number of entries to fill is a power of 2 and
|
695
|
-
* depends on the codeword length; it could be as few as 1 or as
|
696
|
-
* large as half the size of the table. Since the codewords are
|
697
|
-
* bit-reversed, the indices to fill are those with the codeword
|
698
|
-
* in its low bits; it's the high bits that vary. */
|
699
|
-
i = cur_table_start + (codeword_reversed >> num_dropped_bits);
|
700
|
-
end = cur_table_start + (1U << cur_table_bits);
|
701
|
-
increment = 1U << (codeword_len - num_dropped_bits);
|
702
|
-
do {
|
703
|
-
decode_table[i] = entry;
|
704
|
-
i += increment;
|
705
|
-
} while (i < end);
|
706
|
-
|
707
|
-
/* Advance to the next codeword by incrementing it. But since
|
708
|
-
* our codewords are bit-reversed, we must manipulate the bits
|
709
|
-
* ourselves rather than simply adding 1. */
|
710
|
-
bit = 1U << (codeword_len - 1);
|
711
|
-
while (codeword_reversed & bit)
|
712
|
-
bit >>= 1;
|
713
|
-
codeword_reversed &= bit - 1;
|
714
|
-
codeword_reversed |= bit;
|
715
|
-
|
716
|
-
/* Advance to the next symbol. This will either increase the
|
717
|
-
* codeword length, or keep the same codeword length but
|
718
|
-
* increase the symbol value. Note: since we are using
|
719
|
-
* bit-reversed codewords, we don't need to explicitly append
|
720
|
-
* zeroes to the codeword when the codeword length increases. */
|
721
|
-
if (++sym_idx == num_syms)
|
722
|
-
return true;
|
723
|
-
len_counts[codeword_len]--;
|
724
|
-
while (len_counts[codeword_len] == 0)
|
725
|
-
codeword_len++;
|
726
|
-
}
|
727
|
-
}
|
728
|
-
|
729
|
-
/* Build the decode table for the precode. */
|
730
|
-
static bool
|
731
|
-
build_precode_decode_table(struct libdeflate_decompressor *d)
|
732
|
-
{
|
733
|
-
/* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
|
734
|
-
STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128);
|
735
|
-
|
736
|
-
return build_decode_table(d->u.l.precode_decode_table,
|
737
|
-
d->u.precode_lens,
|
738
|
-
DEFLATE_NUM_PRECODE_SYMS,
|
739
|
-
precode_decode_results,
|
740
|
-
PRECODE_TABLEBITS,
|
741
|
-
DEFLATE_MAX_PRE_CODEWORD_LEN,
|
742
|
-
d->working_space);
|
743
|
-
}
|
744
|
-
|
745
|
-
/* Build the decode table for the literal/length code. */
|
746
|
-
static bool
|
747
|
-
build_litlen_decode_table(struct libdeflate_decompressor *d,
|
748
|
-
unsigned num_litlen_syms, unsigned num_offset_syms)
|
749
|
-
{
|
750
|
-
/* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
|
751
|
-
STATIC_ASSERT(LITLEN_TABLEBITS == 10 && LITLEN_ENOUGH == 1334);
|
752
|
-
|
753
|
-
return build_decode_table(d->u.litlen_decode_table,
|
754
|
-
d->u.l.lens,
|
755
|
-
num_litlen_syms,
|
756
|
-
litlen_decode_results,
|
757
|
-
LITLEN_TABLEBITS,
|
758
|
-
DEFLATE_MAX_LITLEN_CODEWORD_LEN,
|
759
|
-
d->working_space);
|
760
|
-
}
|
761
|
-
|
762
|
-
/* Build the decode table for the offset code. */
|
763
|
-
static bool
|
764
|
-
build_offset_decode_table(struct libdeflate_decompressor *d,
|
765
|
-
unsigned num_litlen_syms, unsigned num_offset_syms)
|
766
|
-
{
|
767
|
-
/* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
|
768
|
-
STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402);
|
769
|
-
|
770
|
-
return build_decode_table(d->offset_decode_table,
|
771
|
-
d->u.l.lens + num_litlen_syms,
|
772
|
-
num_offset_syms,
|
773
|
-
offset_decode_results,
|
774
|
-
OFFSET_TABLEBITS,
|
775
|
-
DEFLATE_MAX_OFFSET_CODEWORD_LEN,
|
776
|
-
d->working_space);
|
777
|
-
}
|
778
|
-
|
779
|
-
static forceinline machine_word_t
|
780
|
-
repeat_byte(u8 b)
|
781
|
-
{
|
782
|
-
machine_word_t v;
|
783
|
-
|
784
|
-
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
785
|
-
|
786
|
-
v = b;
|
787
|
-
v |= v << 8;
|
788
|
-
v |= v << 16;
|
789
|
-
v |= v << ((WORDBITS == 64) ? 32 : 0);
|
790
|
-
return v;
|
791
|
-
}
|
792
|
-
|
793
|
-
static forceinline void
|
794
|
-
copy_word_unaligned(const void *src, void *dst)
|
795
|
-
{
|
796
|
-
store_word_unaligned(load_word_unaligned(src), dst);
|
797
|
-
}
|
798
|
-
|
799
|
-
/*****************************************************************************
|
800
|
-
* Main decompression routine
|
801
|
-
*****************************************************************************/
|
802
|
-
|
803
|
-
#define FUNCNAME deflate_decompress_default
|
804
|
-
#define ATTRIBUTES
|
805
|
-
#include "decompress_impl.h"
|
806
|
-
#undef FUNCNAME
|
807
|
-
#undef ATTRIBUTES
|
808
|
-
|
809
|
-
#if X86_CPU_FEATURES_ENABLED && \
|
810
|
-
COMPILER_SUPPORTS_BMI2_TARGET && !defined(__BMI2__)
|
811
|
-
# define FUNCNAME deflate_decompress_bmi2
|
812
|
-
# define ATTRIBUTES __attribute__((target("bmi2")))
|
813
|
-
# include "decompress_impl.h"
|
814
|
-
# undef FUNCNAME
|
815
|
-
# undef ATTRIBUTES
|
816
|
-
# define DISPATCH_ENABLED 1
|
817
|
-
#else
|
818
|
-
# define DISPATCH_ENABLED 0
|
819
|
-
#endif
|
820
|
-
|
821
|
-
#if DISPATCH_ENABLED
|
822
|
-
|
823
|
-
static enum libdeflate_result
|
824
|
-
dispatch(struct libdeflate_decompressor * restrict d,
|
825
|
-
const void * restrict in, size_t in_nbytes,
|
826
|
-
void * restrict out, size_t out_nbytes_avail,
|
827
|
-
size_t *actual_out_nbytes_ret);
|
828
|
-
|
829
|
-
typedef enum libdeflate_result (*decompress_func_t)
|
830
|
-
(struct libdeflate_decompressor * restrict d,
|
831
|
-
const void * restrict in, size_t in_nbytes,
|
832
|
-
void * restrict out, size_t out_nbytes_avail,
|
833
|
-
size_t *actual_out_nbytes_ret);
|
834
|
-
|
835
|
-
static decompress_func_t decompress_impl = dispatch;
|
836
|
-
|
837
|
-
static enum libdeflate_result
|
838
|
-
dispatch(struct libdeflate_decompressor * restrict d,
|
839
|
-
const void * restrict in, size_t in_nbytes,
|
840
|
-
void * restrict out, size_t out_nbytes_avail,
|
841
|
-
size_t *actual_out_nbytes_ret)
|
842
|
-
{
|
843
|
-
decompress_func_t f = deflate_decompress_default;
|
844
|
-
#if X86_CPU_FEATURES_ENABLED
|
845
|
-
if (x86_have_cpu_features(X86_CPU_FEATURE_BMI2))
|
846
|
-
f = deflate_decompress_bmi2;
|
847
|
-
#endif
|
848
|
-
decompress_impl = f;
|
849
|
-
return (*f)(d, in, in_nbytes, out, out_nbytes_avail,
|
850
|
-
actual_out_nbytes_ret);
|
851
|
-
}
|
852
|
-
#endif /* DISPATCH_ENABLED */
|
853
|
-
|
854
|
-
|
855
|
-
/*
|
856
|
-
* This is the main DEFLATE decompression routine. See libdeflate.h for the
|
857
|
-
* documentation.
|
858
|
-
*
|
859
|
-
* Note that the real code is in decompress_impl.h. The part here just handles
|
860
|
-
* calling the appropriate implementation depending on the CPU features at
|
861
|
-
* runtime.
|
862
|
-
*/
|
863
|
-
LIBDEFLATEAPI enum libdeflate_result
|
864
|
-
libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d,
|
865
|
-
const void * restrict in, size_t in_nbytes,
|
866
|
-
void * restrict out, size_t out_nbytes_avail,
|
867
|
-
size_t *actual_out_nbytes_ret)
|
868
|
-
{
|
869
|
-
#if DISPATCH_ENABLED
|
870
|
-
return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes_avail,
|
871
|
-
actual_out_nbytes_ret);
|
872
|
-
#else
|
873
|
-
return deflate_decompress_default(d, in, in_nbytes, out,
|
874
|
-
out_nbytes_avail,
|
875
|
-
actual_out_nbytes_ret);
|
876
|
-
#endif
|
877
|
-
}
|
878
|
-
|
879
|
-
LIBDEFLATEAPI struct libdeflate_decompressor *
|
880
|
-
libdeflate_alloc_decompressor(void)
|
881
|
-
{
|
882
|
-
return malloc(sizeof(struct libdeflate_decompressor));
|
883
|
-
}
|
884
|
-
|
885
|
-
LIBDEFLATEAPI void
|
886
|
-
libdeflate_free_decompressor(struct libdeflate_decompressor *d)
|
887
|
-
{
|
888
|
-
free(d);
|
889
|
-
}
|