libdeflate 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +9 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +52 -0
- data/Rakefile +15 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/libdeflate/extconf.rb +14 -0
- data/ext/libdeflate/libdeflate/.gitignore +19 -0
- data/ext/libdeflate/libdeflate/COPYING +21 -0
- data/ext/libdeflate/libdeflate/Makefile +231 -0
- data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
- data/ext/libdeflate/libdeflate/NEWS +57 -0
- data/ext/libdeflate/libdeflate/README.md +170 -0
- data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
- data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
- data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
- data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
- data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
- data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
- data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
- data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
- data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
- data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
- data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
- data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
- data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
- data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
- data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
- data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
- data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
- data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
- data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
- data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
- data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
- data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
- data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
- data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
- data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
- data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
- data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
- data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
- data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
- data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
- data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
- data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
- data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
- data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
- data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
- data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
- data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
- data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
- data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
- data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
- data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
- data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
- data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
- data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
- data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
- data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
- data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
- data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
- data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
- data/ext/libdeflate/libdeflate_ext.c +389 -0
- data/ext/libdeflate/libdeflate_ext.h +8 -0
- data/lib/libdeflate.rb +2 -0
- data/lib/libdeflate/version.rb +3 -0
- data/libdeflate.gemspec +33 -0
- metadata +230 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#ifndef LIB_DEFLATE_COMPRESS_H
|
|
2
|
+
#define LIB_DEFLATE_COMPRESS_H
|
|
3
|
+
|
|
4
|
+
#include "lib_common.h"
|
|
5
|
+
|
|
6
|
+
/* DEFLATE compression is private to deflate_compress.c, but we do need to be
|
|
7
|
+
* able to query the compression level for zlib and gzip header generation. */
|
|
8
|
+
|
|
9
|
+
struct libdeflate_compressor;
|
|
10
|
+
|
|
11
|
+
extern unsigned int
|
|
12
|
+
deflate_get_compression_level(struct libdeflate_compressor *c);
|
|
13
|
+
|
|
14
|
+
#endif /* LIB_DEFLATE_COMPRESS_H */
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* deflate_constants.h - constants for the DEFLATE compression format
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#ifndef LIB_DEFLATE_CONSTANTS_H
|
|
6
|
+
#define LIB_DEFLATE_CONSTANTS_H
|
|
7
|
+
|
|
8
|
+
/* Valid block types */
|
|
9
|
+
#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
|
|
10
|
+
#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
|
|
11
|
+
#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
|
|
12
|
+
|
|
13
|
+
/* Minimum and maximum supported match lengths (in bytes) */
|
|
14
|
+
#define DEFLATE_MIN_MATCH_LEN 3
|
|
15
|
+
#define DEFLATE_MAX_MATCH_LEN 258
|
|
16
|
+
|
|
17
|
+
/* Minimum and maximum supported match offsets (in bytes) */
|
|
18
|
+
#define DEFLATE_MIN_MATCH_OFFSET 1
|
|
19
|
+
#define DEFLATE_MAX_MATCH_OFFSET 32768
|
|
20
|
+
|
|
21
|
+
#define DEFLATE_MAX_WINDOW_SIZE 32768
|
|
22
|
+
|
|
23
|
+
/* Number of symbols in each Huffman code. Note: for the literal/length
|
|
24
|
+
* and offset codes, these are actually the maximum values; a given block
|
|
25
|
+
* might use fewer symbols. */
|
|
26
|
+
#define DEFLATE_NUM_PRECODE_SYMS 19
|
|
27
|
+
#define DEFLATE_NUM_LITLEN_SYMS 288
|
|
28
|
+
#define DEFLATE_NUM_OFFSET_SYMS 32
|
|
29
|
+
|
|
30
|
+
/* The maximum number of symbols across all codes */
|
|
31
|
+
#define DEFLATE_MAX_NUM_SYMS 288
|
|
32
|
+
|
|
33
|
+
/* Division of symbols in the literal/length code */
|
|
34
|
+
#define DEFLATE_NUM_LITERALS 256
|
|
35
|
+
#define DEFLATE_END_OF_BLOCK 256
|
|
36
|
+
#define DEFLATE_NUM_LEN_SYMS 31
|
|
37
|
+
|
|
38
|
+
/* Maximum codeword length, in bits, within each Huffman code */
|
|
39
|
+
#define DEFLATE_MAX_PRE_CODEWORD_LEN 7
|
|
40
|
+
#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
|
|
41
|
+
#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
|
|
42
|
+
|
|
43
|
+
/* The maximum codeword length across all codes */
|
|
44
|
+
#define DEFLATE_MAX_CODEWORD_LEN 15
|
|
45
|
+
|
|
46
|
+
/* Maximum possible overrun when decoding codeword lengths */
|
|
47
|
+
#define DEFLATE_MAX_LENS_OVERRUN 137
|
|
48
|
+
|
|
49
|
+
/*
|
|
50
|
+
* Maximum number of extra bits that may be required to represent a match
|
|
51
|
+
* length or offset.
|
|
52
|
+
*
|
|
53
|
+
* TODO: are we going to have full DEFLATE64 support? If so, up to 16
|
|
54
|
+
* length bits must be supported.
|
|
55
|
+
*/
|
|
56
|
+
#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
|
|
57
|
+
#define DEFLATE_MAX_EXTRA_OFFSET_BITS 14
|
|
58
|
+
|
|
59
|
+
/* The maximum number of bits in which a match can be represented. This
|
|
60
|
+
* is the absolute worst case, which assumes the longest possible Huffman
|
|
61
|
+
* codewords and the maximum numbers of extra bits. */
|
|
62
|
+
#define DEFLATE_MAX_MATCH_BITS \
|
|
63
|
+
(DEFLATE_MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + \
|
|
64
|
+
DEFLATE_MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)
|
|
65
|
+
|
|
66
|
+
#endif /* LIB_DEFLATE_CONSTANTS_H */
|
|
@@ -0,0 +1,889 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* deflate_decompress.c - a decompressor for DEFLATE
|
|
3
|
+
*
|
|
4
|
+
* Originally public domain; changes after 2016-09-07 are copyrighted.
|
|
5
|
+
*
|
|
6
|
+
* Copyright 2016 Eric Biggers
|
|
7
|
+
*
|
|
8
|
+
* Permission is hereby granted, free of charge, to any person
|
|
9
|
+
* obtaining a copy of this software and associated documentation
|
|
10
|
+
* files (the "Software"), to deal in the Software without
|
|
11
|
+
* restriction, including without limitation the rights to use,
|
|
12
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
* copies of the Software, and to permit persons to whom the
|
|
14
|
+
* Software is furnished to do so, subject to the following
|
|
15
|
+
* conditions:
|
|
16
|
+
*
|
|
17
|
+
* The above copyright notice and this permission notice shall be
|
|
18
|
+
* included in all copies or substantial portions of the Software.
|
|
19
|
+
*
|
|
20
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
21
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
22
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
23
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
24
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
25
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
26
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
27
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
|
28
|
+
*
|
|
29
|
+
* ---------------------------------------------------------------------------
|
|
30
|
+
*
|
|
31
|
+
* This is a highly optimized DEFLATE decompressor. When compiled with gcc on
|
|
32
|
+
* x86_64, it decompresses data in about 52% of the time of zlib (48% if BMI2
|
|
33
|
+
* instructions are available). On other architectures it should still be
|
|
34
|
+
* significantly faster than zlib, but the difference may be smaller.
|
|
35
|
+
*
|
|
36
|
+
* Why this is faster than zlib's implementation:
|
|
37
|
+
*
|
|
38
|
+
* - Word accesses rather than byte accesses when reading input
|
|
39
|
+
* - Word accesses rather than byte accesses when copying matches
|
|
40
|
+
* - Faster Huffman decoding combined with various DEFLATE-specific tricks
|
|
41
|
+
* - Larger bitbuffer variable that doesn't need to be filled as often
|
|
42
|
+
* - Other optimizations to remove unnecessary branches
|
|
43
|
+
* - Only full-buffer decompression is supported, so the code doesn't need to
|
|
44
|
+
* support stopping and resuming decompression.
|
|
45
|
+
* - On x86_64, compile a version of the decompression routine using BMI2
|
|
46
|
+
* instructions and use it automatically at runtime when supported.
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
#include <stdlib.h>
|
|
50
|
+
#include <string.h>
|
|
51
|
+
|
|
52
|
+
#include "deflate_constants.h"
|
|
53
|
+
#include "unaligned.h"
|
|
54
|
+
#include "x86_cpu_features.h"
|
|
55
|
+
|
|
56
|
+
#include "libdeflate.h"
|
|
57
|
+
|
|
58
|
+
/*
|
|
59
|
+
* If the expression passed to SAFETY_CHECK() evaluates to false, then the
|
|
60
|
+
* decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the
|
|
61
|
+
* compressed data is invalid.
|
|
62
|
+
*
|
|
63
|
+
* Theoretically, these checks could be disabled for specialized applications
|
|
64
|
+
* where all input to the decompressor will be trusted.
|
|
65
|
+
*/
|
|
66
|
+
#if 0
|
|
67
|
+
# pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!")
|
|
68
|
+
# define SAFETY_CHECK(expr) (void)(expr)
|
|
69
|
+
#else
|
|
70
|
+
# define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA
|
|
71
|
+
#endif
|
|
72
|
+
|
|
73
|
+
/*
|
|
74
|
+
* Each TABLEBITS number is the base-2 logarithm of the number of entries in the
|
|
75
|
+
* main portion of the corresponding decode table. Each number should be large
|
|
76
|
+
* enough to ensure that for typical data, the vast majority of symbols can be
|
|
77
|
+
* decoded by a direct lookup of the next TABLEBITS bits of compressed data.
|
|
78
|
+
* However, this must be balanced against the fact that a larger table requires
|
|
79
|
+
* more memory and requires more time to fill.
|
|
80
|
+
*
|
|
81
|
+
* Note: you cannot change a TABLEBITS number without also changing the
|
|
82
|
+
* corresponding ENOUGH number!
|
|
83
|
+
*/
|
|
84
|
+
#define PRECODE_TABLEBITS 7
|
|
85
|
+
#define LITLEN_TABLEBITS 10
|
|
86
|
+
#define OFFSET_TABLEBITS 8
|
|
87
|
+
|
|
88
|
+
/*
|
|
89
|
+
* Each ENOUGH number is the maximum number of decode table entries that may be
|
|
90
|
+
* required for the corresponding Huffman code, including the main table and all
|
|
91
|
+
* subtables. Each number depends on three parameters:
|
|
92
|
+
*
|
|
93
|
+
* (1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMBOLS)
|
|
94
|
+
* (2) the number of main table bits (the TABLEBITS numbers defined above)
|
|
95
|
+
* (3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN)
|
|
96
|
+
*
|
|
97
|
+
* The ENOUGH numbers were computed using the utility program 'enough' from
|
|
98
|
+
* zlib. This program enumerates all possible relevant Huffman codes to find
|
|
99
|
+
* the worst-case usage of decode table entries.
|
|
100
|
+
*/
|
|
101
|
+
#define PRECODE_ENOUGH 128 /* enough 19 7 7 */
|
|
102
|
+
#define LITLEN_ENOUGH 1334 /* enough 288 10 15 */
|
|
103
|
+
#define OFFSET_ENOUGH 402 /* enough 32 8 15 */
|
|
104
|
+
|
|
105
|
+
/*
|
|
106
|
+
* Type for codeword lengths.
|
|
107
|
+
*/
|
|
108
|
+
typedef u8 len_t;
|
|
109
|
+
|
|
110
|
+
/*
|
|
111
|
+
* The main DEFLATE decompressor structure. Since this implementation only
|
|
112
|
+
* supports full buffer decompression, this structure does not store the entire
|
|
113
|
+
* decompression state, but rather only some arrays that are too large to
|
|
114
|
+
* comfortably allocate on the stack.
|
|
115
|
+
*/
|
|
116
|
+
struct libdeflate_decompressor {
|
|
117
|
+
|
|
118
|
+
/*
|
|
119
|
+
* The arrays aren't all needed at the same time. 'precode_lens' and
|
|
120
|
+
* 'precode_decode_table' are unneeded after 'lens' has been filled.
|
|
121
|
+
* Furthermore, 'lens' need not be retained after building the litlen
|
|
122
|
+
* and offset decode tables. In fact, 'lens' can be in union with
|
|
123
|
+
* 'litlen_decode_table' provided that 'offset_decode_table' is separate
|
|
124
|
+
* and is built first.
|
|
125
|
+
*/
|
|
126
|
+
|
|
127
|
+
union {
|
|
128
|
+
len_t precode_lens[DEFLATE_NUM_PRECODE_SYMS];
|
|
129
|
+
|
|
130
|
+
struct {
|
|
131
|
+
len_t lens[DEFLATE_NUM_LITLEN_SYMS +
|
|
132
|
+
DEFLATE_NUM_OFFSET_SYMS +
|
|
133
|
+
DEFLATE_MAX_LENS_OVERRUN];
|
|
134
|
+
|
|
135
|
+
u32 precode_decode_table[PRECODE_ENOUGH];
|
|
136
|
+
} l;
|
|
137
|
+
|
|
138
|
+
u32 litlen_decode_table[LITLEN_ENOUGH];
|
|
139
|
+
} u;
|
|
140
|
+
|
|
141
|
+
u32 offset_decode_table[OFFSET_ENOUGH];
|
|
142
|
+
|
|
143
|
+
u16 working_space[2 * (DEFLATE_MAX_CODEWORD_LEN + 1) +
|
|
144
|
+
DEFLATE_MAX_NUM_SYMS];
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
/*****************************************************************************
|
|
148
|
+
* Input bitstream *
|
|
149
|
+
*****************************************************************************/
|
|
150
|
+
|
|
151
|
+
/*
|
|
152
|
+
* The state of the "input bitstream" consists of the following variables:
|
|
153
|
+
*
|
|
154
|
+
* - in_next: pointer to the next unread byte in the input buffer
|
|
155
|
+
*
|
|
156
|
+
* - in_end: pointer just past the end of the input buffer
|
|
157
|
+
*
|
|
158
|
+
* - bitbuf: a word-sized variable containing bits that have been read from
|
|
159
|
+
* the input buffer. The buffered bits are right-aligned
|
|
160
|
+
* (they're the low-order bits).
|
|
161
|
+
*
|
|
162
|
+
* - bitsleft: number of bits in 'bitbuf' that are valid.
|
|
163
|
+
*
|
|
164
|
+
* To make it easier for the compiler to optimize the code by keeping variables
|
|
165
|
+
* in registers, these are declared as normal variables and manipulated using
|
|
166
|
+
* macros.
|
|
167
|
+
*/
|
|
168
|
+
|
|
169
|
+
/*
|
|
170
|
+
* The type for the bitbuffer variable ('bitbuf' described above). For best
|
|
171
|
+
* performance, this should have size equal to a machine word.
|
|
172
|
+
*
|
|
173
|
+
* 64-bit platforms have a significant advantage: they get a bigger bitbuffer
|
|
174
|
+
* which they have to fill less often.
|
|
175
|
+
*/
|
|
176
|
+
typedef machine_word_t bitbuf_t;
|
|
177
|
+
|
|
178
|
+
/*
|
|
179
|
+
* Number of bits the bitbuffer variable can hold.
|
|
180
|
+
*/
|
|
181
|
+
#define BITBUF_NBITS (8 * sizeof(bitbuf_t))
|
|
182
|
+
|
|
183
|
+
/*
|
|
184
|
+
* The maximum number of bits that can be requested to be in the bitbuffer
|
|
185
|
+
* variable. This is the maximum value of 'n' that can be passed
|
|
186
|
+
* ENSURE_BITS(n).
|
|
187
|
+
*
|
|
188
|
+
* This not equal to BITBUF_NBITS because we never read less than one byte at a
|
|
189
|
+
* time. If the bitbuffer variable contains more than (BITBUF_NBITS - 8) bits,
|
|
190
|
+
* then we can't read another byte without first consuming some bits. So the
|
|
191
|
+
* maximum count we can ensure is (BITBUF_NBITS - 7).
|
|
192
|
+
*/
|
|
193
|
+
#define MAX_ENSURE (BITBUF_NBITS - 7)
|
|
194
|
+
|
|
195
|
+
/*
|
|
196
|
+
* Evaluates to true if 'n' is a valid argument to ENSURE_BITS(n), or false if
|
|
197
|
+
* 'n' is too large to be passed to ENSURE_BITS(n). Note: if 'n' is a compile
|
|
198
|
+
* time constant, then this expression will be a compile-type constant.
|
|
199
|
+
* Therefore, CAN_ENSURE() can be used choose between alternative
|
|
200
|
+
* implementations at compile time.
|
|
201
|
+
*/
|
|
202
|
+
#define CAN_ENSURE(n) ((n) <= MAX_ENSURE)
|
|
203
|
+
|
|
204
|
+
/*
|
|
205
|
+
* Fill the bitbuffer variable, reading one byte at a time.
|
|
206
|
+
*
|
|
207
|
+
* Note: if we would overrun the input buffer, we just don't read anything,
|
|
208
|
+
* leaving the bits as 0 but marking them as filled. This makes the
|
|
209
|
+
* implementation simpler because this removes the need to distinguish between
|
|
210
|
+
* "real" overruns and overruns that occur because of our own lookahead during
|
|
211
|
+
* Huffman decoding. The disadvantage is that a "real" overrun can go
|
|
212
|
+
* undetected, and libdeflate_deflate_decompress() may return a success status
|
|
213
|
+
* rather than the expected failure status if one occurs. However, this is
|
|
214
|
+
* irrelevant because even if this specific case were to be handled "correctly",
|
|
215
|
+
* one could easily come up with a different case where the compressed data
|
|
216
|
+
* would be corrupted in such a way that fully retains its validity. Users
|
|
217
|
+
* should run a checksum against the uncompressed data if they wish to detect
|
|
218
|
+
* corruptions.
|
|
219
|
+
*/
|
|
220
|
+
#define FILL_BITS_BYTEWISE() \
|
|
221
|
+
do { \
|
|
222
|
+
if (likely(in_next != in_end)) \
|
|
223
|
+
bitbuf |= (bitbuf_t)*in_next++ << bitsleft; \
|
|
224
|
+
else \
|
|
225
|
+
overrun_count++; \
|
|
226
|
+
bitsleft += 8; \
|
|
227
|
+
} while (bitsleft <= BITBUF_NBITS - 8)
|
|
228
|
+
|
|
229
|
+
/*
|
|
230
|
+
* Fill the bitbuffer variable by reading the next word from the input buffer.
|
|
231
|
+
* This can be significantly faster than FILL_BITS_BYTEWISE(). However, for
|
|
232
|
+
* this to work correctly, the word must be interpreted in little-endian format.
|
|
233
|
+
* In addition, the memory access may be unaligned. Therefore, this method is
|
|
234
|
+
* most efficient on little-endian architectures that support fast unaligned
|
|
235
|
+
* access, such as x86 and x86_64.
|
|
236
|
+
*/
|
|
237
|
+
#define FILL_BITS_WORDWISE() \
|
|
238
|
+
do { \
|
|
239
|
+
bitbuf |= get_unaligned_leword(in_next) << bitsleft; \
|
|
240
|
+
in_next += (BITBUF_NBITS - bitsleft) >> 3; \
|
|
241
|
+
bitsleft += (BITBUF_NBITS - bitsleft) & ~7; \
|
|
242
|
+
} while (0)
|
|
243
|
+
|
|
244
|
+
/*
|
|
245
|
+
* Does the bitbuffer variable currently contain at least 'n' bits?
|
|
246
|
+
*/
|
|
247
|
+
#define HAVE_BITS(n) (bitsleft >= (n))
|
|
248
|
+
|
|
249
|
+
/*
|
|
250
|
+
* Load more bits from the input buffer until the specified number of bits is
|
|
251
|
+
* present in the bitbuffer variable. 'n' cannot be too large; see MAX_ENSURE
|
|
252
|
+
* and CAN_ENSURE().
|
|
253
|
+
*/
|
|
254
|
+
#define ENSURE_BITS(n) \
|
|
255
|
+
if (!HAVE_BITS(n)) { \
|
|
256
|
+
if (CPU_IS_LITTLE_ENDIAN() && \
|
|
257
|
+
UNALIGNED_ACCESS_IS_FAST && \
|
|
258
|
+
likely(in_end - in_next >= sizeof(bitbuf_t))) \
|
|
259
|
+
FILL_BITS_WORDWISE(); \
|
|
260
|
+
else \
|
|
261
|
+
FILL_BITS_BYTEWISE(); \
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/*
|
|
265
|
+
* Return the next 'n' bits from the bitbuffer variable without removing them.
|
|
266
|
+
*/
|
|
267
|
+
#define BITS(n) ((u32)bitbuf & (((u32)1 << (n)) - 1))
|
|
268
|
+
|
|
269
|
+
/*
|
|
270
|
+
* Remove the next 'n' bits from the bitbuffer variable.
|
|
271
|
+
*/
|
|
272
|
+
#define REMOVE_BITS(n) (bitbuf >>= (n), bitsleft -= (n))
|
|
273
|
+
|
|
274
|
+
/*
|
|
275
|
+
* Remove and return the next 'n' bits from the bitbuffer variable.
|
|
276
|
+
*/
|
|
277
|
+
#define POP_BITS(n) (tmp32 = BITS(n), REMOVE_BITS(n), tmp32)
|
|
278
|
+
|
|
279
|
+
/*
|
|
280
|
+
* Align the input to the next byte boundary, discarding any remaining bits in
|
|
281
|
+
* the current byte.
|
|
282
|
+
*
|
|
283
|
+
* Note that if the bitbuffer variable currently contains more than 8 bits, then
|
|
284
|
+
* we must rewind 'in_next', effectively putting those bits back. Only the bits
|
|
285
|
+
* in what would be the "current" byte if we were reading one byte at a time can
|
|
286
|
+
* be actually discarded.
|
|
287
|
+
*/
|
|
288
|
+
#define ALIGN_INPUT() \
|
|
289
|
+
do { \
|
|
290
|
+
in_next -= (bitsleft >> 3) - MIN(overrun_count, bitsleft >> 3); \
|
|
291
|
+
bitbuf = 0; \
|
|
292
|
+
bitsleft = 0; \
|
|
293
|
+
} while(0)
|
|
294
|
+
|
|
295
|
+
/*
|
|
296
|
+
* Read a 16-bit value from the input. This must have been preceded by a call
|
|
297
|
+
* to ALIGN_INPUT(), and the caller must have already checked for overrun.
|
|
298
|
+
*/
|
|
299
|
+
#define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16)
|
|
300
|
+
|
|
301
|
+
/*****************************************************************************
|
|
302
|
+
* Huffman decoding *
|
|
303
|
+
*****************************************************************************/
|
|
304
|
+
|
|
305
|
+
/*
|
|
306
|
+
* A decode table for order TABLEBITS consists of a main table of (1 <<
|
|
307
|
+
* TABLEBITS) entries followed by a variable number of subtables.
|
|
308
|
+
*
|
|
309
|
+
* The decoding algorithm takes the next TABLEBITS bits of compressed data and
|
|
310
|
+
* uses them as an index into the decode table. The resulting entry is either a
|
|
311
|
+
* "direct entry", meaning that it contains the value desired, or a "subtable
|
|
312
|
+
* pointer", meaning that the entry references a subtable that must be indexed
|
|
313
|
+
* using more bits of the compressed data to decode the symbol.
|
|
314
|
+
*
|
|
315
|
+
* Each decode table (a main table along with with its subtables, if any) is
|
|
316
|
+
* associated with a Huffman code. Logically, the result of a decode table
|
|
317
|
+
* lookup is a symbol from the alphabet from which the corresponding Huffman
|
|
318
|
+
* code was constructed. A symbol with codeword length n <= TABLEBITS is
|
|
319
|
+
* associated with 2**(TABLEBITS - n) direct entries in the table, whereas a
|
|
320
|
+
* symbol with codeword length n > TABLEBITS is associated with one or more
|
|
321
|
+
* subtable entries.
|
|
322
|
+
*
|
|
323
|
+
* On top of this basic design, we implement several optimizations:
|
|
324
|
+
*
|
|
325
|
+
* - We store the length of each codeword directly in each of its decode table
|
|
326
|
+
* entries. This allows the codeword length to be produced without indexing
|
|
327
|
+
* an additional table.
|
|
328
|
+
*
|
|
329
|
+
* - When beneficial, we don't store the Huffman symbol itself, but instead data
|
|
330
|
+
* generated from it. For example, when decoding an offset symbol in DEFLATE,
|
|
331
|
+
* it's more efficient if we can decode the offset base and number of extra
|
|
332
|
+
* offset bits directly rather than decoding the offset symbol and then
|
|
333
|
+
* looking up both of those values in an additional table or tables.
|
|
334
|
+
*
|
|
335
|
+
* The size of each decode table entry is 32 bits, which provides slightly
|
|
336
|
+
* better performance than 16-bit entries on 32 and 64 bit processers, provided
|
|
337
|
+
* that the table doesn't get so large that it takes up too much memory and
|
|
338
|
+
* starts generating cache misses. The bits of each decode table entry are
|
|
339
|
+
* defined as follows:
|
|
340
|
+
*
|
|
341
|
+
* - Bits 30 -- 31: flags (see below)
|
|
342
|
+
* - Bits 8 -- 29: decode result: a Huffman symbol or related data
|
|
343
|
+
* - Bits 0 -- 7: codeword length
|
|
344
|
+
*/
|
|
345
|
+
|
|
346
|
+
/*
|
|
347
|
+
* This flag is set in all main decode table entries that represent subtable
|
|
348
|
+
* pointers.
|
|
349
|
+
*/
|
|
350
|
+
#define HUFFDEC_SUBTABLE_POINTER 0x80000000
|
|
351
|
+
|
|
352
|
+
/*
|
|
353
|
+
* This flag is set in all entries in the litlen decode table that represent
|
|
354
|
+
* literals.
|
|
355
|
+
*/
|
|
356
|
+
#define HUFFDEC_LITERAL 0x40000000
|
|
357
|
+
|
|
358
|
+
/* Mask for extracting the codeword length from a decode table entry. */
|
|
359
|
+
#define HUFFDEC_LENGTH_MASK 0xFF
|
|
360
|
+
|
|
361
|
+
/* Shift to extract the decode result from a decode table entry. */
|
|
362
|
+
#define HUFFDEC_RESULT_SHIFT 8
|
|
363
|
+
|
|
364
|
+
/* The decode result for each precode symbol. There is no special optimization
|
|
365
|
+
* for the precode; the decode result is simply the symbol value. */
|
|
366
|
+
static const u32 precode_decode_results[DEFLATE_NUM_PRECODE_SYMS] = {
|
|
367
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
|
|
368
|
+
};
|
|
369
|
+
|
|
370
|
+
/* The decode result for each litlen symbol. For literals, this is the literal
|
|
371
|
+
* value itself and the HUFFDEC_LITERAL flag. For lengths, this is the length
|
|
372
|
+
* base and the number of extra length bits. */
|
|
373
|
+
static const u32 litlen_decode_results[DEFLATE_NUM_LITLEN_SYMS] = {
|
|
374
|
+
#define ENTRY(literal) ((HUFFDEC_LITERAL >> HUFFDEC_RESULT_SHIFT) | (literal))
|
|
375
|
+
|
|
376
|
+
/* Literals */
|
|
377
|
+
ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
|
|
378
|
+
ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
|
|
379
|
+
ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
|
|
380
|
+
ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
|
|
381
|
+
ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) ,
|
|
382
|
+
ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) ,
|
|
383
|
+
ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) ,
|
|
384
|
+
ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) ,
|
|
385
|
+
ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) ,
|
|
386
|
+
ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) ,
|
|
387
|
+
ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) ,
|
|
388
|
+
ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) ,
|
|
389
|
+
ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) ,
|
|
390
|
+
ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) ,
|
|
391
|
+
ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) ,
|
|
392
|
+
ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) ,
|
|
393
|
+
ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) ,
|
|
394
|
+
ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) ,
|
|
395
|
+
ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) ,
|
|
396
|
+
ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) ,
|
|
397
|
+
ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) ,
|
|
398
|
+
ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) ,
|
|
399
|
+
ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) ,
|
|
400
|
+
ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) ,
|
|
401
|
+
ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) ,
|
|
402
|
+
ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) ,
|
|
403
|
+
ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) ,
|
|
404
|
+
ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) ,
|
|
405
|
+
ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) ,
|
|
406
|
+
ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) ,
|
|
407
|
+
ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) ,
|
|
408
|
+
ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) ,
|
|
409
|
+
ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) ,
|
|
410
|
+
ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) ,
|
|
411
|
+
ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) ,
|
|
412
|
+
ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) ,
|
|
413
|
+
ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) ,
|
|
414
|
+
ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) ,
|
|
415
|
+
ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) ,
|
|
416
|
+
ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) ,
|
|
417
|
+
ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) ,
|
|
418
|
+
ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) ,
|
|
419
|
+
ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) ,
|
|
420
|
+
ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) ,
|
|
421
|
+
ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) ,
|
|
422
|
+
ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) ,
|
|
423
|
+
ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) ,
|
|
424
|
+
ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) ,
|
|
425
|
+
ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) ,
|
|
426
|
+
ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) ,
|
|
427
|
+
ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) ,
|
|
428
|
+
ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) ,
|
|
429
|
+
ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) ,
|
|
430
|
+
ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) ,
|
|
431
|
+
ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) ,
|
|
432
|
+
ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) ,
|
|
433
|
+
ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) ,
|
|
434
|
+
ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) ,
|
|
435
|
+
ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) ,
|
|
436
|
+
ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) ,
|
|
437
|
+
ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) ,
|
|
438
|
+
ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) ,
|
|
439
|
+
ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) ,
|
|
440
|
+
ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) ,
|
|
441
|
+
#undef ENTRY
|
|
442
|
+
|
|
443
|
+
#define HUFFDEC_EXTRA_LENGTH_BITS_MASK 0xFF
|
|
444
|
+
#define HUFFDEC_LENGTH_BASE_SHIFT 8
|
|
445
|
+
#define HUFFDEC_END_OF_BLOCK_LENGTH 0
|
|
446
|
+
|
|
447
|
+
#define ENTRY(length_base, num_extra_bits) \
|
|
448
|
+
(((u32)(length_base) << HUFFDEC_LENGTH_BASE_SHIFT) | (num_extra_bits))
|
|
449
|
+
|
|
450
|
+
/* End of block */
|
|
451
|
+
ENTRY(HUFFDEC_END_OF_BLOCK_LENGTH, 0),
|
|
452
|
+
|
|
453
|
+
/* Lengths */
|
|
454
|
+
ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0),
|
|
455
|
+
ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0),
|
|
456
|
+
ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1),
|
|
457
|
+
ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2),
|
|
458
|
+
ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3),
|
|
459
|
+
ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4),
|
|
460
|
+
ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5),
|
|
461
|
+
ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) ,
|
|
462
|
+
#undef ENTRY
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
/* The decode result for each offset symbol. This is the offset base and the
|
|
466
|
+
* number of extra offset bits. */
|
|
467
|
+
static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = {
|
|
468
|
+
|
|
469
|
+
#define HUFFDEC_EXTRA_OFFSET_BITS_SHIFT 16
|
|
470
|
+
#define HUFFDEC_OFFSET_BASE_MASK (((u32)1 << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) - 1)
|
|
471
|
+
|
|
472
|
+
#define ENTRY(offset_base, num_extra_bits) \
|
|
473
|
+
((offset_base) | ((u32)(num_extra_bits) << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT))
|
|
474
|
+
ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) ,
|
|
475
|
+
ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) ,
|
|
476
|
+
ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) ,
|
|
477
|
+
ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) ,
|
|
478
|
+
ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) ,
|
|
479
|
+
ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) ,
|
|
480
|
+
ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) ,
|
|
481
|
+
ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(32769 , 14) , ENTRY(49153 , 14) ,
|
|
482
|
+
#undef ENTRY
|
|
483
|
+
};
|
|
484
|
+
|
|
485
|
+
/* Construct a decode table entry from a decode result and codeword length. */
|
|
486
|
+
static forceinline u32
|
|
487
|
+
make_decode_table_entry(u32 result, u32 length)
|
|
488
|
+
{
|
|
489
|
+
return (result << HUFFDEC_RESULT_SHIFT) | length;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
/*
|
|
493
|
+
* Build a table for fast decoding of symbols from a Huffman code. As input,
|
|
494
|
+
* this function takes the codeword length of each symbol which may be used in
|
|
495
|
+
* the code. As output, it produces a decode table for the canonical Huffman
|
|
496
|
+
* code described by the codeword lengths. The decode table is built with the
|
|
497
|
+
* assumption that it will be indexed with "bit-reversed" codewords, where the
|
|
498
|
+
* low-order bit is the first bit of the codeword. This format is used for all
|
|
499
|
+
* Huffman codes in DEFLATE.
|
|
500
|
+
*
|
|
501
|
+
* @decode_table
|
|
502
|
+
* The array in which the decode table will be generated. This array must
|
|
503
|
+
* have sufficient length; see the definition of the ENOUGH numbers.
|
|
504
|
+
* @lens
|
|
505
|
+
* An array which provides, for each symbol, the length of the
|
|
506
|
+
* corresponding codeword in bits, or 0 if the symbol is unused. This may
|
|
507
|
+
* alias @decode_table, since nothing is written to @decode_table until all
|
|
508
|
+
* @lens have been consumed. All codeword lengths are assumed to be <=
|
|
509
|
+
* @max_codeword_len but are otherwise considered untrusted. If they do
|
|
510
|
+
* not form a valid Huffman code, then the decode table is not built and
|
|
511
|
+
* %false is returned.
|
|
512
|
+
* @num_syms
|
|
513
|
+
* The number of symbols in the code, including all unused symbols.
|
|
514
|
+
* @decode_results
|
|
515
|
+
* An array which provides, for each symbol, the actual value to store into
|
|
516
|
+
* the decode table. This value will be directly produced as the result of
|
|
517
|
+
* decoding that symbol, thereby moving the indirection out of the decode
|
|
518
|
+
* loop and into the table initialization.
|
|
519
|
+
* @table_bits
|
|
520
|
+
* The log base-2 of the number of main table entries to use.
|
|
521
|
+
* @max_codeword_len
|
|
522
|
+
* The maximum allowed codeword length for this Huffman code.
|
|
523
|
+
* @working_space
|
|
524
|
+
* A temporary array of length '2 * (@max_codeword_len + 1) + @num_syms'.
|
|
525
|
+
*
|
|
526
|
+
* Returns %true if successful; %false if the codeword lengths do not form a
|
|
527
|
+
* valid Huffman code.
|
|
528
|
+
*/
|
|
529
|
+
static bool
|
|
530
|
+
build_decode_table(u32 decode_table[],
|
|
531
|
+
const len_t lens[],
|
|
532
|
+
const unsigned num_syms,
|
|
533
|
+
const u32 decode_results[],
|
|
534
|
+
const unsigned table_bits,
|
|
535
|
+
const unsigned max_codeword_len,
|
|
536
|
+
u16 working_space[])
|
|
537
|
+
{
|
|
538
|
+
u16 * const len_counts = &working_space[0];
|
|
539
|
+
u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
|
|
540
|
+
u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
|
|
541
|
+
unsigned len;
|
|
542
|
+
unsigned sym;
|
|
543
|
+
s32 remainder;
|
|
544
|
+
unsigned sym_idx;
|
|
545
|
+
unsigned codeword_len;
|
|
546
|
+
unsigned codeword_reversed = 0;
|
|
547
|
+
unsigned cur_codeword_prefix = -1;
|
|
548
|
+
unsigned cur_table_start = 0;
|
|
549
|
+
unsigned cur_table_bits = table_bits;
|
|
550
|
+
unsigned num_dropped_bits = 0;
|
|
551
|
+
const unsigned table_mask = (1U << table_bits) - 1;
|
|
552
|
+
|
|
553
|
+
/* Count how many symbols have each codeword length, including 0. */
|
|
554
|
+
for (len = 0; len <= max_codeword_len; len++)
|
|
555
|
+
len_counts[len] = 0;
|
|
556
|
+
for (sym = 0; sym < num_syms; sym++)
|
|
557
|
+
len_counts[lens[sym]]++;
|
|
558
|
+
|
|
559
|
+
/* Sort the symbols primarily by increasing codeword length and
|
|
560
|
+
* secondarily by increasing symbol value. */
|
|
561
|
+
|
|
562
|
+
/* Initialize 'offsets' so that offsets[len] is the number of codewords
|
|
563
|
+
* shorter than 'len' bits, including length 0. */
|
|
564
|
+
offsets[0] = 0;
|
|
565
|
+
for (len = 0; len < max_codeword_len; len++)
|
|
566
|
+
offsets[len + 1] = offsets[len] + len_counts[len];
|
|
567
|
+
|
|
568
|
+
/* Use the 'offsets' array to sort the symbols. */
|
|
569
|
+
for (sym = 0; sym < num_syms; sym++)
|
|
570
|
+
sorted_syms[offsets[lens[sym]]++] = sym;
|
|
571
|
+
|
|
572
|
+
/* It is already guaranteed that all lengths are <= max_codeword_len,
|
|
573
|
+
* but it cannot be assumed they form a complete prefix code. A
|
|
574
|
+
* codeword of length n should require a proportion of the codespace
|
|
575
|
+
* equaling (1/2)^n. The code is complete if and only if, by this
|
|
576
|
+
* measure, the codespace is exactly filled by the lengths. */
|
|
577
|
+
remainder = 1;
|
|
578
|
+
for (len = 1; len <= max_codeword_len; len++) {
|
|
579
|
+
remainder <<= 1;
|
|
580
|
+
remainder -= len_counts[len];
|
|
581
|
+
if (unlikely(remainder < 0)) {
|
|
582
|
+
/* The lengths overflow the codespace; that is, the code
|
|
583
|
+
* is over-subscribed. */
|
|
584
|
+
return false;
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
if (unlikely(remainder != 0)) {
|
|
589
|
+
/* The lengths do not fill the codespace; that is, they form an
|
|
590
|
+
* incomplete code. */
|
|
591
|
+
|
|
592
|
+
/* Initialize the table entries to default values. When
|
|
593
|
+
* decompressing a well-formed stream, these default values will
|
|
594
|
+
* never be used. But since a malformed stream might contain
|
|
595
|
+
* any bits at all, these entries need to be set anyway. */
|
|
596
|
+
u32 entry = make_decode_table_entry(decode_results[0], 1);
|
|
597
|
+
for (sym = 0; sym < (1U << table_bits); sym++)
|
|
598
|
+
decode_table[sym] = entry;
|
|
599
|
+
|
|
600
|
+
/* A completely empty code is permitted. */
|
|
601
|
+
if (remainder == (1U << max_codeword_len))
|
|
602
|
+
return true;
|
|
603
|
+
|
|
604
|
+
/* The code is nonempty and incomplete. Proceed only if there
|
|
605
|
+
* is a single used symbol and its codeword has length 1. The
|
|
606
|
+
* DEFLATE RFC is somewhat unclear regarding this case. What
|
|
607
|
+
* zlib's decompressor does is permit this case for
|
|
608
|
+
* literal/length and offset codes and assume the codeword is 0
|
|
609
|
+
* rather than 1. We do the same except we allow this case for
|
|
610
|
+
* precodes too. */
|
|
611
|
+
if (remainder != (1U << (max_codeword_len - 1)) ||
|
|
612
|
+
len_counts[1] != 1)
|
|
613
|
+
return false;
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
/* Generate the decode table entries. Since we process codewords from
|
|
617
|
+
* shortest to longest, the main portion of the decode table is filled
|
|
618
|
+
* first; then the subtables are filled. Note that it's already been
|
|
619
|
+
* verified that the code is nonempty and not over-subscribed. */
|
|
620
|
+
|
|
621
|
+
/* Start with the smallest codeword length and the smallest-valued
|
|
622
|
+
* symbol which has that codeword length. */
|
|
623
|
+
sym_idx = offsets[0];
|
|
624
|
+
codeword_len = 1;
|
|
625
|
+
while (len_counts[codeword_len] == 0)
|
|
626
|
+
codeword_len++;
|
|
627
|
+
|
|
628
|
+
for (;;) { /* For each used symbol and its codeword... */
|
|
629
|
+
unsigned sym;
|
|
630
|
+
u32 entry;
|
|
631
|
+
unsigned i;
|
|
632
|
+
unsigned end;
|
|
633
|
+
unsigned increment;
|
|
634
|
+
unsigned bit;
|
|
635
|
+
|
|
636
|
+
/* Get the next symbol. */
|
|
637
|
+
sym = sorted_syms[sym_idx];
|
|
638
|
+
|
|
639
|
+
/* Start a new subtable if the codeword is long enough to
|
|
640
|
+
* require a subtable, *and* the first 'table_bits' bits of the
|
|
641
|
+
* codeword don't match the prefix for the previous subtable if
|
|
642
|
+
* any. */
|
|
643
|
+
if (codeword_len > table_bits &&
|
|
644
|
+
(codeword_reversed & table_mask) != cur_codeword_prefix) {
|
|
645
|
+
|
|
646
|
+
cur_codeword_prefix = (codeword_reversed & table_mask);
|
|
647
|
+
|
|
648
|
+
cur_table_start += 1U << cur_table_bits;
|
|
649
|
+
|
|
650
|
+
/* Calculate the subtable length. If the codeword
|
|
651
|
+
* length exceeds 'table_bits' by n, the subtable needs
|
|
652
|
+
* at least 2**n entries. But it may need more; if
|
|
653
|
+
* there are fewer than 2**n codewords of length
|
|
654
|
+
* 'table_bits + n' remaining, then n will need to be
|
|
655
|
+
* incremented to bring in longer codewords until the
|
|
656
|
+
* subtable can be filled completely. Note that it
|
|
657
|
+
* always will, eventually, be possible to fill the
|
|
658
|
+
* subtable, since the only case where we may have an
|
|
659
|
+
* incomplete code is a single codeword of length 1,
|
|
660
|
+
* and that never requires any subtables. */
|
|
661
|
+
cur_table_bits = codeword_len - table_bits;
|
|
662
|
+
remainder = (s32)1 << cur_table_bits;
|
|
663
|
+
for (;;) {
|
|
664
|
+
remainder -= len_counts[table_bits +
|
|
665
|
+
cur_table_bits];
|
|
666
|
+
if (remainder <= 0)
|
|
667
|
+
break;
|
|
668
|
+
cur_table_bits++;
|
|
669
|
+
remainder <<= 1;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
/* Create the entry that points from the main table to
|
|
673
|
+
* the subtable. This entry contains the index of the
|
|
674
|
+
* start of the subtable and the number of bits with
|
|
675
|
+
* which the subtable is indexed (the log base 2 of the
|
|
676
|
+
* number of entries it contains). */
|
|
677
|
+
decode_table[cur_codeword_prefix] =
|
|
678
|
+
HUFFDEC_SUBTABLE_POINTER |
|
|
679
|
+
make_decode_table_entry(cur_table_start,
|
|
680
|
+
cur_table_bits);
|
|
681
|
+
|
|
682
|
+
/* Now that we're filling a subtable, we need to drop
|
|
683
|
+
* the first 'table_bits' bits of the codewords. */
|
|
684
|
+
num_dropped_bits = table_bits;
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
/* Create the decode table entry, which packs the decode result
|
|
688
|
+
* and the codeword length (minus 'table_bits' for subtables)
|
|
689
|
+
* together. */
|
|
690
|
+
entry = make_decode_table_entry(decode_results[sym],
|
|
691
|
+
codeword_len - num_dropped_bits);
|
|
692
|
+
|
|
693
|
+
/* Fill in as many copies of the decode table entry as are
|
|
694
|
+
* needed. The number of entries to fill is a power of 2 and
|
|
695
|
+
* depends on the codeword length; it could be as few as 1 or as
|
|
696
|
+
* large as half the size of the table. Since the codewords are
|
|
697
|
+
* bit-reversed, the indices to fill are those with the codeword
|
|
698
|
+
* in its low bits; it's the high bits that vary. */
|
|
699
|
+
i = cur_table_start + (codeword_reversed >> num_dropped_bits);
|
|
700
|
+
end = cur_table_start + (1U << cur_table_bits);
|
|
701
|
+
increment = 1U << (codeword_len - num_dropped_bits);
|
|
702
|
+
do {
|
|
703
|
+
decode_table[i] = entry;
|
|
704
|
+
i += increment;
|
|
705
|
+
} while (i < end);
|
|
706
|
+
|
|
707
|
+
/* Advance to the next codeword by incrementing it. But since
|
|
708
|
+
* our codewords are bit-reversed, we must manipulate the bits
|
|
709
|
+
* ourselves rather than simply adding 1. */
|
|
710
|
+
bit = 1U << (codeword_len - 1);
|
|
711
|
+
while (codeword_reversed & bit)
|
|
712
|
+
bit >>= 1;
|
|
713
|
+
codeword_reversed &= bit - 1;
|
|
714
|
+
codeword_reversed |= bit;
|
|
715
|
+
|
|
716
|
+
/* Advance to the next symbol. This will either increase the
|
|
717
|
+
* codeword length, or keep the same codeword length but
|
|
718
|
+
* increase the symbol value. Note: since we are using
|
|
719
|
+
* bit-reversed codewords, we don't need to explicitly append
|
|
720
|
+
* zeroes to the codeword when the codeword length increases. */
|
|
721
|
+
if (++sym_idx == num_syms)
|
|
722
|
+
return true;
|
|
723
|
+
len_counts[codeword_len]--;
|
|
724
|
+
while (len_counts[codeword_len] == 0)
|
|
725
|
+
codeword_len++;
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
/* Build the decode table for the precode. */
|
|
730
|
+
static bool
|
|
731
|
+
build_precode_decode_table(struct libdeflate_decompressor *d)
|
|
732
|
+
{
|
|
733
|
+
/* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
|
|
734
|
+
STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128);
|
|
735
|
+
|
|
736
|
+
return build_decode_table(d->u.l.precode_decode_table,
|
|
737
|
+
d->u.precode_lens,
|
|
738
|
+
DEFLATE_NUM_PRECODE_SYMS,
|
|
739
|
+
precode_decode_results,
|
|
740
|
+
PRECODE_TABLEBITS,
|
|
741
|
+
DEFLATE_MAX_PRE_CODEWORD_LEN,
|
|
742
|
+
d->working_space);
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
/* Build the decode table for the literal/length code. */
|
|
746
|
+
static bool
|
|
747
|
+
build_litlen_decode_table(struct libdeflate_decompressor *d,
|
|
748
|
+
unsigned num_litlen_syms, unsigned num_offset_syms)
|
|
749
|
+
{
|
|
750
|
+
/* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
|
|
751
|
+
STATIC_ASSERT(LITLEN_TABLEBITS == 10 && LITLEN_ENOUGH == 1334);
|
|
752
|
+
|
|
753
|
+
return build_decode_table(d->u.litlen_decode_table,
|
|
754
|
+
d->u.l.lens,
|
|
755
|
+
num_litlen_syms,
|
|
756
|
+
litlen_decode_results,
|
|
757
|
+
LITLEN_TABLEBITS,
|
|
758
|
+
DEFLATE_MAX_LITLEN_CODEWORD_LEN,
|
|
759
|
+
d->working_space);
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
/* Build the decode table for the offset code. */
|
|
763
|
+
static bool
|
|
764
|
+
build_offset_decode_table(struct libdeflate_decompressor *d,
|
|
765
|
+
unsigned num_litlen_syms, unsigned num_offset_syms)
|
|
766
|
+
{
|
|
767
|
+
/* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
|
|
768
|
+
STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402);
|
|
769
|
+
|
|
770
|
+
return build_decode_table(d->offset_decode_table,
|
|
771
|
+
d->u.l.lens + num_litlen_syms,
|
|
772
|
+
num_offset_syms,
|
|
773
|
+
offset_decode_results,
|
|
774
|
+
OFFSET_TABLEBITS,
|
|
775
|
+
DEFLATE_MAX_OFFSET_CODEWORD_LEN,
|
|
776
|
+
d->working_space);
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
static forceinline machine_word_t
|
|
780
|
+
repeat_byte(u8 b)
|
|
781
|
+
{
|
|
782
|
+
machine_word_t v;
|
|
783
|
+
|
|
784
|
+
STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
|
|
785
|
+
|
|
786
|
+
v = b;
|
|
787
|
+
v |= v << 8;
|
|
788
|
+
v |= v << 16;
|
|
789
|
+
v |= v << ((WORDBITS == 64) ? 32 : 0);
|
|
790
|
+
return v;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
static forceinline void
|
|
794
|
+
copy_word_unaligned(const void *src, void *dst)
|
|
795
|
+
{
|
|
796
|
+
store_word_unaligned(load_word_unaligned(src), dst);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
/*****************************************************************************
|
|
800
|
+
* Main decompression routine
|
|
801
|
+
*****************************************************************************/
|
|
802
|
+
|
|
803
|
+
#define FUNCNAME deflate_decompress_default
|
|
804
|
+
#define ATTRIBUTES
|
|
805
|
+
#include "decompress_impl.h"
|
|
806
|
+
#undef FUNCNAME
|
|
807
|
+
#undef ATTRIBUTES
|
|
808
|
+
|
|
809
|
+
#if X86_CPU_FEATURES_ENABLED && \
|
|
810
|
+
COMPILER_SUPPORTS_BMI2_TARGET && !defined(__BMI2__)
|
|
811
|
+
# define FUNCNAME deflate_decompress_bmi2
|
|
812
|
+
# define ATTRIBUTES __attribute__((target("bmi2")))
|
|
813
|
+
# include "decompress_impl.h"
|
|
814
|
+
# undef FUNCNAME
|
|
815
|
+
# undef ATTRIBUTES
|
|
816
|
+
# define DISPATCH_ENABLED 1
|
|
817
|
+
#else
|
|
818
|
+
# define DISPATCH_ENABLED 0
|
|
819
|
+
#endif
|
|
820
|
+
|
|
821
|
+
#if DISPATCH_ENABLED
|
|
822
|
+
|
|
823
|
+
static enum libdeflate_result
|
|
824
|
+
dispatch(struct libdeflate_decompressor * restrict d,
|
|
825
|
+
const void * restrict in, size_t in_nbytes,
|
|
826
|
+
void * restrict out, size_t out_nbytes_avail,
|
|
827
|
+
size_t *actual_out_nbytes_ret);
|
|
828
|
+
|
|
829
|
+
typedef enum libdeflate_result (*decompress_func_t)
|
|
830
|
+
(struct libdeflate_decompressor * restrict d,
|
|
831
|
+
const void * restrict in, size_t in_nbytes,
|
|
832
|
+
void * restrict out, size_t out_nbytes_avail,
|
|
833
|
+
size_t *actual_out_nbytes_ret);
|
|
834
|
+
|
|
835
|
+
static decompress_func_t decompress_impl = dispatch;
|
|
836
|
+
|
|
837
|
+
static enum libdeflate_result
|
|
838
|
+
dispatch(struct libdeflate_decompressor * restrict d,
|
|
839
|
+
const void * restrict in, size_t in_nbytes,
|
|
840
|
+
void * restrict out, size_t out_nbytes_avail,
|
|
841
|
+
size_t *actual_out_nbytes_ret)
|
|
842
|
+
{
|
|
843
|
+
decompress_func_t f = deflate_decompress_default;
|
|
844
|
+
#if X86_CPU_FEATURES_ENABLED
|
|
845
|
+
if (x86_have_cpu_features(X86_CPU_FEATURE_BMI2))
|
|
846
|
+
f = deflate_decompress_bmi2;
|
|
847
|
+
#endif
|
|
848
|
+
decompress_impl = f;
|
|
849
|
+
return (*f)(d, in, in_nbytes, out, out_nbytes_avail,
|
|
850
|
+
actual_out_nbytes_ret);
|
|
851
|
+
}
|
|
852
|
+
#endif /* DISPATCH_ENABLED */
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
/*
|
|
856
|
+
* This is the main DEFLATE decompression routine. See libdeflate.h for the
|
|
857
|
+
* documentation.
|
|
858
|
+
*
|
|
859
|
+
* Note that the real code is in decompress_impl.h. The part here just handles
|
|
860
|
+
* calling the appropriate implementation depending on the CPU features at
|
|
861
|
+
* runtime.
|
|
862
|
+
*/
|
|
863
|
+
LIBDEFLATEAPI enum libdeflate_result
|
|
864
|
+
libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d,
|
|
865
|
+
const void * restrict in, size_t in_nbytes,
|
|
866
|
+
void * restrict out, size_t out_nbytes_avail,
|
|
867
|
+
size_t *actual_out_nbytes_ret)
|
|
868
|
+
{
|
|
869
|
+
#if DISPATCH_ENABLED
|
|
870
|
+
return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes_avail,
|
|
871
|
+
actual_out_nbytes_ret);
|
|
872
|
+
#else
|
|
873
|
+
return deflate_decompress_default(d, in, in_nbytes, out,
|
|
874
|
+
out_nbytes_avail,
|
|
875
|
+
actual_out_nbytes_ret);
|
|
876
|
+
#endif
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
LIBDEFLATEAPI struct libdeflate_decompressor *
|
|
880
|
+
libdeflate_alloc_decompressor(void)
|
|
881
|
+
{
|
|
882
|
+
return malloc(sizeof(struct libdeflate_decompressor));
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
LIBDEFLATEAPI void
|
|
886
|
+
libdeflate_free_decompressor(struct libdeflate_decompressor *d)
|
|
887
|
+
{
|
|
888
|
+
free(d);
|
|
889
|
+
}
|