libdeflate 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.rubocop.yml +1 -0
  6. data/.rubocop_todo.yml +9 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +52 -0
  11. data/Rakefile +15 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/ext/libdeflate/extconf.rb +14 -0
  15. data/ext/libdeflate/libdeflate/.gitignore +19 -0
  16. data/ext/libdeflate/libdeflate/COPYING +21 -0
  17. data/ext/libdeflate/libdeflate/Makefile +231 -0
  18. data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
  19. data/ext/libdeflate/libdeflate/NEWS +57 -0
  20. data/ext/libdeflate/libdeflate/README.md +170 -0
  21. data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
  22. data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
  23. data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
  24. data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
  25. data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
  26. data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
  27. data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
  28. data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
  29. data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
  30. data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
  31. data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
  32. data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
  33. data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
  34. data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
  35. data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
  36. data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
  37. data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
  38. data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
  39. data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
  40. data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
  41. data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
  42. data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
  43. data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
  44. data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
  45. data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
  46. data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
  47. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
  48. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
  49. data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
  50. data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
  51. data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
  52. data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
  53. data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
  54. data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
  55. data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
  56. data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
  57. data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
  58. data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
  59. data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
  60. data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
  61. data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
  62. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
  63. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
  64. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
  65. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
  66. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
  67. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
  68. data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
  69. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
  70. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
  71. data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
  72. data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
  73. data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
  74. data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
  75. data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
  76. data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
  77. data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
  78. data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
  79. data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
  80. data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
  81. data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
  82. data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
  83. data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
  84. data/ext/libdeflate/libdeflate_ext.c +389 -0
  85. data/ext/libdeflate/libdeflate_ext.h +8 -0
  86. data/lib/libdeflate.rb +2 -0
  87. data/lib/libdeflate/version.rb +3 -0
  88. data/libdeflate.gemspec +33 -0
  89. metadata +230 -0
@@ -0,0 +1,14 @@
1
+ #ifndef LIB_DEFLATE_COMPRESS_H
2
+ #define LIB_DEFLATE_COMPRESS_H
3
+
4
+ #include "lib_common.h"
5
+
6
+ /* DEFLATE compression is private to deflate_compress.c, but we do need to be
7
+ * able to query the compression level for zlib and gzip header generation. */
8
+
9
+ struct libdeflate_compressor;
10
+
11
+ extern unsigned int
12
+ deflate_get_compression_level(struct libdeflate_compressor *c);
13
+
14
+ #endif /* LIB_DEFLATE_COMPRESS_H */
@@ -0,0 +1,66 @@
1
+ /*
2
+ * deflate_constants.h - constants for the DEFLATE compression format
3
+ */
4
+
5
+ #ifndef LIB_DEFLATE_CONSTANTS_H
6
+ #define LIB_DEFLATE_CONSTANTS_H
7
+
8
+ /* Valid block types */
9
+ #define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
10
+ #define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
11
+ #define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
12
+
13
+ /* Minimum and maximum supported match lengths (in bytes) */
14
+ #define DEFLATE_MIN_MATCH_LEN 3
15
+ #define DEFLATE_MAX_MATCH_LEN 258
16
+
17
+ /* Minimum and maximum supported match offsets (in bytes) */
18
+ #define DEFLATE_MIN_MATCH_OFFSET 1
19
+ #define DEFLATE_MAX_MATCH_OFFSET 32768
20
+
21
+ #define DEFLATE_MAX_WINDOW_SIZE 32768
22
+
23
+ /* Number of symbols in each Huffman code. Note: for the literal/length
24
+ * and offset codes, these are actually the maximum values; a given block
25
+ * might use fewer symbols. */
26
+ #define DEFLATE_NUM_PRECODE_SYMS 19
27
+ #define DEFLATE_NUM_LITLEN_SYMS 288
28
+ #define DEFLATE_NUM_OFFSET_SYMS 32
29
+
30
+ /* The maximum number of symbols across all codes */
31
+ #define DEFLATE_MAX_NUM_SYMS 288
32
+
33
+ /* Division of symbols in the literal/length code */
34
+ #define DEFLATE_NUM_LITERALS 256
35
+ #define DEFLATE_END_OF_BLOCK 256
36
+ #define DEFLATE_NUM_LEN_SYMS 31
37
+
38
+ /* Maximum codeword length, in bits, within each Huffman code */
39
+ #define DEFLATE_MAX_PRE_CODEWORD_LEN 7
40
+ #define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
41
+ #define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
42
+
43
+ /* The maximum codeword length across all codes */
44
+ #define DEFLATE_MAX_CODEWORD_LEN 15
45
+
46
+ /* Maximum possible overrun when decoding codeword lengths */
47
+ #define DEFLATE_MAX_LENS_OVERRUN 137
48
+
49
+ /*
50
+ * Maximum number of extra bits that may be required to represent a match
51
+ * length or offset.
52
+ *
53
+ * TODO: are we going to have full DEFLATE64 support? If so, up to 16
54
+ * length bits must be supported.
55
+ */
56
+ #define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
57
+ #define DEFLATE_MAX_EXTRA_OFFSET_BITS 14
58
+
59
+ /* The maximum number of bits in which a match can be represented. This
60
+ * is the absolute worst case, which assumes the longest possible Huffman
61
+ * codewords and the maximum numbers of extra bits. */
62
+ #define DEFLATE_MAX_MATCH_BITS \
63
+ (DEFLATE_MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + \
64
+ DEFLATE_MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)
65
+
66
+ #endif /* LIB_DEFLATE_CONSTANTS_H */
@@ -0,0 +1,889 @@
1
+ /*
2
+ * deflate_decompress.c - a decompressor for DEFLATE
3
+ *
4
+ * Originally public domain; changes after 2016-09-07 are copyrighted.
5
+ *
6
+ * Copyright 2016 Eric Biggers
7
+ *
8
+ * Permission is hereby granted, free of charge, to any person
9
+ * obtaining a copy of this software and associated documentation
10
+ * files (the "Software"), to deal in the Software without
11
+ * restriction, including without limitation the rights to use,
12
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ * copies of the Software, and to permit persons to whom the
14
+ * Software is furnished to do so, subject to the following
15
+ * conditions:
16
+ *
17
+ * The above copyright notice and this permission notice shall be
18
+ * included in all copies or substantial portions of the Software.
19
+ *
20
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
+ * OTHER DEALINGS IN THE SOFTWARE.
28
+ *
29
+ * ---------------------------------------------------------------------------
30
+ *
31
+ * This is a highly optimized DEFLATE decompressor. When compiled with gcc on
32
+ * x86_64, it decompresses data in about 52% of the time of zlib (48% if BMI2
33
+ * instructions are available). On other architectures it should still be
34
+ * significantly faster than zlib, but the difference may be smaller.
35
+ *
36
+ * Why this is faster than zlib's implementation:
37
+ *
38
+ * - Word accesses rather than byte accesses when reading input
39
+ * - Word accesses rather than byte accesses when copying matches
40
+ * - Faster Huffman decoding combined with various DEFLATE-specific tricks
41
+ * - Larger bitbuffer variable that doesn't need to be filled as often
42
+ * - Other optimizations to remove unnecessary branches
43
+ * - Only full-buffer decompression is supported, so the code doesn't need to
44
+ * support stopping and resuming decompression.
45
+ * - On x86_64, compile a version of the decompression routine using BMI2
46
+ * instructions and use it automatically at runtime when supported.
47
+ */
48
+
49
+ #include <stdlib.h>
50
+ #include <string.h>
51
+
52
+ #include "deflate_constants.h"
53
+ #include "unaligned.h"
54
+ #include "x86_cpu_features.h"
55
+
56
+ #include "libdeflate.h"
57
+
58
+ /*
59
+ * If the expression passed to SAFETY_CHECK() evaluates to false, then the
60
+ * decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the
61
+ * compressed data is invalid.
62
+ *
63
+ * Theoretically, these checks could be disabled for specialized applications
64
+ * where all input to the decompressor will be trusted.
65
+ */
66
+ #if 0
67
+ # pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!")
68
+ # define SAFETY_CHECK(expr) (void)(expr)
69
+ #else
70
+ # define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA
71
+ #endif
72
+
73
+ /*
74
+ * Each TABLEBITS number is the base-2 logarithm of the number of entries in the
75
+ * main portion of the corresponding decode table. Each number should be large
76
+ * enough to ensure that for typical data, the vast majority of symbols can be
77
+ * decoded by a direct lookup of the next TABLEBITS bits of compressed data.
78
+ * However, this must be balanced against the fact that a larger table requires
79
+ * more memory and requires more time to fill.
80
+ *
81
+ * Note: you cannot change a TABLEBITS number without also changing the
82
+ * corresponding ENOUGH number!
83
+ */
84
+ #define PRECODE_TABLEBITS 7
85
+ #define LITLEN_TABLEBITS 10
86
+ #define OFFSET_TABLEBITS 8
87
+
88
+ /*
89
+ * Each ENOUGH number is the maximum number of decode table entries that may be
90
+ * required for the corresponding Huffman code, including the main table and all
91
+ * subtables. Each number depends on three parameters:
92
+ *
93
+ * (1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMBOLS)
94
+ * (2) the number of main table bits (the TABLEBITS numbers defined above)
95
+ * (3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN)
96
+ *
97
+ * The ENOUGH numbers were computed using the utility program 'enough' from
98
+ * zlib. This program enumerates all possible relevant Huffman codes to find
99
+ * the worst-case usage of decode table entries.
100
+ */
101
+ #define PRECODE_ENOUGH 128 /* enough 19 7 7 */
102
+ #define LITLEN_ENOUGH 1334 /* enough 288 10 15 */
103
+ #define OFFSET_ENOUGH 402 /* enough 32 8 15 */
104
+
105
+ /*
106
+ * Type for codeword lengths.
107
+ */
108
+ typedef u8 len_t;
109
+
110
+ /*
111
+ * The main DEFLATE decompressor structure. Since this implementation only
112
+ * supports full buffer decompression, this structure does not store the entire
113
+ * decompression state, but rather only some arrays that are too large to
114
+ * comfortably allocate on the stack.
115
+ */
116
+ struct libdeflate_decompressor {
117
+
118
+ /*
119
+ * The arrays aren't all needed at the same time. 'precode_lens' and
120
+ * 'precode_decode_table' are unneeded after 'lens' has been filled.
121
+ * Furthermore, 'lens' need not be retained after building the litlen
122
+ * and offset decode tables. In fact, 'lens' can be in union with
123
+ * 'litlen_decode_table' provided that 'offset_decode_table' is separate
124
+ * and is built first.
125
+ */
126
+
127
+ union {
128
+ len_t precode_lens[DEFLATE_NUM_PRECODE_SYMS];
129
+
130
+ struct {
131
+ len_t lens[DEFLATE_NUM_LITLEN_SYMS +
132
+ DEFLATE_NUM_OFFSET_SYMS +
133
+ DEFLATE_MAX_LENS_OVERRUN];
134
+
135
+ u32 precode_decode_table[PRECODE_ENOUGH];
136
+ } l;
137
+
138
+ u32 litlen_decode_table[LITLEN_ENOUGH];
139
+ } u;
140
+
141
+ u32 offset_decode_table[OFFSET_ENOUGH];
142
+
143
+ u16 working_space[2 * (DEFLATE_MAX_CODEWORD_LEN + 1) +
144
+ DEFLATE_MAX_NUM_SYMS];
145
+ };
146
+
147
+ /*****************************************************************************
148
+ * Input bitstream *
149
+ *****************************************************************************/
150
+
151
+ /*
152
+ * The state of the "input bitstream" consists of the following variables:
153
+ *
154
+ * - in_next: pointer to the next unread byte in the input buffer
155
+ *
156
+ * - in_end: pointer just past the end of the input buffer
157
+ *
158
+ * - bitbuf: a word-sized variable containing bits that have been read from
159
+ * the input buffer. The buffered bits are right-aligned
160
+ * (they're the low-order bits).
161
+ *
162
+ * - bitsleft: number of bits in 'bitbuf' that are valid.
163
+ *
164
+ * To make it easier for the compiler to optimize the code by keeping variables
165
+ * in registers, these are declared as normal variables and manipulated using
166
+ * macros.
167
+ */
168
+
169
+ /*
170
+ * The type for the bitbuffer variable ('bitbuf' described above). For best
171
+ * performance, this should have size equal to a machine word.
172
+ *
173
+ * 64-bit platforms have a significant advantage: they get a bigger bitbuffer
174
+ * which they have to fill less often.
175
+ */
176
+ typedef machine_word_t bitbuf_t;
177
+
178
+ /*
179
+ * Number of bits the bitbuffer variable can hold.
180
+ */
181
+ #define BITBUF_NBITS (8 * sizeof(bitbuf_t))
182
+
183
+ /*
184
+ * The maximum number of bits that can be requested to be in the bitbuffer
185
+ * variable. This is the maximum value of 'n' that can be passed
186
+ * ENSURE_BITS(n).
187
+ *
188
+ * This not equal to BITBUF_NBITS because we never read less than one byte at a
189
+ * time. If the bitbuffer variable contains more than (BITBUF_NBITS - 8) bits,
190
+ * then we can't read another byte without first consuming some bits. So the
191
+ * maximum count we can ensure is (BITBUF_NBITS - 7).
192
+ */
193
+ #define MAX_ENSURE (BITBUF_NBITS - 7)
194
+
195
+ /*
196
+ * Evaluates to true if 'n' is a valid argument to ENSURE_BITS(n), or false if
197
+ * 'n' is too large to be passed to ENSURE_BITS(n). Note: if 'n' is a compile
198
+ * time constant, then this expression will be a compile-type constant.
199
+ * Therefore, CAN_ENSURE() can be used choose between alternative
200
+ * implementations at compile time.
201
+ */
202
+ #define CAN_ENSURE(n) ((n) <= MAX_ENSURE)
203
+
204
+ /*
205
+ * Fill the bitbuffer variable, reading one byte at a time.
206
+ *
207
+ * Note: if we would overrun the input buffer, we just don't read anything,
208
+ * leaving the bits as 0 but marking them as filled. This makes the
209
+ * implementation simpler because this removes the need to distinguish between
210
+ * "real" overruns and overruns that occur because of our own lookahead during
211
+ * Huffman decoding. The disadvantage is that a "real" overrun can go
212
+ * undetected, and libdeflate_deflate_decompress() may return a success status
213
+ * rather than the expected failure status if one occurs. However, this is
214
+ * irrelevant because even if this specific case were to be handled "correctly",
215
+ * one could easily come up with a different case where the compressed data
216
+ * would be corrupted in such a way that fully retains its validity. Users
217
+ * should run a checksum against the uncompressed data if they wish to detect
218
+ * corruptions.
219
+ */
220
+ #define FILL_BITS_BYTEWISE() \
221
+ do { \
222
+ if (likely(in_next != in_end)) \
223
+ bitbuf |= (bitbuf_t)*in_next++ << bitsleft; \
224
+ else \
225
+ overrun_count++; \
226
+ bitsleft += 8; \
227
+ } while (bitsleft <= BITBUF_NBITS - 8)
228
+
229
+ /*
230
+ * Fill the bitbuffer variable by reading the next word from the input buffer.
231
+ * This can be significantly faster than FILL_BITS_BYTEWISE(). However, for
232
+ * this to work correctly, the word must be interpreted in little-endian format.
233
+ * In addition, the memory access may be unaligned. Therefore, this method is
234
+ * most efficient on little-endian architectures that support fast unaligned
235
+ * access, such as x86 and x86_64.
236
+ */
237
+ #define FILL_BITS_WORDWISE() \
238
+ do { \
239
+ bitbuf |= get_unaligned_leword(in_next) << bitsleft; \
240
+ in_next += (BITBUF_NBITS - bitsleft) >> 3; \
241
+ bitsleft += (BITBUF_NBITS - bitsleft) & ~7; \
242
+ } while (0)
243
+
244
+ /*
245
+ * Does the bitbuffer variable currently contain at least 'n' bits?
246
+ */
247
+ #define HAVE_BITS(n) (bitsleft >= (n))
248
+
249
+ /*
250
+ * Load more bits from the input buffer until the specified number of bits is
251
+ * present in the bitbuffer variable. 'n' cannot be too large; see MAX_ENSURE
252
+ * and CAN_ENSURE().
253
+ */
254
+ #define ENSURE_BITS(n) \
255
+ if (!HAVE_BITS(n)) { \
256
+ if (CPU_IS_LITTLE_ENDIAN() && \
257
+ UNALIGNED_ACCESS_IS_FAST && \
258
+ likely(in_end - in_next >= sizeof(bitbuf_t))) \
259
+ FILL_BITS_WORDWISE(); \
260
+ else \
261
+ FILL_BITS_BYTEWISE(); \
262
+ }
263
+
264
+ /*
265
+ * Return the next 'n' bits from the bitbuffer variable without removing them.
266
+ */
267
+ #define BITS(n) ((u32)bitbuf & (((u32)1 << (n)) - 1))
268
+
269
+ /*
270
+ * Remove the next 'n' bits from the bitbuffer variable.
271
+ */
272
+ #define REMOVE_BITS(n) (bitbuf >>= (n), bitsleft -= (n))
273
+
274
+ /*
275
+ * Remove and return the next 'n' bits from the bitbuffer variable.
276
+ */
277
+ #define POP_BITS(n) (tmp32 = BITS(n), REMOVE_BITS(n), tmp32)
278
+
279
+ /*
280
+ * Align the input to the next byte boundary, discarding any remaining bits in
281
+ * the current byte.
282
+ *
283
+ * Note that if the bitbuffer variable currently contains more than 8 bits, then
284
+ * we must rewind 'in_next', effectively putting those bits back. Only the bits
285
+ * in what would be the "current" byte if we were reading one byte at a time can
286
+ * be actually discarded.
287
+ */
288
+ #define ALIGN_INPUT() \
289
+ do { \
290
+ in_next -= (bitsleft >> 3) - MIN(overrun_count, bitsleft >> 3); \
291
+ bitbuf = 0; \
292
+ bitsleft = 0; \
293
+ } while(0)
294
+
295
+ /*
296
+ * Read a 16-bit value from the input. This must have been preceded by a call
297
+ * to ALIGN_INPUT(), and the caller must have already checked for overrun.
298
+ */
299
+ #define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16)
300
+
301
+ /*****************************************************************************
302
+ * Huffman decoding *
303
+ *****************************************************************************/
304
+
305
+ /*
306
+ * A decode table for order TABLEBITS consists of a main table of (1 <<
307
+ * TABLEBITS) entries followed by a variable number of subtables.
308
+ *
309
+ * The decoding algorithm takes the next TABLEBITS bits of compressed data and
310
+ * uses them as an index into the decode table. The resulting entry is either a
311
+ * "direct entry", meaning that it contains the value desired, or a "subtable
312
+ * pointer", meaning that the entry references a subtable that must be indexed
313
+ * using more bits of the compressed data to decode the symbol.
314
+ *
315
+ * Each decode table (a main table along with with its subtables, if any) is
316
+ * associated with a Huffman code. Logically, the result of a decode table
317
+ * lookup is a symbol from the alphabet from which the corresponding Huffman
318
+ * code was constructed. A symbol with codeword length n <= TABLEBITS is
319
+ * associated with 2**(TABLEBITS - n) direct entries in the table, whereas a
320
+ * symbol with codeword length n > TABLEBITS is associated with one or more
321
+ * subtable entries.
322
+ *
323
+ * On top of this basic design, we implement several optimizations:
324
+ *
325
+ * - We store the length of each codeword directly in each of its decode table
326
+ * entries. This allows the codeword length to be produced without indexing
327
+ * an additional table.
328
+ *
329
+ * - When beneficial, we don't store the Huffman symbol itself, but instead data
330
+ * generated from it. For example, when decoding an offset symbol in DEFLATE,
331
+ * it's more efficient if we can decode the offset base and number of extra
332
+ * offset bits directly rather than decoding the offset symbol and then
333
+ * looking up both of those values in an additional table or tables.
334
+ *
335
+ * The size of each decode table entry is 32 bits, which provides slightly
336
+ * better performance than 16-bit entries on 32 and 64 bit processers, provided
337
+ * that the table doesn't get so large that it takes up too much memory and
338
+ * starts generating cache misses. The bits of each decode table entry are
339
+ * defined as follows:
340
+ *
341
+ * - Bits 30 -- 31: flags (see below)
342
+ * - Bits 8 -- 29: decode result: a Huffman symbol or related data
343
+ * - Bits 0 -- 7: codeword length
344
+ */
345
+
346
+ /*
347
+ * This flag is set in all main decode table entries that represent subtable
348
+ * pointers.
349
+ */
350
+ #define HUFFDEC_SUBTABLE_POINTER 0x80000000
351
+
352
+ /*
353
+ * This flag is set in all entries in the litlen decode table that represent
354
+ * literals.
355
+ */
356
+ #define HUFFDEC_LITERAL 0x40000000
357
+
358
+ /* Mask for extracting the codeword length from a decode table entry. */
359
+ #define HUFFDEC_LENGTH_MASK 0xFF
360
+
361
+ /* Shift to extract the decode result from a decode table entry. */
362
+ #define HUFFDEC_RESULT_SHIFT 8
363
+
364
+ /* The decode result for each precode symbol. There is no special optimization
365
+ * for the precode; the decode result is simply the symbol value. */
366
+ static const u32 precode_decode_results[DEFLATE_NUM_PRECODE_SYMS] = {
367
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
368
+ };
369
+
370
+ /* The decode result for each litlen symbol. For literals, this is the literal
371
+ * value itself and the HUFFDEC_LITERAL flag. For lengths, this is the length
372
+ * base and the number of extra length bits. */
373
+ static const u32 litlen_decode_results[DEFLATE_NUM_LITLEN_SYMS] = {
374
+ #define ENTRY(literal) ((HUFFDEC_LITERAL >> HUFFDEC_RESULT_SHIFT) | (literal))
375
+
376
+ /* Literals */
377
+ ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
378
+ ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
379
+ ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
380
+ ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
381
+ ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) ,
382
+ ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) ,
383
+ ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) ,
384
+ ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) ,
385
+ ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) ,
386
+ ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) ,
387
+ ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) ,
388
+ ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) ,
389
+ ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) ,
390
+ ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) ,
391
+ ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) ,
392
+ ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) ,
393
+ ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) ,
394
+ ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) ,
395
+ ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) ,
396
+ ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) ,
397
+ ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) ,
398
+ ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) ,
399
+ ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) ,
400
+ ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) ,
401
+ ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) ,
402
+ ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) ,
403
+ ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) ,
404
+ ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) ,
405
+ ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) ,
406
+ ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) ,
407
+ ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) ,
408
+ ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) ,
409
+ ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) ,
410
+ ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) ,
411
+ ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) ,
412
+ ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) ,
413
+ ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) ,
414
+ ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) ,
415
+ ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) ,
416
+ ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) ,
417
+ ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) ,
418
+ ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) ,
419
+ ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) ,
420
+ ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) ,
421
+ ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) ,
422
+ ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) ,
423
+ ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) ,
424
+ ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) ,
425
+ ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) ,
426
+ ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) ,
427
+ ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) ,
428
+ ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) ,
429
+ ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) ,
430
+ ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) ,
431
+ ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) ,
432
+ ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) ,
433
+ ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) ,
434
+ ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) ,
435
+ ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) ,
436
+ ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) ,
437
+ ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) ,
438
+ ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) ,
439
+ ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) ,
440
+ ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) ,
441
+ #undef ENTRY
442
+
443
+ #define HUFFDEC_EXTRA_LENGTH_BITS_MASK 0xFF
444
+ #define HUFFDEC_LENGTH_BASE_SHIFT 8
445
+ #define HUFFDEC_END_OF_BLOCK_LENGTH 0
446
+
447
+ #define ENTRY(length_base, num_extra_bits) \
448
+ (((u32)(length_base) << HUFFDEC_LENGTH_BASE_SHIFT) | (num_extra_bits))
449
+
450
+ /* End of block */
451
+ ENTRY(HUFFDEC_END_OF_BLOCK_LENGTH, 0),
452
+
453
+ /* Lengths */
454
+ ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0),
455
+ ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0),
456
+ ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1),
457
+ ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2),
458
+ ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3),
459
+ ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4),
460
+ ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5),
461
+ ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) ,
462
+ #undef ENTRY
463
+ };
464
+
465
+ /* The decode result for each offset symbol. This is the offset base and the
466
+ * number of extra offset bits. */
467
+ static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = {
468
+
469
+ #define HUFFDEC_EXTRA_OFFSET_BITS_SHIFT 16
470
+ #define HUFFDEC_OFFSET_BASE_MASK (((u32)1 << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) - 1)
471
+
472
+ #define ENTRY(offset_base, num_extra_bits) \
473
+ ((offset_base) | ((u32)(num_extra_bits) << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT))
474
+ ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) ,
475
+ ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) ,
476
+ ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) ,
477
+ ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) ,
478
+ ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) ,
479
+ ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) ,
480
+ ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) ,
481
+ ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(32769 , 14) , ENTRY(49153 , 14) ,
482
+ #undef ENTRY
483
+ };
484
+
485
+ /* Construct a decode table entry from a decode result and codeword length. */
486
+ static forceinline u32
487
+ make_decode_table_entry(u32 result, u32 length)
488
+ {
489
+ return (result << HUFFDEC_RESULT_SHIFT) | length;
490
+ }
491
+
492
+ /*
493
+ * Build a table for fast decoding of symbols from a Huffman code. As input,
494
+ * this function takes the codeword length of each symbol which may be used in
495
+ * the code. As output, it produces a decode table for the canonical Huffman
496
+ * code described by the codeword lengths. The decode table is built with the
497
+ * assumption that it will be indexed with "bit-reversed" codewords, where the
498
+ * low-order bit is the first bit of the codeword. This format is used for all
499
+ * Huffman codes in DEFLATE.
500
+ *
501
+ * @decode_table
502
+ * The array in which the decode table will be generated. This array must
503
+ * have sufficient length; see the definition of the ENOUGH numbers.
504
+ * @lens
505
+ * An array which provides, for each symbol, the length of the
506
+ * corresponding codeword in bits, or 0 if the symbol is unused. This may
507
+ * alias @decode_table, since nothing is written to @decode_table until all
508
+ * @lens have been consumed. All codeword lengths are assumed to be <=
509
+ * @max_codeword_len but are otherwise considered untrusted. If they do
510
+ * not form a valid Huffman code, then the decode table is not built and
511
+ * %false is returned.
512
+ * @num_syms
513
+ * The number of symbols in the code, including all unused symbols.
514
+ * @decode_results
515
+ * An array which provides, for each symbol, the actual value to store into
516
+ * the decode table. This value will be directly produced as the result of
517
+ * decoding that symbol, thereby moving the indirection out of the decode
518
+ * loop and into the table initialization.
519
+ * @table_bits
520
+ * The log base-2 of the number of main table entries to use.
521
+ * @max_codeword_len
522
+ * The maximum allowed codeword length for this Huffman code.
523
+ * @working_space
524
+ * A temporary array of length '2 * (@max_codeword_len + 1) + @num_syms'.
525
+ *
526
+ * Returns %true if successful; %false if the codeword lengths do not form a
527
+ * valid Huffman code.
528
+ */
529
+ static bool
530
+ build_decode_table(u32 decode_table[],
531
+ const len_t lens[],
532
+ const unsigned num_syms,
533
+ const u32 decode_results[],
534
+ const unsigned table_bits,
535
+ const unsigned max_codeword_len,
536
+ u16 working_space[])
537
+ {
538
+ u16 * const len_counts = &working_space[0];
539
+ u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
540
+ u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
541
+ unsigned len;
542
+ unsigned sym;
543
+ s32 remainder;
544
+ unsigned sym_idx;
545
+ unsigned codeword_len;
546
+ unsigned codeword_reversed = 0;
547
+ unsigned cur_codeword_prefix = -1;
548
+ unsigned cur_table_start = 0;
549
+ unsigned cur_table_bits = table_bits;
550
+ unsigned num_dropped_bits = 0;
551
+ const unsigned table_mask = (1U << table_bits) - 1;
552
+
553
+ /* Count how many symbols have each codeword length, including 0. */
554
+ for (len = 0; len <= max_codeword_len; len++)
555
+ len_counts[len] = 0;
556
+ for (sym = 0; sym < num_syms; sym++)
557
+ len_counts[lens[sym]]++;
558
+
559
+ /* Sort the symbols primarily by increasing codeword length and
560
+ * secondarily by increasing symbol value. */
561
+
562
+ /* Initialize 'offsets' so that offsets[len] is the number of codewords
563
+ * shorter than 'len' bits, including length 0. */
564
+ offsets[0] = 0;
565
+ for (len = 0; len < max_codeword_len; len++)
566
+ offsets[len + 1] = offsets[len] + len_counts[len];
567
+
568
+ /* Use the 'offsets' array to sort the symbols. */
569
+ for (sym = 0; sym < num_syms; sym++)
570
+ sorted_syms[offsets[lens[sym]]++] = sym;
571
+
572
+ /* It is already guaranteed that all lengths are <= max_codeword_len,
573
+ * but it cannot be assumed they form a complete prefix code. A
574
+ * codeword of length n should require a proportion of the codespace
575
+ * equaling (1/2)^n. The code is complete if and only if, by this
576
+ * measure, the codespace is exactly filled by the lengths. */
577
+ remainder = 1;
578
+ for (len = 1; len <= max_codeword_len; len++) {
579
+ remainder <<= 1;
580
+ remainder -= len_counts[len];
581
+ if (unlikely(remainder < 0)) {
582
+ /* The lengths overflow the codespace; that is, the code
583
+ * is over-subscribed. */
584
+ return false;
585
+ }
586
+ }
587
+
588
+ if (unlikely(remainder != 0)) {
589
+ /* The lengths do not fill the codespace; that is, they form an
590
+ * incomplete code. */
591
+
592
+ /* Initialize the table entries to default values. When
593
+ * decompressing a well-formed stream, these default values will
594
+ * never be used. But since a malformed stream might contain
595
+ * any bits at all, these entries need to be set anyway. */
596
+ u32 entry = make_decode_table_entry(decode_results[0], 1);
597
+ for (sym = 0; sym < (1U << table_bits); sym++)
598
+ decode_table[sym] = entry;
599
+
600
+ /* A completely empty code is permitted. */
601
+ if (remainder == (1U << max_codeword_len))
602
+ return true;
603
+
604
+ /* The code is nonempty and incomplete. Proceed only if there
605
+ * is a single used symbol and its codeword has length 1. The
606
+ * DEFLATE RFC is somewhat unclear regarding this case. What
607
+ * zlib's decompressor does is permit this case for
608
+ * literal/length and offset codes and assume the codeword is 0
609
+ * rather than 1. We do the same except we allow this case for
610
+ * precodes too. */
611
+ if (remainder != (1U << (max_codeword_len - 1)) ||
612
+ len_counts[1] != 1)
613
+ return false;
614
+ }
615
+
616
+ /* Generate the decode table entries. Since we process codewords from
617
+ * shortest to longest, the main portion of the decode table is filled
618
+ * first; then the subtables are filled. Note that it's already been
619
+ * verified that the code is nonempty and not over-subscribed. */
620
+
621
+ /* Start with the smallest codeword length and the smallest-valued
622
+ * symbol which has that codeword length. */
623
+ sym_idx = offsets[0];
624
+ codeword_len = 1;
625
+ while (len_counts[codeword_len] == 0)
626
+ codeword_len++;
627
+
628
+ for (;;) { /* For each used symbol and its codeword... */
629
+ unsigned sym;
630
+ u32 entry;
631
+ unsigned i;
632
+ unsigned end;
633
+ unsigned increment;
634
+ unsigned bit;
635
+
636
+ /* Get the next symbol. */
637
+ sym = sorted_syms[sym_idx];
638
+
639
+ /* Start a new subtable if the codeword is long enough to
640
+ * require a subtable, *and* the first 'table_bits' bits of the
641
+ * codeword don't match the prefix for the previous subtable if
642
+ * any. */
643
+ if (codeword_len > table_bits &&
644
+ (codeword_reversed & table_mask) != cur_codeword_prefix) {
645
+
646
+ cur_codeword_prefix = (codeword_reversed & table_mask);
647
+
648
+ cur_table_start += 1U << cur_table_bits;
649
+
650
+ /* Calculate the subtable length. If the codeword
651
+ * length exceeds 'table_bits' by n, the subtable needs
652
+ * at least 2**n entries. But it may need more; if
653
+ * there are fewer than 2**n codewords of length
654
+ * 'table_bits + n' remaining, then n will need to be
655
+ * incremented to bring in longer codewords until the
656
+ * subtable can be filled completely. Note that it
657
+ * always will, eventually, be possible to fill the
658
+ * subtable, since the only case where we may have an
659
+ * incomplete code is a single codeword of length 1,
660
+ * and that never requires any subtables. */
661
+ cur_table_bits = codeword_len - table_bits;
662
+ remainder = (s32)1 << cur_table_bits;
663
+ for (;;) {
664
+ remainder -= len_counts[table_bits +
665
+ cur_table_bits];
666
+ if (remainder <= 0)
667
+ break;
668
+ cur_table_bits++;
669
+ remainder <<= 1;
670
+ }
671
+
672
+ /* Create the entry that points from the main table to
673
+ * the subtable. This entry contains the index of the
674
+ * start of the subtable and the number of bits with
675
+ * which the subtable is indexed (the log base 2 of the
676
+ * number of entries it contains). */
677
+ decode_table[cur_codeword_prefix] =
678
+ HUFFDEC_SUBTABLE_POINTER |
679
+ make_decode_table_entry(cur_table_start,
680
+ cur_table_bits);
681
+
682
+ /* Now that we're filling a subtable, we need to drop
683
+ * the first 'table_bits' bits of the codewords. */
684
+ num_dropped_bits = table_bits;
685
+ }
686
+
687
+ /* Create the decode table entry, which packs the decode result
688
+ * and the codeword length (minus 'table_bits' for subtables)
689
+ * together. */
690
+ entry = make_decode_table_entry(decode_results[sym],
691
+ codeword_len - num_dropped_bits);
692
+
693
+ /* Fill in as many copies of the decode table entry as are
694
+ * needed. The number of entries to fill is a power of 2 and
695
+ * depends on the codeword length; it could be as few as 1 or as
696
+ * large as half the size of the table. Since the codewords are
697
+ * bit-reversed, the indices to fill are those with the codeword
698
+ * in its low bits; it's the high bits that vary. */
699
+ i = cur_table_start + (codeword_reversed >> num_dropped_bits);
700
+ end = cur_table_start + (1U << cur_table_bits);
701
+ increment = 1U << (codeword_len - num_dropped_bits);
702
+ do {
703
+ decode_table[i] = entry;
704
+ i += increment;
705
+ } while (i < end);
706
+
707
+ /* Advance to the next codeword by incrementing it. But since
708
+ * our codewords are bit-reversed, we must manipulate the bits
709
+ * ourselves rather than simply adding 1. */
710
+ bit = 1U << (codeword_len - 1);
711
+ while (codeword_reversed & bit)
712
+ bit >>= 1;
713
+ codeword_reversed &= bit - 1;
714
+ codeword_reversed |= bit;
715
+
716
+ /* Advance to the next symbol. This will either increase the
717
+ * codeword length, or keep the same codeword length but
718
+ * increase the symbol value. Note: since we are using
719
+ * bit-reversed codewords, we don't need to explicitly append
720
+ * zeroes to the codeword when the codeword length increases. */
721
+ if (++sym_idx == num_syms)
722
+ return true;
723
+ len_counts[codeword_len]--;
724
+ while (len_counts[codeword_len] == 0)
725
+ codeword_len++;
726
+ }
727
+ }
728
+
729
+ /* Build the decode table for the precode. */
730
+ static bool
731
+ build_precode_decode_table(struct libdeflate_decompressor *d)
732
+ {
733
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
734
+ STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128);
735
+
736
+ return build_decode_table(d->u.l.precode_decode_table,
737
+ d->u.precode_lens,
738
+ DEFLATE_NUM_PRECODE_SYMS,
739
+ precode_decode_results,
740
+ PRECODE_TABLEBITS,
741
+ DEFLATE_MAX_PRE_CODEWORD_LEN,
742
+ d->working_space);
743
+ }
744
+
745
+ /* Build the decode table for the literal/length code. */
746
+ static bool
747
+ build_litlen_decode_table(struct libdeflate_decompressor *d,
748
+ unsigned num_litlen_syms, unsigned num_offset_syms)
749
+ {
750
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
751
+ STATIC_ASSERT(LITLEN_TABLEBITS == 10 && LITLEN_ENOUGH == 1334);
752
+
753
+ return build_decode_table(d->u.litlen_decode_table,
754
+ d->u.l.lens,
755
+ num_litlen_syms,
756
+ litlen_decode_results,
757
+ LITLEN_TABLEBITS,
758
+ DEFLATE_MAX_LITLEN_CODEWORD_LEN,
759
+ d->working_space);
760
+ }
761
+
762
+ /* Build the decode table for the offset code. */
763
+ static bool
764
+ build_offset_decode_table(struct libdeflate_decompressor *d,
765
+ unsigned num_litlen_syms, unsigned num_offset_syms)
766
+ {
767
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
768
+ STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402);
769
+
770
+ return build_decode_table(d->offset_decode_table,
771
+ d->u.l.lens + num_litlen_syms,
772
+ num_offset_syms,
773
+ offset_decode_results,
774
+ OFFSET_TABLEBITS,
775
+ DEFLATE_MAX_OFFSET_CODEWORD_LEN,
776
+ d->working_space);
777
+ }
778
+
779
+ static forceinline machine_word_t
780
+ repeat_byte(u8 b)
781
+ {
782
+ machine_word_t v;
783
+
784
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
785
+
786
+ v = b;
787
+ v |= v << 8;
788
+ v |= v << 16;
789
+ v |= v << ((WORDBITS == 64) ? 32 : 0);
790
+ return v;
791
+ }
792
+
793
+ static forceinline void
794
+ copy_word_unaligned(const void *src, void *dst)
795
+ {
796
+ store_word_unaligned(load_word_unaligned(src), dst);
797
+ }
798
+
799
+ /*****************************************************************************
800
+ * Main decompression routine
801
+ *****************************************************************************/
802
+
803
+ #define FUNCNAME deflate_decompress_default
804
+ #define ATTRIBUTES
805
+ #include "decompress_impl.h"
806
+ #undef FUNCNAME
807
+ #undef ATTRIBUTES
808
+
809
+ #if X86_CPU_FEATURES_ENABLED && \
810
+ COMPILER_SUPPORTS_BMI2_TARGET && !defined(__BMI2__)
811
+ # define FUNCNAME deflate_decompress_bmi2
812
+ # define ATTRIBUTES __attribute__((target("bmi2")))
813
+ # include "decompress_impl.h"
814
+ # undef FUNCNAME
815
+ # undef ATTRIBUTES
816
+ # define DISPATCH_ENABLED 1
817
+ #else
818
+ # define DISPATCH_ENABLED 0
819
+ #endif
820
+
821
+ #if DISPATCH_ENABLED
822
+
823
+ static enum libdeflate_result
824
+ dispatch(struct libdeflate_decompressor * restrict d,
825
+ const void * restrict in, size_t in_nbytes,
826
+ void * restrict out, size_t out_nbytes_avail,
827
+ size_t *actual_out_nbytes_ret);
828
+
829
+ typedef enum libdeflate_result (*decompress_func_t)
830
+ (struct libdeflate_decompressor * restrict d,
831
+ const void * restrict in, size_t in_nbytes,
832
+ void * restrict out, size_t out_nbytes_avail,
833
+ size_t *actual_out_nbytes_ret);
834
+
835
+ static decompress_func_t decompress_impl = dispatch;
836
+
837
+ static enum libdeflate_result
838
+ dispatch(struct libdeflate_decompressor * restrict d,
839
+ const void * restrict in, size_t in_nbytes,
840
+ void * restrict out, size_t out_nbytes_avail,
841
+ size_t *actual_out_nbytes_ret)
842
+ {
843
+ decompress_func_t f = deflate_decompress_default;
844
+ #if X86_CPU_FEATURES_ENABLED
845
+ if (x86_have_cpu_features(X86_CPU_FEATURE_BMI2))
846
+ f = deflate_decompress_bmi2;
847
+ #endif
848
+ decompress_impl = f;
849
+ return (*f)(d, in, in_nbytes, out, out_nbytes_avail,
850
+ actual_out_nbytes_ret);
851
+ }
852
+ #endif /* DISPATCH_ENABLED */
853
+
854
+
855
+ /*
856
+ * This is the main DEFLATE decompression routine. See libdeflate.h for the
857
+ * documentation.
858
+ *
859
+ * Note that the real code is in decompress_impl.h. The part here just handles
860
+ * calling the appropriate implementation depending on the CPU features at
861
+ * runtime.
862
+ */
863
+ LIBDEFLATEAPI enum libdeflate_result
864
+ libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d,
865
+ const void * restrict in, size_t in_nbytes,
866
+ void * restrict out, size_t out_nbytes_avail,
867
+ size_t *actual_out_nbytes_ret)
868
+ {
869
+ #if DISPATCH_ENABLED
870
+ return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes_avail,
871
+ actual_out_nbytes_ret);
872
+ #else
873
+ return deflate_decompress_default(d, in, in_nbytes, out,
874
+ out_nbytes_avail,
875
+ actual_out_nbytes_ret);
876
+ #endif
877
+ }
878
+
879
+ LIBDEFLATEAPI struct libdeflate_decompressor *
880
+ libdeflate_alloc_decompressor(void)
881
+ {
882
+ return malloc(sizeof(struct libdeflate_decompressor));
883
+ }
884
+
885
+ LIBDEFLATEAPI void
886
+ libdeflate_free_decompressor(struct libdeflate_decompressor *d)
887
+ {
888
+ free(d);
889
+ }