libdeflate 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.rubocop.yml +1 -0
  6. data/.rubocop_todo.yml +9 -0
  7. data/.travis.yml +5 -0
  8. data/Gemfile +4 -0
  9. data/LICENSE.txt +21 -0
  10. data/README.md +52 -0
  11. data/Rakefile +15 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/ext/libdeflate/extconf.rb +14 -0
  15. data/ext/libdeflate/libdeflate/.gitignore +19 -0
  16. data/ext/libdeflate/libdeflate/COPYING +21 -0
  17. data/ext/libdeflate/libdeflate/Makefile +231 -0
  18. data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
  19. data/ext/libdeflate/libdeflate/NEWS +57 -0
  20. data/ext/libdeflate/libdeflate/README.md +170 -0
  21. data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
  22. data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
  23. data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
  24. data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
  25. data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
  26. data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
  27. data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
  28. data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
  29. data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
  30. data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
  31. data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
  32. data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
  33. data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
  34. data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
  35. data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
  36. data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
  37. data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
  38. data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
  39. data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
  40. data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
  41. data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
  42. data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
  43. data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
  44. data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
  45. data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
  46. data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
  47. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
  48. data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
  49. data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
  50. data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
  51. data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
  52. data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
  53. data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
  54. data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
  55. data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
  56. data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
  57. data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
  58. data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
  59. data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
  60. data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
  61. data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
  62. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
  63. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
  64. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
  65. data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
  66. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
  67. data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
  68. data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
  69. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
  70. data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
  71. data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
  72. data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
  73. data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
  74. data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
  75. data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
  76. data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
  77. data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
  78. data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
  79. data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
  80. data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
  81. data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
  82. data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
  83. data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
  84. data/ext/libdeflate/libdeflate_ext.c +389 -0
  85. data/ext/libdeflate/libdeflate_ext.h +8 -0
  86. data/lib/libdeflate.rb +2 -0
  87. data/lib/libdeflate/version.rb +3 -0
  88. data/libdeflate.gemspec +33 -0
  89. metadata +230 -0
@@ -0,0 +1,14 @@
1
+ #ifndef LIB_DEFLATE_COMPRESS_H
2
+ #define LIB_DEFLATE_COMPRESS_H
3
+
4
+ #include "lib_common.h"
5
+
6
+ /* DEFLATE compression is private to deflate_compress.c, but we do need to be
7
+ * able to query the compression level for zlib and gzip header generation. */
8
+
9
+ struct libdeflate_compressor;
10
+
11
+ extern unsigned int
12
+ deflate_get_compression_level(struct libdeflate_compressor *c);
13
+
14
+ #endif /* LIB_DEFLATE_COMPRESS_H */
@@ -0,0 +1,66 @@
1
+ /*
2
+ * deflate_constants.h - constants for the DEFLATE compression format
3
+ */
4
+
5
+ #ifndef LIB_DEFLATE_CONSTANTS_H
6
+ #define LIB_DEFLATE_CONSTANTS_H
7
+
8
+ /* Valid block types */
9
+ #define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
10
+ #define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
11
+ #define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
12
+
13
+ /* Minimum and maximum supported match lengths (in bytes) */
14
+ #define DEFLATE_MIN_MATCH_LEN 3
15
+ #define DEFLATE_MAX_MATCH_LEN 258
16
+
17
+ /* Minimum and maximum supported match offsets (in bytes) */
18
+ #define DEFLATE_MIN_MATCH_OFFSET 1
19
+ #define DEFLATE_MAX_MATCH_OFFSET 32768
20
+
21
+ #define DEFLATE_MAX_WINDOW_SIZE 32768
22
+
23
+ /* Number of symbols in each Huffman code. Note: for the literal/length
24
+ * and offset codes, these are actually the maximum values; a given block
25
+ * might use fewer symbols. */
26
+ #define DEFLATE_NUM_PRECODE_SYMS 19
27
+ #define DEFLATE_NUM_LITLEN_SYMS 288
28
+ #define DEFLATE_NUM_OFFSET_SYMS 32
29
+
30
+ /* The maximum number of symbols across all codes */
31
+ #define DEFLATE_MAX_NUM_SYMS 288
32
+
33
+ /* Division of symbols in the literal/length code */
34
+ #define DEFLATE_NUM_LITERALS 256
35
+ #define DEFLATE_END_OF_BLOCK 256
36
+ #define DEFLATE_NUM_LEN_SYMS 31
37
+
38
+ /* Maximum codeword length, in bits, within each Huffman code */
39
+ #define DEFLATE_MAX_PRE_CODEWORD_LEN 7
40
+ #define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
41
+ #define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
42
+
43
+ /* The maximum codeword length across all codes */
44
+ #define DEFLATE_MAX_CODEWORD_LEN 15
45
+
46
+ /* Maximum possible overrun when decoding codeword lengths */
47
+ #define DEFLATE_MAX_LENS_OVERRUN 137
48
+
49
+ /*
50
+ * Maximum number of extra bits that may be required to represent a match
51
+ * length or offset.
52
+ *
53
+ * TODO: are we going to have full DEFLATE64 support? If so, up to 16
54
+ * length bits must be supported.
55
+ */
56
+ #define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
57
+ #define DEFLATE_MAX_EXTRA_OFFSET_BITS 14
58
+
59
+ /* The maximum number of bits in which a match can be represented. This
60
+ * is the absolute worst case, which assumes the longest possible Huffman
61
+ * codewords and the maximum numbers of extra bits. */
62
+ #define DEFLATE_MAX_MATCH_BITS \
63
+ (DEFLATE_MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + \
64
+ DEFLATE_MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)
65
+
66
+ #endif /* LIB_DEFLATE_CONSTANTS_H */
@@ -0,0 +1,889 @@
1
+ /*
2
+ * deflate_decompress.c - a decompressor for DEFLATE
3
+ *
4
+ * Originally public domain; changes after 2016-09-07 are copyrighted.
5
+ *
6
+ * Copyright 2016 Eric Biggers
7
+ *
8
+ * Permission is hereby granted, free of charge, to any person
9
+ * obtaining a copy of this software and associated documentation
10
+ * files (the "Software"), to deal in the Software without
11
+ * restriction, including without limitation the rights to use,
12
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ * copies of the Software, and to permit persons to whom the
14
+ * Software is furnished to do so, subject to the following
15
+ * conditions:
16
+ *
17
+ * The above copyright notice and this permission notice shall be
18
+ * included in all copies or substantial portions of the Software.
19
+ *
20
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
+ * OTHER DEALINGS IN THE SOFTWARE.
28
+ *
29
+ * ---------------------------------------------------------------------------
30
+ *
31
+ * This is a highly optimized DEFLATE decompressor. When compiled with gcc on
32
+ * x86_64, it decompresses data in about 52% of the time of zlib (48% if BMI2
33
+ * instructions are available). On other architectures it should still be
34
+ * significantly faster than zlib, but the difference may be smaller.
35
+ *
36
+ * Why this is faster than zlib's implementation:
37
+ *
38
+ * - Word accesses rather than byte accesses when reading input
39
+ * - Word accesses rather than byte accesses when copying matches
40
+ * - Faster Huffman decoding combined with various DEFLATE-specific tricks
41
+ * - Larger bitbuffer variable that doesn't need to be filled as often
42
+ * - Other optimizations to remove unnecessary branches
43
+ * - Only full-buffer decompression is supported, so the code doesn't need to
44
+ * support stopping and resuming decompression.
45
+ * - On x86_64, compile a version of the decompression routine using BMI2
46
+ * instructions and use it automatically at runtime when supported.
47
+ */
48
+
49
+ #include <stdlib.h>
50
+ #include <string.h>
51
+
52
+ #include "deflate_constants.h"
53
+ #include "unaligned.h"
54
+ #include "x86_cpu_features.h"
55
+
56
+ #include "libdeflate.h"
57
+
58
+ /*
59
+ * If the expression passed to SAFETY_CHECK() evaluates to false, then the
60
+ * decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the
61
+ * compressed data is invalid.
62
+ *
63
+ * Theoretically, these checks could be disabled for specialized applications
64
+ * where all input to the decompressor will be trusted.
65
+ */
66
+ #if 0
67
+ # pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!")
68
+ # define SAFETY_CHECK(expr) (void)(expr)
69
+ #else
70
+ # define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA
71
+ #endif
72
+
73
+ /*
74
+ * Each TABLEBITS number is the base-2 logarithm of the number of entries in the
75
+ * main portion of the corresponding decode table. Each number should be large
76
+ * enough to ensure that for typical data, the vast majority of symbols can be
77
+ * decoded by a direct lookup of the next TABLEBITS bits of compressed data.
78
+ * However, this must be balanced against the fact that a larger table requires
79
+ * more memory and requires more time to fill.
80
+ *
81
+ * Note: you cannot change a TABLEBITS number without also changing the
82
+ * corresponding ENOUGH number!
83
+ */
84
+ #define PRECODE_TABLEBITS 7
85
+ #define LITLEN_TABLEBITS 10
86
+ #define OFFSET_TABLEBITS 8
87
+
88
+ /*
89
+ * Each ENOUGH number is the maximum number of decode table entries that may be
90
+ * required for the corresponding Huffman code, including the main table and all
91
+ * subtables. Each number depends on three parameters:
92
+ *
93
+ * (1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMBOLS)
94
+ * (2) the number of main table bits (the TABLEBITS numbers defined above)
95
+ * (3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN)
96
+ *
97
+ * The ENOUGH numbers were computed using the utility program 'enough' from
98
+ * zlib. This program enumerates all possible relevant Huffman codes to find
99
+ * the worst-case usage of decode table entries.
100
+ */
101
+ #define PRECODE_ENOUGH 128 /* enough 19 7 7 */
102
+ #define LITLEN_ENOUGH 1334 /* enough 288 10 15 */
103
+ #define OFFSET_ENOUGH 402 /* enough 32 8 15 */
104
+
105
+ /*
106
+ * Type for codeword lengths.
107
+ */
108
+ typedef u8 len_t;
109
+
110
+ /*
111
+ * The main DEFLATE decompressor structure. Since this implementation only
112
+ * supports full buffer decompression, this structure does not store the entire
113
+ * decompression state, but rather only some arrays that are too large to
114
+ * comfortably allocate on the stack.
115
+ */
116
+ struct libdeflate_decompressor {
117
+
118
+ /*
119
+ * The arrays aren't all needed at the same time. 'precode_lens' and
120
+ * 'precode_decode_table' are unneeded after 'lens' has been filled.
121
+ * Furthermore, 'lens' need not be retained after building the litlen
122
+ * and offset decode tables. In fact, 'lens' can be in union with
123
+ * 'litlen_decode_table' provided that 'offset_decode_table' is separate
124
+ * and is built first.
125
+ */
126
+
127
+ union {
128
+ len_t precode_lens[DEFLATE_NUM_PRECODE_SYMS];
129
+
130
+ struct {
131
+ len_t lens[DEFLATE_NUM_LITLEN_SYMS +
132
+ DEFLATE_NUM_OFFSET_SYMS +
133
+ DEFLATE_MAX_LENS_OVERRUN];
134
+
135
+ u32 precode_decode_table[PRECODE_ENOUGH];
136
+ } l;
137
+
138
+ u32 litlen_decode_table[LITLEN_ENOUGH];
139
+ } u;
140
+
141
+ u32 offset_decode_table[OFFSET_ENOUGH];
142
+
143
+ u16 working_space[2 * (DEFLATE_MAX_CODEWORD_LEN + 1) +
144
+ DEFLATE_MAX_NUM_SYMS];
145
+ };
146
+
147
+ /*****************************************************************************
148
+ * Input bitstream *
149
+ *****************************************************************************/
150
+
151
+ /*
152
+ * The state of the "input bitstream" consists of the following variables:
153
+ *
154
+ * - in_next: pointer to the next unread byte in the input buffer
155
+ *
156
+ * - in_end: pointer just past the end of the input buffer
157
+ *
158
+ * - bitbuf: a word-sized variable containing bits that have been read from
159
+ * the input buffer. The buffered bits are right-aligned
160
+ * (they're the low-order bits).
161
+ *
162
+ * - bitsleft: number of bits in 'bitbuf' that are valid.
163
+ *
164
+ * To make it easier for the compiler to optimize the code by keeping variables
165
+ * in registers, these are declared as normal variables and manipulated using
166
+ * macros.
167
+ */
168
+
169
+ /*
170
+ * The type for the bitbuffer variable ('bitbuf' described above). For best
171
+ * performance, this should have size equal to a machine word.
172
+ *
173
+ * 64-bit platforms have a significant advantage: they get a bigger bitbuffer
174
+ * which they have to fill less often.
175
+ */
176
+ typedef machine_word_t bitbuf_t;
177
+
178
+ /*
179
+ * Number of bits the bitbuffer variable can hold.
180
+ */
181
+ #define BITBUF_NBITS (8 * sizeof(bitbuf_t))
182
+
183
+ /*
184
+ * The maximum number of bits that can be requested to be in the bitbuffer
185
+ * variable. This is the maximum value of 'n' that can be passed
186
+ * ENSURE_BITS(n).
187
+ *
188
+ * This not equal to BITBUF_NBITS because we never read less than one byte at a
189
+ * time. If the bitbuffer variable contains more than (BITBUF_NBITS - 8) bits,
190
+ * then we can't read another byte without first consuming some bits. So the
191
+ * maximum count we can ensure is (BITBUF_NBITS - 7).
192
+ */
193
+ #define MAX_ENSURE (BITBUF_NBITS - 7)
194
+
195
+ /*
196
+ * Evaluates to true if 'n' is a valid argument to ENSURE_BITS(n), or false if
197
+ * 'n' is too large to be passed to ENSURE_BITS(n). Note: if 'n' is a compile
198
+ * time constant, then this expression will be a compile-type constant.
199
+ * Therefore, CAN_ENSURE() can be used choose between alternative
200
+ * implementations at compile time.
201
+ */
202
+ #define CAN_ENSURE(n) ((n) <= MAX_ENSURE)
203
+
204
+ /*
205
+ * Fill the bitbuffer variable, reading one byte at a time.
206
+ *
207
+ * Note: if we would overrun the input buffer, we just don't read anything,
208
+ * leaving the bits as 0 but marking them as filled. This makes the
209
+ * implementation simpler because this removes the need to distinguish between
210
+ * "real" overruns and overruns that occur because of our own lookahead during
211
+ * Huffman decoding. The disadvantage is that a "real" overrun can go
212
+ * undetected, and libdeflate_deflate_decompress() may return a success status
213
+ * rather than the expected failure status if one occurs. However, this is
214
+ * irrelevant because even if this specific case were to be handled "correctly",
215
+ * one could easily come up with a different case where the compressed data
216
+ * would be corrupted in such a way that fully retains its validity. Users
217
+ * should run a checksum against the uncompressed data if they wish to detect
218
+ * corruptions.
219
+ */
220
+ #define FILL_BITS_BYTEWISE() \
221
+ do { \
222
+ if (likely(in_next != in_end)) \
223
+ bitbuf |= (bitbuf_t)*in_next++ << bitsleft; \
224
+ else \
225
+ overrun_count++; \
226
+ bitsleft += 8; \
227
+ } while (bitsleft <= BITBUF_NBITS - 8)
228
+
229
+ /*
230
+ * Fill the bitbuffer variable by reading the next word from the input buffer.
231
+ * This can be significantly faster than FILL_BITS_BYTEWISE(). However, for
232
+ * this to work correctly, the word must be interpreted in little-endian format.
233
+ * In addition, the memory access may be unaligned. Therefore, this method is
234
+ * most efficient on little-endian architectures that support fast unaligned
235
+ * access, such as x86 and x86_64.
236
+ */
237
+ #define FILL_BITS_WORDWISE() \
238
+ do { \
239
+ bitbuf |= get_unaligned_leword(in_next) << bitsleft; \
240
+ in_next += (BITBUF_NBITS - bitsleft) >> 3; \
241
+ bitsleft += (BITBUF_NBITS - bitsleft) & ~7; \
242
+ } while (0)
243
+
244
+ /*
245
+ * Does the bitbuffer variable currently contain at least 'n' bits?
246
+ */
247
+ #define HAVE_BITS(n) (bitsleft >= (n))
248
+
249
+ /*
250
+ * Load more bits from the input buffer until the specified number of bits is
251
+ * present in the bitbuffer variable. 'n' cannot be too large; see MAX_ENSURE
252
+ * and CAN_ENSURE().
253
+ */
254
+ #define ENSURE_BITS(n) \
255
+ if (!HAVE_BITS(n)) { \
256
+ if (CPU_IS_LITTLE_ENDIAN() && \
257
+ UNALIGNED_ACCESS_IS_FAST && \
258
+ likely(in_end - in_next >= sizeof(bitbuf_t))) \
259
+ FILL_BITS_WORDWISE(); \
260
+ else \
261
+ FILL_BITS_BYTEWISE(); \
262
+ }
263
+
264
+ /*
265
+ * Return the next 'n' bits from the bitbuffer variable without removing them.
266
+ */
267
+ #define BITS(n) ((u32)bitbuf & (((u32)1 << (n)) - 1))
268
+
269
+ /*
270
+ * Remove the next 'n' bits from the bitbuffer variable.
271
+ */
272
+ #define REMOVE_BITS(n) (bitbuf >>= (n), bitsleft -= (n))
273
+
274
+ /*
275
+ * Remove and return the next 'n' bits from the bitbuffer variable.
276
+ */
277
+ #define POP_BITS(n) (tmp32 = BITS(n), REMOVE_BITS(n), tmp32)
278
+
279
+ /*
280
+ * Align the input to the next byte boundary, discarding any remaining bits in
281
+ * the current byte.
282
+ *
283
+ * Note that if the bitbuffer variable currently contains more than 8 bits, then
284
+ * we must rewind 'in_next', effectively putting those bits back. Only the bits
285
+ * in what would be the "current" byte if we were reading one byte at a time can
286
+ * be actually discarded.
287
+ */
288
+ #define ALIGN_INPUT() \
289
+ do { \
290
+ in_next -= (bitsleft >> 3) - MIN(overrun_count, bitsleft >> 3); \
291
+ bitbuf = 0; \
292
+ bitsleft = 0; \
293
+ } while(0)
294
+
295
+ /*
296
+ * Read a 16-bit value from the input. This must have been preceded by a call
297
+ * to ALIGN_INPUT(), and the caller must have already checked for overrun.
298
+ */
299
+ #define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16)
300
+
301
+ /*****************************************************************************
302
+ * Huffman decoding *
303
+ *****************************************************************************/
304
+
305
+ /*
306
+ * A decode table for order TABLEBITS consists of a main table of (1 <<
307
+ * TABLEBITS) entries followed by a variable number of subtables.
308
+ *
309
+ * The decoding algorithm takes the next TABLEBITS bits of compressed data and
310
+ * uses them as an index into the decode table. The resulting entry is either a
311
+ * "direct entry", meaning that it contains the value desired, or a "subtable
312
+ * pointer", meaning that the entry references a subtable that must be indexed
313
+ * using more bits of the compressed data to decode the symbol.
314
+ *
315
+ * Each decode table (a main table along with with its subtables, if any) is
316
+ * associated with a Huffman code. Logically, the result of a decode table
317
+ * lookup is a symbol from the alphabet from which the corresponding Huffman
318
+ * code was constructed. A symbol with codeword length n <= TABLEBITS is
319
+ * associated with 2**(TABLEBITS - n) direct entries in the table, whereas a
320
+ * symbol with codeword length n > TABLEBITS is associated with one or more
321
+ * subtable entries.
322
+ *
323
+ * On top of this basic design, we implement several optimizations:
324
+ *
325
+ * - We store the length of each codeword directly in each of its decode table
326
+ * entries. This allows the codeword length to be produced without indexing
327
+ * an additional table.
328
+ *
329
+ * - When beneficial, we don't store the Huffman symbol itself, but instead data
330
+ * generated from it. For example, when decoding an offset symbol in DEFLATE,
331
+ * it's more efficient if we can decode the offset base and number of extra
332
+ * offset bits directly rather than decoding the offset symbol and then
333
+ * looking up both of those values in an additional table or tables.
334
+ *
335
+ * The size of each decode table entry is 32 bits, which provides slightly
336
+ * better performance than 16-bit entries on 32 and 64 bit processers, provided
337
+ * that the table doesn't get so large that it takes up too much memory and
338
+ * starts generating cache misses. The bits of each decode table entry are
339
+ * defined as follows:
340
+ *
341
+ * - Bits 30 -- 31: flags (see below)
342
+ * - Bits 8 -- 29: decode result: a Huffman symbol or related data
343
+ * - Bits 0 -- 7: codeword length
344
+ */
345
+
346
+ /*
347
+ * This flag is set in all main decode table entries that represent subtable
348
+ * pointers.
349
+ */
350
+ #define HUFFDEC_SUBTABLE_POINTER 0x80000000
351
+
352
+ /*
353
+ * This flag is set in all entries in the litlen decode table that represent
354
+ * literals.
355
+ */
356
+ #define HUFFDEC_LITERAL 0x40000000
357
+
358
+ /* Mask for extracting the codeword length from a decode table entry. */
359
+ #define HUFFDEC_LENGTH_MASK 0xFF
360
+
361
+ /* Shift to extract the decode result from a decode table entry. */
362
+ #define HUFFDEC_RESULT_SHIFT 8
363
+
364
+ /* The decode result for each precode symbol. There is no special optimization
365
+ * for the precode; the decode result is simply the symbol value. */
366
+ static const u32 precode_decode_results[DEFLATE_NUM_PRECODE_SYMS] = {
367
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
368
+ };
369
+
370
+ /* The decode result for each litlen symbol. For literals, this is the literal
371
+ * value itself and the HUFFDEC_LITERAL flag. For lengths, this is the length
372
+ * base and the number of extra length bits. */
373
+ static const u32 litlen_decode_results[DEFLATE_NUM_LITLEN_SYMS] = {
374
+ #define ENTRY(literal) ((HUFFDEC_LITERAL >> HUFFDEC_RESULT_SHIFT) | (literal))
375
+
376
+ /* Literals */
377
+ ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
378
+ ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
379
+ ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
380
+ ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
381
+ ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) ,
382
+ ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) ,
383
+ ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) ,
384
+ ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) ,
385
+ ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) ,
386
+ ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) ,
387
+ ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) ,
388
+ ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) ,
389
+ ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) ,
390
+ ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) ,
391
+ ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) ,
392
+ ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) ,
393
+ ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) ,
394
+ ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) ,
395
+ ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) ,
396
+ ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) ,
397
+ ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) ,
398
+ ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) ,
399
+ ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) ,
400
+ ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) ,
401
+ ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) ,
402
+ ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) ,
403
+ ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) ,
404
+ ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) ,
405
+ ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) ,
406
+ ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) ,
407
+ ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) ,
408
+ ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) ,
409
+ ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) ,
410
+ ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) ,
411
+ ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) ,
412
+ ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) ,
413
+ ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) ,
414
+ ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) ,
415
+ ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) ,
416
+ ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) ,
417
+ ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) ,
418
+ ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) ,
419
+ ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) ,
420
+ ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) ,
421
+ ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) ,
422
+ ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) ,
423
+ ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) ,
424
+ ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) ,
425
+ ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) ,
426
+ ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) ,
427
+ ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) ,
428
+ ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) ,
429
+ ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) ,
430
+ ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) ,
431
+ ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) ,
432
+ ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) ,
433
+ ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) ,
434
+ ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) ,
435
+ ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) ,
436
+ ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) ,
437
+ ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) ,
438
+ ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) ,
439
+ ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) ,
440
+ ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) ,
441
+ #undef ENTRY
442
+
443
+ #define HUFFDEC_EXTRA_LENGTH_BITS_MASK 0xFF
444
+ #define HUFFDEC_LENGTH_BASE_SHIFT 8
445
+ #define HUFFDEC_END_OF_BLOCK_LENGTH 0
446
+
447
+ #define ENTRY(length_base, num_extra_bits) \
448
+ (((u32)(length_base) << HUFFDEC_LENGTH_BASE_SHIFT) | (num_extra_bits))
449
+
450
+ /* End of block */
451
+ ENTRY(HUFFDEC_END_OF_BLOCK_LENGTH, 0),
452
+
453
+ /* Lengths */
454
+ ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0),
455
+ ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0),
456
+ ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1),
457
+ ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2),
458
+ ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3),
459
+ ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4),
460
+ ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5),
461
+ ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) ,
462
+ #undef ENTRY
463
+ };
464
+
465
+ /* The decode result for each offset symbol. This is the offset base and the
466
+ * number of extra offset bits. */
467
+ static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = {
468
+
469
+ #define HUFFDEC_EXTRA_OFFSET_BITS_SHIFT 16
470
+ #define HUFFDEC_OFFSET_BASE_MASK (((u32)1 << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) - 1)
471
+
472
+ #define ENTRY(offset_base, num_extra_bits) \
473
+ ((offset_base) | ((u32)(num_extra_bits) << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT))
474
+ ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) ,
475
+ ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) ,
476
+ ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) ,
477
+ ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) ,
478
+ ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) ,
479
+ ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) ,
480
+ ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) ,
481
+ ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(32769 , 14) , ENTRY(49153 , 14) ,
482
+ #undef ENTRY
483
+ };
484
+
485
+ /* Construct a decode table entry from a decode result and codeword length. */
486
+ static forceinline u32
487
+ make_decode_table_entry(u32 result, u32 length)
488
+ {
489
+ return (result << HUFFDEC_RESULT_SHIFT) | length;
490
+ }
491
+
492
+ /*
493
+ * Build a table for fast decoding of symbols from a Huffman code. As input,
494
+ * this function takes the codeword length of each symbol which may be used in
495
+ * the code. As output, it produces a decode table for the canonical Huffman
496
+ * code described by the codeword lengths. The decode table is built with the
497
+ * assumption that it will be indexed with "bit-reversed" codewords, where the
498
+ * low-order bit is the first bit of the codeword. This format is used for all
499
+ * Huffman codes in DEFLATE.
500
+ *
501
+ * @decode_table
502
+ * The array in which the decode table will be generated. This array must
503
+ * have sufficient length; see the definition of the ENOUGH numbers.
504
+ * @lens
505
+ * An array which provides, for each symbol, the length of the
506
+ * corresponding codeword in bits, or 0 if the symbol is unused. This may
507
+ * alias @decode_table, since nothing is written to @decode_table until all
508
+ * @lens have been consumed. All codeword lengths are assumed to be <=
509
+ * @max_codeword_len but are otherwise considered untrusted. If they do
510
+ * not form a valid Huffman code, then the decode table is not built and
511
+ * %false is returned.
512
+ * @num_syms
513
+ * The number of symbols in the code, including all unused symbols.
514
+ * @decode_results
515
+ * An array which provides, for each symbol, the actual value to store into
516
+ * the decode table. This value will be directly produced as the result of
517
+ * decoding that symbol, thereby moving the indirection out of the decode
518
+ * loop and into the table initialization.
519
+ * @table_bits
520
+ * The log base-2 of the number of main table entries to use.
521
+ * @max_codeword_len
522
+ * The maximum allowed codeword length for this Huffman code.
523
+ * @working_space
524
+ * A temporary array of length '2 * (@max_codeword_len + 1) + @num_syms'.
525
+ *
526
+ * Returns %true if successful; %false if the codeword lengths do not form a
527
+ * valid Huffman code.
528
+ */
529
+ static bool
530
+ build_decode_table(u32 decode_table[],
531
+ const len_t lens[],
532
+ const unsigned num_syms,
533
+ const u32 decode_results[],
534
+ const unsigned table_bits,
535
+ const unsigned max_codeword_len,
536
+ u16 working_space[])
537
+ {
538
+ u16 * const len_counts = &working_space[0];
539
+ u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
540
+ u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
541
+ unsigned len;
542
+ unsigned sym;
543
+ s32 remainder;
544
+ unsigned sym_idx;
545
+ unsigned codeword_len;
546
+ unsigned codeword_reversed = 0;
547
+ unsigned cur_codeword_prefix = -1;
548
+ unsigned cur_table_start = 0;
549
+ unsigned cur_table_bits = table_bits;
550
+ unsigned num_dropped_bits = 0;
551
+ const unsigned table_mask = (1U << table_bits) - 1;
552
+
553
+ /* Count how many symbols have each codeword length, including 0. */
554
+ for (len = 0; len <= max_codeword_len; len++)
555
+ len_counts[len] = 0;
556
+ for (sym = 0; sym < num_syms; sym++)
557
+ len_counts[lens[sym]]++;
558
+
559
+ /* Sort the symbols primarily by increasing codeword length and
560
+ * secondarily by increasing symbol value. */
561
+
562
+ /* Initialize 'offsets' so that offsets[len] is the number of codewords
563
+ * shorter than 'len' bits, including length 0. */
564
+ offsets[0] = 0;
565
+ for (len = 0; len < max_codeword_len; len++)
566
+ offsets[len + 1] = offsets[len] + len_counts[len];
567
+
568
+ /* Use the 'offsets' array to sort the symbols. */
569
+ for (sym = 0; sym < num_syms; sym++)
570
+ sorted_syms[offsets[lens[sym]]++] = sym;
571
+
572
+ /* It is already guaranteed that all lengths are <= max_codeword_len,
573
+ * but it cannot be assumed they form a complete prefix code. A
574
+ * codeword of length n should require a proportion of the codespace
575
+ * equaling (1/2)^n. The code is complete if and only if, by this
576
+ * measure, the codespace is exactly filled by the lengths. */
577
+ remainder = 1;
578
+ for (len = 1; len <= max_codeword_len; len++) {
579
+ remainder <<= 1;
580
+ remainder -= len_counts[len];
581
+ if (unlikely(remainder < 0)) {
582
+ /* The lengths overflow the codespace; that is, the code
583
+ * is over-subscribed. */
584
+ return false;
585
+ }
586
+ }
587
+
588
+ if (unlikely(remainder != 0)) {
589
+ /* The lengths do not fill the codespace; that is, they form an
590
+ * incomplete code. */
591
+
592
+ /* Initialize the table entries to default values. When
593
+ * decompressing a well-formed stream, these default values will
594
+ * never be used. But since a malformed stream might contain
595
+ * any bits at all, these entries need to be set anyway. */
596
+ u32 entry = make_decode_table_entry(decode_results[0], 1);
597
+ for (sym = 0; sym < (1U << table_bits); sym++)
598
+ decode_table[sym] = entry;
599
+
600
+ /* A completely empty code is permitted. */
601
+ if (remainder == (1U << max_codeword_len))
602
+ return true;
603
+
604
+ /* The code is nonempty and incomplete. Proceed only if there
605
+ * is a single used symbol and its codeword has length 1. The
606
+ * DEFLATE RFC is somewhat unclear regarding this case. What
607
+ * zlib's decompressor does is permit this case for
608
+ * literal/length and offset codes and assume the codeword is 0
609
+ * rather than 1. We do the same except we allow this case for
610
+ * precodes too. */
611
+ if (remainder != (1U << (max_codeword_len - 1)) ||
612
+ len_counts[1] != 1)
613
+ return false;
614
+ }
615
+
616
+ /* Generate the decode table entries. Since we process codewords from
617
+ * shortest to longest, the main portion of the decode table is filled
618
+ * first; then the subtables are filled. Note that it's already been
619
+ * verified that the code is nonempty and not over-subscribed. */
620
+
621
+ /* Start with the smallest codeword length and the smallest-valued
622
+ * symbol which has that codeword length. */
623
+ sym_idx = offsets[0];
624
+ codeword_len = 1;
625
+ while (len_counts[codeword_len] == 0)
626
+ codeword_len++;
627
+
628
+ for (;;) { /* For each used symbol and its codeword... */
629
+ unsigned sym;
630
+ u32 entry;
631
+ unsigned i;
632
+ unsigned end;
633
+ unsigned increment;
634
+ unsigned bit;
635
+
636
+ /* Get the next symbol. */
637
+ sym = sorted_syms[sym_idx];
638
+
639
+ /* Start a new subtable if the codeword is long enough to
640
+ * require a subtable, *and* the first 'table_bits' bits of the
641
+ * codeword don't match the prefix for the previous subtable if
642
+ * any. */
643
+ if (codeword_len > table_bits &&
644
+ (codeword_reversed & table_mask) != cur_codeword_prefix) {
645
+
646
+ cur_codeword_prefix = (codeword_reversed & table_mask);
647
+
648
+ cur_table_start += 1U << cur_table_bits;
649
+
650
+ /* Calculate the subtable length. If the codeword
651
+ * length exceeds 'table_bits' by n, the subtable needs
652
+ * at least 2**n entries. But it may need more; if
653
+ * there are fewer than 2**n codewords of length
654
+ * 'table_bits + n' remaining, then n will need to be
655
+ * incremented to bring in longer codewords until the
656
+ * subtable can be filled completely. Note that it
657
+ * always will, eventually, be possible to fill the
658
+ * subtable, since the only case where we may have an
659
+ * incomplete code is a single codeword of length 1,
660
+ * and that never requires any subtables. */
661
+ cur_table_bits = codeword_len - table_bits;
662
+ remainder = (s32)1 << cur_table_bits;
663
+ for (;;) {
664
+ remainder -= len_counts[table_bits +
665
+ cur_table_bits];
666
+ if (remainder <= 0)
667
+ break;
668
+ cur_table_bits++;
669
+ remainder <<= 1;
670
+ }
671
+
672
+ /* Create the entry that points from the main table to
673
+ * the subtable. This entry contains the index of the
674
+ * start of the subtable and the number of bits with
675
+ * which the subtable is indexed (the log base 2 of the
676
+ * number of entries it contains). */
677
+ decode_table[cur_codeword_prefix] =
678
+ HUFFDEC_SUBTABLE_POINTER |
679
+ make_decode_table_entry(cur_table_start,
680
+ cur_table_bits);
681
+
682
+ /* Now that we're filling a subtable, we need to drop
683
+ * the first 'table_bits' bits of the codewords. */
684
+ num_dropped_bits = table_bits;
685
+ }
686
+
687
+ /* Create the decode table entry, which packs the decode result
688
+ * and the codeword length (minus 'table_bits' for subtables)
689
+ * together. */
690
+ entry = make_decode_table_entry(decode_results[sym],
691
+ codeword_len - num_dropped_bits);
692
+
693
+ /* Fill in as many copies of the decode table entry as are
694
+ * needed. The number of entries to fill is a power of 2 and
695
+ * depends on the codeword length; it could be as few as 1 or as
696
+ * large as half the size of the table. Since the codewords are
697
+ * bit-reversed, the indices to fill are those with the codeword
698
+ * in its low bits; it's the high bits that vary. */
699
+ i = cur_table_start + (codeword_reversed >> num_dropped_bits);
700
+ end = cur_table_start + (1U << cur_table_bits);
701
+ increment = 1U << (codeword_len - num_dropped_bits);
702
+ do {
703
+ decode_table[i] = entry;
704
+ i += increment;
705
+ } while (i < end);
706
+
707
+ /* Advance to the next codeword by incrementing it. But since
708
+ * our codewords are bit-reversed, we must manipulate the bits
709
+ * ourselves rather than simply adding 1. */
710
+ bit = 1U << (codeword_len - 1);
711
+ while (codeword_reversed & bit)
712
+ bit >>= 1;
713
+ codeword_reversed &= bit - 1;
714
+ codeword_reversed |= bit;
715
+
716
+ /* Advance to the next symbol. This will either increase the
717
+ * codeword length, or keep the same codeword length but
718
+ * increase the symbol value. Note: since we are using
719
+ * bit-reversed codewords, we don't need to explicitly append
720
+ * zeroes to the codeword when the codeword length increases. */
721
+ if (++sym_idx == num_syms)
722
+ return true;
723
+ len_counts[codeword_len]--;
724
+ while (len_counts[codeword_len] == 0)
725
+ codeword_len++;
726
+ }
727
+ }
728
+
729
+ /* Build the decode table for the precode. */
730
+ static bool
731
+ build_precode_decode_table(struct libdeflate_decompressor *d)
732
+ {
733
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
734
+ STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128);
735
+
736
+ return build_decode_table(d->u.l.precode_decode_table,
737
+ d->u.precode_lens,
738
+ DEFLATE_NUM_PRECODE_SYMS,
739
+ precode_decode_results,
740
+ PRECODE_TABLEBITS,
741
+ DEFLATE_MAX_PRE_CODEWORD_LEN,
742
+ d->working_space);
743
+ }
744
+
745
+ /* Build the decode table for the literal/length code. */
746
+ static bool
747
+ build_litlen_decode_table(struct libdeflate_decompressor *d,
748
+ unsigned num_litlen_syms, unsigned num_offset_syms)
749
+ {
750
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
751
+ STATIC_ASSERT(LITLEN_TABLEBITS == 10 && LITLEN_ENOUGH == 1334);
752
+
753
+ return build_decode_table(d->u.litlen_decode_table,
754
+ d->u.l.lens,
755
+ num_litlen_syms,
756
+ litlen_decode_results,
757
+ LITLEN_TABLEBITS,
758
+ DEFLATE_MAX_LITLEN_CODEWORD_LEN,
759
+ d->working_space);
760
+ }
761
+
762
+ /* Build the decode table for the offset code. */
763
+ static bool
764
+ build_offset_decode_table(struct libdeflate_decompressor *d,
765
+ unsigned num_litlen_syms, unsigned num_offset_syms)
766
+ {
767
+ /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
768
+ STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402);
769
+
770
+ return build_decode_table(d->offset_decode_table,
771
+ d->u.l.lens + num_litlen_syms,
772
+ num_offset_syms,
773
+ offset_decode_results,
774
+ OFFSET_TABLEBITS,
775
+ DEFLATE_MAX_OFFSET_CODEWORD_LEN,
776
+ d->working_space);
777
+ }
778
+
779
+ static forceinline machine_word_t
780
+ repeat_byte(u8 b)
781
+ {
782
+ machine_word_t v;
783
+
784
+ STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
785
+
786
+ v = b;
787
+ v |= v << 8;
788
+ v |= v << 16;
789
+ v |= v << ((WORDBITS == 64) ? 32 : 0);
790
+ return v;
791
+ }
792
+
793
+ static forceinline void
794
+ copy_word_unaligned(const void *src, void *dst)
795
+ {
796
+ store_word_unaligned(load_word_unaligned(src), dst);
797
+ }
798
+
799
+ /*****************************************************************************
800
+ * Main decompression routine
801
+ *****************************************************************************/
802
+
803
+ #define FUNCNAME deflate_decompress_default
804
+ #define ATTRIBUTES
805
+ #include "decompress_impl.h"
806
+ #undef FUNCNAME
807
+ #undef ATTRIBUTES
808
+
809
+ #if X86_CPU_FEATURES_ENABLED && \
810
+ COMPILER_SUPPORTS_BMI2_TARGET && !defined(__BMI2__)
811
+ # define FUNCNAME deflate_decompress_bmi2
812
+ # define ATTRIBUTES __attribute__((target("bmi2")))
813
+ # include "decompress_impl.h"
814
+ # undef FUNCNAME
815
+ # undef ATTRIBUTES
816
+ # define DISPATCH_ENABLED 1
817
+ #else
818
+ # define DISPATCH_ENABLED 0
819
+ #endif
820
+
821
+ #if DISPATCH_ENABLED
822
+
823
+ static enum libdeflate_result
824
+ dispatch(struct libdeflate_decompressor * restrict d,
825
+ const void * restrict in, size_t in_nbytes,
826
+ void * restrict out, size_t out_nbytes_avail,
827
+ size_t *actual_out_nbytes_ret);
828
+
829
+ typedef enum libdeflate_result (*decompress_func_t)
830
+ (struct libdeflate_decompressor * restrict d,
831
+ const void * restrict in, size_t in_nbytes,
832
+ void * restrict out, size_t out_nbytes_avail,
833
+ size_t *actual_out_nbytes_ret);
834
+
835
+ static decompress_func_t decompress_impl = dispatch;
836
+
837
+ static enum libdeflate_result
838
+ dispatch(struct libdeflate_decompressor * restrict d,
839
+ const void * restrict in, size_t in_nbytes,
840
+ void * restrict out, size_t out_nbytes_avail,
841
+ size_t *actual_out_nbytes_ret)
842
+ {
843
+ decompress_func_t f = deflate_decompress_default;
844
+ #if X86_CPU_FEATURES_ENABLED
845
+ if (x86_have_cpu_features(X86_CPU_FEATURE_BMI2))
846
+ f = deflate_decompress_bmi2;
847
+ #endif
848
+ decompress_impl = f;
849
+ return (*f)(d, in, in_nbytes, out, out_nbytes_avail,
850
+ actual_out_nbytes_ret);
851
+ }
852
+ #endif /* DISPATCH_ENABLED */
853
+
854
+
855
+ /*
856
+ * This is the main DEFLATE decompression routine. See libdeflate.h for the
857
+ * documentation.
858
+ *
859
+ * Note that the real code is in decompress_impl.h. The part here just handles
860
+ * calling the appropriate implementation depending on the CPU features at
861
+ * runtime.
862
+ */
863
+ LIBDEFLATEAPI enum libdeflate_result
864
+ libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d,
865
+ const void * restrict in, size_t in_nbytes,
866
+ void * restrict out, size_t out_nbytes_avail,
867
+ size_t *actual_out_nbytes_ret)
868
+ {
869
+ #if DISPATCH_ENABLED
870
+ return (*decompress_impl)(d, in, in_nbytes, out, out_nbytes_avail,
871
+ actual_out_nbytes_ret);
872
+ #else
873
+ return deflate_decompress_default(d, in, in_nbytes, out,
874
+ out_nbytes_avail,
875
+ actual_out_nbytes_ret);
876
+ #endif
877
+ }
878
+
879
+ LIBDEFLATEAPI struct libdeflate_decompressor *
880
+ libdeflate_alloc_decompressor(void)
881
+ {
882
+ return malloc(sizeof(struct libdeflate_decompressor));
883
+ }
884
+
885
+ LIBDEFLATEAPI void
886
+ libdeflate_free_decompressor(struct libdeflate_decompressor *d)
887
+ {
888
+ free(d);
889
+ }