brotli 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.travis.yml +11 -3
- data/Gemfile +2 -0
- data/ext/brotli/brotli.c +279 -0
- data/ext/brotli/brotli.h +2 -0
- data/ext/brotli/buffer.c +95 -0
- data/ext/brotli/buffer.h +19 -0
- data/ext/brotli/extconf.rb +21 -81
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/dec/bit_reader.c +5 -5
- data/vendor/brotli/dec/bit_reader.h +15 -15
- data/vendor/brotli/dec/context.h +1 -1
- data/vendor/brotli/dec/decode.c +433 -348
- data/vendor/brotli/dec/decode.h +74 -48
- data/vendor/brotli/dec/huffman.c +5 -4
- data/vendor/brotli/dec/huffman.h +4 -4
- data/vendor/brotli/dec/port.h +2 -95
- data/vendor/brotli/dec/prefix.h +5 -3
- data/vendor/brotli/dec/state.c +15 -27
- data/vendor/brotli/dec/state.h +21 -17
- data/vendor/brotli/dec/transform.h +1 -1
- data/vendor/brotli/enc/backward_references.c +892 -0
- data/vendor/brotli/enc/backward_references.h +85 -102
- data/vendor/brotli/enc/backward_references_inc.h +147 -0
- data/vendor/brotli/enc/bit_cost.c +35 -0
- data/vendor/brotli/enc/bit_cost.h +23 -121
- data/vendor/brotli/enc/bit_cost_inc.h +127 -0
- data/vendor/brotli/enc/block_encoder_inc.h +33 -0
- data/vendor/brotli/enc/block_splitter.c +197 -0
- data/vendor/brotli/enc/block_splitter.h +40 -50
- data/vendor/brotli/enc/block_splitter_inc.h +432 -0
- data/vendor/brotli/enc/brotli_bit_stream.c +1334 -0
- data/vendor/brotli/enc/brotli_bit_stream.h +95 -167
- data/vendor/brotli/enc/cluster.c +56 -0
- data/vendor/brotli/enc/cluster.h +23 -305
- data/vendor/brotli/enc/cluster_inc.h +315 -0
- data/vendor/brotli/enc/command.h +83 -76
- data/vendor/brotli/enc/compress_fragment.c +747 -0
- data/vendor/brotli/enc/compress_fragment.h +48 -37
- data/vendor/brotli/enc/compress_fragment_two_pass.c +557 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.h +37 -26
- data/vendor/brotli/enc/compressor.cc +139 -0
- data/vendor/brotli/enc/compressor.h +146 -0
- data/vendor/brotli/enc/context.h +102 -96
- data/vendor/brotli/enc/dictionary_hash.h +9 -5
- data/vendor/brotli/enc/encode.c +1562 -0
- data/vendor/brotli/enc/encode.h +211 -199
- data/vendor/brotli/enc/encode_parallel.cc +161 -151
- data/vendor/brotli/enc/encode_parallel.h +7 -8
- data/vendor/brotli/enc/entropy_encode.c +501 -0
- data/vendor/brotli/enc/entropy_encode.h +107 -89
- data/vendor/brotli/enc/entropy_encode_static.h +29 -62
- data/vendor/brotli/enc/fast_log.h +26 -20
- data/vendor/brotli/enc/find_match_length.h +23 -20
- data/vendor/brotli/enc/hash.h +614 -871
- data/vendor/brotli/enc/hash_forgetful_chain_inc.h +249 -0
- data/vendor/brotli/enc/hash_longest_match_inc.h +241 -0
- data/vendor/brotli/enc/hash_longest_match_quickly_inc.h +230 -0
- data/vendor/brotli/enc/histogram.c +95 -0
- data/vendor/brotli/enc/histogram.h +49 -83
- data/vendor/brotli/enc/histogram_inc.h +51 -0
- data/vendor/brotli/enc/literal_cost.c +178 -0
- data/vendor/brotli/enc/literal_cost.h +16 -10
- data/vendor/brotli/enc/memory.c +181 -0
- data/vendor/brotli/enc/memory.h +62 -0
- data/vendor/brotli/enc/metablock.c +515 -0
- data/vendor/brotli/enc/metablock.h +87 -57
- data/vendor/brotli/enc/metablock_inc.h +183 -0
- data/vendor/brotli/enc/port.h +73 -47
- data/vendor/brotli/enc/prefix.h +34 -61
- data/vendor/brotli/enc/quality.h +130 -0
- data/vendor/brotli/enc/ringbuffer.h +137 -122
- data/vendor/brotli/enc/{static_dict.cc → static_dict.c} +162 -139
- data/vendor/brotli/enc/static_dict.h +23 -18
- data/vendor/brotli/enc/static_dict_lut.h +11223 -12037
- data/vendor/brotli/enc/streams.cc +7 -7
- data/vendor/brotli/enc/streams.h +32 -32
- data/vendor/brotli/enc/{utf8_util.cc → utf8_util.c} +22 -20
- data/vendor/brotli/enc/utf8_util.h +16 -9
- data/vendor/brotli/enc/write_bits.h +49 -43
- metadata +34 -25
- data/ext/brotli/brotli.cc +0 -181
- data/vendor/brotli/dec/Makefile +0 -12
- data/vendor/brotli/dec/dictionary.c +0 -9466
- data/vendor/brotli/dec/dictionary.h +0 -38
- data/vendor/brotli/dec/types.h +0 -38
- data/vendor/brotli/enc/Makefile +0 -14
- data/vendor/brotli/enc/backward_references.cc +0 -858
- data/vendor/brotli/enc/block_splitter.cc +0 -505
- data/vendor/brotli/enc/brotli_bit_stream.cc +0 -1181
- data/vendor/brotli/enc/compress_fragment.cc +0 -701
- data/vendor/brotli/enc/compress_fragment_two_pass.cc +0 -524
- data/vendor/brotli/enc/dictionary.cc +0 -9466
- data/vendor/brotli/enc/dictionary.h +0 -41
- data/vendor/brotli/enc/encode.cc +0 -1180
- data/vendor/brotli/enc/entropy_encode.cc +0 -480
- data/vendor/brotli/enc/histogram.cc +0 -67
- data/vendor/brotli/enc/literal_cost.cc +0 -165
- data/vendor/brotli/enc/metablock.cc +0 -539
- data/vendor/brotli/enc/transform.h +0 -248
- data/vendor/brotli/enc/types.h +0 -29
@@ -1,1181 +0,0 @@
|
|
1
|
-
/* Copyright 2014 Google Inc. All Rights Reserved.
|
2
|
-
|
3
|
-
Distributed under MIT license.
|
4
|
-
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
-
*/
|
6
|
-
|
7
|
-
// Brotli bit stream functions to support the low level format. There are no
|
8
|
-
// compression algorithms here, just the right ordering of bits to match the
|
9
|
-
// specs.
|
10
|
-
|
11
|
-
#include "./brotli_bit_stream.h"
|
12
|
-
|
13
|
-
#include <algorithm>
|
14
|
-
#include <cstdlib> /* free, malloc */
|
15
|
-
#include <cstring>
|
16
|
-
#include <limits>
|
17
|
-
#include <vector>
|
18
|
-
|
19
|
-
#include "./bit_cost.h"
|
20
|
-
#include "./context.h"
|
21
|
-
#include "./entropy_encode.h"
|
22
|
-
#include "./entropy_encode_static.h"
|
23
|
-
#include "./fast_log.h"
|
24
|
-
#include "./prefix.h"
|
25
|
-
#include "./write_bits.h"
|
26
|
-
|
27
|
-
namespace brotli {
|
28
|
-
|
29
|
-
namespace {
|
30
|
-
|
31
|
-
static const size_t kMaxHuffmanTreeSize = 2 * kNumCommandPrefixes + 1;
|
32
|
-
// Context map alphabet has 256 context id symbols plus max 16 rle symbols.
|
33
|
-
static const size_t kContextMapAlphabetSize = 256 + 16;
|
34
|
-
// Block type alphabet has 256 block id symbols plus 2 special symbols.
|
35
|
-
static const size_t kBlockTypeAlphabetSize = 256 + 2;
|
36
|
-
|
37
|
-
// nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
|
38
|
-
// REQUIRES: length > 0
|
39
|
-
// REQUIRES: length <= (1 << 24)
|
40
|
-
void EncodeMlen(size_t length, uint64_t* bits,
|
41
|
-
size_t* numbits, uint64_t* nibblesbits) {
|
42
|
-
assert(length > 0);
|
43
|
-
assert(length <= (1 << 24));
|
44
|
-
length--; // MLEN - 1 is encoded
|
45
|
-
size_t lg = length == 0 ? 1 : Log2FloorNonZero(
|
46
|
-
static_cast<uint32_t>(length)) + 1;
|
47
|
-
assert(lg <= 24);
|
48
|
-
size_t mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
|
49
|
-
*nibblesbits = mnibbles - 4;
|
50
|
-
*numbits = mnibbles * 4;
|
51
|
-
*bits = length;
|
52
|
-
}
|
53
|
-
|
54
|
-
static inline void StoreCommandExtra(
|
55
|
-
const Command& cmd, size_t* storage_ix, uint8_t* storage) {
|
56
|
-
uint32_t copylen_code = cmd.copy_len_code();
|
57
|
-
uint16_t inscode = GetInsertLengthCode(cmd.insert_len_);
|
58
|
-
uint16_t copycode = GetCopyLengthCode(copylen_code);
|
59
|
-
uint32_t insnumextra = GetInsertExtra(inscode);
|
60
|
-
uint64_t insextraval = cmd.insert_len_ - GetInsertBase(inscode);
|
61
|
-
uint64_t copyextraval = copylen_code - GetCopyBase(copycode);
|
62
|
-
uint64_t bits = (copyextraval << insnumextra) | insextraval;
|
63
|
-
WriteBits(insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
|
64
|
-
}
|
65
|
-
|
66
|
-
} // namespace
|
67
|
-
|
68
|
-
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
|
69
|
-
if (n == 0) {
|
70
|
-
WriteBits(1, 0, storage_ix, storage);
|
71
|
-
} else {
|
72
|
-
WriteBits(1, 1, storage_ix, storage);
|
73
|
-
size_t nbits = Log2FloorNonZero(n);
|
74
|
-
WriteBits(3, nbits, storage_ix, storage);
|
75
|
-
WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
|
76
|
-
}
|
77
|
-
}
|
78
|
-
|
79
|
-
void StoreCompressedMetaBlockHeader(bool final_block,
|
80
|
-
size_t length,
|
81
|
-
size_t* storage_ix,
|
82
|
-
uint8_t* storage) {
|
83
|
-
// Write ISLAST bit.
|
84
|
-
WriteBits(1, final_block, storage_ix, storage);
|
85
|
-
// Write ISEMPTY bit.
|
86
|
-
if (final_block) {
|
87
|
-
WriteBits(1, 0, storage_ix, storage);
|
88
|
-
}
|
89
|
-
|
90
|
-
uint64_t lenbits;
|
91
|
-
size_t nlenbits;
|
92
|
-
uint64_t nibblesbits;
|
93
|
-
EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
|
94
|
-
WriteBits(2, nibblesbits, storage_ix, storage);
|
95
|
-
WriteBits(nlenbits, lenbits, storage_ix, storage);
|
96
|
-
|
97
|
-
if (!final_block) {
|
98
|
-
// Write ISUNCOMPRESSED bit.
|
99
|
-
WriteBits(1, 0, storage_ix, storage);
|
100
|
-
}
|
101
|
-
}
|
102
|
-
|
103
|
-
void StoreUncompressedMetaBlockHeader(size_t length,
|
104
|
-
size_t* storage_ix,
|
105
|
-
uint8_t* storage) {
|
106
|
-
// Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0.
|
107
|
-
WriteBits(1, 0, storage_ix, storage);
|
108
|
-
uint64_t lenbits;
|
109
|
-
size_t nlenbits;
|
110
|
-
uint64_t nibblesbits;
|
111
|
-
EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
|
112
|
-
WriteBits(2, nibblesbits, storage_ix, storage);
|
113
|
-
WriteBits(nlenbits, lenbits, storage_ix, storage);
|
114
|
-
// Write ISUNCOMPRESSED bit.
|
115
|
-
WriteBits(1, 1, storage_ix, storage);
|
116
|
-
}
|
117
|
-
|
118
|
-
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
119
|
-
const int num_codes,
|
120
|
-
const uint8_t *code_length_bitdepth,
|
121
|
-
size_t *storage_ix,
|
122
|
-
uint8_t *storage) {
|
123
|
-
static const uint8_t kStorageOrder[kCodeLengthCodes] = {
|
124
|
-
1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
125
|
-
};
|
126
|
-
// The bit lengths of the Huffman code over the code length alphabet
|
127
|
-
// are compressed with the following static Huffman code:
|
128
|
-
// Symbol Code
|
129
|
-
// ------ ----
|
130
|
-
// 0 00
|
131
|
-
// 1 1110
|
132
|
-
// 2 110
|
133
|
-
// 3 01
|
134
|
-
// 4 10
|
135
|
-
// 5 1111
|
136
|
-
static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {
|
137
|
-
0, 7, 3, 2, 1, 15
|
138
|
-
};
|
139
|
-
static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {
|
140
|
-
2, 4, 3, 2, 2, 4
|
141
|
-
};
|
142
|
-
|
143
|
-
// Throw away trailing zeros:
|
144
|
-
size_t codes_to_store = kCodeLengthCodes;
|
145
|
-
if (num_codes > 1) {
|
146
|
-
for (; codes_to_store > 0; --codes_to_store) {
|
147
|
-
if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
|
148
|
-
break;
|
149
|
-
}
|
150
|
-
}
|
151
|
-
}
|
152
|
-
size_t skip_some = 0; // skips none.
|
153
|
-
if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
|
154
|
-
code_length_bitdepth[kStorageOrder[1]] == 0) {
|
155
|
-
skip_some = 2; // skips two.
|
156
|
-
if (code_length_bitdepth[kStorageOrder[2]] == 0) {
|
157
|
-
skip_some = 3; // skips three.
|
158
|
-
}
|
159
|
-
}
|
160
|
-
WriteBits(2, skip_some, storage_ix, storage);
|
161
|
-
for (size_t i = skip_some; i < codes_to_store; ++i) {
|
162
|
-
size_t l = code_length_bitdepth[kStorageOrder[i]];
|
163
|
-
WriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
|
164
|
-
kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
|
165
|
-
}
|
166
|
-
}
|
167
|
-
|
168
|
-
static void StoreHuffmanTreeToBitMask(
|
169
|
-
const size_t huffman_tree_size,
|
170
|
-
const uint8_t* huffman_tree,
|
171
|
-
const uint8_t* huffman_tree_extra_bits,
|
172
|
-
const uint8_t* code_length_bitdepth,
|
173
|
-
const uint16_t* code_length_bitdepth_symbols,
|
174
|
-
size_t * __restrict storage_ix,
|
175
|
-
uint8_t * __restrict storage) {
|
176
|
-
for (size_t i = 0; i < huffman_tree_size; ++i) {
|
177
|
-
size_t ix = huffman_tree[i];
|
178
|
-
WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
|
179
|
-
storage_ix, storage);
|
180
|
-
// Extra bits
|
181
|
-
switch (ix) {
|
182
|
-
case 16:
|
183
|
-
WriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
|
184
|
-
break;
|
185
|
-
case 17:
|
186
|
-
WriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage);
|
187
|
-
break;
|
188
|
-
}
|
189
|
-
}
|
190
|
-
}
|
191
|
-
|
192
|
-
static void StoreSimpleHuffmanTree(const uint8_t* depths,
|
193
|
-
size_t symbols[4],
|
194
|
-
size_t num_symbols,
|
195
|
-
size_t max_bits,
|
196
|
-
size_t *storage_ix, uint8_t *storage) {
|
197
|
-
// value of 1 indicates a simple Huffman code
|
198
|
-
WriteBits(2, 1, storage_ix, storage);
|
199
|
-
WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1
|
200
|
-
|
201
|
-
// Sort
|
202
|
-
for (size_t i = 0; i < num_symbols; i++) {
|
203
|
-
for (size_t j = i + 1; j < num_symbols; j++) {
|
204
|
-
if (depths[symbols[j]] < depths[symbols[i]]) {
|
205
|
-
std::swap(symbols[j], symbols[i]);
|
206
|
-
}
|
207
|
-
}
|
208
|
-
}
|
209
|
-
|
210
|
-
if (num_symbols == 2) {
|
211
|
-
WriteBits(max_bits, symbols[0], storage_ix, storage);
|
212
|
-
WriteBits(max_bits, symbols[1], storage_ix, storage);
|
213
|
-
} else if (num_symbols == 3) {
|
214
|
-
WriteBits(max_bits, symbols[0], storage_ix, storage);
|
215
|
-
WriteBits(max_bits, symbols[1], storage_ix, storage);
|
216
|
-
WriteBits(max_bits, symbols[2], storage_ix, storage);
|
217
|
-
} else {
|
218
|
-
WriteBits(max_bits, symbols[0], storage_ix, storage);
|
219
|
-
WriteBits(max_bits, symbols[1], storage_ix, storage);
|
220
|
-
WriteBits(max_bits, symbols[2], storage_ix, storage);
|
221
|
-
WriteBits(max_bits, symbols[3], storage_ix, storage);
|
222
|
-
// tree-select
|
223
|
-
WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
|
224
|
-
}
|
225
|
-
}
|
226
|
-
|
227
|
-
// num = alphabet size
|
228
|
-
// depths = symbol depths
|
229
|
-
void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
230
|
-
HuffmanTree* tree,
|
231
|
-
size_t *storage_ix, uint8_t *storage) {
|
232
|
-
// Write the Huffman tree into the brotli-representation.
|
233
|
-
// The command alphabet is the largest, so this allocation will fit all
|
234
|
-
// alphabets.
|
235
|
-
assert(num <= kNumCommandPrefixes);
|
236
|
-
uint8_t huffman_tree[kNumCommandPrefixes];
|
237
|
-
uint8_t huffman_tree_extra_bits[kNumCommandPrefixes];
|
238
|
-
size_t huffman_tree_size = 0;
|
239
|
-
WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
|
240
|
-
huffman_tree_extra_bits);
|
241
|
-
|
242
|
-
// Calculate the statistics of the Huffman tree in brotli-representation.
|
243
|
-
uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
|
244
|
-
for (size_t i = 0; i < huffman_tree_size; ++i) {
|
245
|
-
++huffman_tree_histogram[huffman_tree[i]];
|
246
|
-
}
|
247
|
-
|
248
|
-
int num_codes = 0;
|
249
|
-
int code = 0;
|
250
|
-
for (int i = 0; i < kCodeLengthCodes; ++i) {
|
251
|
-
if (huffman_tree_histogram[i]) {
|
252
|
-
if (num_codes == 0) {
|
253
|
-
code = i;
|
254
|
-
num_codes = 1;
|
255
|
-
} else if (num_codes == 1) {
|
256
|
-
num_codes = 2;
|
257
|
-
break;
|
258
|
-
}
|
259
|
-
}
|
260
|
-
}
|
261
|
-
|
262
|
-
// Calculate another Huffman tree to use for compressing both the
|
263
|
-
// earlier Huffman tree with.
|
264
|
-
uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
|
265
|
-
uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 };
|
266
|
-
CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
|
267
|
-
5, tree, &code_length_bitdepth[0]);
|
268
|
-
ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
|
269
|
-
&code_length_bitdepth_symbols[0]);
|
270
|
-
|
271
|
-
// Now, we have all the data, let's start storing it
|
272
|
-
StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
|
273
|
-
storage_ix, storage);
|
274
|
-
|
275
|
-
if (num_codes == 1) {
|
276
|
-
code_length_bitdepth[code] = 0;
|
277
|
-
}
|
278
|
-
|
279
|
-
// Store the real huffman tree now.
|
280
|
-
StoreHuffmanTreeToBitMask(huffman_tree_size,
|
281
|
-
huffman_tree,
|
282
|
-
huffman_tree_extra_bits,
|
283
|
-
&code_length_bitdepth[0],
|
284
|
-
code_length_bitdepth_symbols,
|
285
|
-
storage_ix, storage);
|
286
|
-
}
|
287
|
-
|
288
|
-
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
|
289
|
-
const size_t length,
|
290
|
-
HuffmanTree* tree,
|
291
|
-
uint8_t* depth,
|
292
|
-
uint16_t* bits,
|
293
|
-
size_t* storage_ix,
|
294
|
-
uint8_t* storage) {
|
295
|
-
size_t count = 0;
|
296
|
-
size_t s4[4] = { 0 };
|
297
|
-
for (size_t i = 0; i < length; i++) {
|
298
|
-
if (histogram[i]) {
|
299
|
-
if (count < 4) {
|
300
|
-
s4[count] = i;
|
301
|
-
} else if (count > 4) {
|
302
|
-
break;
|
303
|
-
}
|
304
|
-
count++;
|
305
|
-
}
|
306
|
-
}
|
307
|
-
|
308
|
-
size_t max_bits_counter = length - 1;
|
309
|
-
size_t max_bits = 0;
|
310
|
-
while (max_bits_counter) {
|
311
|
-
max_bits_counter >>= 1;
|
312
|
-
++max_bits;
|
313
|
-
}
|
314
|
-
|
315
|
-
if (count <= 1) {
|
316
|
-
WriteBits(4, 1, storage_ix, storage);
|
317
|
-
WriteBits(max_bits, s4[0], storage_ix, storage);
|
318
|
-
return;
|
319
|
-
}
|
320
|
-
|
321
|
-
CreateHuffmanTree(histogram, length, 15, tree, depth);
|
322
|
-
ConvertBitDepthsToSymbols(depth, length, bits);
|
323
|
-
|
324
|
-
if (count <= 4) {
|
325
|
-
StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
|
326
|
-
} else {
|
327
|
-
StoreHuffmanTree(depth, length, tree, storage_ix, storage);
|
328
|
-
}
|
329
|
-
}
|
330
|
-
|
331
|
-
static inline bool SortHuffmanTree(const HuffmanTree& v0,
|
332
|
-
const HuffmanTree& v1) {
|
333
|
-
return v0.total_count_ < v1.total_count_;
|
334
|
-
}
|
335
|
-
|
336
|
-
void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
337
|
-
const size_t histogram_total,
|
338
|
-
const size_t max_bits,
|
339
|
-
uint8_t* depth,
|
340
|
-
uint16_t* bits,
|
341
|
-
size_t* storage_ix,
|
342
|
-
uint8_t* storage) {
|
343
|
-
size_t count = 0;
|
344
|
-
size_t symbols[4] = { 0 };
|
345
|
-
size_t length = 0;
|
346
|
-
size_t total = histogram_total;
|
347
|
-
while (total != 0) {
|
348
|
-
if (histogram[length]) {
|
349
|
-
if (count < 4) {
|
350
|
-
symbols[count] = length;
|
351
|
-
}
|
352
|
-
++count;
|
353
|
-
total -= histogram[length];
|
354
|
-
}
|
355
|
-
++length;
|
356
|
-
}
|
357
|
-
|
358
|
-
if (count <= 1) {
|
359
|
-
WriteBits(4, 1, storage_ix, storage);
|
360
|
-
WriteBits(max_bits, symbols[0], storage_ix, storage);
|
361
|
-
return;
|
362
|
-
}
|
363
|
-
|
364
|
-
const size_t max_tree_size = 2 * length + 1;
|
365
|
-
HuffmanTree* const tree =
|
366
|
-
static_cast<HuffmanTree*>(malloc(max_tree_size * sizeof(HuffmanTree)));
|
367
|
-
for (uint32_t count_limit = 1; ; count_limit *= 2) {
|
368
|
-
HuffmanTree* node = tree;
|
369
|
-
for (size_t i = length; i != 0;) {
|
370
|
-
--i;
|
371
|
-
if (histogram[i]) {
|
372
|
-
if (PREDICT_TRUE(histogram[i] >= count_limit)) {
|
373
|
-
*node = HuffmanTree(histogram[i], -1, static_cast<int16_t>(i));
|
374
|
-
} else {
|
375
|
-
*node = HuffmanTree(count_limit, -1, static_cast<int16_t>(i));
|
376
|
-
}
|
377
|
-
++node;
|
378
|
-
}
|
379
|
-
}
|
380
|
-
const int n = static_cast<int>(node - tree);
|
381
|
-
std::sort(tree, node, SortHuffmanTree);
|
382
|
-
// The nodes are:
|
383
|
-
// [0, n): the sorted leaf nodes that we start with.
|
384
|
-
// [n]: we add a sentinel here.
|
385
|
-
// [n + 1, 2n): new parent nodes are added here, starting from
|
386
|
-
// (n+1). These are naturally in ascending order.
|
387
|
-
// [2n]: we add a sentinel at the end as well.
|
388
|
-
// There will be (2n+1) elements at the end.
|
389
|
-
const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
|
390
|
-
*node++ = sentinel;
|
391
|
-
*node++ = sentinel;
|
392
|
-
|
393
|
-
int i = 0; // Points to the next leaf node.
|
394
|
-
int j = n + 1; // Points to the next non-leaf node.
|
395
|
-
for (int k = n - 1; k > 0; --k) {
|
396
|
-
int left, right;
|
397
|
-
if (tree[i].total_count_ <= tree[j].total_count_) {
|
398
|
-
left = i;
|
399
|
-
++i;
|
400
|
-
} else {
|
401
|
-
left = j;
|
402
|
-
++j;
|
403
|
-
}
|
404
|
-
if (tree[i].total_count_ <= tree[j].total_count_) {
|
405
|
-
right = i;
|
406
|
-
++i;
|
407
|
-
} else {
|
408
|
-
right = j;
|
409
|
-
++j;
|
410
|
-
}
|
411
|
-
// The sentinel node becomes the parent node.
|
412
|
-
node[-1].total_count_ =
|
413
|
-
tree[left].total_count_ + tree[right].total_count_;
|
414
|
-
node[-1].index_left_ = static_cast<int16_t>(left);
|
415
|
-
node[-1].index_right_or_value_ = static_cast<int16_t>(right);
|
416
|
-
// Add back the last sentinel node.
|
417
|
-
*node++ = sentinel;
|
418
|
-
}
|
419
|
-
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
|
420
|
-
// We need to pack the Huffman tree in 14 bits.
|
421
|
-
// If this was not successful, add fake entities to the lowest values
|
422
|
-
// and retry.
|
423
|
-
if (PREDICT_TRUE(*std::max_element(&depth[0], &depth[length]) <= 14)) {
|
424
|
-
break;
|
425
|
-
}
|
426
|
-
}
|
427
|
-
free(tree);
|
428
|
-
ConvertBitDepthsToSymbols(depth, length, bits);
|
429
|
-
if (count <= 4) {
|
430
|
-
// value of 1 indicates a simple Huffman code
|
431
|
-
WriteBits(2, 1, storage_ix, storage);
|
432
|
-
WriteBits(2, count - 1, storage_ix, storage); // NSYM - 1
|
433
|
-
|
434
|
-
// Sort
|
435
|
-
for (size_t i = 0; i < count; i++) {
|
436
|
-
for (size_t j = i + 1; j < count; j++) {
|
437
|
-
if (depth[symbols[j]] < depth[symbols[i]]) {
|
438
|
-
std::swap(symbols[j], symbols[i]);
|
439
|
-
}
|
440
|
-
}
|
441
|
-
}
|
442
|
-
|
443
|
-
if (count == 2) {
|
444
|
-
WriteBits(max_bits, symbols[0], storage_ix, storage);
|
445
|
-
WriteBits(max_bits, symbols[1], storage_ix, storage);
|
446
|
-
} else if (count == 3) {
|
447
|
-
WriteBits(max_bits, symbols[0], storage_ix, storage);
|
448
|
-
WriteBits(max_bits, symbols[1], storage_ix, storage);
|
449
|
-
WriteBits(max_bits, symbols[2], storage_ix, storage);
|
450
|
-
} else {
|
451
|
-
WriteBits(max_bits, symbols[0], storage_ix, storage);
|
452
|
-
WriteBits(max_bits, symbols[1], storage_ix, storage);
|
453
|
-
WriteBits(max_bits, symbols[2], storage_ix, storage);
|
454
|
-
WriteBits(max_bits, symbols[3], storage_ix, storage);
|
455
|
-
// tree-select
|
456
|
-
WriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
|
457
|
-
}
|
458
|
-
} else {
|
459
|
-
// Complex Huffman Tree
|
460
|
-
StoreStaticCodeLengthCode(storage_ix, storage);
|
461
|
-
|
462
|
-
// Actual rle coding.
|
463
|
-
uint8_t previous_value = 8;
|
464
|
-
for (size_t i = 0; i < length;) {
|
465
|
-
const uint8_t value = depth[i];
|
466
|
-
size_t reps = 1;
|
467
|
-
for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
|
468
|
-
++reps;
|
469
|
-
}
|
470
|
-
i += reps;
|
471
|
-
if (value == 0) {
|
472
|
-
WriteBits(kZeroRepsDepth[reps], kZeroRepsBits[reps],
|
473
|
-
storage_ix, storage);
|
474
|
-
} else {
|
475
|
-
if (previous_value != value) {
|
476
|
-
WriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
|
477
|
-
storage_ix, storage);
|
478
|
-
--reps;
|
479
|
-
}
|
480
|
-
if (reps < 3) {
|
481
|
-
while (reps != 0) {
|
482
|
-
reps--;
|
483
|
-
WriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
|
484
|
-
storage_ix, storage);
|
485
|
-
}
|
486
|
-
} else {
|
487
|
-
reps -= 3;
|
488
|
-
WriteBits(kNonZeroRepsDepth[reps], kNonZeroRepsBits[reps],
|
489
|
-
storage_ix, storage);
|
490
|
-
}
|
491
|
-
previous_value = value;
|
492
|
-
}
|
493
|
-
}
|
494
|
-
}
|
495
|
-
}
|
496
|
-
|
497
|
-
static size_t IndexOf(const uint8_t* v, size_t v_size, uint8_t value) {
|
498
|
-
size_t i = 0;
|
499
|
-
for (; i < v_size; ++i) {
|
500
|
-
if (v[i] == value) return i;
|
501
|
-
}
|
502
|
-
return i;
|
503
|
-
}
|
504
|
-
|
505
|
-
static void MoveToFront(uint8_t* v, size_t index) {
|
506
|
-
uint8_t value = v[index];
|
507
|
-
for (size_t i = index; i != 0; --i) {
|
508
|
-
v[i] = v[i - 1];
|
509
|
-
}
|
510
|
-
v[0] = value;
|
511
|
-
}
|
512
|
-
|
513
|
-
static void MoveToFrontTransform(const uint32_t* __restrict v_in,
|
514
|
-
const size_t v_size,
|
515
|
-
uint32_t* v_out) {
|
516
|
-
if (v_size == 0) {
|
517
|
-
return;
|
518
|
-
}
|
519
|
-
uint32_t max_value = *std::max_element(v_in, v_in + v_size);
|
520
|
-
assert(max_value < 256u);
|
521
|
-
uint8_t mtf[256];
|
522
|
-
size_t mtf_size = max_value + 1;
|
523
|
-
for (uint32_t i = 0; i <= max_value; ++i) {
|
524
|
-
mtf[i] = static_cast<uint8_t>(i);
|
525
|
-
}
|
526
|
-
for (size_t i = 0; i < v_size; ++i) {
|
527
|
-
size_t index = IndexOf(mtf, mtf_size, static_cast<uint8_t>(v_in[i]));
|
528
|
-
assert(index < mtf_size);
|
529
|
-
v_out[i] = static_cast<uint32_t>(index);
|
530
|
-
MoveToFront(mtf, index);
|
531
|
-
}
|
532
|
-
}
|
533
|
-
|
534
|
-
// Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
|
535
|
-
// the run length plus extra bits (lower 9 bits is the prefix code and the rest
|
536
|
-
// are the extra bits). Non-zero values in v[] are shifted by
|
537
|
-
// *max_length_prefix. Will not create prefix codes bigger than the initial
|
538
|
-
// value of *max_run_length_prefix. The prefix code of run length L is simply
|
539
|
-
// Log2Floor(L) and the number of extra bits is the same as the prefix code.
|
540
|
-
static void RunLengthCodeZeros(const size_t in_size,
|
541
|
-
uint32_t* __restrict v,
|
542
|
-
size_t* __restrict out_size,
|
543
|
-
uint32_t* __restrict max_run_length_prefix) {
|
544
|
-
uint32_t max_reps = 0;
|
545
|
-
for (size_t i = 0; i < in_size;) {
|
546
|
-
for (; i < in_size && v[i] != 0; ++i) ;
|
547
|
-
uint32_t reps = 0;
|
548
|
-
for (; i < in_size && v[i] == 0; ++i) {
|
549
|
-
++reps;
|
550
|
-
}
|
551
|
-
max_reps = std::max(reps, max_reps);
|
552
|
-
}
|
553
|
-
uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
|
554
|
-
max_prefix = std::min(max_prefix, *max_run_length_prefix);
|
555
|
-
*max_run_length_prefix = max_prefix;
|
556
|
-
*out_size = 0;
|
557
|
-
for (size_t i = 0; i < in_size;) {
|
558
|
-
assert(*out_size <= i);
|
559
|
-
if (v[i] != 0) {
|
560
|
-
v[*out_size] = v[i] + *max_run_length_prefix;
|
561
|
-
++i;
|
562
|
-
++(*out_size);
|
563
|
-
} else {
|
564
|
-
uint32_t reps = 1;
|
565
|
-
for (size_t k = i + 1; k < in_size && v[k] == 0; ++k) {
|
566
|
-
++reps;
|
567
|
-
}
|
568
|
-
i += reps;
|
569
|
-
while (reps != 0) {
|
570
|
-
if (reps < (2u << max_prefix)) {
|
571
|
-
uint32_t run_length_prefix = Log2FloorNonZero(reps);
|
572
|
-
const uint32_t extra_bits = reps - (1u << run_length_prefix);
|
573
|
-
v[*out_size] = run_length_prefix + (extra_bits << 9);
|
574
|
-
++(*out_size);
|
575
|
-
break;
|
576
|
-
} else {
|
577
|
-
const uint32_t extra_bits = (1u << max_prefix) - 1u;
|
578
|
-
v[*out_size] = max_prefix + (extra_bits << 9);
|
579
|
-
reps -= (2u << max_prefix) - 1u;
|
580
|
-
++(*out_size);
|
581
|
-
}
|
582
|
-
}
|
583
|
-
}
|
584
|
-
}
|
585
|
-
}
|
586
|
-
|
587
|
-
void EncodeContextMap(const std::vector<uint32_t>& context_map,
|
588
|
-
size_t num_clusters,
|
589
|
-
HuffmanTree* tree,
|
590
|
-
size_t* storage_ix, uint8_t* storage) {
|
591
|
-
StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
|
592
|
-
|
593
|
-
if (num_clusters == 1) {
|
594
|
-
return;
|
595
|
-
}
|
596
|
-
|
597
|
-
uint32_t* rle_symbols = new uint32_t[context_map.size()];
|
598
|
-
MoveToFrontTransform(&context_map[0], context_map.size(), rle_symbols);
|
599
|
-
uint32_t max_run_length_prefix = 6;
|
600
|
-
size_t num_rle_symbols = 0;
|
601
|
-
RunLengthCodeZeros(context_map.size(), rle_symbols,
|
602
|
-
&num_rle_symbols, &max_run_length_prefix);
|
603
|
-
uint32_t histogram[kContextMapAlphabetSize];
|
604
|
-
memset(histogram, 0, sizeof(histogram));
|
605
|
-
static const int kSymbolBits = 9;
|
606
|
-
static const uint32_t kSymbolMask = (1u << kSymbolBits) - 1u;
|
607
|
-
for (size_t i = 0; i < num_rle_symbols; ++i) {
|
608
|
-
++histogram[rle_symbols[i] & kSymbolMask];
|
609
|
-
}
|
610
|
-
bool use_rle = max_run_length_prefix > 0;
|
611
|
-
WriteBits(1, use_rle, storage_ix, storage);
|
612
|
-
if (use_rle) {
|
613
|
-
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
|
614
|
-
}
|
615
|
-
uint8_t depths[kContextMapAlphabetSize];
|
616
|
-
uint16_t bits[kContextMapAlphabetSize];
|
617
|
-
memset(depths, 0, sizeof(depths));
|
618
|
-
memset(bits, 0, sizeof(bits));
|
619
|
-
BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
|
620
|
-
tree, depths, bits, storage_ix, storage);
|
621
|
-
for (size_t i = 0; i < num_rle_symbols; ++i) {
|
622
|
-
const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
|
623
|
-
const uint32_t extra_bits_val = rle_symbols[i] >> kSymbolBits;
|
624
|
-
WriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
|
625
|
-
if (rle_symbol > 0 && rle_symbol <= max_run_length_prefix) {
|
626
|
-
WriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
|
627
|
-
}
|
628
|
-
}
|
629
|
-
WriteBits(1, 1, storage_ix, storage); // use move-to-front
|
630
|
-
delete[] rle_symbols;
|
631
|
-
}
|
632
|
-
|
633
|
-
void StoreBlockSwitch(const BlockSplitCode& code,
|
634
|
-
const size_t block_ix,
|
635
|
-
size_t* storage_ix,
|
636
|
-
uint8_t* storage) {
|
637
|
-
if (block_ix > 0) {
|
638
|
-
size_t typecode = code.type_code[block_ix];
|
639
|
-
WriteBits(code.type_depths[typecode], code.type_bits[typecode],
|
640
|
-
storage_ix, storage);
|
641
|
-
}
|
642
|
-
size_t lencode = code.length_prefix[block_ix];
|
643
|
-
WriteBits(code.length_depths[lencode], code.length_bits[lencode],
|
644
|
-
storage_ix, storage);
|
645
|
-
WriteBits(code.length_nextra[block_ix], code.length_extra[block_ix],
|
646
|
-
storage_ix, storage);
|
647
|
-
}
|
648
|
-
|
649
|
-
static void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
|
650
|
-
const std::vector<uint32_t>& lengths,
|
651
|
-
const size_t num_types,
|
652
|
-
HuffmanTree* tree,
|
653
|
-
BlockSplitCode* code,
|
654
|
-
size_t* storage_ix,
|
655
|
-
uint8_t* storage) {
|
656
|
-
const size_t num_blocks = types.size();
|
657
|
-
uint32_t type_histo[kBlockTypeAlphabetSize];
|
658
|
-
uint32_t length_histo[kNumBlockLenPrefixes];
|
659
|
-
memset(type_histo, 0, (num_types + 2) * sizeof(type_histo[0]));
|
660
|
-
memset(length_histo, 0, sizeof(length_histo));
|
661
|
-
size_t last_type = 1;
|
662
|
-
size_t second_last_type = 0;
|
663
|
-
code->type_code.resize(num_blocks);
|
664
|
-
code->length_prefix.resize(num_blocks);
|
665
|
-
code->length_nextra.resize(num_blocks);
|
666
|
-
code->length_extra.resize(num_blocks);
|
667
|
-
code->type_depths.resize(num_types + 2);
|
668
|
-
code->type_bits.resize(num_types + 2);
|
669
|
-
memset(code->length_depths, 0, sizeof(code->length_depths));
|
670
|
-
memset(code->length_bits, 0, sizeof(code->length_bits));
|
671
|
-
for (size_t i = 0; i < num_blocks; ++i) {
|
672
|
-
size_t type = types[i];
|
673
|
-
size_t type_code = (type == last_type + 1 ? 1 :
|
674
|
-
type == second_last_type ? 0 :
|
675
|
-
type + 2);
|
676
|
-
second_last_type = last_type;
|
677
|
-
last_type = type;
|
678
|
-
code->type_code[i] = static_cast<uint32_t>(type_code);
|
679
|
-
if (i != 0) ++type_histo[type_code];
|
680
|
-
GetBlockLengthPrefixCode(lengths[i],
|
681
|
-
&code->length_prefix[i],
|
682
|
-
&code->length_nextra[i],
|
683
|
-
&code->length_extra[i]);
|
684
|
-
++length_histo[code->length_prefix[i]];
|
685
|
-
}
|
686
|
-
StoreVarLenUint8(num_types - 1, storage_ix, storage);
|
687
|
-
if (num_types > 1) {
|
688
|
-
BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree,
|
689
|
-
&code->type_depths[0], &code->type_bits[0],
|
690
|
-
storage_ix, storage);
|
691
|
-
BuildAndStoreHuffmanTree(&length_histo[0], kNumBlockLenPrefixes, tree,
|
692
|
-
&code->length_depths[0], &code->length_bits[0],
|
693
|
-
storage_ix, storage);
|
694
|
-
StoreBlockSwitch(*code, 0, storage_ix, storage);
|
695
|
-
}
|
696
|
-
}
|
697
|
-
|
698
|
-
void StoreTrivialContextMap(size_t num_types,
|
699
|
-
size_t context_bits,
|
700
|
-
HuffmanTree* tree,
|
701
|
-
size_t* storage_ix,
|
702
|
-
uint8_t* storage) {
|
703
|
-
StoreVarLenUint8(num_types - 1, storage_ix, storage);
|
704
|
-
if (num_types > 1) {
|
705
|
-
size_t repeat_code = context_bits - 1u;
|
706
|
-
size_t repeat_bits = (1u << repeat_code) - 1u;
|
707
|
-
size_t alphabet_size = num_types + repeat_code;
|
708
|
-
uint32_t histogram[kContextMapAlphabetSize];
|
709
|
-
uint8_t depths[kContextMapAlphabetSize];
|
710
|
-
uint16_t bits[kContextMapAlphabetSize];
|
711
|
-
memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
|
712
|
-
memset(depths, 0, alphabet_size * sizeof(depths[0]));
|
713
|
-
memset(bits, 0, alphabet_size * sizeof(bits[0]));
|
714
|
-
// Write RLEMAX.
|
715
|
-
WriteBits(1, 1, storage_ix, storage);
|
716
|
-
WriteBits(4, repeat_code - 1, storage_ix, storage);
|
717
|
-
histogram[repeat_code] = static_cast<uint32_t>(num_types);
|
718
|
-
histogram[0] = 1;
|
719
|
-
for (size_t i = context_bits; i < alphabet_size; ++i) {
|
720
|
-
histogram[i] = 1;
|
721
|
-
}
|
722
|
-
BuildAndStoreHuffmanTree(&histogram[0], alphabet_size, tree,
|
723
|
-
&depths[0], &bits[0],
|
724
|
-
storage_ix, storage);
|
725
|
-
for (size_t i = 0; i < num_types; ++i) {
|
726
|
-
size_t code = (i == 0 ? 0 : i + context_bits - 1);
|
727
|
-
WriteBits(depths[code], bits[code], storage_ix, storage);
|
728
|
-
WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage);
|
729
|
-
WriteBits(repeat_code, repeat_bits, storage_ix, storage);
|
730
|
-
}
|
731
|
-
// Write IMTF (inverse-move-to-front) bit.
|
732
|
-
WriteBits(1, 1, storage_ix, storage);
|
733
|
-
}
|
734
|
-
}
|
735
|
-
|
736
|
-
// Manages the encoding of one block category (literal, command or distance).
|
737
|
-
class BlockEncoder {
|
738
|
-
public:
|
739
|
-
BlockEncoder(size_t alphabet_size,
|
740
|
-
size_t num_block_types,
|
741
|
-
const std::vector<uint8_t>& block_types,
|
742
|
-
const std::vector<uint32_t>& block_lengths)
|
743
|
-
: alphabet_size_(alphabet_size),
|
744
|
-
num_block_types_(num_block_types),
|
745
|
-
block_types_(block_types),
|
746
|
-
block_lengths_(block_lengths),
|
747
|
-
block_ix_(0),
|
748
|
-
block_len_(block_lengths.empty() ? 0 : block_lengths[0]),
|
749
|
-
entropy_ix_(0) {}
|
750
|
-
|
751
|
-
// Creates entropy codes of block lengths and block types and stores them
|
752
|
-
// to the bit stream.
|
753
|
-
void BuildAndStoreBlockSwitchEntropyCodes(HuffmanTree* tree,
|
754
|
-
size_t* storage_ix,
|
755
|
-
uint8_t* storage) {
|
756
|
-
BuildAndStoreBlockSplitCode(
|
757
|
-
block_types_, block_lengths_, num_block_types_,
|
758
|
-
tree, &block_split_code_, storage_ix, storage);
|
759
|
-
}
|
760
|
-
|
761
|
-
// Creates entropy codes for all block types and stores them to the bit
|
762
|
-
// stream.
|
763
|
-
template<int kSize>
|
764
|
-
void BuildAndStoreEntropyCodes(
|
765
|
-
const std::vector<Histogram<kSize> >& histograms,
|
766
|
-
HuffmanTree* tree,
|
767
|
-
size_t* storage_ix, uint8_t* storage) {
|
768
|
-
depths_.resize(histograms.size() * alphabet_size_);
|
769
|
-
bits_.resize(histograms.size() * alphabet_size_);
|
770
|
-
for (size_t i = 0; i < histograms.size(); ++i) {
|
771
|
-
size_t ix = i * alphabet_size_;
|
772
|
-
BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_,
|
773
|
-
tree,
|
774
|
-
&depths_[ix], &bits_[ix],
|
775
|
-
storage_ix, storage);
|
776
|
-
}
|
777
|
-
}
|
778
|
-
|
779
|
-
// Stores the next symbol with the entropy code of the current block type.
|
780
|
-
// Updates the block type and block length at block boundaries.
|
781
|
-
void StoreSymbol(size_t symbol, size_t* storage_ix, uint8_t* storage) {
|
782
|
-
if (block_len_ == 0) {
|
783
|
-
++block_ix_;
|
784
|
-
block_len_ = block_lengths_[block_ix_];
|
785
|
-
entropy_ix_ = block_types_[block_ix_] * alphabet_size_;
|
786
|
-
StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
|
787
|
-
}
|
788
|
-
--block_len_;
|
789
|
-
size_t ix = entropy_ix_ + symbol;
|
790
|
-
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
|
791
|
-
}
|
792
|
-
|
793
|
-
// Stores the next symbol with the entropy code of the current block type and
|
794
|
-
// context value.
|
795
|
-
// Updates the block type and block length at block boundaries.
|
796
|
-
template<int kContextBits>
|
797
|
-
void StoreSymbolWithContext(size_t symbol, size_t context,
|
798
|
-
const std::vector<uint32_t>& context_map,
|
799
|
-
size_t* storage_ix, uint8_t* storage) {
|
800
|
-
if (block_len_ == 0) {
|
801
|
-
++block_ix_;
|
802
|
-
block_len_ = block_lengths_[block_ix_];
|
803
|
-
size_t block_type = block_types_[block_ix_];
|
804
|
-
entropy_ix_ = block_type << kContextBits;
|
805
|
-
StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
|
806
|
-
}
|
807
|
-
--block_len_;
|
808
|
-
size_t histo_ix = context_map[entropy_ix_ + context];
|
809
|
-
size_t ix = histo_ix * alphabet_size_ + symbol;
|
810
|
-
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
|
811
|
-
}
|
812
|
-
|
813
|
-
private:
|
814
|
-
const size_t alphabet_size_;
|
815
|
-
const size_t num_block_types_;
|
816
|
-
const std::vector<uint8_t>& block_types_;
|
817
|
-
const std::vector<uint32_t>& block_lengths_;
|
818
|
-
BlockSplitCode block_split_code_;
|
819
|
-
size_t block_ix_;
|
820
|
-
size_t block_len_;
|
821
|
-
size_t entropy_ix_;
|
822
|
-
std::vector<uint8_t> depths_;
|
823
|
-
std::vector<uint16_t> bits_;
|
824
|
-
};
|
825
|
-
|
826
|
-
static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
|
827
|
-
*storage_ix = (*storage_ix + 7u) & ~7u;
|
828
|
-
storage[*storage_ix >> 3] = 0;
|
829
|
-
}
|
830
|
-
|
831
|
-
void StoreMetaBlock(const uint8_t* input,
|
832
|
-
size_t start_pos,
|
833
|
-
size_t length,
|
834
|
-
size_t mask,
|
835
|
-
uint8_t prev_byte,
|
836
|
-
uint8_t prev_byte2,
|
837
|
-
bool is_last,
|
838
|
-
uint32_t num_direct_distance_codes,
|
839
|
-
uint32_t distance_postfix_bits,
|
840
|
-
ContextType literal_context_mode,
|
841
|
-
const brotli::Command *commands,
|
842
|
-
size_t n_commands,
|
843
|
-
const MetaBlockSplit& mb,
|
844
|
-
size_t *storage_ix,
|
845
|
-
uint8_t *storage) {
|
846
|
-
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
|
847
|
-
|
848
|
-
size_t num_distance_codes =
|
849
|
-
kNumDistanceShortCodes + num_direct_distance_codes +
|
850
|
-
(48u << distance_postfix_bits);
|
851
|
-
|
852
|
-
HuffmanTree* tree = static_cast<HuffmanTree*>(
|
853
|
-
malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
|
854
|
-
BlockEncoder literal_enc(256,
|
855
|
-
mb.literal_split.num_types,
|
856
|
-
mb.literal_split.types,
|
857
|
-
mb.literal_split.lengths);
|
858
|
-
BlockEncoder command_enc(kNumCommandPrefixes,
|
859
|
-
mb.command_split.num_types,
|
860
|
-
mb.command_split.types,
|
861
|
-
mb.command_split.lengths);
|
862
|
-
BlockEncoder distance_enc(num_distance_codes,
|
863
|
-
mb.distance_split.num_types,
|
864
|
-
mb.distance_split.types,
|
865
|
-
mb.distance_split.lengths);
|
866
|
-
|
867
|
-
literal_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
|
868
|
-
command_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
|
869
|
-
distance_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
|
870
|
-
|
871
|
-
WriteBits(2, distance_postfix_bits, storage_ix, storage);
|
872
|
-
WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
|
873
|
-
storage_ix, storage);
|
874
|
-
for (size_t i = 0; i < mb.literal_split.num_types; ++i) {
|
875
|
-
WriteBits(2, literal_context_mode, storage_ix, storage);
|
876
|
-
}
|
877
|
-
|
878
|
-
size_t num_literal_histograms = mb.literal_histograms.size();
|
879
|
-
if (mb.literal_context_map.empty()) {
|
880
|
-
StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits, tree,
|
881
|
-
storage_ix, storage);
|
882
|
-
} else {
|
883
|
-
EncodeContextMap(mb.literal_context_map, num_literal_histograms, tree,
|
884
|
-
storage_ix, storage);
|
885
|
-
}
|
886
|
-
|
887
|
-
size_t num_dist_histograms = mb.distance_histograms.size();
|
888
|
-
if (mb.distance_context_map.empty()) {
|
889
|
-
StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits, tree,
|
890
|
-
storage_ix, storage);
|
891
|
-
} else {
|
892
|
-
EncodeContextMap(mb.distance_context_map, num_dist_histograms, tree,
|
893
|
-
storage_ix, storage);
|
894
|
-
}
|
895
|
-
|
896
|
-
literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms, tree,
|
897
|
-
storage_ix, storage);
|
898
|
-
command_enc.BuildAndStoreEntropyCodes(mb.command_histograms, tree,
|
899
|
-
storage_ix, storage);
|
900
|
-
distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms, tree,
|
901
|
-
storage_ix, storage);
|
902
|
-
free(tree);
|
903
|
-
|
904
|
-
size_t pos = start_pos;
|
905
|
-
for (size_t i = 0; i < n_commands; ++i) {
|
906
|
-
const Command cmd = commands[i];
|
907
|
-
size_t cmd_code = cmd.cmd_prefix_;
|
908
|
-
command_enc.StoreSymbol(cmd_code, storage_ix, storage);
|
909
|
-
StoreCommandExtra(cmd, storage_ix, storage);
|
910
|
-
if (mb.literal_context_map.empty()) {
|
911
|
-
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
912
|
-
literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
|
913
|
-
++pos;
|
914
|
-
}
|
915
|
-
} else {
|
916
|
-
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
917
|
-
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
|
918
|
-
uint8_t literal = input[pos & mask];
|
919
|
-
literal_enc.StoreSymbolWithContext<kLiteralContextBits>(
|
920
|
-
literal, context, mb.literal_context_map, storage_ix, storage);
|
921
|
-
prev_byte2 = prev_byte;
|
922
|
-
prev_byte = literal;
|
923
|
-
++pos;
|
924
|
-
}
|
925
|
-
}
|
926
|
-
pos += cmd.copy_len();
|
927
|
-
if (cmd.copy_len()) {
|
928
|
-
prev_byte2 = input[(pos - 2) & mask];
|
929
|
-
prev_byte = input[(pos - 1) & mask];
|
930
|
-
if (cmd.cmd_prefix_ >= 128) {
|
931
|
-
size_t dist_code = cmd.dist_prefix_;
|
932
|
-
uint32_t distnumextra = cmd.dist_extra_ >> 24;
|
933
|
-
uint64_t distextra = cmd.dist_extra_ & 0xffffff;
|
934
|
-
if (mb.distance_context_map.empty()) {
|
935
|
-
distance_enc.StoreSymbol(dist_code, storage_ix, storage);
|
936
|
-
} else {
|
937
|
-
size_t context = cmd.DistanceContext();
|
938
|
-
distance_enc.StoreSymbolWithContext<kDistanceContextBits>(
|
939
|
-
dist_code, context, mb.distance_context_map, storage_ix, storage);
|
940
|
-
}
|
941
|
-
brotli::WriteBits(distnumextra, distextra, storage_ix, storage);
|
942
|
-
}
|
943
|
-
}
|
944
|
-
}
|
945
|
-
if (is_last) {
|
946
|
-
JumpToByteBoundary(storage_ix, storage);
|
947
|
-
}
|
948
|
-
}
|
949
|
-
|
950
|
-
static void BuildHistograms(const uint8_t* input,
|
951
|
-
size_t start_pos,
|
952
|
-
size_t mask,
|
953
|
-
const brotli::Command *commands,
|
954
|
-
size_t n_commands,
|
955
|
-
HistogramLiteral* lit_histo,
|
956
|
-
HistogramCommand* cmd_histo,
|
957
|
-
HistogramDistance* dist_histo) {
|
958
|
-
size_t pos = start_pos;
|
959
|
-
for (size_t i = 0; i < n_commands; ++i) {
|
960
|
-
const Command cmd = commands[i];
|
961
|
-
cmd_histo->Add(cmd.cmd_prefix_);
|
962
|
-
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
963
|
-
lit_histo->Add(input[pos & mask]);
|
964
|
-
++pos;
|
965
|
-
}
|
966
|
-
pos += cmd.copy_len();
|
967
|
-
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
968
|
-
dist_histo->Add(cmd.dist_prefix_);
|
969
|
-
}
|
970
|
-
}
|
971
|
-
}
|
972
|
-
|
973
|
-
static void StoreDataWithHuffmanCodes(const uint8_t* input,
|
974
|
-
size_t start_pos,
|
975
|
-
size_t mask,
|
976
|
-
const brotli::Command *commands,
|
977
|
-
size_t n_commands,
|
978
|
-
const uint8_t* lit_depth,
|
979
|
-
const uint16_t* lit_bits,
|
980
|
-
const uint8_t* cmd_depth,
|
981
|
-
const uint16_t* cmd_bits,
|
982
|
-
const uint8_t* dist_depth,
|
983
|
-
const uint16_t* dist_bits,
|
984
|
-
size_t* storage_ix,
|
985
|
-
uint8_t* storage) {
|
986
|
-
size_t pos = start_pos;
|
987
|
-
for (size_t i = 0; i < n_commands; ++i) {
|
988
|
-
const Command cmd = commands[i];
|
989
|
-
const size_t cmd_code = cmd.cmd_prefix_;
|
990
|
-
WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
|
991
|
-
StoreCommandExtra(cmd, storage_ix, storage);
|
992
|
-
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
993
|
-
const uint8_t literal = input[pos & mask];
|
994
|
-
WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
|
995
|
-
++pos;
|
996
|
-
}
|
997
|
-
pos += cmd.copy_len();
|
998
|
-
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
999
|
-
const size_t dist_code = cmd.dist_prefix_;
|
1000
|
-
const uint32_t distnumextra = cmd.dist_extra_ >> 24;
|
1001
|
-
const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
|
1002
|
-
WriteBits(dist_depth[dist_code], dist_bits[dist_code],
|
1003
|
-
storage_ix, storage);
|
1004
|
-
WriteBits(distnumextra, distextra, storage_ix, storage);
|
1005
|
-
}
|
1006
|
-
}
|
1007
|
-
}
|
1008
|
-
|
1009
|
-
void StoreMetaBlockTrivial(const uint8_t* input,
|
1010
|
-
size_t start_pos,
|
1011
|
-
size_t length,
|
1012
|
-
size_t mask,
|
1013
|
-
bool is_last,
|
1014
|
-
const brotli::Command *commands,
|
1015
|
-
size_t n_commands,
|
1016
|
-
size_t *storage_ix,
|
1017
|
-
uint8_t *storage) {
|
1018
|
-
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
|
1019
|
-
|
1020
|
-
HistogramLiteral lit_histo;
|
1021
|
-
HistogramCommand cmd_histo;
|
1022
|
-
HistogramDistance dist_histo;
|
1023
|
-
|
1024
|
-
BuildHistograms(input, start_pos, mask, commands, n_commands,
|
1025
|
-
&lit_histo, &cmd_histo, &dist_histo);
|
1026
|
-
|
1027
|
-
WriteBits(13, 0, storage_ix, storage);
|
1028
|
-
|
1029
|
-
std::vector<uint8_t> lit_depth(256);
|
1030
|
-
std::vector<uint16_t> lit_bits(256);
|
1031
|
-
std::vector<uint8_t> cmd_depth(kNumCommandPrefixes);
|
1032
|
-
std::vector<uint16_t> cmd_bits(kNumCommandPrefixes);
|
1033
|
-
std::vector<uint8_t> dist_depth(64);
|
1034
|
-
std::vector<uint16_t> dist_bits(64);
|
1035
|
-
|
1036
|
-
HuffmanTree* tree = static_cast<HuffmanTree*>(
|
1037
|
-
malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
|
1038
|
-
BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256, tree,
|
1039
|
-
&lit_depth[0], &lit_bits[0],
|
1040
|
-
storage_ix, storage);
|
1041
|
-
BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes, tree,
|
1042
|
-
&cmd_depth[0], &cmd_bits[0],
|
1043
|
-
storage_ix, storage);
|
1044
|
-
BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64, tree,
|
1045
|
-
&dist_depth[0], &dist_bits[0],
|
1046
|
-
storage_ix, storage);
|
1047
|
-
free(tree);
|
1048
|
-
StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
|
1049
|
-
n_commands, &lit_depth[0], &lit_bits[0],
|
1050
|
-
&cmd_depth[0], &cmd_bits[0],
|
1051
|
-
&dist_depth[0], &dist_bits[0],
|
1052
|
-
storage_ix, storage);
|
1053
|
-
if (is_last) {
|
1054
|
-
JumpToByteBoundary(storage_ix, storage);
|
1055
|
-
}
|
1056
|
-
}
|
1057
|
-
|
1058
|
-
void StoreMetaBlockFast(const uint8_t* input,
|
1059
|
-
size_t start_pos,
|
1060
|
-
size_t length,
|
1061
|
-
size_t mask,
|
1062
|
-
bool is_last,
|
1063
|
-
const brotli::Command *commands,
|
1064
|
-
size_t n_commands,
|
1065
|
-
size_t *storage_ix,
|
1066
|
-
uint8_t *storage) {
|
1067
|
-
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
|
1068
|
-
|
1069
|
-
WriteBits(13, 0, storage_ix, storage);
|
1070
|
-
|
1071
|
-
if (n_commands <= 128) {
|
1072
|
-
uint32_t histogram[256] = { 0 };
|
1073
|
-
size_t pos = start_pos;
|
1074
|
-
size_t num_literals = 0;
|
1075
|
-
for (size_t i = 0; i < n_commands; ++i) {
|
1076
|
-
const Command cmd = commands[i];
|
1077
|
-
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
1078
|
-
++histogram[input[pos & mask]];
|
1079
|
-
++pos;
|
1080
|
-
}
|
1081
|
-
num_literals += cmd.insert_len_;
|
1082
|
-
pos += cmd.copy_len();
|
1083
|
-
}
|
1084
|
-
uint8_t lit_depth[256] = { 0 };
|
1085
|
-
uint16_t lit_bits[256] = { 0 };
|
1086
|
-
BuildAndStoreHuffmanTreeFast(histogram, num_literals,
|
1087
|
-
/* max_bits = */ 8,
|
1088
|
-
lit_depth, lit_bits,
|
1089
|
-
storage_ix, storage);
|
1090
|
-
StoreStaticCommandHuffmanTree(storage_ix, storage);
|
1091
|
-
StoreStaticDistanceHuffmanTree(storage_ix, storage);
|
1092
|
-
StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
|
1093
|
-
n_commands, &lit_depth[0], &lit_bits[0],
|
1094
|
-
kStaticCommandCodeDepth,
|
1095
|
-
kStaticCommandCodeBits,
|
1096
|
-
kStaticDistanceCodeDepth,
|
1097
|
-
kStaticDistanceCodeBits,
|
1098
|
-
storage_ix, storage);
|
1099
|
-
} else {
|
1100
|
-
HistogramLiteral lit_histo;
|
1101
|
-
HistogramCommand cmd_histo;
|
1102
|
-
HistogramDistance dist_histo;
|
1103
|
-
BuildHistograms(input, start_pos, mask, commands, n_commands,
|
1104
|
-
&lit_histo, &cmd_histo, &dist_histo);
|
1105
|
-
std::vector<uint8_t> lit_depth(256);
|
1106
|
-
std::vector<uint16_t> lit_bits(256);
|
1107
|
-
std::vector<uint8_t> cmd_depth(kNumCommandPrefixes);
|
1108
|
-
std::vector<uint16_t> cmd_bits(kNumCommandPrefixes);
|
1109
|
-
std::vector<uint8_t> dist_depth(64);
|
1110
|
-
std::vector<uint16_t> dist_bits(64);
|
1111
|
-
BuildAndStoreHuffmanTreeFast(&lit_histo.data_[0], lit_histo.total_count_,
|
1112
|
-
/* max_bits = */ 8,
|
1113
|
-
&lit_depth[0], &lit_bits[0],
|
1114
|
-
storage_ix, storage);
|
1115
|
-
BuildAndStoreHuffmanTreeFast(&cmd_histo.data_[0], cmd_histo.total_count_,
|
1116
|
-
/* max_bits = */ 10,
|
1117
|
-
&cmd_depth[0], &cmd_bits[0],
|
1118
|
-
storage_ix, storage);
|
1119
|
-
BuildAndStoreHuffmanTreeFast(&dist_histo.data_[0], dist_histo.total_count_,
|
1120
|
-
/* max_bits = */ 6,
|
1121
|
-
&dist_depth[0], &dist_bits[0],
|
1122
|
-
storage_ix, storage);
|
1123
|
-
StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
|
1124
|
-
n_commands, &lit_depth[0], &lit_bits[0],
|
1125
|
-
&cmd_depth[0], &cmd_bits[0],
|
1126
|
-
&dist_depth[0], &dist_bits[0],
|
1127
|
-
storage_ix, storage);
|
1128
|
-
}
|
1129
|
-
|
1130
|
-
if (is_last) {
|
1131
|
-
JumpToByteBoundary(storage_ix, storage);
|
1132
|
-
}
|
1133
|
-
}
|
1134
|
-
|
1135
|
-
// This is for storing uncompressed blocks (simple raw storage of
|
1136
|
-
// bytes-as-bytes).
|
1137
|
-
void StoreUncompressedMetaBlock(bool final_block,
|
1138
|
-
const uint8_t * __restrict input,
|
1139
|
-
size_t position, size_t mask,
|
1140
|
-
size_t len,
|
1141
|
-
size_t * __restrict storage_ix,
|
1142
|
-
uint8_t * __restrict storage) {
|
1143
|
-
StoreUncompressedMetaBlockHeader(len, storage_ix, storage);
|
1144
|
-
JumpToByteBoundary(storage_ix, storage);
|
1145
|
-
|
1146
|
-
size_t masked_pos = position & mask;
|
1147
|
-
if (masked_pos + len > mask + 1) {
|
1148
|
-
size_t len1 = mask + 1 - masked_pos;
|
1149
|
-
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
|
1150
|
-
*storage_ix += len1 << 3;
|
1151
|
-
len -= len1;
|
1152
|
-
masked_pos = 0;
|
1153
|
-
}
|
1154
|
-
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
|
1155
|
-
*storage_ix += len << 3;
|
1156
|
-
|
1157
|
-
// We need to clear the next 4 bytes to continue to be
|
1158
|
-
// compatible with WriteBits.
|
1159
|
-
brotli::WriteBitsPrepareStorage(*storage_ix, storage);
|
1160
|
-
|
1161
|
-
// Since the uncompressed block itself may not be the final block, add an
|
1162
|
-
// empty one after this.
|
1163
|
-
if (final_block) {
|
1164
|
-
brotli::WriteBits(1, 1, storage_ix, storage); // islast
|
1165
|
-
brotli::WriteBits(1, 1, storage_ix, storage); // isempty
|
1166
|
-
JumpToByteBoundary(storage_ix, storage);
|
1167
|
-
}
|
1168
|
-
}
|
1169
|
-
|
1170
|
-
void StoreSyncMetaBlock(size_t * __restrict storage_ix,
|
1171
|
-
uint8_t * __restrict storage) {
|
1172
|
-
// Empty metadata meta-block bit pattern:
|
1173
|
-
// 1 bit: is_last (0)
|
1174
|
-
// 2 bits: num nibbles (3)
|
1175
|
-
// 1 bit: reserved (0)
|
1176
|
-
// 2 bits: metadata length bytes (0)
|
1177
|
-
WriteBits(6, 6, storage_ix, storage);
|
1178
|
-
JumpToByteBoundary(storage_ix, storage);
|
1179
|
-
}
|
1180
|
-
|
1181
|
-
} // namespace brotli
|