brotli 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.travis.yml +11 -3
- data/Gemfile +2 -0
- data/ext/brotli/brotli.c +279 -0
- data/ext/brotli/brotli.h +2 -0
- data/ext/brotli/buffer.c +95 -0
- data/ext/brotli/buffer.h +19 -0
- data/ext/brotli/extconf.rb +21 -81
- data/lib/brotli/version.rb +1 -1
- data/vendor/brotli/dec/bit_reader.c +5 -5
- data/vendor/brotli/dec/bit_reader.h +15 -15
- data/vendor/brotli/dec/context.h +1 -1
- data/vendor/brotli/dec/decode.c +433 -348
- data/vendor/brotli/dec/decode.h +74 -48
- data/vendor/brotli/dec/huffman.c +5 -4
- data/vendor/brotli/dec/huffman.h +4 -4
- data/vendor/brotli/dec/port.h +2 -95
- data/vendor/brotli/dec/prefix.h +5 -3
- data/vendor/brotli/dec/state.c +15 -27
- data/vendor/brotli/dec/state.h +21 -17
- data/vendor/brotli/dec/transform.h +1 -1
- data/vendor/brotli/enc/backward_references.c +892 -0
- data/vendor/brotli/enc/backward_references.h +85 -102
- data/vendor/brotli/enc/backward_references_inc.h +147 -0
- data/vendor/brotli/enc/bit_cost.c +35 -0
- data/vendor/brotli/enc/bit_cost.h +23 -121
- data/vendor/brotli/enc/bit_cost_inc.h +127 -0
- data/vendor/brotli/enc/block_encoder_inc.h +33 -0
- data/vendor/brotli/enc/block_splitter.c +197 -0
- data/vendor/brotli/enc/block_splitter.h +40 -50
- data/vendor/brotli/enc/block_splitter_inc.h +432 -0
- data/vendor/brotli/enc/brotli_bit_stream.c +1334 -0
- data/vendor/brotli/enc/brotli_bit_stream.h +95 -167
- data/vendor/brotli/enc/cluster.c +56 -0
- data/vendor/brotli/enc/cluster.h +23 -305
- data/vendor/brotli/enc/cluster_inc.h +315 -0
- data/vendor/brotli/enc/command.h +83 -76
- data/vendor/brotli/enc/compress_fragment.c +747 -0
- data/vendor/brotli/enc/compress_fragment.h +48 -37
- data/vendor/brotli/enc/compress_fragment_two_pass.c +557 -0
- data/vendor/brotli/enc/compress_fragment_two_pass.h +37 -26
- data/vendor/brotli/enc/compressor.cc +139 -0
- data/vendor/brotli/enc/compressor.h +146 -0
- data/vendor/brotli/enc/context.h +102 -96
- data/vendor/brotli/enc/dictionary_hash.h +9 -5
- data/vendor/brotli/enc/encode.c +1562 -0
- data/vendor/brotli/enc/encode.h +211 -199
- data/vendor/brotli/enc/encode_parallel.cc +161 -151
- data/vendor/brotli/enc/encode_parallel.h +7 -8
- data/vendor/brotli/enc/entropy_encode.c +501 -0
- data/vendor/brotli/enc/entropy_encode.h +107 -89
- data/vendor/brotli/enc/entropy_encode_static.h +29 -62
- data/vendor/brotli/enc/fast_log.h +26 -20
- data/vendor/brotli/enc/find_match_length.h +23 -20
- data/vendor/brotli/enc/hash.h +614 -871
- data/vendor/brotli/enc/hash_forgetful_chain_inc.h +249 -0
- data/vendor/brotli/enc/hash_longest_match_inc.h +241 -0
- data/vendor/brotli/enc/hash_longest_match_quickly_inc.h +230 -0
- data/vendor/brotli/enc/histogram.c +95 -0
- data/vendor/brotli/enc/histogram.h +49 -83
- data/vendor/brotli/enc/histogram_inc.h +51 -0
- data/vendor/brotli/enc/literal_cost.c +178 -0
- data/vendor/brotli/enc/literal_cost.h +16 -10
- data/vendor/brotli/enc/memory.c +181 -0
- data/vendor/brotli/enc/memory.h +62 -0
- data/vendor/brotli/enc/metablock.c +515 -0
- data/vendor/brotli/enc/metablock.h +87 -57
- data/vendor/brotli/enc/metablock_inc.h +183 -0
- data/vendor/brotli/enc/port.h +73 -47
- data/vendor/brotli/enc/prefix.h +34 -61
- data/vendor/brotli/enc/quality.h +130 -0
- data/vendor/brotli/enc/ringbuffer.h +137 -122
- data/vendor/brotli/enc/{static_dict.cc → static_dict.c} +162 -139
- data/vendor/brotli/enc/static_dict.h +23 -18
- data/vendor/brotli/enc/static_dict_lut.h +11223 -12037
- data/vendor/brotli/enc/streams.cc +7 -7
- data/vendor/brotli/enc/streams.h +32 -32
- data/vendor/brotli/enc/{utf8_util.cc → utf8_util.c} +22 -20
- data/vendor/brotli/enc/utf8_util.h +16 -9
- data/vendor/brotli/enc/write_bits.h +49 -43
- metadata +34 -25
- data/ext/brotli/brotli.cc +0 -181
- data/vendor/brotli/dec/Makefile +0 -12
- data/vendor/brotli/dec/dictionary.c +0 -9466
- data/vendor/brotli/dec/dictionary.h +0 -38
- data/vendor/brotli/dec/types.h +0 -38
- data/vendor/brotli/enc/Makefile +0 -14
- data/vendor/brotli/enc/backward_references.cc +0 -858
- data/vendor/brotli/enc/block_splitter.cc +0 -505
- data/vendor/brotli/enc/brotli_bit_stream.cc +0 -1181
- data/vendor/brotli/enc/compress_fragment.cc +0 -701
- data/vendor/brotli/enc/compress_fragment_two_pass.cc +0 -524
- data/vendor/brotli/enc/dictionary.cc +0 -9466
- data/vendor/brotli/enc/dictionary.h +0 -41
- data/vendor/brotli/enc/encode.cc +0 -1180
- data/vendor/brotli/enc/entropy_encode.cc +0 -480
- data/vendor/brotli/enc/histogram.cc +0 -67
- data/vendor/brotli/enc/literal_cost.cc +0 -165
- data/vendor/brotli/enc/metablock.cc +0 -539
- data/vendor/brotli/enc/transform.h +0 -248
- data/vendor/brotli/enc/types.h +0 -29
@@ -1,480 +0,0 @@
|
|
1
|
-
/* Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
|
3
|
-
Distributed under MIT license.
|
4
|
-
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
-
*/
|
6
|
-
|
7
|
-
// Entropy encoding (Huffman) utilities.
|
8
|
-
|
9
|
-
#include "./entropy_encode.h"
|
10
|
-
|
11
|
-
#include <algorithm>
|
12
|
-
#include <limits>
|
13
|
-
#include <cstdlib>
|
14
|
-
|
15
|
-
#include "./histogram.h"
|
16
|
-
#include "./port.h"
|
17
|
-
#include "./types.h"
|
18
|
-
|
19
|
-
namespace brotli {
|
20
|
-
|
21
|
-
void SetDepth(const HuffmanTree &p,
|
22
|
-
HuffmanTree *pool,
|
23
|
-
uint8_t *depth,
|
24
|
-
uint8_t level) {
|
25
|
-
if (p.index_left_ >= 0) {
|
26
|
-
++level;
|
27
|
-
SetDepth(pool[p.index_left_], pool, depth, level);
|
28
|
-
SetDepth(pool[p.index_right_or_value_], pool, depth, level);
|
29
|
-
} else {
|
30
|
-
depth[p.index_right_or_value_] = level;
|
31
|
-
}
|
32
|
-
}
|
33
|
-
|
34
|
-
// Sort the root nodes, least popular first.
|
35
|
-
static inline bool SortHuffmanTree(const HuffmanTree& v0,
|
36
|
-
const HuffmanTree& v1) {
|
37
|
-
if (v0.total_count_ != v1.total_count_) {
|
38
|
-
return v0.total_count_ < v1.total_count_;
|
39
|
-
}
|
40
|
-
return v0.index_right_or_value_ > v1.index_right_or_value_;
|
41
|
-
}
|
42
|
-
|
43
|
-
// This function will create a Huffman tree.
|
44
|
-
//
|
45
|
-
// The catch here is that the tree cannot be arbitrarily deep.
|
46
|
-
// Brotli specifies a maximum depth of 15 bits for "code trees"
|
47
|
-
// and 7 bits for "code length code trees."
|
48
|
-
//
|
49
|
-
// count_limit is the value that is to be faked as the minimum value
|
50
|
-
// and this minimum value is raised until the tree matches the
|
51
|
-
// maximum length requirement.
|
52
|
-
//
|
53
|
-
// This algorithm is not of excellent performance for very long data blocks,
|
54
|
-
// especially when population counts are longer than 2**tree_limit, but
|
55
|
-
// we are not planning to use this with extremely long blocks.
|
56
|
-
//
|
57
|
-
// See http://en.wikipedia.org/wiki/Huffman_coding
|
58
|
-
void CreateHuffmanTree(const uint32_t *data,
|
59
|
-
const size_t length,
|
60
|
-
const int tree_limit,
|
61
|
-
HuffmanTree* tree,
|
62
|
-
uint8_t *depth) {
|
63
|
-
// For block sizes below 64 kB, we never need to do a second iteration
|
64
|
-
// of this loop. Probably all of our block sizes will be smaller than
|
65
|
-
// that, so this loop is mostly of academic interest. If we actually
|
66
|
-
// would need this, we would be better off with the Katajainen algorithm.
|
67
|
-
for (uint32_t count_limit = 1; ; count_limit *= 2) {
|
68
|
-
size_t n = 0;
|
69
|
-
for (size_t i = length; i != 0;) {
|
70
|
-
--i;
|
71
|
-
if (data[i]) {
|
72
|
-
const uint32_t count = std::max(data[i], count_limit);
|
73
|
-
tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
|
74
|
-
}
|
75
|
-
}
|
76
|
-
|
77
|
-
if (n == 1) {
|
78
|
-
depth[tree[0].index_right_or_value_] = 1; // Only one element.
|
79
|
-
break;
|
80
|
-
}
|
81
|
-
|
82
|
-
std::sort(tree, tree + n, SortHuffmanTree);
|
83
|
-
|
84
|
-
// The nodes are:
|
85
|
-
// [0, n): the sorted leaf nodes that we start with.
|
86
|
-
// [n]: we add a sentinel here.
|
87
|
-
// [n + 1, 2n): new parent nodes are added here, starting from
|
88
|
-
// (n+1). These are naturally in ascending order.
|
89
|
-
// [2n]: we add a sentinel at the end as well.
|
90
|
-
// There will be (2n+1) elements at the end.
|
91
|
-
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
|
92
|
-
tree[n] = sentinel;
|
93
|
-
tree[n + 1] = sentinel;
|
94
|
-
|
95
|
-
size_t i = 0; // Points to the next leaf node.
|
96
|
-
size_t j = n + 1; // Points to the next non-leaf node.
|
97
|
-
for (size_t k = n - 1; k != 0; --k) {
|
98
|
-
size_t left, right;
|
99
|
-
if (tree[i].total_count_ <= tree[j].total_count_) {
|
100
|
-
left = i;
|
101
|
-
++i;
|
102
|
-
} else {
|
103
|
-
left = j;
|
104
|
-
++j;
|
105
|
-
}
|
106
|
-
if (tree[i].total_count_ <= tree[j].total_count_) {
|
107
|
-
right = i;
|
108
|
-
++i;
|
109
|
-
} else {
|
110
|
-
right = j;
|
111
|
-
++j;
|
112
|
-
}
|
113
|
-
|
114
|
-
// The sentinel node becomes the parent node.
|
115
|
-
size_t j_end = 2 * n - k;
|
116
|
-
tree[j_end].total_count_ =
|
117
|
-
tree[left].total_count_ + tree[right].total_count_;
|
118
|
-
tree[j_end].index_left_ = static_cast<int16_t>(left);
|
119
|
-
tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
|
120
|
-
|
121
|
-
// Add back the last sentinel node.
|
122
|
-
tree[j_end + 1] = sentinel;
|
123
|
-
}
|
124
|
-
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
|
125
|
-
|
126
|
-
// We need to pack the Huffman tree in tree_limit bits.
|
127
|
-
// If this was not successful, add fake entities to the lowest values
|
128
|
-
// and retry.
|
129
|
-
if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
|
130
|
-
break;
|
131
|
-
}
|
132
|
-
}
|
133
|
-
}
|
134
|
-
|
135
|
-
static void Reverse(uint8_t* v, size_t start, size_t end) {
|
136
|
-
--end;
|
137
|
-
while (start < end) {
|
138
|
-
uint8_t tmp = v[start];
|
139
|
-
v[start] = v[end];
|
140
|
-
v[end] = tmp;
|
141
|
-
++start;
|
142
|
-
--end;
|
143
|
-
}
|
144
|
-
}
|
145
|
-
|
146
|
-
static void WriteHuffmanTreeRepetitions(
|
147
|
-
const uint8_t previous_value,
|
148
|
-
const uint8_t value,
|
149
|
-
size_t repetitions,
|
150
|
-
size_t* tree_size,
|
151
|
-
uint8_t* tree,
|
152
|
-
uint8_t* extra_bits_data) {
|
153
|
-
assert(repetitions > 0);
|
154
|
-
if (previous_value != value) {
|
155
|
-
tree[*tree_size] = value;
|
156
|
-
extra_bits_data[*tree_size] = 0;
|
157
|
-
++(*tree_size);
|
158
|
-
--repetitions;
|
159
|
-
}
|
160
|
-
if (repetitions == 7) {
|
161
|
-
tree[*tree_size] = value;
|
162
|
-
extra_bits_data[*tree_size] = 0;
|
163
|
-
++(*tree_size);
|
164
|
-
--repetitions;
|
165
|
-
}
|
166
|
-
if (repetitions < 3) {
|
167
|
-
for (size_t i = 0; i < repetitions; ++i) {
|
168
|
-
tree[*tree_size] = value;
|
169
|
-
extra_bits_data[*tree_size] = 0;
|
170
|
-
++(*tree_size);
|
171
|
-
}
|
172
|
-
} else {
|
173
|
-
repetitions -= 3;
|
174
|
-
size_t start = *tree_size;
|
175
|
-
while (true) {
|
176
|
-
tree[*tree_size] = 16;
|
177
|
-
extra_bits_data[*tree_size] = repetitions & 0x3;
|
178
|
-
++(*tree_size);
|
179
|
-
repetitions >>= 2;
|
180
|
-
if (repetitions == 0) {
|
181
|
-
break;
|
182
|
-
}
|
183
|
-
--repetitions;
|
184
|
-
}
|
185
|
-
Reverse(tree, start, *tree_size);
|
186
|
-
Reverse(extra_bits_data, start, *tree_size);
|
187
|
-
}
|
188
|
-
}
|
189
|
-
|
190
|
-
static void WriteHuffmanTreeRepetitionsZeros(
|
191
|
-
size_t repetitions,
|
192
|
-
size_t* tree_size,
|
193
|
-
uint8_t* tree,
|
194
|
-
uint8_t* extra_bits_data) {
|
195
|
-
if (repetitions == 11) {
|
196
|
-
tree[*tree_size] = 0;
|
197
|
-
extra_bits_data[*tree_size] = 0;
|
198
|
-
++(*tree_size);
|
199
|
-
--repetitions;
|
200
|
-
}
|
201
|
-
if (repetitions < 3) {
|
202
|
-
for (size_t i = 0; i < repetitions; ++i) {
|
203
|
-
tree[*tree_size] = 0;
|
204
|
-
extra_bits_data[*tree_size] = 0;
|
205
|
-
++(*tree_size);
|
206
|
-
}
|
207
|
-
} else {
|
208
|
-
repetitions -= 3;
|
209
|
-
size_t start = *tree_size;
|
210
|
-
while (true) {
|
211
|
-
tree[*tree_size] = 17;
|
212
|
-
extra_bits_data[*tree_size] = repetitions & 0x7;
|
213
|
-
++(*tree_size);
|
214
|
-
repetitions >>= 3;
|
215
|
-
if (repetitions == 0) {
|
216
|
-
break;
|
217
|
-
}
|
218
|
-
--repetitions;
|
219
|
-
}
|
220
|
-
Reverse(tree, start, *tree_size);
|
221
|
-
Reverse(extra_bits_data, start, *tree_size);
|
222
|
-
}
|
223
|
-
}
|
224
|
-
|
225
|
-
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
226
|
-
uint8_t* good_for_rle) {
|
227
|
-
size_t nonzero_count = 0;
|
228
|
-
size_t stride;
|
229
|
-
size_t limit;
|
230
|
-
size_t sum;
|
231
|
-
const size_t streak_limit = 1240;
|
232
|
-
// Let's make the Huffman code more compatible with rle encoding.
|
233
|
-
size_t i;
|
234
|
-
for (i = 0; i < length; i++) {
|
235
|
-
if (counts[i]) {
|
236
|
-
++nonzero_count;
|
237
|
-
}
|
238
|
-
}
|
239
|
-
if (nonzero_count < 16) {
|
240
|
-
return;
|
241
|
-
}
|
242
|
-
while (length != 0 && counts[length - 1] == 0) {
|
243
|
-
--length;
|
244
|
-
}
|
245
|
-
if (length == 0) {
|
246
|
-
return; // All zeros.
|
247
|
-
}
|
248
|
-
// Now counts[0..length - 1] does not have trailing zeros.
|
249
|
-
{
|
250
|
-
size_t nonzeros = 0;
|
251
|
-
uint32_t smallest_nonzero = 1 << 30;
|
252
|
-
for (i = 0; i < length; ++i) {
|
253
|
-
if (counts[i] != 0) {
|
254
|
-
++nonzeros;
|
255
|
-
if (smallest_nonzero > counts[i]) {
|
256
|
-
smallest_nonzero = counts[i];
|
257
|
-
}
|
258
|
-
}
|
259
|
-
}
|
260
|
-
if (nonzeros < 5) {
|
261
|
-
// Small histogram will model it well.
|
262
|
-
return;
|
263
|
-
}
|
264
|
-
size_t zeros = length - nonzeros;
|
265
|
-
if (smallest_nonzero < 4) {
|
266
|
-
if (zeros < 6) {
|
267
|
-
for (i = 1; i < length - 1; ++i) {
|
268
|
-
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
|
269
|
-
counts[i] = 1;
|
270
|
-
}
|
271
|
-
}
|
272
|
-
}
|
273
|
-
}
|
274
|
-
if (nonzeros < 28) {
|
275
|
-
return;
|
276
|
-
}
|
277
|
-
}
|
278
|
-
// 2) Let's mark all population counts that already can be encoded
|
279
|
-
// with an rle code.
|
280
|
-
memset(good_for_rle, 0, length);
|
281
|
-
{
|
282
|
-
// Let's not spoil any of the existing good rle codes.
|
283
|
-
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
284
|
-
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
|
285
|
-
uint32_t symbol = counts[0];
|
286
|
-
size_t step = 0;
|
287
|
-
for (i = 0; i <= length; ++i) {
|
288
|
-
if (i == length || counts[i] != symbol) {
|
289
|
-
if ((symbol == 0 && step >= 5) ||
|
290
|
-
(symbol != 0 && step >= 7)) {
|
291
|
-
size_t k;
|
292
|
-
for (k = 0; k < step; ++k) {
|
293
|
-
good_for_rle[i - k - 1] = 1;
|
294
|
-
}
|
295
|
-
}
|
296
|
-
step = 1;
|
297
|
-
if (i != length) {
|
298
|
-
symbol = counts[i];
|
299
|
-
}
|
300
|
-
} else {
|
301
|
-
++step;
|
302
|
-
}
|
303
|
-
}
|
304
|
-
}
|
305
|
-
// 3) Let's replace those population counts that lead to more rle codes.
|
306
|
-
// Math here is in 24.8 fixed point representation.
|
307
|
-
stride = 0;
|
308
|
-
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
|
309
|
-
sum = 0;
|
310
|
-
for (i = 0; i <= length; ++i) {
|
311
|
-
if (i == length || good_for_rle[i] ||
|
312
|
-
(i != 0 && good_for_rle[i - 1]) ||
|
313
|
-
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
|
314
|
-
if (stride >= 4 || (stride >= 3 && sum == 0)) {
|
315
|
-
size_t k;
|
316
|
-
// The stride must end, collapse what we have, if we have enough (4).
|
317
|
-
size_t count = (sum + stride / 2) / stride;
|
318
|
-
if (count == 0) {
|
319
|
-
count = 1;
|
320
|
-
}
|
321
|
-
if (sum == 0) {
|
322
|
-
// Don't make an all zeros stride to be upgraded to ones.
|
323
|
-
count = 0;
|
324
|
-
}
|
325
|
-
for (k = 0; k < stride; ++k) {
|
326
|
-
// We don't want to change value at counts[i],
|
327
|
-
// that is already belonging to the next stride. Thus - 1.
|
328
|
-
counts[i - k - 1] = static_cast<uint32_t>(count);
|
329
|
-
}
|
330
|
-
}
|
331
|
-
stride = 0;
|
332
|
-
sum = 0;
|
333
|
-
if (i < length - 2) {
|
334
|
-
// All interesting strides have a count of at least 4,
|
335
|
-
// at least when non-zeros.
|
336
|
-
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
|
337
|
-
} else if (i < length) {
|
338
|
-
limit = 256 * counts[i];
|
339
|
-
} else {
|
340
|
-
limit = 0;
|
341
|
-
}
|
342
|
-
}
|
343
|
-
++stride;
|
344
|
-
if (i != length) {
|
345
|
-
sum += counts[i];
|
346
|
-
if (stride >= 4) {
|
347
|
-
limit = (256 * sum + stride / 2) / stride;
|
348
|
-
}
|
349
|
-
if (stride == 4) {
|
350
|
-
limit += 120;
|
351
|
-
}
|
352
|
-
}
|
353
|
-
}
|
354
|
-
}
|
355
|
-
|
356
|
-
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
|
357
|
-
bool *use_rle_for_non_zero,
|
358
|
-
bool *use_rle_for_zero) {
|
359
|
-
size_t total_reps_zero = 0;
|
360
|
-
size_t total_reps_non_zero = 0;
|
361
|
-
size_t count_reps_zero = 1;
|
362
|
-
size_t count_reps_non_zero = 1;
|
363
|
-
for (size_t i = 0; i < length;) {
|
364
|
-
const uint8_t value = depth[i];
|
365
|
-
size_t reps = 1;
|
366
|
-
for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
|
367
|
-
++reps;
|
368
|
-
}
|
369
|
-
if (reps >= 3 && value == 0) {
|
370
|
-
total_reps_zero += reps;
|
371
|
-
++count_reps_zero;
|
372
|
-
}
|
373
|
-
if (reps >= 4 && value != 0) {
|
374
|
-
total_reps_non_zero += reps;
|
375
|
-
++count_reps_non_zero;
|
376
|
-
}
|
377
|
-
i += reps;
|
378
|
-
}
|
379
|
-
*use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
|
380
|
-
*use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
|
381
|
-
}
|
382
|
-
|
383
|
-
void WriteHuffmanTree(const uint8_t* depth,
|
384
|
-
size_t length,
|
385
|
-
size_t* tree_size,
|
386
|
-
uint8_t* tree,
|
387
|
-
uint8_t* extra_bits_data) {
|
388
|
-
uint8_t previous_value = 8;
|
389
|
-
|
390
|
-
// Throw away trailing zeros.
|
391
|
-
size_t new_length = length;
|
392
|
-
for (size_t i = 0; i < length; ++i) {
|
393
|
-
if (depth[length - i - 1] == 0) {
|
394
|
-
--new_length;
|
395
|
-
} else {
|
396
|
-
break;
|
397
|
-
}
|
398
|
-
}
|
399
|
-
|
400
|
-
// First gather statistics on if it is a good idea to do rle.
|
401
|
-
bool use_rle_for_non_zero = false;
|
402
|
-
bool use_rle_for_zero = false;
|
403
|
-
if (length > 50) {
|
404
|
-
// Find rle coding for longer codes.
|
405
|
-
// Shorter codes seem not to benefit from rle.
|
406
|
-
DecideOverRleUse(depth, new_length,
|
407
|
-
&use_rle_for_non_zero, &use_rle_for_zero);
|
408
|
-
}
|
409
|
-
|
410
|
-
// Actual rle coding.
|
411
|
-
for (size_t i = 0; i < new_length;) {
|
412
|
-
const uint8_t value = depth[i];
|
413
|
-
size_t reps = 1;
|
414
|
-
if ((value != 0 && use_rle_for_non_zero) ||
|
415
|
-
(value == 0 && use_rle_for_zero)) {
|
416
|
-
for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
|
417
|
-
++reps;
|
418
|
-
}
|
419
|
-
}
|
420
|
-
if (value == 0) {
|
421
|
-
WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
|
422
|
-
} else {
|
423
|
-
WriteHuffmanTreeRepetitions(previous_value,
|
424
|
-
value, reps, tree_size,
|
425
|
-
tree, extra_bits_data);
|
426
|
-
previous_value = value;
|
427
|
-
}
|
428
|
-
i += reps;
|
429
|
-
}
|
430
|
-
}
|
431
|
-
|
432
|
-
namespace {
|
433
|
-
|
434
|
-
uint16_t ReverseBits(int num_bits, uint16_t bits) {
|
435
|
-
static const size_t kLut[16] = { // Pre-reversed 4-bit values.
|
436
|
-
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
|
437
|
-
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
|
438
|
-
};
|
439
|
-
size_t retval = kLut[bits & 0xf];
|
440
|
-
for (int i = 4; i < num_bits; i += 4) {
|
441
|
-
retval <<= 4;
|
442
|
-
bits = static_cast<uint16_t>(bits >> 4);
|
443
|
-
retval |= kLut[bits & 0xf];
|
444
|
-
}
|
445
|
-
retval >>= (-num_bits & 0x3);
|
446
|
-
return static_cast<uint16_t>(retval);
|
447
|
-
}
|
448
|
-
|
449
|
-
} // namespace
|
450
|
-
|
451
|
-
void ConvertBitDepthsToSymbols(const uint8_t *depth,
|
452
|
-
size_t len,
|
453
|
-
uint16_t *bits) {
|
454
|
-
// In Brotli, all bit depths are [1..15]
|
455
|
-
// 0 bit depth means that the symbol does not exist.
|
456
|
-
const int kMaxBits = 16; // 0..15 are values for bits
|
457
|
-
uint16_t bl_count[kMaxBits] = { 0 };
|
458
|
-
{
|
459
|
-
for (size_t i = 0; i < len; ++i) {
|
460
|
-
++bl_count[depth[i]];
|
461
|
-
}
|
462
|
-
bl_count[0] = 0;
|
463
|
-
}
|
464
|
-
uint16_t next_code[kMaxBits];
|
465
|
-
next_code[0] = 0;
|
466
|
-
{
|
467
|
-
int code = 0;
|
468
|
-
for (int bits = 1; bits < kMaxBits; ++bits) {
|
469
|
-
code = (code + bl_count[bits - 1]) << 1;
|
470
|
-
next_code[bits] = static_cast<uint16_t>(code);
|
471
|
-
}
|
472
|
-
}
|
473
|
-
for (size_t i = 0; i < len; ++i) {
|
474
|
-
if (depth[i]) {
|
475
|
-
bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
|
476
|
-
}
|
477
|
-
}
|
478
|
-
}
|
479
|
-
|
480
|
-
} // namespace brotli
|
@@ -1,67 +0,0 @@
|
|
1
|
-
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
-
|
3
|
-
Distributed under MIT license.
|
4
|
-
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
-
*/
|
6
|
-
|
7
|
-
// Build per-context histograms of literals, commands and distance codes.
|
8
|
-
|
9
|
-
#include "./histogram.h"
|
10
|
-
|
11
|
-
#include <cmath>
|
12
|
-
|
13
|
-
#include "./block_splitter.h"
|
14
|
-
#include "./command.h"
|
15
|
-
#include "./context.h"
|
16
|
-
#include "./prefix.h"
|
17
|
-
|
18
|
-
namespace brotli {
|
19
|
-
|
20
|
-
void BuildHistograms(
|
21
|
-
const Command* cmds,
|
22
|
-
const size_t num_commands,
|
23
|
-
const BlockSplit& literal_split,
|
24
|
-
const BlockSplit& insert_and_copy_split,
|
25
|
-
const BlockSplit& dist_split,
|
26
|
-
const uint8_t* ringbuffer,
|
27
|
-
size_t start_pos,
|
28
|
-
size_t mask,
|
29
|
-
uint8_t prev_byte,
|
30
|
-
uint8_t prev_byte2,
|
31
|
-
const std::vector<ContextType>& context_modes,
|
32
|
-
std::vector<HistogramLiteral>* literal_histograms,
|
33
|
-
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
34
|
-
std::vector<HistogramDistance>* copy_dist_histograms) {
|
35
|
-
size_t pos = start_pos;
|
36
|
-
BlockSplitIterator literal_it(literal_split);
|
37
|
-
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
|
38
|
-
BlockSplitIterator dist_it(dist_split);
|
39
|
-
for (size_t i = 0; i < num_commands; ++i) {
|
40
|
-
const Command &cmd = cmds[i];
|
41
|
-
insert_and_copy_it.Next();
|
42
|
-
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
43
|
-
cmd.cmd_prefix_);
|
44
|
-
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
45
|
-
literal_it.Next();
|
46
|
-
size_t context = (literal_it.type_ << kLiteralContextBits) +
|
47
|
-
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
|
48
|
-
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
|
49
|
-
prev_byte2 = prev_byte;
|
50
|
-
prev_byte = ringbuffer[pos & mask];
|
51
|
-
++pos;
|
52
|
-
}
|
53
|
-
pos += cmd.copy_len();
|
54
|
-
if (cmd.copy_len()) {
|
55
|
-
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
56
|
-
prev_byte = ringbuffer[(pos - 1) & mask];
|
57
|
-
if (cmd.cmd_prefix_ >= 128) {
|
58
|
-
dist_it.Next();
|
59
|
-
size_t context = (dist_it.type_ << kDistanceContextBits) +
|
60
|
-
cmd.DistanceContext();
|
61
|
-
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
|
62
|
-
}
|
63
|
-
}
|
64
|
-
}
|
65
|
-
}
|
66
|
-
|
67
|
-
} // namespace brotli
|
@@ -1,165 +0,0 @@
|
|
1
|
-
/* Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
-
|
3
|
-
Distributed under MIT license.
|
4
|
-
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
5
|
-
*/
|
6
|
-
|
7
|
-
// Literal cost model to allow backward reference replacement to be efficient.
|
8
|
-
|
9
|
-
#include "./literal_cost.h"
|
10
|
-
|
11
|
-
#include <math.h>
|
12
|
-
#include <algorithm>
|
13
|
-
|
14
|
-
#include "./fast_log.h"
|
15
|
-
#include "./types.h"
|
16
|
-
#include "./utf8_util.h"
|
17
|
-
|
18
|
-
namespace brotli {
|
19
|
-
|
20
|
-
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
|
21
|
-
if (c < 128) {
|
22
|
-
return 0; // Next one is the 'Byte 1' again.
|
23
|
-
} else if (c >= 192) { // Next one is the 'Byte 2' of utf-8 encoding.
|
24
|
-
return std::min<size_t>(1, clamp);
|
25
|
-
} else {
|
26
|
-
// Let's decide over the last byte if this ends the sequence.
|
27
|
-
if (last < 0xe0) {
|
28
|
-
return 0; // Completed two or three byte coding.
|
29
|
-
} else { // Next one is the 'Byte 3' of utf-8 encoding.
|
30
|
-
return std::min<size_t>(2, clamp);
|
31
|
-
}
|
32
|
-
}
|
33
|
-
}
|
34
|
-
|
35
|
-
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
36
|
-
const uint8_t *data) {
|
37
|
-
size_t counts[3] = { 0 };
|
38
|
-
size_t max_utf8 = 1; // should be 2, but 1 compresses better.
|
39
|
-
size_t last_c = 0;
|
40
|
-
size_t utf8_pos = 0;
|
41
|
-
for (size_t i = 0; i < len; ++i) {
|
42
|
-
size_t c = data[(pos + i) & mask];
|
43
|
-
utf8_pos = UTF8Position(last_c, c, 2);
|
44
|
-
++counts[utf8_pos];
|
45
|
-
last_c = c;
|
46
|
-
}
|
47
|
-
if (counts[2] < 500) {
|
48
|
-
max_utf8 = 1;
|
49
|
-
}
|
50
|
-
if (counts[1] + counts[2] < 25) {
|
51
|
-
max_utf8 = 0;
|
52
|
-
}
|
53
|
-
return max_utf8;
|
54
|
-
}
|
55
|
-
|
56
|
-
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
57
|
-
const uint8_t *data, float *cost) {
|
58
|
-
|
59
|
-
// max_utf8 is 0 (normal ascii single byte modeling),
|
60
|
-
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
|
61
|
-
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
62
|
-
size_t histogram[3][256] = { { 0 } };
|
63
|
-
size_t window_half = 495;
|
64
|
-
size_t in_window = std::min(window_half, len);
|
65
|
-
size_t in_window_utf8[3] = { 0 };
|
66
|
-
|
67
|
-
// Bootstrap histograms.
|
68
|
-
size_t last_c = 0;
|
69
|
-
size_t utf8_pos = 0;
|
70
|
-
for (size_t i = 0; i < in_window; ++i) {
|
71
|
-
size_t c = data[(pos + i) & mask];
|
72
|
-
++histogram[utf8_pos][c];
|
73
|
-
++in_window_utf8[utf8_pos];
|
74
|
-
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
75
|
-
last_c = c;
|
76
|
-
}
|
77
|
-
|
78
|
-
// Compute bit costs with sliding window.
|
79
|
-
for (size_t i = 0; i < len; ++i) {
|
80
|
-
if (i >= window_half) {
|
81
|
-
// Remove a byte in the past.
|
82
|
-
size_t c = i < window_half + 1 ?
|
83
|
-
0 : data[(pos + i - window_half - 1) & mask];
|
84
|
-
size_t last_c = i < window_half + 2 ?
|
85
|
-
0 : data[(pos + i - window_half - 2) & mask];
|
86
|
-
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
87
|
-
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
|
88
|
-
--in_window_utf8[utf8_pos2];
|
89
|
-
}
|
90
|
-
if (i + window_half < len) {
|
91
|
-
// Add a byte in the future.
|
92
|
-
size_t c = data[(pos + i + window_half - 1) & mask];
|
93
|
-
size_t last_c = data[(pos + i + window_half - 2) & mask];
|
94
|
-
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
95
|
-
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
|
96
|
-
++in_window_utf8[utf8_pos2];
|
97
|
-
}
|
98
|
-
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
99
|
-
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
100
|
-
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
|
101
|
-
size_t masked_pos = (pos + i) & mask;
|
102
|
-
size_t histo = histogram[utf8_pos][data[masked_pos]];
|
103
|
-
if (histo == 0) {
|
104
|
-
histo = 1;
|
105
|
-
}
|
106
|
-
double lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
|
107
|
-
lit_cost += 0.02905;
|
108
|
-
if (lit_cost < 1.0) {
|
109
|
-
lit_cost *= 0.5;
|
110
|
-
lit_cost += 0.5;
|
111
|
-
}
|
112
|
-
// Make the first bytes more expensive -- seems to help, not sure why.
|
113
|
-
// Perhaps because the entropy source is changing its properties
|
114
|
-
// rapidly in the beginning of the file, perhaps because the beginning
|
115
|
-
// of the data is a statistical "anomaly".
|
116
|
-
if (i < 2000) {
|
117
|
-
lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
|
118
|
-
}
|
119
|
-
cost[i] = static_cast<float>(lit_cost);
|
120
|
-
}
|
121
|
-
}
|
122
|
-
|
123
|
-
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
124
|
-
const uint8_t *data, float *cost) {
|
125
|
-
if (IsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
|
126
|
-
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
|
127
|
-
return;
|
128
|
-
}
|
129
|
-
size_t histogram[256] = { 0 };
|
130
|
-
size_t window_half = 2000;
|
131
|
-
size_t in_window = std::min(window_half, len);
|
132
|
-
|
133
|
-
// Bootstrap histogram.
|
134
|
-
for (size_t i = 0; i < in_window; ++i) {
|
135
|
-
++histogram[data[(pos + i) & mask]];
|
136
|
-
}
|
137
|
-
|
138
|
-
// Compute bit costs with sliding window.
|
139
|
-
for (size_t i = 0; i < len; ++i) {
|
140
|
-
if (i >= window_half) {
|
141
|
-
// Remove a byte in the past.
|
142
|
-
--histogram[data[(pos + i - window_half) & mask]];
|
143
|
-
--in_window;
|
144
|
-
}
|
145
|
-
if (i + window_half < len) {
|
146
|
-
// Add a byte in the future.
|
147
|
-
++histogram[data[(pos + i + window_half) & mask]];
|
148
|
-
++in_window;
|
149
|
-
}
|
150
|
-
size_t histo = histogram[data[(pos + i) & mask]];
|
151
|
-
if (histo == 0) {
|
152
|
-
histo = 1;
|
153
|
-
}
|
154
|
-
double lit_cost = FastLog2(in_window) - FastLog2(histo);
|
155
|
-
lit_cost += 0.029;
|
156
|
-
if (lit_cost < 1.0) {
|
157
|
-
lit_cost *= 0.5;
|
158
|
-
lit_cost += 0.5;
|
159
|
-
}
|
160
|
-
cost[i] = static_cast<float>(lit_cost);
|
161
|
-
}
|
162
|
-
}
|
163
|
-
|
164
|
-
|
165
|
-
} // namespace brotli
|