brotli 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/ext/brotli/brotli.cc +114 -24
  3. data/ext/brotli/brotli.h +0 -1
  4. data/ext/brotli/extconf.rb +30 -23
  5. data/lib/brotli/version.rb +1 -1
  6. data/vendor/brotli/LICENSE +1 -1
  7. data/vendor/brotli/dec/Makefile +1 -1
  8. data/vendor/brotli/dec/bit_reader.c +3 -3
  9. data/vendor/brotli/dec/bit_reader.h +25 -27
  10. data/vendor/brotli/dec/context.h +4 -4
  11. data/vendor/brotli/dec/decode.c +410 -486
  12. data/vendor/brotli/dec/decode.h +101 -105
  13. data/vendor/brotli/dec/dictionary.c +1 -1
  14. data/vendor/brotli/dec/dictionary.h +7 -8
  15. data/vendor/brotli/dec/huffman.c +103 -105
  16. data/vendor/brotli/dec/huffman.h +18 -18
  17. data/vendor/brotli/dec/port.h +52 -40
  18. data/vendor/brotli/dec/prefix.h +2 -0
  19. data/vendor/brotli/dec/state.c +13 -19
  20. data/vendor/brotli/dec/state.h +25 -39
  21. data/vendor/brotli/dec/transform.h +38 -44
  22. data/vendor/brotli/dec/types.h +2 -2
  23. data/vendor/brotli/enc/Makefile +1 -1
  24. data/vendor/brotli/enc/backward_references.cc +455 -359
  25. data/vendor/brotli/enc/backward_references.h +79 -3
  26. data/vendor/brotli/enc/bit_cost.h +54 -32
  27. data/vendor/brotli/enc/block_splitter.cc +285 -193
  28. data/vendor/brotli/enc/block_splitter.h +4 -12
  29. data/vendor/brotli/enc/brotli_bit_stream.cc +623 -324
  30. data/vendor/brotli/enc/brotli_bit_stream.h +76 -37
  31. data/vendor/brotli/enc/cluster.h +161 -120
  32. data/vendor/brotli/enc/command.h +60 -37
  33. data/vendor/brotli/enc/compress_fragment.cc +701 -0
  34. data/vendor/brotli/enc/compress_fragment.h +47 -0
  35. data/vendor/brotli/enc/compress_fragment_two_pass.cc +524 -0
  36. data/vendor/brotli/enc/compress_fragment_two_pass.h +40 -0
  37. data/vendor/brotli/enc/compressor.h +15 -0
  38. data/vendor/brotli/enc/context.h +1 -1
  39. data/vendor/brotli/enc/dictionary.h +2 -2
  40. data/vendor/brotli/enc/encode.cc +819 -286
  41. data/vendor/brotli/enc/encode.h +38 -15
  42. data/vendor/brotli/enc/encode_parallel.cc +40 -42
  43. data/vendor/brotli/enc/entropy_encode.cc +144 -147
  44. data/vendor/brotli/enc/entropy_encode.h +32 -8
  45. data/vendor/brotli/enc/entropy_encode_static.h +572 -0
  46. data/vendor/brotli/enc/fast_log.h +7 -40
  47. data/vendor/brotli/enc/find_match_length.h +9 -9
  48. data/vendor/brotli/enc/hash.h +462 -154
  49. data/vendor/brotli/enc/histogram.cc +6 -6
  50. data/vendor/brotli/enc/histogram.h +13 -13
  51. data/vendor/brotli/enc/literal_cost.cc +45 -45
  52. data/vendor/brotli/enc/metablock.cc +92 -89
  53. data/vendor/brotli/enc/metablock.h +12 -12
  54. data/vendor/brotli/enc/port.h +7 -16
  55. data/vendor/brotli/enc/prefix.h +23 -22
  56. data/vendor/brotli/enc/ringbuffer.h +75 -29
  57. data/vendor/brotli/enc/static_dict.cc +56 -48
  58. data/vendor/brotli/enc/static_dict.h +5 -5
  59. data/vendor/brotli/enc/streams.cc +1 -1
  60. data/vendor/brotli/enc/streams.h +5 -5
  61. data/vendor/brotli/enc/transform.h +40 -35
  62. data/vendor/brotli/enc/types.h +2 -0
  63. data/vendor/brotli/enc/utf8_util.cc +3 -2
  64. data/vendor/brotli/enc/write_bits.h +6 -6
  65. metadata +9 -5
  66. data/vendor/brotli/dec/streams.c +0 -102
  67. data/vendor/brotli/dec/streams.h +0 -95
@@ -11,70 +11,86 @@
11
11
  #include "./brotli_bit_stream.h"
12
12
 
13
13
  #include <algorithm>
14
+ #include <cstdlib> /* free, malloc */
15
+ #include <cstring>
14
16
  #include <limits>
15
17
  #include <vector>
16
18
 
17
19
  #include "./bit_cost.h"
18
20
  #include "./context.h"
19
21
  #include "./entropy_encode.h"
22
+ #include "./entropy_encode_static.h"
20
23
  #include "./fast_log.h"
21
24
  #include "./prefix.h"
22
25
  #include "./write_bits.h"
26
+
23
27
  namespace brotli {
24
28
 
25
- // returns false if fail
29
+ namespace {
30
+
31
+ static const size_t kMaxHuffmanTreeSize = 2 * kNumCommandPrefixes + 1;
32
+ // Context map alphabet has 256 context id symbols plus max 16 rle symbols.
33
+ static const size_t kContextMapAlphabetSize = 256 + 16;
34
+ // Block type alphabet has 256 block id symbols plus 2 special symbols.
35
+ static const size_t kBlockTypeAlphabetSize = 256 + 2;
36
+
26
37
  // nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
27
- bool EncodeMlen(size_t length, int* bits, int* numbits, int* nibblesbits) {
28
- if (length > (1 << 24)) {
29
- return false;
30
- }
38
+ // REQUIRES: length > 0
39
+ // REQUIRES: length <= (1 << 24)
40
+ void EncodeMlen(size_t length, uint64_t* bits,
41
+ size_t* numbits, uint64_t* nibblesbits) {
42
+ assert(length > 0);
43
+ assert(length <= (1 << 24));
31
44
  length--; // MLEN - 1 is encoded
32
- int lg = length == 0 ? 1 : Log2Floor(static_cast<uint32_t>(length)) + 1;
45
+ size_t lg = length == 0 ? 1 : Log2FloorNonZero(
46
+ static_cast<uint32_t>(length)) + 1;
33
47
  assert(lg <= 24);
34
- int mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
48
+ size_t mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
35
49
  *nibblesbits = mnibbles - 4;
36
50
  *numbits = mnibbles * 4;
37
- *bits = static_cast<int>(length);
38
- return true;
51
+ *bits = length;
39
52
  }
40
53
 
41
- void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
54
+ static inline void StoreCommandExtra(
55
+ const Command& cmd, size_t* storage_ix, uint8_t* storage) {
56
+ uint32_t copylen_code = cmd.copy_len_code();
57
+ uint16_t inscode = GetInsertLengthCode(cmd.insert_len_);
58
+ uint16_t copycode = GetCopyLengthCode(copylen_code);
59
+ uint32_t insnumextra = GetInsertExtra(inscode);
60
+ uint64_t insextraval = cmd.insert_len_ - GetInsertBase(inscode);
61
+ uint64_t copyextraval = copylen_code - GetCopyBase(copycode);
62
+ uint64_t bits = (copyextraval << insnumextra) | insextraval;
63
+ WriteBits(insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
64
+ }
65
+
66
+ } // namespace
67
+
68
+ void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
42
69
  if (n == 0) {
43
70
  WriteBits(1, 0, storage_ix, storage);
44
71
  } else {
45
72
  WriteBits(1, 1, storage_ix, storage);
46
- int nbits = Log2Floor(n);
73
+ size_t nbits = Log2FloorNonZero(n);
47
74
  WriteBits(3, nbits, storage_ix, storage);
48
75
  WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
49
76
  }
50
77
  }
51
78
 
52
- bool StoreCompressedMetaBlockHeader(bool final_block,
79
+ void StoreCompressedMetaBlockHeader(bool final_block,
53
80
  size_t length,
54
- int* storage_ix,
81
+ size_t* storage_ix,
55
82
  uint8_t* storage) {
56
83
  // Write ISLAST bit.
57
84
  WriteBits(1, final_block, storage_ix, storage);
58
85
  // Write ISEMPTY bit.
59
86
  if (final_block) {
60
- WriteBits(1, length == 0, storage_ix, storage);
61
- if (length == 0) {
62
- return true;
63
- }
64
- }
65
-
66
- if (length == 0) {
67
- // Only the last meta-block can be empty.
68
- return false;
69
- }
70
-
71
- int lenbits;
72
- int nlenbits;
73
- int nibblesbits;
74
- if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
75
- return false;
87
+ WriteBits(1, 0, storage_ix, storage);
76
88
  }
77
89
 
90
+ uint64_t lenbits;
91
+ size_t nlenbits;
92
+ uint64_t nibblesbits;
93
+ EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
78
94
  WriteBits(2, nibblesbits, storage_ix, storage);
79
95
  WriteBits(nlenbits, lenbits, storage_ix, storage);
80
96
 
@@ -82,31 +98,27 @@ bool StoreCompressedMetaBlockHeader(bool final_block,
82
98
  // Write ISUNCOMPRESSED bit.
83
99
  WriteBits(1, 0, storage_ix, storage);
84
100
  }
85
- return true;
86
101
  }
87
102
 
88
- bool StoreUncompressedMetaBlockHeader(size_t length,
89
- int* storage_ix,
103
+ void StoreUncompressedMetaBlockHeader(size_t length,
104
+ size_t* storage_ix,
90
105
  uint8_t* storage) {
91
106
  // Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0.
92
107
  WriteBits(1, 0, storage_ix, storage);
93
- int lenbits;
94
- int nlenbits;
95
- int nibblesbits;
96
- if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
97
- return false;
98
- }
108
+ uint64_t lenbits;
109
+ size_t nlenbits;
110
+ uint64_t nibblesbits;
111
+ EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
99
112
  WriteBits(2, nibblesbits, storage_ix, storage);
100
113
  WriteBits(nlenbits, lenbits, storage_ix, storage);
101
114
  // Write ISUNCOMPRESSED bit.
102
115
  WriteBits(1, 1, storage_ix, storage);
103
- return true;
104
116
  }
105
117
 
106
118
  void StoreHuffmanTreeOfHuffmanTreeToBitMask(
107
119
  const int num_codes,
108
120
  const uint8_t *code_length_bitdepth,
109
- int *storage_ix,
121
+ size_t *storage_ix,
110
122
  uint8_t *storage) {
111
123
  static const uint8_t kStorageOrder[kCodeLengthCodes] = {
112
124
  1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
@@ -129,7 +141,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
129
141
  };
130
142
 
131
143
  // Throw away trailing zeros:
132
- int codes_to_store = kCodeLengthCodes;
144
+ size_t codes_to_store = kCodeLengthCodes;
133
145
  if (num_codes > 1) {
134
146
  for (; codes_to_store > 0; --codes_to_store) {
135
147
  if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
@@ -137,7 +149,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
137
149
  }
138
150
  }
139
151
  }
140
- int skip_some = 0; // skips none.
152
+ size_t skip_some = 0; // skips none.
141
153
  if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
142
154
  code_length_bitdepth[kStorageOrder[1]] == 0) {
143
155
  skip_some = 2; // skips two.
@@ -146,22 +158,23 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
146
158
  }
147
159
  }
148
160
  WriteBits(2, skip_some, storage_ix, storage);
149
- for (int i = skip_some; i < codes_to_store; ++i) {
150
- uint8_t l = code_length_bitdepth[kStorageOrder[i]];
161
+ for (size_t i = skip_some; i < codes_to_store; ++i) {
162
+ size_t l = code_length_bitdepth[kStorageOrder[i]];
151
163
  WriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
152
164
  kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
153
165
  }
154
166
  }
155
167
 
156
- void StoreHuffmanTreeToBitMask(
157
- const std::vector<uint8_t> &huffman_tree,
158
- const std::vector<uint8_t> &huffman_tree_extra_bits,
159
- const uint8_t *code_length_bitdepth,
160
- const std::vector<uint16_t> &code_length_bitdepth_symbols,
161
- int * __restrict storage_ix,
168
+ static void StoreHuffmanTreeToBitMask(
169
+ const size_t huffman_tree_size,
170
+ const uint8_t* huffman_tree,
171
+ const uint8_t* huffman_tree_extra_bits,
172
+ const uint8_t* code_length_bitdepth,
173
+ const uint16_t* code_length_bitdepth_symbols,
174
+ size_t * __restrict storage_ix,
162
175
  uint8_t * __restrict storage) {
163
- for (size_t i = 0; i < huffman_tree.size(); ++i) {
164
- int ix = huffman_tree[i];
176
+ for (size_t i = 0; i < huffman_tree_size; ++i) {
177
+ size_t ix = huffman_tree[i];
165
178
  WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
166
179
  storage_ix, storage);
167
180
  // Extra bits
@@ -176,18 +189,18 @@ void StoreHuffmanTreeToBitMask(
176
189
  }
177
190
  }
178
191
 
179
- void StoreSimpleHuffmanTree(const uint8_t* depths,
180
- int symbols[4],
181
- int num_symbols,
182
- int max_bits,
183
- int *storage_ix, uint8_t *storage) {
192
+ static void StoreSimpleHuffmanTree(const uint8_t* depths,
193
+ size_t symbols[4],
194
+ size_t num_symbols,
195
+ size_t max_bits,
196
+ size_t *storage_ix, uint8_t *storage) {
184
197
  // value of 1 indicates a simple Huffman code
185
198
  WriteBits(2, 1, storage_ix, storage);
186
199
  WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1
187
200
 
188
201
  // Sort
189
- for (int i = 0; i < num_symbols; i++) {
190
- for (int j = i + 1; j < num_symbols; j++) {
202
+ for (size_t i = 0; i < num_symbols; i++) {
203
+ for (size_t j = i + 1; j < num_symbols; j++) {
191
204
  if (depths[symbols[j]] < depths[symbols[i]]) {
192
205
  std::swap(symbols[j], symbols[i]);
193
206
  }
@@ -213,19 +226,22 @@ void StoreSimpleHuffmanTree(const uint8_t* depths,
213
226
 
214
227
  // num = alphabet size
215
228
  // depths = symbol depths
216
- void StoreHuffmanTree(const uint8_t* depths, int num,
217
- int *storage_ix, uint8_t *storage) {
229
+ void StoreHuffmanTree(const uint8_t* depths, size_t num,
230
+ HuffmanTree* tree,
231
+ size_t *storage_ix, uint8_t *storage) {
218
232
  // Write the Huffman tree into the brotli-representation.
219
- std::vector<uint8_t> huffman_tree;
220
- std::vector<uint8_t> huffman_tree_extra_bits;
221
- // TODO: Consider allocating these from stack.
222
- huffman_tree.reserve(256);
223
- huffman_tree_extra_bits.reserve(256);
224
- WriteHuffmanTree(depths, num, &huffman_tree, &huffman_tree_extra_bits);
233
+ // The command alphabet is the largest, so this allocation will fit all
234
+ // alphabets.
235
+ assert(num <= kNumCommandPrefixes);
236
+ uint8_t huffman_tree[kNumCommandPrefixes];
237
+ uint8_t huffman_tree_extra_bits[kNumCommandPrefixes];
238
+ size_t huffman_tree_size = 0;
239
+ WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
240
+ huffman_tree_extra_bits);
225
241
 
226
242
  // Calculate the statistics of the Huffman tree in brotli-representation.
227
- int huffman_tree_histogram[kCodeLengthCodes] = { 0 };
228
- for (size_t i = 0; i < huffman_tree.size(); ++i) {
243
+ uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
244
+ for (size_t i = 0; i < huffman_tree_size; ++i) {
229
245
  ++huffman_tree_histogram[huffman_tree[i]];
230
246
  }
231
247
 
@@ -245,11 +261,10 @@ void StoreHuffmanTree(const uint8_t* depths, int num,
245
261
 
246
262
  // Calculate another Huffman tree to use for compressing both the
247
263
  // earlier Huffman tree with.
248
- // TODO: Consider allocating these from stack.
249
264
  uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
250
- std::vector<uint16_t> code_length_bitdepth_symbols(kCodeLengthCodes);
265
+ uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 };
251
266
  CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
252
- 5, &code_length_bitdepth[0]);
267
+ 5, tree, &code_length_bitdepth[0]);
253
268
  ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
254
269
  &code_length_bitdepth_symbols[0]);
255
270
 
@@ -262,23 +277,24 @@ void StoreHuffmanTree(const uint8_t* depths, int num,
262
277
  }
263
278
 
264
279
  // Store the real huffman tree now.
265
- StoreHuffmanTreeToBitMask(huffman_tree,
280
+ StoreHuffmanTreeToBitMask(huffman_tree_size,
281
+ huffman_tree,
266
282
  huffman_tree_extra_bits,
267
283
  &code_length_bitdepth[0],
268
284
  code_length_bitdepth_symbols,
269
285
  storage_ix, storage);
270
286
  }
271
287
 
272
-
273
- void BuildAndStoreHuffmanTree(const int *histogram,
274
- const int length,
288
+ void BuildAndStoreHuffmanTree(const uint32_t *histogram,
289
+ const size_t length,
290
+ HuffmanTree* tree,
275
291
  uint8_t* depth,
276
292
  uint16_t* bits,
277
- int* storage_ix,
293
+ size_t* storage_ix,
278
294
  uint8_t* storage) {
279
- int count = 0;
280
- int s4[4] = { 0 };
281
- for (int i = 0; i < length; i++) {
295
+ size_t count = 0;
296
+ size_t s4[4] = { 0 };
297
+ for (size_t i = 0; i < length; i++) {
282
298
  if (histogram[i]) {
283
299
  if (count < 4) {
284
300
  s4[count] = i;
@@ -289,8 +305,8 @@ void BuildAndStoreHuffmanTree(const int *histogram,
289
305
  }
290
306
  }
291
307
 
292
- int max_bits_counter = length - 1;
293
- int max_bits = 0;
308
+ size_t max_bits_counter = length - 1;
309
+ size_t max_bits = 0;
294
310
  while (max_bits_counter) {
295
311
  max_bits_counter >>= 1;
296
312
  ++max_bits;
@@ -302,179 +318,365 @@ void BuildAndStoreHuffmanTree(const int *histogram,
302
318
  return;
303
319
  }
304
320
 
305
- CreateHuffmanTree(histogram, length, 15, depth);
321
+ CreateHuffmanTree(histogram, length, 15, tree, depth);
306
322
  ConvertBitDepthsToSymbols(depth, length, bits);
307
323
 
308
324
  if (count <= 4) {
309
325
  StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
310
326
  } else {
311
- StoreHuffmanTree(depth, length, storage_ix, storage);
327
+ StoreHuffmanTree(depth, length, tree, storage_ix, storage);
328
+ }
329
+ }
330
+
331
+ static inline bool SortHuffmanTree(const HuffmanTree& v0,
332
+ const HuffmanTree& v1) {
333
+ return v0.total_count_ < v1.total_count_;
334
+ }
335
+
336
+ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
337
+ const size_t histogram_total,
338
+ const size_t max_bits,
339
+ uint8_t* depth,
340
+ uint16_t* bits,
341
+ size_t* storage_ix,
342
+ uint8_t* storage) {
343
+ size_t count = 0;
344
+ size_t symbols[4] = { 0 };
345
+ size_t length = 0;
346
+ size_t total = histogram_total;
347
+ while (total != 0) {
348
+ if (histogram[length]) {
349
+ if (count < 4) {
350
+ symbols[count] = length;
351
+ }
352
+ ++count;
353
+ total -= histogram[length];
354
+ }
355
+ ++length;
356
+ }
357
+
358
+ if (count <= 1) {
359
+ WriteBits(4, 1, storage_ix, storage);
360
+ WriteBits(max_bits, symbols[0], storage_ix, storage);
361
+ return;
362
+ }
363
+
364
+ const size_t max_tree_size = 2 * length + 1;
365
+ HuffmanTree* const tree =
366
+ static_cast<HuffmanTree*>(malloc(max_tree_size * sizeof(HuffmanTree)));
367
+ for (uint32_t count_limit = 1; ; count_limit *= 2) {
368
+ HuffmanTree* node = tree;
369
+ for (size_t i = length; i != 0;) {
370
+ --i;
371
+ if (histogram[i]) {
372
+ if (PREDICT_TRUE(histogram[i] >= count_limit)) {
373
+ *node = HuffmanTree(histogram[i], -1, static_cast<int16_t>(i));
374
+ } else {
375
+ *node = HuffmanTree(count_limit, -1, static_cast<int16_t>(i));
376
+ }
377
+ ++node;
378
+ }
379
+ }
380
+ const int n = static_cast<int>(node - tree);
381
+ std::sort(tree, node, SortHuffmanTree);
382
+ // The nodes are:
383
+ // [0, n): the sorted leaf nodes that we start with.
384
+ // [n]: we add a sentinel here.
385
+ // [n + 1, 2n): new parent nodes are added here, starting from
386
+ // (n+1). These are naturally in ascending order.
387
+ // [2n]: we add a sentinel at the end as well.
388
+ // There will be (2n+1) elements at the end.
389
+ const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
390
+ *node++ = sentinel;
391
+ *node++ = sentinel;
392
+
393
+ int i = 0; // Points to the next leaf node.
394
+ int j = n + 1; // Points to the next non-leaf node.
395
+ for (int k = n - 1; k > 0; --k) {
396
+ int left, right;
397
+ if (tree[i].total_count_ <= tree[j].total_count_) {
398
+ left = i;
399
+ ++i;
400
+ } else {
401
+ left = j;
402
+ ++j;
403
+ }
404
+ if (tree[i].total_count_ <= tree[j].total_count_) {
405
+ right = i;
406
+ ++i;
407
+ } else {
408
+ right = j;
409
+ ++j;
410
+ }
411
+ // The sentinel node becomes the parent node.
412
+ node[-1].total_count_ =
413
+ tree[left].total_count_ + tree[right].total_count_;
414
+ node[-1].index_left_ = static_cast<int16_t>(left);
415
+ node[-1].index_right_or_value_ = static_cast<int16_t>(right);
416
+ // Add back the last sentinel node.
417
+ *node++ = sentinel;
418
+ }
419
+ SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
420
+ // We need to pack the Huffman tree in 14 bits.
421
+ // If this was not successful, add fake entities to the lowest values
422
+ // and retry.
423
+ if (PREDICT_TRUE(*std::max_element(&depth[0], &depth[length]) <= 14)) {
424
+ break;
425
+ }
426
+ }
427
+ free(tree);
428
+ ConvertBitDepthsToSymbols(depth, length, bits);
429
+ if (count <= 4) {
430
+ // value of 1 indicates a simple Huffman code
431
+ WriteBits(2, 1, storage_ix, storage);
432
+ WriteBits(2, count - 1, storage_ix, storage); // NSYM - 1
433
+
434
+ // Sort
435
+ for (size_t i = 0; i < count; i++) {
436
+ for (size_t j = i + 1; j < count; j++) {
437
+ if (depth[symbols[j]] < depth[symbols[i]]) {
438
+ std::swap(symbols[j], symbols[i]);
439
+ }
440
+ }
441
+ }
442
+
443
+ if (count == 2) {
444
+ WriteBits(max_bits, symbols[0], storage_ix, storage);
445
+ WriteBits(max_bits, symbols[1], storage_ix, storage);
446
+ } else if (count == 3) {
447
+ WriteBits(max_bits, symbols[0], storage_ix, storage);
448
+ WriteBits(max_bits, symbols[1], storage_ix, storage);
449
+ WriteBits(max_bits, symbols[2], storage_ix, storage);
450
+ } else {
451
+ WriteBits(max_bits, symbols[0], storage_ix, storage);
452
+ WriteBits(max_bits, symbols[1], storage_ix, storage);
453
+ WriteBits(max_bits, symbols[2], storage_ix, storage);
454
+ WriteBits(max_bits, symbols[3], storage_ix, storage);
455
+ // tree-select
456
+ WriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
457
+ }
458
+ } else {
459
+ // Complex Huffman Tree
460
+ StoreStaticCodeLengthCode(storage_ix, storage);
461
+
462
+ // Actual rle coding.
463
+ uint8_t previous_value = 8;
464
+ for (size_t i = 0; i < length;) {
465
+ const uint8_t value = depth[i];
466
+ size_t reps = 1;
467
+ for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
468
+ ++reps;
469
+ }
470
+ i += reps;
471
+ if (value == 0) {
472
+ WriteBits(kZeroRepsDepth[reps], kZeroRepsBits[reps],
473
+ storage_ix, storage);
474
+ } else {
475
+ if (previous_value != value) {
476
+ WriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
477
+ storage_ix, storage);
478
+ --reps;
479
+ }
480
+ if (reps < 3) {
481
+ while (reps != 0) {
482
+ reps--;
483
+ WriteBits(kCodeLengthDepth[value], kCodeLengthBits[value],
484
+ storage_ix, storage);
485
+ }
486
+ } else {
487
+ reps -= 3;
488
+ WriteBits(kNonZeroRepsDepth[reps], kNonZeroRepsBits[reps],
489
+ storage_ix, storage);
490
+ }
491
+ previous_value = value;
492
+ }
493
+ }
312
494
  }
313
495
  }
314
496
 
315
- int IndexOf(const std::vector<int>& v, int value) {
316
- for (int i = 0; i < static_cast<int>(v.size()); ++i) {
497
+ static size_t IndexOf(const uint8_t* v, size_t v_size, uint8_t value) {
498
+ size_t i = 0;
499
+ for (; i < v_size; ++i) {
317
500
  if (v[i] == value) return i;
318
501
  }
319
- return -1;
502
+ return i;
320
503
  }
321
504
 
322
- void MoveToFront(std::vector<int>* v, int index) {
323
- int value = (*v)[index];
324
- for (int i = index; i > 0; --i) {
325
- (*v)[i] = (*v)[i - 1];
505
+ static void MoveToFront(uint8_t* v, size_t index) {
506
+ uint8_t value = v[index];
507
+ for (size_t i = index; i != 0; --i) {
508
+ v[i] = v[i - 1];
326
509
  }
327
- (*v)[0] = value;
510
+ v[0] = value;
328
511
  }
329
512
 
330
- std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
331
- if (v.empty()) return v;
332
- std::vector<int> mtf(*std::max_element(v.begin(), v.end()) + 1);
333
- for (int i = 0; i < static_cast<int>(mtf.size()); ++i) mtf[i] = i;
334
- std::vector<int> result(v.size());
335
- for (size_t i = 0; i < v.size(); ++i) {
336
- int index = IndexOf(mtf, v[i]);
337
- assert(index >= 0);
338
- result[i] = index;
339
- MoveToFront(&mtf, index);
340
- }
341
- return result;
513
+ static void MoveToFrontTransform(const uint32_t* __restrict v_in,
514
+ const size_t v_size,
515
+ uint32_t* v_out) {
516
+ if (v_size == 0) {
517
+ return;
518
+ }
519
+ uint32_t max_value = *std::max_element(v_in, v_in + v_size);
520
+ assert(max_value < 256u);
521
+ uint8_t mtf[256];
522
+ size_t mtf_size = max_value + 1;
523
+ for (uint32_t i = 0; i <= max_value; ++i) {
524
+ mtf[i] = static_cast<uint8_t>(i);
525
+ }
526
+ for (size_t i = 0; i < v_size; ++i) {
527
+ size_t index = IndexOf(mtf, mtf_size, static_cast<uint8_t>(v_in[i]));
528
+ assert(index < mtf_size);
529
+ v_out[i] = static_cast<uint32_t>(index);
530
+ MoveToFront(mtf, index);
531
+ }
342
532
  }
343
533
 
344
- // Finds runs of zeros in v_in and replaces them with a prefix code of the run
345
- // length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
346
- // shifted by *max_length_prefix. Will not create prefix codes bigger than the
347
- // initial value of *max_run_length_prefix. The prefix code of run length L is
348
- // simply Log2Floor(L) and the number of extra bits is the same as the prefix
349
- // code.
350
- void RunLengthCodeZeros(const std::vector<int>& v_in,
351
- int* max_run_length_prefix,
352
- std::vector<int>* v_out,
353
- std::vector<int>* extra_bits) {
354
- int max_reps = 0;
355
- for (size_t i = 0; i < v_in.size();) {
356
- for (; i < v_in.size() && v_in[i] != 0; ++i) ;
357
- int reps = 0;
358
- for (; i < v_in.size() && v_in[i] == 0; ++i) {
534
+ // Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
535
+ // the run length plus extra bits (lower 9 bits is the prefix code and the rest
536
+ // are the extra bits). Non-zero values in v[] are shifted by
537
+ // *max_length_prefix. Will not create prefix codes bigger than the initial
538
+ // value of *max_run_length_prefix. The prefix code of run length L is simply
539
+ // Log2Floor(L) and the number of extra bits is the same as the prefix code.
540
+ static void RunLengthCodeZeros(const size_t in_size,
541
+ uint32_t* __restrict v,
542
+ size_t* __restrict out_size,
543
+ uint32_t* __restrict max_run_length_prefix) {
544
+ uint32_t max_reps = 0;
545
+ for (size_t i = 0; i < in_size;) {
546
+ for (; i < in_size && v[i] != 0; ++i) ;
547
+ uint32_t reps = 0;
548
+ for (; i < in_size && v[i] == 0; ++i) {
359
549
  ++reps;
360
550
  }
361
551
  max_reps = std::max(reps, max_reps);
362
552
  }
363
- int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
364
- *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
365
- for (size_t i = 0; i < v_in.size();) {
366
- if (v_in[i] != 0) {
367
- v_out->push_back(v_in[i] + *max_run_length_prefix);
368
- extra_bits->push_back(0);
553
+ uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
554
+ max_prefix = std::min(max_prefix, *max_run_length_prefix);
555
+ *max_run_length_prefix = max_prefix;
556
+ *out_size = 0;
557
+ for (size_t i = 0; i < in_size;) {
558
+ assert(*out_size <= i);
559
+ if (v[i] != 0) {
560
+ v[*out_size] = v[i] + *max_run_length_prefix;
369
561
  ++i;
562
+ ++(*out_size);
370
563
  } else {
371
- int reps = 1;
372
- for (size_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
564
+ uint32_t reps = 1;
565
+ for (size_t k = i + 1; k < in_size && v[k] == 0; ++k) {
373
566
  ++reps;
374
567
  }
375
568
  i += reps;
376
- while (reps) {
377
- if (reps < (2 << *max_run_length_prefix)) {
378
- int run_length_prefix = Log2Floor(reps);
379
- v_out->push_back(run_length_prefix);
380
- extra_bits->push_back(reps - (1 << run_length_prefix));
569
+ while (reps != 0) {
570
+ if (reps < (2u << max_prefix)) {
571
+ uint32_t run_length_prefix = Log2FloorNonZero(reps);
572
+ const uint32_t extra_bits = reps - (1u << run_length_prefix);
573
+ v[*out_size] = run_length_prefix + (extra_bits << 9);
574
+ ++(*out_size);
381
575
  break;
382
576
  } else {
383
- v_out->push_back(*max_run_length_prefix);
384
- extra_bits->push_back((1 << *max_run_length_prefix) - 1);
385
- reps -= (2 << *max_run_length_prefix) - 1;
577
+ const uint32_t extra_bits = (1u << max_prefix) - 1u;
578
+ v[*out_size] = max_prefix + (extra_bits << 9);
579
+ reps -= (2u << max_prefix) - 1u;
580
+ ++(*out_size);
386
581
  }
387
582
  }
388
583
  }
389
584
  }
390
585
  }
391
586
 
392
- void EncodeContextMap(const std::vector<int>& context_map,
393
- int num_clusters,
394
- int* storage_ix, uint8_t* storage) {
587
+ void EncodeContextMap(const std::vector<uint32_t>& context_map,
588
+ size_t num_clusters,
589
+ HuffmanTree* tree,
590
+ size_t* storage_ix, uint8_t* storage) {
395
591
  StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
396
592
 
397
593
  if (num_clusters == 1) {
398
594
  return;
399
595
  }
400
596
 
401
- std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
402
- std::vector<int> rle_symbols;
403
- std::vector<int> extra_bits;
404
- int max_run_length_prefix = 6;
405
- RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
406
- &rle_symbols, &extra_bits);
407
- HistogramContextMap symbol_histogram;
408
- for (size_t i = 0; i < rle_symbols.size(); ++i) {
409
- symbol_histogram.Add(rle_symbols[i]);
597
+ uint32_t* rle_symbols = new uint32_t[context_map.size()];
598
+ MoveToFrontTransform(&context_map[0], context_map.size(), rle_symbols);
599
+ uint32_t max_run_length_prefix = 6;
600
+ size_t num_rle_symbols = 0;
601
+ RunLengthCodeZeros(context_map.size(), rle_symbols,
602
+ &num_rle_symbols, &max_run_length_prefix);
603
+ uint32_t histogram[kContextMapAlphabetSize];
604
+ memset(histogram, 0, sizeof(histogram));
605
+ static const int kSymbolBits = 9;
606
+ static const uint32_t kSymbolMask = (1u << kSymbolBits) - 1u;
607
+ for (size_t i = 0; i < num_rle_symbols; ++i) {
608
+ ++histogram[rle_symbols[i] & kSymbolMask];
410
609
  }
411
610
  bool use_rle = max_run_length_prefix > 0;
412
611
  WriteBits(1, use_rle, storage_ix, storage);
413
612
  if (use_rle) {
414
613
  WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
415
614
  }
416
- EntropyCodeContextMap symbol_code;
417
- memset(symbol_code.depth_, 0, sizeof(symbol_code.depth_));
418
- memset(symbol_code.bits_, 0, sizeof(symbol_code.bits_));
419
- BuildAndStoreHuffmanTree(symbol_histogram.data_,
420
- num_clusters + max_run_length_prefix,
421
- symbol_code.depth_, symbol_code.bits_,
422
- storage_ix, storage);
423
- for (size_t i = 0; i < rle_symbols.size(); ++i) {
424
- WriteBits(symbol_code.depth_[rle_symbols[i]],
425
- symbol_code.bits_[rle_symbols[i]],
426
- storage_ix, storage);
427
- if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
428
- WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
615
+ uint8_t depths[kContextMapAlphabetSize];
616
+ uint16_t bits[kContextMapAlphabetSize];
617
+ memset(depths, 0, sizeof(depths));
618
+ memset(bits, 0, sizeof(bits));
619
+ BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
620
+ tree, depths, bits, storage_ix, storage);
621
+ for (size_t i = 0; i < num_rle_symbols; ++i) {
622
+ const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
623
+ const uint32_t extra_bits_val = rle_symbols[i] >> kSymbolBits;
624
+ WriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
625
+ if (rle_symbol > 0 && rle_symbol <= max_run_length_prefix) {
626
+ WriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
429
627
  }
430
628
  }
431
629
  WriteBits(1, 1, storage_ix, storage); // use move-to-front
630
+ delete[] rle_symbols;
432
631
  }
433
632
 
434
633
  void StoreBlockSwitch(const BlockSplitCode& code,
435
- const int block_ix,
436
- int* storage_ix,
634
+ const size_t block_ix,
635
+ size_t* storage_ix,
437
636
  uint8_t* storage) {
438
637
  if (block_ix > 0) {
439
- int typecode = code.type_code[block_ix];
638
+ size_t typecode = code.type_code[block_ix];
440
639
  WriteBits(code.type_depths[typecode], code.type_bits[typecode],
441
640
  storage_ix, storage);
442
641
  }
443
- int lencode = code.length_prefix[block_ix];
642
+ size_t lencode = code.length_prefix[block_ix];
444
643
  WriteBits(code.length_depths[lencode], code.length_bits[lencode],
445
644
  storage_ix, storage);
446
645
  WriteBits(code.length_nextra[block_ix], code.length_extra[block_ix],
447
646
  storage_ix, storage);
448
647
  }
449
648
 
450
- void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
451
- const std::vector<int>& lengths,
452
- const int num_types,
453
- BlockSplitCode* code,
454
- int* storage_ix,
455
- uint8_t* storage) {
456
- const int num_blocks = static_cast<int>(types.size());
457
- std::vector<int> type_histo(num_types + 2);
458
- std::vector<int> length_histo(26);
459
- int last_type = 1;
460
- int second_last_type = 0;
649
+ static void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
650
+ const std::vector<uint32_t>& lengths,
651
+ const size_t num_types,
652
+ HuffmanTree* tree,
653
+ BlockSplitCode* code,
654
+ size_t* storage_ix,
655
+ uint8_t* storage) {
656
+ const size_t num_blocks = types.size();
657
+ uint32_t type_histo[kBlockTypeAlphabetSize];
658
+ uint32_t length_histo[kNumBlockLenPrefixes];
659
+ memset(type_histo, 0, (num_types + 2) * sizeof(type_histo[0]));
660
+ memset(length_histo, 0, sizeof(length_histo));
661
+ size_t last_type = 1;
662
+ size_t second_last_type = 0;
461
663
  code->type_code.resize(num_blocks);
462
664
  code->length_prefix.resize(num_blocks);
463
665
  code->length_nextra.resize(num_blocks);
464
666
  code->length_extra.resize(num_blocks);
465
667
  code->type_depths.resize(num_types + 2);
466
668
  code->type_bits.resize(num_types + 2);
467
- code->length_depths.resize(26);
468
- code->length_bits.resize(26);
469
- for (int i = 0; i < num_blocks; ++i) {
470
- int type = types[i];
471
- int type_code = (type == last_type + 1 ? 1 :
669
+ memset(code->length_depths, 0, sizeof(code->length_depths));
670
+ memset(code->length_bits, 0, sizeof(code->length_bits));
671
+ for (size_t i = 0; i < num_blocks; ++i) {
672
+ size_t type = types[i];
673
+ size_t type_code = (type == last_type + 1 ? 1 :
472
674
  type == second_last_type ? 0 :
473
675
  type + 2);
474
676
  second_last_type = last_type;
475
677
  last_type = type;
476
- code->type_code[i] = type_code;
477
- if (i > 0) ++type_histo[type_code];
678
+ code->type_code[i] = static_cast<uint32_t>(type_code);
679
+ if (i != 0) ++type_histo[type_code];
478
680
  GetBlockLengthPrefixCode(lengths[i],
479
681
  &code->length_prefix[i],
480
682
  &code->length_nextra[i],
@@ -483,41 +685,45 @@ void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
483
685
  }
484
686
  StoreVarLenUint8(num_types - 1, storage_ix, storage);
485
687
  if (num_types > 1) {
486
- BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2,
688
+ BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree,
487
689
  &code->type_depths[0], &code->type_bits[0],
488
690
  storage_ix, storage);
489
- BuildAndStoreHuffmanTree(&length_histo[0], 26,
691
+ BuildAndStoreHuffmanTree(&length_histo[0], kNumBlockLenPrefixes, tree,
490
692
  &code->length_depths[0], &code->length_bits[0],
491
693
  storage_ix, storage);
492
694
  StoreBlockSwitch(*code, 0, storage_ix, storage);
493
695
  }
494
696
  }
495
697
 
496
- void StoreTrivialContextMap(int num_types,
497
- int context_bits,
498
- int* storage_ix,
698
+ void StoreTrivialContextMap(size_t num_types,
699
+ size_t context_bits,
700
+ HuffmanTree* tree,
701
+ size_t* storage_ix,
499
702
  uint8_t* storage) {
500
703
  StoreVarLenUint8(num_types - 1, storage_ix, storage);
501
704
  if (num_types > 1) {
502
- int repeat_code = context_bits - 1;
503
- int repeat_bits = (1 << repeat_code) - 1;
504
- int alphabet_size = num_types + repeat_code;
505
- std::vector<int> histogram(alphabet_size);
506
- std::vector<uint8_t> depths(alphabet_size);
507
- std::vector<uint16_t> bits(alphabet_size);
705
+ size_t repeat_code = context_bits - 1u;
706
+ size_t repeat_bits = (1u << repeat_code) - 1u;
707
+ size_t alphabet_size = num_types + repeat_code;
708
+ uint32_t histogram[kContextMapAlphabetSize];
709
+ uint8_t depths[kContextMapAlphabetSize];
710
+ uint16_t bits[kContextMapAlphabetSize];
711
+ memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
712
+ memset(depths, 0, alphabet_size * sizeof(depths[0]));
713
+ memset(bits, 0, alphabet_size * sizeof(bits[0]));
508
714
  // Write RLEMAX.
509
715
  WriteBits(1, 1, storage_ix, storage);
510
716
  WriteBits(4, repeat_code - 1, storage_ix, storage);
511
- histogram[repeat_code] = num_types;
717
+ histogram[repeat_code] = static_cast<uint32_t>(num_types);
512
718
  histogram[0] = 1;
513
- for (int i = context_bits; i < alphabet_size; ++i) {
719
+ for (size_t i = context_bits; i < alphabet_size; ++i) {
514
720
  histogram[i] = 1;
515
721
  }
516
- BuildAndStoreHuffmanTree(&histogram[0], alphabet_size,
722
+ BuildAndStoreHuffmanTree(&histogram[0], alphabet_size, tree,
517
723
  &depths[0], &bits[0],
518
724
  storage_ix, storage);
519
- for (int i = 0; i < num_types; ++i) {
520
- int code = (i == 0 ? 0 : i + context_bits - 1);
725
+ for (size_t i = 0; i < num_types; ++i) {
726
+ size_t code = (i == 0 ? 0 : i + context_bits - 1);
521
727
  WriteBits(depths[code], bits[code], storage_ix, storage);
522
728
  WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage);
523
729
  WriteBits(repeat_code, repeat_bits, storage_ix, storage);
@@ -530,10 +736,10 @@ void StoreTrivialContextMap(int num_types,
530
736
  // Manages the encoding of one block category (literal, command or distance).
531
737
  class BlockEncoder {
532
738
  public:
533
- BlockEncoder(int alphabet_size,
534
- int num_block_types,
535
- const std::vector<int>& block_types,
536
- const std::vector<int>& block_lengths)
739
+ BlockEncoder(size_t alphabet_size,
740
+ size_t num_block_types,
741
+ const std::vector<uint8_t>& block_types,
742
+ const std::vector<uint32_t>& block_lengths)
537
743
  : alphabet_size_(alphabet_size),
538
744
  num_block_types_(num_block_types),
539
745
  block_types_(block_types),
@@ -544,10 +750,12 @@ class BlockEncoder {
544
750
 
545
751
  // Creates entropy codes of block lengths and block types and stores them
546
752
  // to the bit stream.
547
- void BuildAndStoreBlockSwitchEntropyCodes(int* storage_ix, uint8_t* storage) {
753
+ void BuildAndStoreBlockSwitchEntropyCodes(HuffmanTree* tree,
754
+ size_t* storage_ix,
755
+ uint8_t* storage) {
548
756
  BuildAndStoreBlockSplitCode(
549
757
  block_types_, block_lengths_, num_block_types_,
550
- &block_split_code_, storage_ix, storage);
758
+ tree, &block_split_code_, storage_ix, storage);
551
759
  }
552
760
 
553
761
  // Creates entropy codes for all block types and stores them to the bit
@@ -555,12 +763,14 @@ class BlockEncoder {
555
763
  template<int kSize>
556
764
  void BuildAndStoreEntropyCodes(
557
765
  const std::vector<Histogram<kSize> >& histograms,
558
- int* storage_ix, uint8_t* storage) {
766
+ HuffmanTree* tree,
767
+ size_t* storage_ix, uint8_t* storage) {
559
768
  depths_.resize(histograms.size() * alphabet_size_);
560
769
  bits_.resize(histograms.size() * alphabet_size_);
561
770
  for (size_t i = 0; i < histograms.size(); ++i) {
562
771
  size_t ix = i * alphabet_size_;
563
772
  BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_,
773
+ tree,
564
774
  &depths_[ix], &bits_[ix],
565
775
  storage_ix, storage);
566
776
  }
@@ -568,7 +778,7 @@ class BlockEncoder {
568
778
 
569
779
  // Stores the next symbol with the entropy code of the current block type.
570
780
  // Updates the block type and block length at block boundaries.
571
- void StoreSymbol(int symbol, int* storage_ix, uint8_t* storage) {
781
+ void StoreSymbol(size_t symbol, size_t* storage_ix, uint8_t* storage) {
572
782
  if (block_len_ == 0) {
573
783
  ++block_ix_;
574
784
  block_len_ = block_lengths_[block_ix_];
@@ -576,7 +786,7 @@ class BlockEncoder {
576
786
  StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
577
787
  }
578
788
  --block_len_;
579
- int ix = entropy_ix_ + symbol;
789
+ size_t ix = entropy_ix_ + symbol;
580
790
  WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
581
791
  }
582
792
 
@@ -584,68 +794,63 @@ class BlockEncoder {
584
794
  // context value.
585
795
  // Updates the block type and block length at block boundaries.
586
796
  template<int kContextBits>
587
- void StoreSymbolWithContext(int symbol, int context,
588
- const std::vector<int>& context_map,
589
- int* storage_ix, uint8_t* storage) {
797
+ void StoreSymbolWithContext(size_t symbol, size_t context,
798
+ const std::vector<uint32_t>& context_map,
799
+ size_t* storage_ix, uint8_t* storage) {
590
800
  if (block_len_ == 0) {
591
801
  ++block_ix_;
592
802
  block_len_ = block_lengths_[block_ix_];
593
- entropy_ix_ = block_types_[block_ix_] << kContextBits;
803
+ size_t block_type = block_types_[block_ix_];
804
+ entropy_ix_ = block_type << kContextBits;
594
805
  StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
595
806
  }
596
807
  --block_len_;
597
- int histo_ix = context_map[entropy_ix_ + context];
598
- int ix = histo_ix * alphabet_size_ + symbol;
808
+ size_t histo_ix = context_map[entropy_ix_ + context];
809
+ size_t ix = histo_ix * alphabet_size_ + symbol;
599
810
  WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
600
811
  }
601
812
 
602
813
  private:
603
- const int alphabet_size_;
604
- const int num_block_types_;
605
- const std::vector<int>& block_types_;
606
- const std::vector<int>& block_lengths_;
814
+ const size_t alphabet_size_;
815
+ const size_t num_block_types_;
816
+ const std::vector<uint8_t>& block_types_;
817
+ const std::vector<uint32_t>& block_lengths_;
607
818
  BlockSplitCode block_split_code_;
608
- int block_ix_;
609
- int block_len_;
610
- int entropy_ix_;
819
+ size_t block_ix_;
820
+ size_t block_len_;
821
+ size_t entropy_ix_;
611
822
  std::vector<uint8_t> depths_;
612
823
  std::vector<uint16_t> bits_;
613
824
  };
614
825
 
615
- void JumpToByteBoundary(int* storage_ix, uint8_t* storage) {
616
- *storage_ix = (*storage_ix + 7) & ~7;
826
+ static void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
827
+ *storage_ix = (*storage_ix + 7u) & ~7u;
617
828
  storage[*storage_ix >> 3] = 0;
618
829
  }
619
830
 
620
- bool StoreMetaBlock(const uint8_t* input,
831
+ void StoreMetaBlock(const uint8_t* input,
621
832
  size_t start_pos,
622
833
  size_t length,
623
834
  size_t mask,
624
835
  uint8_t prev_byte,
625
836
  uint8_t prev_byte2,
626
837
  bool is_last,
627
- int num_direct_distance_codes,
628
- int distance_postfix_bits,
629
- int literal_context_mode,
838
+ uint32_t num_direct_distance_codes,
839
+ uint32_t distance_postfix_bits,
840
+ ContextType literal_context_mode,
630
841
  const brotli::Command *commands,
631
842
  size_t n_commands,
632
843
  const MetaBlockSplit& mb,
633
- int *storage_ix,
844
+ size_t *storage_ix,
634
845
  uint8_t *storage) {
635
- if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
636
- return false;
637
- }
638
-
639
- if (length == 0) {
640
- // Only the last meta-block can be empty, so jump to next byte.
641
- JumpToByteBoundary(storage_ix, storage);
642
- return true;
643
- }
846
+ StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
644
847
 
645
- int num_distance_codes =
848
+ size_t num_distance_codes =
646
849
  kNumDistanceShortCodes + num_direct_distance_codes +
647
- (48 << distance_postfix_bits);
850
+ (48u << distance_postfix_bits);
648
851
 
852
+ HuffmanTree* tree = static_cast<HuffmanTree*>(
853
+ malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
649
854
  BlockEncoder literal_enc(256,
650
855
  mb.literal_split.num_types,
651
856
  mb.literal_split.types,
@@ -659,59 +864,57 @@ bool StoreMetaBlock(const uint8_t* input,
659
864
  mb.distance_split.types,
660
865
  mb.distance_split.lengths);
661
866
 
662
- literal_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
663
- command_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
664
- distance_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
867
+ literal_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
868
+ command_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
869
+ distance_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
665
870
 
666
871
  WriteBits(2, distance_postfix_bits, storage_ix, storage);
667
872
  WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
668
873
  storage_ix, storage);
669
- for (int i = 0; i < mb.literal_split.num_types; ++i) {
874
+ for (size_t i = 0; i < mb.literal_split.num_types; ++i) {
670
875
  WriteBits(2, literal_context_mode, storage_ix, storage);
671
876
  }
672
877
 
673
- int num_literal_histograms = static_cast<int>(mb.literal_histograms.size());
878
+ size_t num_literal_histograms = mb.literal_histograms.size();
674
879
  if (mb.literal_context_map.empty()) {
675
- StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits,
880
+ StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits, tree,
676
881
  storage_ix, storage);
677
882
  } else {
678
- EncodeContextMap(mb.literal_context_map, num_literal_histograms,
883
+ EncodeContextMap(mb.literal_context_map, num_literal_histograms, tree,
679
884
  storage_ix, storage);
680
885
  }
681
886
 
682
- int num_dist_histograms = static_cast<int>(mb.distance_histograms.size());
887
+ size_t num_dist_histograms = mb.distance_histograms.size();
683
888
  if (mb.distance_context_map.empty()) {
684
- StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits,
889
+ StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits, tree,
685
890
  storage_ix, storage);
686
891
  } else {
687
- EncodeContextMap(mb.distance_context_map, num_dist_histograms,
892
+ EncodeContextMap(mb.distance_context_map, num_dist_histograms, tree,
688
893
  storage_ix, storage);
689
894
  }
690
895
 
691
- literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms,
896
+ literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms, tree,
692
897
  storage_ix, storage);
693
- command_enc.BuildAndStoreEntropyCodes(mb.command_histograms,
898
+ command_enc.BuildAndStoreEntropyCodes(mb.command_histograms, tree,
694
899
  storage_ix, storage);
695
- distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms,
900
+ distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms, tree,
696
901
  storage_ix, storage);
902
+ free(tree);
697
903
 
698
904
  size_t pos = start_pos;
699
905
  for (size_t i = 0; i < n_commands; ++i) {
700
906
  const Command cmd = commands[i];
701
- int cmd_code = cmd.cmd_prefix_;
702
- int lennumextra = static_cast<int>(cmd.cmd_extra_ >> 48);
703
- uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
907
+ size_t cmd_code = cmd.cmd_prefix_;
704
908
  command_enc.StoreSymbol(cmd_code, storage_ix, storage);
705
- WriteBits(lennumextra, lenextra, storage_ix, storage);
909
+ StoreCommandExtra(cmd, storage_ix, storage);
706
910
  if (mb.literal_context_map.empty()) {
707
- for (int j = 0; j < cmd.insert_len_; j++) {
911
+ for (size_t j = cmd.insert_len_; j != 0; --j) {
708
912
  literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
709
913
  ++pos;
710
914
  }
711
915
  } else {
712
- for (int j = 0; j < cmd.insert_len_; ++j) {
713
- int context = Context(prev_byte, prev_byte2,
714
- literal_context_mode);
916
+ for (size_t j = cmd.insert_len_; j != 0; --j) {
917
+ size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
715
918
  uint8_t literal = input[pos & mask];
716
919
  literal_enc.StoreSymbolWithContext<kLiteralContextBits>(
717
920
  literal, context, mb.literal_context_map, storage_ix, storage);
@@ -720,18 +923,18 @@ bool StoreMetaBlock(const uint8_t* input,
720
923
  ++pos;
721
924
  }
722
925
  }
723
- pos += cmd.copy_len_;
724
- if (cmd.copy_len_ > 0) {
926
+ pos += cmd.copy_len();
927
+ if (cmd.copy_len()) {
725
928
  prev_byte2 = input[(pos - 2) & mask];
726
929
  prev_byte = input[(pos - 1) & mask];
727
930
  if (cmd.cmd_prefix_ >= 128) {
728
- int dist_code = cmd.dist_prefix_;
729
- int distnumextra = cmd.dist_extra_ >> 24;
730
- int distextra = cmd.dist_extra_ & 0xffffff;
931
+ size_t dist_code = cmd.dist_prefix_;
932
+ uint32_t distnumextra = cmd.dist_extra_ >> 24;
933
+ uint64_t distextra = cmd.dist_extra_ & 0xffffff;
731
934
  if (mb.distance_context_map.empty()) {
732
935
  distance_enc.StoreSymbol(dist_code, storage_ix, storage);
733
936
  } else {
734
- int context = cmd.DistanceContext();
937
+ size_t context = cmd.DistanceContext();
735
938
  distance_enc.StoreSymbolWithContext<kDistanceContextBits>(
736
939
  dist_code, context, mb.distance_context_map, storage_ix, storage);
737
940
  }
@@ -742,45 +945,84 @@ bool StoreMetaBlock(const uint8_t* input,
742
945
  if (is_last) {
743
946
  JumpToByteBoundary(storage_ix, storage);
744
947
  }
745
- return true;
746
948
  }
747
949
 
748
- bool StoreMetaBlockTrivial(const uint8_t* input,
950
+ static void BuildHistograms(const uint8_t* input,
951
+ size_t start_pos,
952
+ size_t mask,
953
+ const brotli::Command *commands,
954
+ size_t n_commands,
955
+ HistogramLiteral* lit_histo,
956
+ HistogramCommand* cmd_histo,
957
+ HistogramDistance* dist_histo) {
958
+ size_t pos = start_pos;
959
+ for (size_t i = 0; i < n_commands; ++i) {
960
+ const Command cmd = commands[i];
961
+ cmd_histo->Add(cmd.cmd_prefix_);
962
+ for (size_t j = cmd.insert_len_; j != 0; --j) {
963
+ lit_histo->Add(input[pos & mask]);
964
+ ++pos;
965
+ }
966
+ pos += cmd.copy_len();
967
+ if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
968
+ dist_histo->Add(cmd.dist_prefix_);
969
+ }
970
+ }
971
+ }
972
+
973
+ static void StoreDataWithHuffmanCodes(const uint8_t* input,
974
+ size_t start_pos,
975
+ size_t mask,
976
+ const brotli::Command *commands,
977
+ size_t n_commands,
978
+ const uint8_t* lit_depth,
979
+ const uint16_t* lit_bits,
980
+ const uint8_t* cmd_depth,
981
+ const uint16_t* cmd_bits,
982
+ const uint8_t* dist_depth,
983
+ const uint16_t* dist_bits,
984
+ size_t* storage_ix,
985
+ uint8_t* storage) {
986
+ size_t pos = start_pos;
987
+ for (size_t i = 0; i < n_commands; ++i) {
988
+ const Command cmd = commands[i];
989
+ const size_t cmd_code = cmd.cmd_prefix_;
990
+ WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
991
+ StoreCommandExtra(cmd, storage_ix, storage);
992
+ for (size_t j = cmd.insert_len_; j != 0; --j) {
993
+ const uint8_t literal = input[pos & mask];
994
+ WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
995
+ ++pos;
996
+ }
997
+ pos += cmd.copy_len();
998
+ if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
999
+ const size_t dist_code = cmd.dist_prefix_;
1000
+ const uint32_t distnumextra = cmd.dist_extra_ >> 24;
1001
+ const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
1002
+ WriteBits(dist_depth[dist_code], dist_bits[dist_code],
1003
+ storage_ix, storage);
1004
+ WriteBits(distnumextra, distextra, storage_ix, storage);
1005
+ }
1006
+ }
1007
+ }
1008
+
1009
+ void StoreMetaBlockTrivial(const uint8_t* input,
749
1010
  size_t start_pos,
750
1011
  size_t length,
751
1012
  size_t mask,
752
1013
  bool is_last,
753
1014
  const brotli::Command *commands,
754
1015
  size_t n_commands,
755
- int *storage_ix,
1016
+ size_t *storage_ix,
756
1017
  uint8_t *storage) {
757
- if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
758
- return false;
759
- }
760
-
761
- if (length == 0) {
762
- // Only the last meta-block can be empty, so jump to next byte.
763
- JumpToByteBoundary(storage_ix, storage);
764
- return true;
765
- }
1018
+ StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
766
1019
 
767
1020
  HistogramLiteral lit_histo;
768
1021
  HistogramCommand cmd_histo;
769
1022
  HistogramDistance dist_histo;
770
1023
 
771
- size_t pos = start_pos;
772
- for (size_t i = 0; i < n_commands; ++i) {
773
- const Command cmd = commands[i];
774
- cmd_histo.Add(cmd.cmd_prefix_);
775
- for (int j = 0; j < cmd.insert_len_; ++j) {
776
- lit_histo.Add(input[pos & mask]);
777
- ++pos;
778
- }
779
- pos += cmd.copy_len_;
780
- if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
781
- dist_histo.Add(cmd.dist_prefix_);
782
- }
783
- }
1024
+ BuildHistograms(input, start_pos, mask, commands, n_commands,
1025
+ &lit_histo, &cmd_histo, &dist_histo);
784
1026
 
785
1027
  WriteBits(13, 0, storage_ix, storage);
786
1028
 
@@ -791,68 +1033,126 @@ bool StoreMetaBlockTrivial(const uint8_t* input,
791
1033
  std::vector<uint8_t> dist_depth(64);
792
1034
  std::vector<uint16_t> dist_bits(64);
793
1035
 
794
- BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256,
1036
+ HuffmanTree* tree = static_cast<HuffmanTree*>(
1037
+ malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
1038
+ BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256, tree,
795
1039
  &lit_depth[0], &lit_bits[0],
796
1040
  storage_ix, storage);
797
- BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes,
1041
+ BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes, tree,
798
1042
  &cmd_depth[0], &cmd_bits[0],
799
1043
  storage_ix, storage);
800
- BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64,
1044
+ BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64, tree,
801
1045
  &dist_depth[0], &dist_bits[0],
802
1046
  storage_ix, storage);
1047
+ free(tree);
1048
+ StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
1049
+ n_commands, &lit_depth[0], &lit_bits[0],
1050
+ &cmd_depth[0], &cmd_bits[0],
1051
+ &dist_depth[0], &dist_bits[0],
1052
+ storage_ix, storage);
1053
+ if (is_last) {
1054
+ JumpToByteBoundary(storage_ix, storage);
1055
+ }
1056
+ }
803
1057
 
804
- pos = start_pos;
805
- for (size_t i = 0; i < n_commands; ++i) {
806
- const Command cmd = commands[i];
807
- const int cmd_code = cmd.cmd_prefix_;
808
- const int lennumextra = static_cast<int>(cmd.cmd_extra_ >> 48);
809
- const uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
810
- WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
811
- WriteBits(lennumextra, lenextra, storage_ix, storage);
812
- for (int j = 0; j < cmd.insert_len_; j++) {
813
- const uint8_t literal = input[pos & mask];
814
- WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
815
- ++pos;
816
- }
817
- pos += cmd.copy_len_;
818
- if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
819
- const int dist_code = cmd.dist_prefix_;
820
- const int distnumextra = cmd.dist_extra_ >> 24;
821
- const int distextra = cmd.dist_extra_ & 0xffffff;
822
- WriteBits(dist_depth[dist_code], dist_bits[dist_code],
823
- storage_ix, storage);
824
- WriteBits(distnumextra, distextra, storage_ix, storage);
1058
+ void StoreMetaBlockFast(const uint8_t* input,
1059
+ size_t start_pos,
1060
+ size_t length,
1061
+ size_t mask,
1062
+ bool is_last,
1063
+ const brotli::Command *commands,
1064
+ size_t n_commands,
1065
+ size_t *storage_ix,
1066
+ uint8_t *storage) {
1067
+ StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
1068
+
1069
+ WriteBits(13, 0, storage_ix, storage);
1070
+
1071
+ if (n_commands <= 128) {
1072
+ uint32_t histogram[256] = { 0 };
1073
+ size_t pos = start_pos;
1074
+ size_t num_literals = 0;
1075
+ for (size_t i = 0; i < n_commands; ++i) {
1076
+ const Command cmd = commands[i];
1077
+ for (size_t j = cmd.insert_len_; j != 0; --j) {
1078
+ ++histogram[input[pos & mask]];
1079
+ ++pos;
1080
+ }
1081
+ num_literals += cmd.insert_len_;
1082
+ pos += cmd.copy_len();
825
1083
  }
1084
+ uint8_t lit_depth[256] = { 0 };
1085
+ uint16_t lit_bits[256] = { 0 };
1086
+ BuildAndStoreHuffmanTreeFast(histogram, num_literals,
1087
+ /* max_bits = */ 8,
1088
+ lit_depth, lit_bits,
1089
+ storage_ix, storage);
1090
+ StoreStaticCommandHuffmanTree(storage_ix, storage);
1091
+ StoreStaticDistanceHuffmanTree(storage_ix, storage);
1092
+ StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
1093
+ n_commands, &lit_depth[0], &lit_bits[0],
1094
+ kStaticCommandCodeDepth,
1095
+ kStaticCommandCodeBits,
1096
+ kStaticDistanceCodeDepth,
1097
+ kStaticDistanceCodeBits,
1098
+ storage_ix, storage);
1099
+ } else {
1100
+ HistogramLiteral lit_histo;
1101
+ HistogramCommand cmd_histo;
1102
+ HistogramDistance dist_histo;
1103
+ BuildHistograms(input, start_pos, mask, commands, n_commands,
1104
+ &lit_histo, &cmd_histo, &dist_histo);
1105
+ std::vector<uint8_t> lit_depth(256);
1106
+ std::vector<uint16_t> lit_bits(256);
1107
+ std::vector<uint8_t> cmd_depth(kNumCommandPrefixes);
1108
+ std::vector<uint16_t> cmd_bits(kNumCommandPrefixes);
1109
+ std::vector<uint8_t> dist_depth(64);
1110
+ std::vector<uint16_t> dist_bits(64);
1111
+ BuildAndStoreHuffmanTreeFast(&lit_histo.data_[0], lit_histo.total_count_,
1112
+ /* max_bits = */ 8,
1113
+ &lit_depth[0], &lit_bits[0],
1114
+ storage_ix, storage);
1115
+ BuildAndStoreHuffmanTreeFast(&cmd_histo.data_[0], cmd_histo.total_count_,
1116
+ /* max_bits = */ 10,
1117
+ &cmd_depth[0], &cmd_bits[0],
1118
+ storage_ix, storage);
1119
+ BuildAndStoreHuffmanTreeFast(&dist_histo.data_[0], dist_histo.total_count_,
1120
+ /* max_bits = */ 6,
1121
+ &dist_depth[0], &dist_bits[0],
1122
+ storage_ix, storage);
1123
+ StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
1124
+ n_commands, &lit_depth[0], &lit_bits[0],
1125
+ &cmd_depth[0], &cmd_bits[0],
1126
+ &dist_depth[0], &dist_bits[0],
1127
+ storage_ix, storage);
826
1128
  }
1129
+
827
1130
  if (is_last) {
828
1131
  JumpToByteBoundary(storage_ix, storage);
829
1132
  }
830
- return true;
831
1133
  }
832
1134
 
833
1135
  // This is for storing uncompressed blocks (simple raw storage of
834
1136
  // bytes-as-bytes).
835
- bool StoreUncompressedMetaBlock(bool final_block,
1137
+ void StoreUncompressedMetaBlock(bool final_block,
836
1138
  const uint8_t * __restrict input,
837
1139
  size_t position, size_t mask,
838
1140
  size_t len,
839
- int * __restrict storage_ix,
1141
+ size_t * __restrict storage_ix,
840
1142
  uint8_t * __restrict storage) {
841
- if (!brotli::StoreUncompressedMetaBlockHeader(len, storage_ix, storage)) {
842
- return false;
843
- }
1143
+ StoreUncompressedMetaBlockHeader(len, storage_ix, storage);
844
1144
  JumpToByteBoundary(storage_ix, storage);
845
1145
 
846
1146
  size_t masked_pos = position & mask;
847
1147
  if (masked_pos + len > mask + 1) {
848
1148
  size_t len1 = mask + 1 - masked_pos;
849
1149
  memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
850
- *storage_ix += static_cast<int>(len1 << 3);
1150
+ *storage_ix += len1 << 3;
851
1151
  len -= len1;
852
1152
  masked_pos = 0;
853
1153
  }
854
1154
  memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
855
- *storage_ix += static_cast<int>(len << 3);
1155
+ *storage_ix += len << 3;
856
1156
 
857
1157
  // We need to clear the next 4 bytes to continue to be
858
1158
  // compatible with WriteBits.
@@ -865,10 +1165,9 @@ bool StoreUncompressedMetaBlock(bool final_block,
865
1165
  brotli::WriteBits(1, 1, storage_ix, storage); // isempty
866
1166
  JumpToByteBoundary(storage_ix, storage);
867
1167
  }
868
- return true;
869
1168
  }
870
1169
 
871
- void StoreSyncMetaBlock(int * __restrict storage_ix,
1170
+ void StoreSyncMetaBlock(size_t * __restrict storage_ix,
872
1171
  uint8_t * __restrict storage) {
873
1172
  // Empty metadata meta-block bit pattern:
874
1173
  // 1 bit: is_last (0)