extbrotli 0.0.1.PROTOTYPE

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +28 -0
  3. data/README.md +67 -0
  4. data/Rakefile +158 -0
  5. data/contrib/brotli/LICENSE +202 -0
  6. data/contrib/brotli/README.md +18 -0
  7. data/contrib/brotli/dec/bit_reader.c +55 -0
  8. data/contrib/brotli/dec/bit_reader.h +256 -0
  9. data/contrib/brotli/dec/context.h +260 -0
  10. data/contrib/brotli/dec/decode.c +1573 -0
  11. data/contrib/brotli/dec/decode.h +160 -0
  12. data/contrib/brotli/dec/dictionary.h +9494 -0
  13. data/contrib/brotli/dec/huffman.c +325 -0
  14. data/contrib/brotli/dec/huffman.h +77 -0
  15. data/contrib/brotli/dec/port.h +148 -0
  16. data/contrib/brotli/dec/prefix.h +756 -0
  17. data/contrib/brotli/dec/state.c +149 -0
  18. data/contrib/brotli/dec/state.h +185 -0
  19. data/contrib/brotli/dec/streams.c +99 -0
  20. data/contrib/brotli/dec/streams.h +100 -0
  21. data/contrib/brotli/dec/transform.h +315 -0
  22. data/contrib/brotli/dec/types.h +36 -0
  23. data/contrib/brotli/enc/backward_references.cc +769 -0
  24. data/contrib/brotli/enc/backward_references.h +50 -0
  25. data/contrib/brotli/enc/bit_cost.h +147 -0
  26. data/contrib/brotli/enc/block_splitter.cc +418 -0
  27. data/contrib/brotli/enc/block_splitter.h +78 -0
  28. data/contrib/brotli/enc/brotli_bit_stream.cc +884 -0
  29. data/contrib/brotli/enc/brotli_bit_stream.h +149 -0
  30. data/contrib/brotli/enc/cluster.h +290 -0
  31. data/contrib/brotli/enc/command.h +140 -0
  32. data/contrib/brotli/enc/context.h +185 -0
  33. data/contrib/brotli/enc/dictionary.h +9485 -0
  34. data/contrib/brotli/enc/dictionary_hash.h +4125 -0
  35. data/contrib/brotli/enc/encode.cc +715 -0
  36. data/contrib/brotli/enc/encode.h +196 -0
  37. data/contrib/brotli/enc/encode_parallel.cc +354 -0
  38. data/contrib/brotli/enc/encode_parallel.h +37 -0
  39. data/contrib/brotli/enc/entropy_encode.cc +492 -0
  40. data/contrib/brotli/enc/entropy_encode.h +88 -0
  41. data/contrib/brotli/enc/fast_log.h +179 -0
  42. data/contrib/brotli/enc/find_match_length.h +87 -0
  43. data/contrib/brotli/enc/hash.h +686 -0
  44. data/contrib/brotli/enc/histogram.cc +76 -0
  45. data/contrib/brotli/enc/histogram.h +100 -0
  46. data/contrib/brotli/enc/literal_cost.cc +172 -0
  47. data/contrib/brotli/enc/literal_cost.h +38 -0
  48. data/contrib/brotli/enc/metablock.cc +544 -0
  49. data/contrib/brotli/enc/metablock.h +88 -0
  50. data/contrib/brotli/enc/port.h +151 -0
  51. data/contrib/brotli/enc/prefix.h +85 -0
  52. data/contrib/brotli/enc/ringbuffer.h +108 -0
  53. data/contrib/brotli/enc/static_dict.cc +441 -0
  54. data/contrib/brotli/enc/static_dict.h +40 -0
  55. data/contrib/brotli/enc/static_dict_lut.h +12063 -0
  56. data/contrib/brotli/enc/streams.cc +127 -0
  57. data/contrib/brotli/enc/streams.h +129 -0
  58. data/contrib/brotli/enc/transform.h +250 -0
  59. data/contrib/brotli/enc/write_bits.h +91 -0
  60. data/ext/extbrotli.cc +24 -0
  61. data/ext/extbrotli.h +73 -0
  62. data/ext/extconf.rb +35 -0
  63. data/ext/lldecoder.c +220 -0
  64. data/ext/llencoder.cc +433 -0
  65. data/gemstub.rb +21 -0
  66. data/lib/extbrotli.rb +243 -0
  67. data/lib/extbrotli/version.rb +3 -0
  68. metadata +140 -0
@@ -0,0 +1,78 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Block split point selection utilities.
16
+
17
+ #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
18
+ #define BROTLI_ENC_BLOCK_SPLITTER_H_
19
+
20
+ #include <stddef.h>
21
+ #include <stdint.h>
22
+ #include <string.h>
23
+ #include <vector>
24
+ #include <utility>
25
+
26
+ #include "./command.h"
27
+ #include "./metablock.h"
28
+
29
+ namespace brotli {
30
+
31
+ struct BlockSplitIterator {
32
+ explicit BlockSplitIterator(const BlockSplit& split)
33
+ : split_(split), idx_(0), type_(0), length_(0) {
34
+ if (!split.lengths.empty()) {
35
+ length_ = split.lengths[0];
36
+ }
37
+ }
38
+
39
+ void Next() {
40
+ if (length_ == 0) {
41
+ ++idx_;
42
+ type_ = split_.types[idx_];
43
+ length_ = split_.lengths[idx_];
44
+ }
45
+ --length_;
46
+ }
47
+
48
+ const BlockSplit& split_;
49
+ int idx_;
50
+ int type_;
51
+ int length_;
52
+ };
53
+
54
+ void CopyLiteralsToByteArray(const Command* cmds,
55
+ const size_t num_commands,
56
+ const uint8_t* data,
57
+ const size_t offset,
58
+ const size_t mask,
59
+ std::vector<uint8_t>* literals);
60
+
61
+ void SplitBlock(const Command* cmds,
62
+ const size_t num_commands,
63
+ const uint8_t* data,
64
+ const size_t offset,
65
+ const size_t mask,
66
+ BlockSplit* literal_split,
67
+ BlockSplit* insert_and_copy_split,
68
+ BlockSplit* dist_split);
69
+
70
+ void SplitBlockByTotalLength(const Command* all_commands,
71
+ const size_t num_commands,
72
+ int input_size,
73
+ int target_length,
74
+ std::vector<std::vector<Command> >* blocks);
75
+
76
+ } // namespace brotli
77
+
78
+ #endif // BROTLI_ENC_BLOCK_SPLITTER_H_
@@ -0,0 +1,884 @@
1
+ // Copyright 2014 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Brotli bit stream functions to support the low level format. There are no
16
+ // compression algorithms here, just the right ordering of bits to match the
17
+ // specs.
18
+
19
+ #include "./brotli_bit_stream.h"
20
+
21
+ #include <algorithm>
22
+ #include <limits>
23
+ #include <vector>
24
+
25
+ #include "./bit_cost.h"
26
+ #include "./context.h"
27
+ #include "./entropy_encode.h"
28
+ #include "./fast_log.h"
29
+ #include "./prefix.h"
30
+ #include "./write_bits.h"
31
+
32
+ namespace brotli {
33
+
34
+ // returns false if fail
35
+ // nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
36
+ bool EncodeMlen(size_t length, int* bits, int* numbits, int* nibblesbits) {
37
+ length--; // MLEN - 1 is encoded
38
+ int lg = length == 0 ? 1 : Log2Floor(length) + 1;
39
+ if (lg > 24) return false;
40
+ int mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
41
+ *nibblesbits = mnibbles - 4;
42
+ *numbits = mnibbles * 4;
43
+ *bits = length;
44
+ return true;
45
+ }
46
+
47
+ void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
48
+ if (n == 0) {
49
+ WriteBits(1, 0, storage_ix, storage);
50
+ } else {
51
+ WriteBits(1, 1, storage_ix, storage);
52
+ int nbits = Log2Floor(n);
53
+ WriteBits(3, nbits, storage_ix, storage);
54
+ WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
55
+ }
56
+ }
57
+
58
+ bool StoreCompressedMetaBlockHeader(bool final_block,
59
+ size_t length,
60
+ int* storage_ix,
61
+ uint8_t* storage) {
62
+ // Write ISLAST bit.
63
+ WriteBits(1, final_block, storage_ix, storage);
64
+ // Write ISEMPTY bit.
65
+ if (final_block) {
66
+ WriteBits(1, length == 0, storage_ix, storage);
67
+ if (length == 0) {
68
+ return true;
69
+ }
70
+ }
71
+
72
+ if (length == 0) {
73
+ // Only the last meta-block can be empty.
74
+ return false;
75
+ }
76
+
77
+ int lenbits;
78
+ int nlenbits;
79
+ int nibblesbits;
80
+ if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
81
+ return false;
82
+ }
83
+
84
+ WriteBits(2, nibblesbits, storage_ix, storage);
85
+ WriteBits(nlenbits, lenbits, storage_ix, storage);
86
+
87
+ if (!final_block) {
88
+ // Write ISUNCOMPRESSED bit.
89
+ WriteBits(1, 0, storage_ix, storage);
90
+ }
91
+ return true;
92
+ }
93
+
94
+ bool StoreUncompressedMetaBlockHeader(size_t length,
95
+ int* storage_ix,
96
+ uint8_t* storage) {
97
+ // Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0.
98
+ WriteBits(1, 0, storage_ix, storage);
99
+ int lenbits;
100
+ int nlenbits;
101
+ int nibblesbits;
102
+ if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
103
+ return false;
104
+ }
105
+ WriteBits(2, nibblesbits, storage_ix, storage);
106
+ WriteBits(nlenbits, lenbits, storage_ix, storage);
107
+ // Write ISUNCOMPRESSED bit.
108
+ WriteBits(1, 1, storage_ix, storage);
109
+ return true;
110
+ }
111
+
112
+ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
113
+ const int num_codes,
114
+ const uint8_t *code_length_bitdepth,
115
+ int *storage_ix,
116
+ uint8_t *storage) {
117
+ static const uint8_t kStorageOrder[kCodeLengthCodes] = {
118
+ 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
119
+ };
120
+ // The bit lengths of the Huffman code over the code length alphabet
121
+ // are compressed with the following static Huffman code:
122
+ // Symbol Code
123
+ // ------ ----
124
+ // 0 00
125
+ // 1 1110
126
+ // 2 110
127
+ // 3 01
128
+ // 4 10
129
+ // 5 1111
130
+ static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {
131
+ 0, 7, 3, 2, 1, 15
132
+ };
133
+ static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {
134
+ 2, 4, 3, 2, 2, 4
135
+ };
136
+
137
+ // Throw away trailing zeros:
138
+ int codes_to_store = kCodeLengthCodes;
139
+ if (num_codes > 1) {
140
+ for (; codes_to_store > 0; --codes_to_store) {
141
+ if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
142
+ break;
143
+ }
144
+ }
145
+ }
146
+ int skip_some = 0; // skips none.
147
+ if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
148
+ code_length_bitdepth[kStorageOrder[1]] == 0) {
149
+ skip_some = 2; // skips two.
150
+ if (code_length_bitdepth[kStorageOrder[2]] == 0) {
151
+ skip_some = 3; // skips three.
152
+ }
153
+ }
154
+ WriteBits(2, skip_some, storage_ix, storage);
155
+ for (int i = skip_some; i < codes_to_store; ++i) {
156
+ uint8_t l = code_length_bitdepth[kStorageOrder[i]];
157
+ WriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
158
+ kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
159
+ }
160
+ }
161
+
162
+ void StoreHuffmanTreeToBitMask(
163
+ const std::vector<uint8_t> &huffman_tree,
164
+ const std::vector<uint8_t> &huffman_tree_extra_bits,
165
+ const uint8_t *code_length_bitdepth,
166
+ const std::vector<uint16_t> &code_length_bitdepth_symbols,
167
+ int * __restrict storage_ix,
168
+ uint8_t * __restrict storage) {
169
+ for (int i = 0; i < huffman_tree.size(); ++i) {
170
+ int ix = huffman_tree[i];
171
+ WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
172
+ storage_ix, storage);
173
+ // Extra bits
174
+ switch (ix) {
175
+ case 16:
176
+ WriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
177
+ break;
178
+ case 17:
179
+ WriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage);
180
+ break;
181
+ }
182
+ }
183
+ }
184
+
185
+ void StoreSimpleHuffmanTree(const uint8_t* depths,
186
+ int symbols[4],
187
+ int num_symbols,
188
+ int max_bits,
189
+ int *storage_ix, uint8_t *storage) {
190
+ // value of 1 indicates a simple Huffman code
191
+ WriteBits(2, 1, storage_ix, storage);
192
+ WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1
193
+
194
+ // Sort
195
+ for (int i = 0; i < num_symbols; i++) {
196
+ for (int j = i + 1; j < num_symbols; j++) {
197
+ if (depths[symbols[j]] < depths[symbols[i]]) {
198
+ std::swap(symbols[j], symbols[i]);
199
+ }
200
+ }
201
+ }
202
+
203
+ if (num_symbols == 2) {
204
+ WriteBits(max_bits, symbols[0], storage_ix, storage);
205
+ WriteBits(max_bits, symbols[1], storage_ix, storage);
206
+ } else if (num_symbols == 3) {
207
+ WriteBits(max_bits, symbols[0], storage_ix, storage);
208
+ WriteBits(max_bits, symbols[1], storage_ix, storage);
209
+ WriteBits(max_bits, symbols[2], storage_ix, storage);
210
+ } else {
211
+ WriteBits(max_bits, symbols[0], storage_ix, storage);
212
+ WriteBits(max_bits, symbols[1], storage_ix, storage);
213
+ WriteBits(max_bits, symbols[2], storage_ix, storage);
214
+ WriteBits(max_bits, symbols[3], storage_ix, storage);
215
+ // tree-select
216
+ WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
217
+ }
218
+ }
219
+
220
+ // num = alphabet size
221
+ // depths = symbol depths
222
+ void StoreHuffmanTree(const uint8_t* depths, size_t num,
223
+ int *storage_ix, uint8_t *storage) {
224
+ // Write the Huffman tree into the brotli-representation.
225
+ std::vector<uint8_t> huffman_tree;
226
+ std::vector<uint8_t> huffman_tree_extra_bits;
227
+ // TODO: Consider allocating these from stack.
228
+ huffman_tree.reserve(256);
229
+ huffman_tree_extra_bits.reserve(256);
230
+ WriteHuffmanTree(depths, num, &huffman_tree, &huffman_tree_extra_bits);
231
+
232
+ // Calculate the statistics of the Huffman tree in brotli-representation.
233
+ int huffman_tree_histogram[kCodeLengthCodes] = { 0 };
234
+ for (int i = 0; i < huffman_tree.size(); ++i) {
235
+ ++huffman_tree_histogram[huffman_tree[i]];
236
+ }
237
+
238
+ int num_codes = 0;
239
+ int code = 0;
240
+ for (int i = 0; i < kCodeLengthCodes; ++i) {
241
+ if (huffman_tree_histogram[i]) {
242
+ if (num_codes == 0) {
243
+ code = i;
244
+ num_codes = 1;
245
+ } else if (num_codes == 1) {
246
+ num_codes = 2;
247
+ break;
248
+ }
249
+ }
250
+ }
251
+
252
+ // Calculate another Huffman tree to use for compressing both the
253
+ // earlier Huffman tree with.
254
+ // TODO: Consider allocating these from stack.
255
+ uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
256
+ std::vector<uint16_t> code_length_bitdepth_symbols(kCodeLengthCodes);
257
+ CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
258
+ 5, &code_length_bitdepth[0]);
259
+ ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
260
+ code_length_bitdepth_symbols.data());
261
+
262
+ // Now, we have all the data, let's start storing it
263
+ StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
264
+ storage_ix, storage);
265
+
266
+ if (num_codes == 1) {
267
+ code_length_bitdepth[code] = 0;
268
+ }
269
+
270
+ // Store the real huffman tree now.
271
+ StoreHuffmanTreeToBitMask(huffman_tree,
272
+ huffman_tree_extra_bits,
273
+ &code_length_bitdepth[0],
274
+ code_length_bitdepth_symbols,
275
+ storage_ix, storage);
276
+ }
277
+
278
+ void BuildAndStoreHuffmanTree(const int *histogram,
279
+ const int length,
280
+ uint8_t* depth,
281
+ uint16_t* bits,
282
+ int* storage_ix,
283
+ uint8_t* storage) {
284
+ int count = 0;
285
+ int s4[4] = { 0 };
286
+ for (size_t i = 0; i < length; i++) {
287
+ if (histogram[i]) {
288
+ if (count < 4) {
289
+ s4[count] = i;
290
+ } else if (count > 4) {
291
+ break;
292
+ }
293
+ count++;
294
+ }
295
+ }
296
+
297
+ int max_bits_counter = length - 1;
298
+ int max_bits = 0;
299
+ while (max_bits_counter) {
300
+ max_bits_counter >>= 1;
301
+ ++max_bits;
302
+ }
303
+
304
+ if (count <= 1) {
305
+ WriteBits(4, 1, storage_ix, storage);
306
+ WriteBits(max_bits, s4[0], storage_ix, storage);
307
+ return;
308
+ }
309
+
310
+ CreateHuffmanTree(histogram, length, 15, depth);
311
+ ConvertBitDepthsToSymbols(depth, length, bits);
312
+
313
+ if (count <= 4) {
314
+ StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
315
+ } else {
316
+ StoreHuffmanTree(depth, length, storage_ix, storage);
317
+ }
318
+ }
319
+
320
+ int IndexOf(const std::vector<int>& v, int value) {
321
+ for (int i = 0; i < v.size(); ++i) {
322
+ if (v[i] == value) return i;
323
+ }
324
+ return -1;
325
+ }
326
+
327
+ void MoveToFront(std::vector<int>* v, int index) {
328
+ int value = (*v)[index];
329
+ for (int i = index; i > 0; --i) {
330
+ (*v)[i] = (*v)[i - 1];
331
+ }
332
+ (*v)[0] = value;
333
+ }
334
+
335
+ std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
336
+ if (v.empty()) return v;
337
+ std::vector<int> mtf(*std::max_element(v.begin(), v.end()) + 1);
338
+ for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
339
+ std::vector<int> result(v.size());
340
+ for (int i = 0; i < v.size(); ++i) {
341
+ int index = IndexOf(mtf, v[i]);
342
+ result[i] = index;
343
+ MoveToFront(&mtf, index);
344
+ }
345
+ return result;
346
+ }
347
+
348
+ // Finds runs of zeros in v_in and replaces them with a prefix code of the run
349
+ // length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
350
+ // shifted by *max_length_prefix. Will not create prefix codes bigger than the
351
+ // initial value of *max_run_length_prefix. The prefix code of run length L is
352
+ // simply Log2Floor(L) and the number of extra bits is the same as the prefix
353
+ // code.
354
+ void RunLengthCodeZeros(const std::vector<int>& v_in,
355
+ int* max_run_length_prefix,
356
+ std::vector<int>* v_out,
357
+ std::vector<int>* extra_bits) {
358
+ int max_reps = 0;
359
+ for (int i = 0; i < v_in.size();) {
360
+ for (; i < v_in.size() && v_in[i] != 0; ++i) ;
361
+ int reps = 0;
362
+ for (; i < v_in.size() && v_in[i] == 0; ++i) {
363
+ ++reps;
364
+ }
365
+ max_reps = std::max(reps, max_reps);
366
+ }
367
+ int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
368
+ *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
369
+ for (int i = 0; i < v_in.size();) {
370
+ if (v_in[i] != 0) {
371
+ v_out->push_back(v_in[i] + *max_run_length_prefix);
372
+ extra_bits->push_back(0);
373
+ ++i;
374
+ } else {
375
+ int reps = 1;
376
+ for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
377
+ ++reps;
378
+ }
379
+ i += reps;
380
+ while (reps) {
381
+ if (reps < (2 << *max_run_length_prefix)) {
382
+ int run_length_prefix = Log2Floor(reps);
383
+ v_out->push_back(run_length_prefix);
384
+ extra_bits->push_back(reps - (1 << run_length_prefix));
385
+ break;
386
+ } else {
387
+ v_out->push_back(*max_run_length_prefix);
388
+ extra_bits->push_back((1 << *max_run_length_prefix) - 1);
389
+ reps -= (2 << *max_run_length_prefix) - 1;
390
+ }
391
+ }
392
+ }
393
+ }
394
+ }
395
+
396
+ void EncodeContextMap(const std::vector<int>& context_map,
397
+ int num_clusters,
398
+ int* storage_ix, uint8_t* storage) {
399
+ StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
400
+
401
+ if (num_clusters == 1) {
402
+ return;
403
+ }
404
+
405
+ std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
406
+ std::vector<int> rle_symbols;
407
+ std::vector<int> extra_bits;
408
+ int max_run_length_prefix = 6;
409
+ RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
410
+ &rle_symbols, &extra_bits);
411
+ HistogramContextMap symbol_histogram;
412
+ for (int i = 0; i < rle_symbols.size(); ++i) {
413
+ symbol_histogram.Add(rle_symbols[i]);
414
+ }
415
+ bool use_rle = max_run_length_prefix > 0;
416
+ WriteBits(1, use_rle, storage_ix, storage);
417
+ if (use_rle) {
418
+ WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
419
+ }
420
+ EntropyCodeContextMap symbol_code;
421
+ memset(symbol_code.depth_, 0, sizeof(symbol_code.depth_));
422
+ memset(symbol_code.bits_, 0, sizeof(symbol_code.bits_));
423
+ BuildAndStoreHuffmanTree(symbol_histogram.data_,
424
+ num_clusters + max_run_length_prefix,
425
+ symbol_code.depth_, symbol_code.bits_,
426
+ storage_ix, storage);
427
+ for (int i = 0; i < rle_symbols.size(); ++i) {
428
+ WriteBits(symbol_code.depth_[rle_symbols[i]],
429
+ symbol_code.bits_[rle_symbols[i]],
430
+ storage_ix, storage);
431
+ if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
432
+ WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
433
+ }
434
+ }
435
+ WriteBits(1, 1, storage_ix, storage); // use move-to-front
436
+ }
437
+
438
+ void StoreBlockSwitch(const BlockSplitCode& code,
439
+ const int block_ix,
440
+ int* storage_ix,
441
+ uint8_t* storage) {
442
+ if (block_ix > 0) {
443
+ int typecode = code.type_code[block_ix];
444
+ WriteBits(code.type_depths[typecode], code.type_bits[typecode],
445
+ storage_ix, storage);
446
+ }
447
+ int lencode = code.length_prefix[block_ix];
448
+ WriteBits(code.length_depths[lencode], code.length_bits[lencode],
449
+ storage_ix, storage);
450
+ WriteBits(code.length_nextra[block_ix], code.length_extra[block_ix],
451
+ storage_ix, storage);
452
+ }
453
+
454
+ void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
455
+ const std::vector<int>& lengths,
456
+ const int num_types,
457
+ BlockSplitCode* code,
458
+ int* storage_ix,
459
+ uint8_t* storage) {
460
+ const int num_blocks = types.size();
461
+ std::vector<int> type_histo(num_types + 2);
462
+ std::vector<int> length_histo(26);
463
+ int last_type = 1;
464
+ int second_last_type = 0;
465
+ code->type_code.resize(num_blocks);
466
+ code->length_prefix.resize(num_blocks);
467
+ code->length_nextra.resize(num_blocks);
468
+ code->length_extra.resize(num_blocks);
469
+ code->type_depths.resize(num_types + 2);
470
+ code->type_bits.resize(num_types + 2);
471
+ code->length_depths.resize(26);
472
+ code->length_bits.resize(26);
473
+ for (int i = 0; i < num_blocks; ++i) {
474
+ int type = types[i];
475
+ int type_code = (type == last_type + 1 ? 1 :
476
+ type == second_last_type ? 0 :
477
+ type + 2);
478
+ second_last_type = last_type;
479
+ last_type = type;
480
+ code->type_code[i] = type_code;
481
+ if (i > 0) ++type_histo[type_code];
482
+ GetBlockLengthPrefixCode(lengths[i],
483
+ &code->length_prefix[i],
484
+ &code->length_nextra[i],
485
+ &code->length_extra[i]);
486
+ ++length_histo[code->length_prefix[i]];
487
+ }
488
+ StoreVarLenUint8(num_types - 1, storage_ix, storage);
489
+ if (num_types > 1) {
490
+ BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2,
491
+ &code->type_depths[0], &code->type_bits[0],
492
+ storage_ix, storage);
493
+ BuildAndStoreHuffmanTree(&length_histo[0], 26,
494
+ &code->length_depths[0], &code->length_bits[0],
495
+ storage_ix, storage);
496
+ StoreBlockSwitch(*code, 0, storage_ix, storage);
497
+ }
498
+ }
499
+
500
+ void StoreTrivialContextMap(int num_types,
501
+ int context_bits,
502
+ int* storage_ix,
503
+ uint8_t* storage) {
504
+ StoreVarLenUint8(num_types - 1, storage_ix, storage);
505
+ if (num_types > 1) {
506
+ int repeat_code = context_bits - 1;
507
+ int repeat_bits = (1 << repeat_code) - 1;
508
+ int alphabet_size = num_types + repeat_code;
509
+ std::vector<int> histogram(alphabet_size);
510
+ std::vector<uint8_t> depths(alphabet_size);
511
+ std::vector<uint16_t> bits(alphabet_size);
512
+ // Write RLEMAX.
513
+ WriteBits(1, 1, storage_ix, storage);
514
+ WriteBits(4, repeat_code - 1, storage_ix, storage);
515
+ histogram[repeat_code] = num_types;
516
+ histogram[0] = 1;
517
+ for (int i = context_bits; i < alphabet_size; ++i) {
518
+ histogram[i] = 1;
519
+ }
520
+ BuildAndStoreHuffmanTree(&histogram[0], alphabet_size,
521
+ &depths[0], &bits[0],
522
+ storage_ix, storage);
523
+ for (int i = 0; i < num_types; ++i) {
524
+ int code = (i == 0 ? 0 : i + context_bits - 1);
525
+ WriteBits(depths[code], bits[code], storage_ix, storage);
526
+ WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage);
527
+ WriteBits(repeat_code, repeat_bits, storage_ix, storage);
528
+ }
529
+ // Write IMTF (inverse-move-to-front) bit.
530
+ WriteBits(1, 1, storage_ix, storage);
531
+ }
532
+ }
533
+
534
+ // Manages the encoding of one block category (literal, command or distance).
535
+ class BlockEncoder {
536
+ public:
537
+ BlockEncoder(int alphabet_size,
538
+ int num_block_types,
539
+ const std::vector<int>& block_types,
540
+ const std::vector<int>& block_lengths)
541
+ : alphabet_size_(alphabet_size),
542
+ num_block_types_(num_block_types),
543
+ block_types_(block_types),
544
+ block_lengths_(block_lengths),
545
+ block_ix_(0),
546
+ block_len_(block_lengths.empty() ? 0 : block_lengths[0]),
547
+ entropy_ix_(0) {}
548
+
549
+ // Creates entropy codes of block lengths and block types and stores them
550
+ // to the bit stream.
551
+ void BuildAndStoreBlockSwitchEntropyCodes(int* storage_ix, uint8_t* storage) {
552
+ BuildAndStoreBlockSplitCode(
553
+ block_types_, block_lengths_, num_block_types_,
554
+ &block_split_code_, storage_ix, storage);
555
+ }
556
+
557
+ // Creates entropy codes for all block types and stores them to the bit
558
+ // stream.
559
+ template<int kSize>
560
+ void BuildAndStoreEntropyCodes(
561
+ const std::vector<Histogram<kSize> >& histograms,
562
+ int* storage_ix, uint8_t* storage) {
563
+ depths_.resize(histograms.size() * alphabet_size_);
564
+ bits_.resize(histograms.size() * alphabet_size_);
565
+ for (int i = 0; i < histograms.size(); ++i) {
566
+ int ix = i * alphabet_size_;
567
+ BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_,
568
+ &depths_[ix], &bits_[ix],
569
+ storage_ix, storage);
570
+ }
571
+ }
572
+
573
+ // Stores the next symbol with the entropy code of the current block type.
574
+ // Updates the block type and block length at block boundaries.
575
+ void StoreSymbol(int symbol, int* storage_ix, uint8_t* storage) {
576
+ if (block_len_ == 0) {
577
+ ++block_ix_;
578
+ block_len_ = block_lengths_[block_ix_];
579
+ entropy_ix_ = block_types_[block_ix_] * alphabet_size_;
580
+ StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
581
+ }
582
+ --block_len_;
583
+ int ix = entropy_ix_ + symbol;
584
+ WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
585
+ }
586
+
587
+ // Stores the next symbol with the entropy code of the current block type and
588
+ // context value.
589
+ // Updates the block type and block length at block boundaries.
590
+ template<int kContextBits>
591
+ void StoreSymbolWithContext(int symbol, int context,
592
+ const std::vector<int>& context_map,
593
+ int* storage_ix, uint8_t* storage) {
594
+ if (block_len_ == 0) {
595
+ ++block_ix_;
596
+ block_len_ = block_lengths_[block_ix_];
597
+ entropy_ix_ = block_types_[block_ix_] << kContextBits;
598
+ StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
599
+ }
600
+ --block_len_;
601
+ int histo_ix = context_map[entropy_ix_ + context];
602
+ int ix = histo_ix * alphabet_size_ + symbol;
603
+ WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
604
+ }
605
+
606
+ private:
607
+ const int alphabet_size_;
608
+ const int num_block_types_;
609
+ const std::vector<int>& block_types_;
610
+ const std::vector<int>& block_lengths_;
611
+ BlockSplitCode block_split_code_;
612
+ int block_ix_;
613
+ int block_len_;
614
+ int entropy_ix_;
615
+ std::vector<uint8_t> depths_;
616
+ std::vector<uint16_t> bits_;
617
+ };
618
+
619
+ void JumpToByteBoundary(int* storage_ix, uint8_t* storage) {
620
+ *storage_ix = (*storage_ix + 7) & ~7;
621
+ storage[*storage_ix >> 3] = 0;
622
+ }
623
+
624
+ bool StoreMetaBlock(const uint8_t* input,
625
+ size_t start_pos,
626
+ size_t length,
627
+ size_t mask,
628
+ uint8_t prev_byte,
629
+ uint8_t prev_byte2,
630
+ bool is_last,
631
+ int num_direct_distance_codes,
632
+ int distance_postfix_bits,
633
+ int literal_context_mode,
634
+ const brotli::Command *commands,
635
+ size_t n_commands,
636
+ const MetaBlockSplit& mb,
637
+ int *storage_ix,
638
+ uint8_t *storage) {
639
+ if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
640
+ return false;
641
+ }
642
+
643
+ if (length == 0) {
644
+ // Only the last meta-block can be empty, so jump to next byte.
645
+ JumpToByteBoundary(storage_ix, storage);
646
+ return true;
647
+ }
648
+
649
+ int num_distance_codes =
650
+ kNumDistanceShortCodes + num_direct_distance_codes +
651
+ (48 << distance_postfix_bits);
652
+
653
+ BlockEncoder literal_enc(256,
654
+ mb.literal_split.num_types,
655
+ mb.literal_split.types,
656
+ mb.literal_split.lengths);
657
+ BlockEncoder command_enc(kNumCommandPrefixes,
658
+ mb.command_split.num_types,
659
+ mb.command_split.types,
660
+ mb.command_split.lengths);
661
+ BlockEncoder distance_enc(num_distance_codes,
662
+ mb.distance_split.num_types,
663
+ mb.distance_split.types,
664
+ mb.distance_split.lengths);
665
+
666
+ literal_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
667
+ command_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
668
+ distance_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
669
+
670
+ WriteBits(2, distance_postfix_bits, storage_ix, storage);
671
+ WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
672
+ storage_ix, storage);
673
+ for (int i = 0; i < mb.literal_split.num_types; ++i) {
674
+ WriteBits(2, literal_context_mode, storage_ix, storage);
675
+ }
676
+
677
+ if (mb.literal_context_map.empty()) {
678
+ StoreTrivialContextMap(mb.literal_histograms.size(), kLiteralContextBits,
679
+ storage_ix, storage);
680
+ } else {
681
+ EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
682
+ storage_ix, storage);
683
+ }
684
+
685
+ if (mb.distance_context_map.empty()) {
686
+ StoreTrivialContextMap(mb.distance_histograms.size(), kDistanceContextBits,
687
+ storage_ix, storage);
688
+ } else {
689
+ EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
690
+ storage_ix, storage);
691
+ }
692
+
693
+ literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms,
694
+ storage_ix, storage);
695
+ command_enc.BuildAndStoreEntropyCodes(mb.command_histograms,
696
+ storage_ix, storage);
697
+ distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms,
698
+ storage_ix, storage);
699
+
700
+ size_t pos = start_pos;
701
+ for (int i = 0; i < n_commands; ++i) {
702
+ const Command cmd = commands[i];
703
+ int cmd_code = cmd.cmd_prefix_;
704
+ int lennumextra = cmd.cmd_extra_ >> 48;
705
+ uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffULL;
706
+ command_enc.StoreSymbol(cmd_code, storage_ix, storage);
707
+ WriteBits(lennumextra, lenextra, storage_ix, storage);
708
+ if (mb.literal_context_map.empty()) {
709
+ for (int j = 0; j < cmd.insert_len_; j++) {
710
+ literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
711
+ ++pos;
712
+ }
713
+ } else {
714
+ for (int j = 0; j < cmd.insert_len_; ++j) {
715
+ int context = Context(prev_byte, prev_byte2,
716
+ literal_context_mode);
717
+ int literal = input[pos & mask];
718
+ literal_enc.StoreSymbolWithContext<kLiteralContextBits>(
719
+ literal, context, mb.literal_context_map, storage_ix, storage);
720
+ prev_byte2 = prev_byte;
721
+ prev_byte = literal;
722
+ ++pos;
723
+ }
724
+ }
725
+ pos += cmd.copy_len_;
726
+ if (cmd.copy_len_ > 0) {
727
+ prev_byte2 = input[(pos - 2) & mask];
728
+ prev_byte = input[(pos - 1) & mask];
729
+ if (cmd.cmd_prefix_ >= 128) {
730
+ int dist_code = cmd.dist_prefix_;
731
+ int distnumextra = cmd.dist_extra_ >> 24;
732
+ int distextra = cmd.dist_extra_ & 0xffffff;
733
+ if (mb.distance_context_map.empty()) {
734
+ distance_enc.StoreSymbol(dist_code, storage_ix, storage);
735
+ } else {
736
+ int context = cmd.DistanceContext();
737
+ distance_enc.StoreSymbolWithContext<kDistanceContextBits>(
738
+ dist_code, context, mb.distance_context_map, storage_ix, storage);
739
+ }
740
+ brotli::WriteBits(distnumextra, distextra, storage_ix, storage);
741
+ }
742
+ }
743
+ }
744
+ if (is_last) {
745
+ JumpToByteBoundary(storage_ix, storage);
746
+ }
747
+ return true;
748
+ }
749
+
750
+ bool StoreMetaBlockTrivial(const uint8_t* input,
751
+ size_t start_pos,
752
+ size_t length,
753
+ size_t mask,
754
+ bool is_last,
755
+ const brotli::Command *commands,
756
+ size_t n_commands,
757
+ int *storage_ix,
758
+ uint8_t *storage) {
759
+ if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
760
+ return false;
761
+ }
762
+
763
+ if (length == 0) {
764
+ // Only the last meta-block can be empty, so jump to next byte.
765
+ JumpToByteBoundary(storage_ix, storage);
766
+ return true;
767
+ }
768
+
769
+ HistogramLiteral lit_histo;
770
+ HistogramCommand cmd_histo;
771
+ HistogramDistance dist_histo;
772
+
773
+ size_t pos = start_pos;
774
+ for (int i = 0; i < n_commands; ++i) {
775
+ const Command cmd = commands[i];
776
+ cmd_histo.Add(cmd.cmd_prefix_);
777
+ for (int j = 0; j < cmd.insert_len_; ++j) {
778
+ lit_histo.Add(input[pos & mask]);
779
+ ++pos;
780
+ }
781
+ pos += cmd.copy_len_;
782
+ if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
783
+ dist_histo.Add(cmd.dist_prefix_);
784
+ }
785
+ }
786
+
787
+ WriteBits(13, 0, storage_ix, storage);
788
+
789
+ std::vector<uint8_t> lit_depth(256);
790
+ std::vector<uint16_t> lit_bits(256);
791
+ std::vector<uint8_t> cmd_depth(kNumCommandPrefixes);
792
+ std::vector<uint16_t> cmd_bits(kNumCommandPrefixes);
793
+ std::vector<uint8_t> dist_depth(64);
794
+ std::vector<uint16_t> dist_bits(64);
795
+
796
+ BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256,
797
+ &lit_depth[0], &lit_bits[0],
798
+ storage_ix, storage);
799
+ BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes,
800
+ &cmd_depth[0], &cmd_bits[0],
801
+ storage_ix, storage);
802
+ BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64,
803
+ &dist_depth[0], &dist_bits[0],
804
+ storage_ix, storage);
805
+
806
+ pos = start_pos;
807
+ for (int i = 0; i < n_commands; ++i) {
808
+ const Command cmd = commands[i];
809
+ const int cmd_code = cmd.cmd_prefix_;
810
+ const int lennumextra = cmd.cmd_extra_ >> 48;
811
+ const uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffULL;
812
+ WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
813
+ WriteBits(lennumextra, lenextra, storage_ix, storage);
814
+ for (int j = 0; j < cmd.insert_len_; j++) {
815
+ const uint8_t literal = input[pos & mask];
816
+ WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
817
+ ++pos;
818
+ }
819
+ pos += cmd.copy_len_;
820
+ if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
821
+ const int dist_code = cmd.dist_prefix_;
822
+ const int distnumextra = cmd.dist_extra_ >> 24;
823
+ const int distextra = cmd.dist_extra_ & 0xffffff;
824
+ WriteBits(dist_depth[dist_code], dist_bits[dist_code],
825
+ storage_ix, storage);
826
+ WriteBits(distnumextra, distextra, storage_ix, storage);
827
+ }
828
+ }
829
+ if (is_last) {
830
+ JumpToByteBoundary(storage_ix, storage);
831
+ }
832
+ return true;
833
+ }
834
+
835
+ // This is for storing uncompressed blocks (simple raw storage of
836
+ // bytes-as-bytes).
837
+ bool StoreUncompressedMetaBlock(bool final_block,
838
+ const uint8_t * __restrict input,
839
+ size_t position, size_t mask,
840
+ size_t len,
841
+ int * __restrict storage_ix,
842
+ uint8_t * __restrict storage) {
843
+ if (!brotli::StoreUncompressedMetaBlockHeader(len, storage_ix, storage)) {
844
+ return false;
845
+ }
846
+ JumpToByteBoundary(storage_ix, storage);
847
+
848
+ size_t masked_pos = position & mask;
849
+ if (masked_pos + len > mask + 1) {
850
+ size_t len1 = mask + 1 - masked_pos;
851
+ memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
852
+ *storage_ix += len1 << 3;
853
+ len -= len1;
854
+ masked_pos = 0;
855
+ }
856
+ memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
857
+ *storage_ix += len << 3;
858
+
859
+ // We need to clear the next 4 bytes to continue to be
860
+ // compatible with WriteBits.
861
+ brotli::WriteBitsPrepareStorage(*storage_ix, storage);
862
+
863
+ // Since the uncomressed block itself may not be the final block, add an empty
864
+ // one after this.
865
+ if (final_block) {
866
+ brotli::WriteBits(1, 1, storage_ix, storage); // islast
867
+ brotli::WriteBits(1, 1, storage_ix, storage); // isempty
868
+ JumpToByteBoundary(storage_ix, storage);
869
+ }
870
+ return true;
871
+ }
872
+
873
+ void StoreSyncMetaBlock(int * __restrict storage_ix,
874
+ uint8_t * __restrict storage) {
875
+ // Empty metadata meta-block bit pattern:
876
+ // 1 bit: is_last (0)
877
+ // 2 bits: num nibbles (3)
878
+ // 1 bit: reserved (0)
879
+ // 2 bits: metadata length bytes (0)
880
+ WriteBits(6, 6, storage_ix, storage);
881
+ JumpToByteBoundary(storage_ix, storage);
882
+ }
883
+
884
+ } // namespace brotli