brotli 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +4 -0
  6. data/Gemfile +4 -0
  7. data/README.md +36 -0
  8. data/Rakefile +13 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +7 -0
  11. data/brotli.gemspec +28 -0
  12. data/ext/brotli/brotli.cc +67 -0
  13. data/ext/brotli/brotli.h +9 -0
  14. data/ext/brotli/extconf.rb +34 -0
  15. data/lib/brotli.rb +2 -0
  16. data/lib/brotli/version.rb +3 -0
  17. data/vendor/brotli/LICENSE +202 -0
  18. data/vendor/brotli/dec/Makefile +12 -0
  19. data/vendor/brotli/dec/bit_reader.c +55 -0
  20. data/vendor/brotli/dec/bit_reader.h +256 -0
  21. data/vendor/brotli/dec/context.h +260 -0
  22. data/vendor/brotli/dec/decode.c +1573 -0
  23. data/vendor/brotli/dec/decode.h +160 -0
  24. data/vendor/brotli/dec/dictionary.h +9494 -0
  25. data/vendor/brotli/dec/huffman.c +325 -0
  26. data/vendor/brotli/dec/huffman.h +77 -0
  27. data/vendor/brotli/dec/port.h +148 -0
  28. data/vendor/brotli/dec/prefix.h +756 -0
  29. data/vendor/brotli/dec/state.c +149 -0
  30. data/vendor/brotli/dec/state.h +185 -0
  31. data/vendor/brotli/dec/streams.c +99 -0
  32. data/vendor/brotli/dec/streams.h +100 -0
  33. data/vendor/brotli/dec/transform.h +315 -0
  34. data/vendor/brotli/dec/types.h +36 -0
  35. data/vendor/brotli/enc/Makefile +11 -0
  36. data/vendor/brotli/enc/backward_references.cc +769 -0
  37. data/vendor/brotli/enc/backward_references.h +50 -0
  38. data/vendor/brotli/enc/bit_cost.h +147 -0
  39. data/vendor/brotli/enc/block_splitter.cc +418 -0
  40. data/vendor/brotli/enc/block_splitter.h +78 -0
  41. data/vendor/brotli/enc/brotli_bit_stream.cc +884 -0
  42. data/vendor/brotli/enc/brotli_bit_stream.h +149 -0
  43. data/vendor/brotli/enc/cluster.h +290 -0
  44. data/vendor/brotli/enc/command.h +140 -0
  45. data/vendor/brotli/enc/context.h +185 -0
  46. data/vendor/brotli/enc/dictionary.h +9485 -0
  47. data/vendor/brotli/enc/dictionary_hash.h +4125 -0
  48. data/vendor/brotli/enc/encode.cc +715 -0
  49. data/vendor/brotli/enc/encode.h +196 -0
  50. data/vendor/brotli/enc/encode_parallel.cc +354 -0
  51. data/vendor/brotli/enc/encode_parallel.h +37 -0
  52. data/vendor/brotli/enc/entropy_encode.cc +492 -0
  53. data/vendor/brotli/enc/entropy_encode.h +88 -0
  54. data/vendor/brotli/enc/fast_log.h +179 -0
  55. data/vendor/brotli/enc/find_match_length.h +87 -0
  56. data/vendor/brotli/enc/hash.h +686 -0
  57. data/vendor/brotli/enc/histogram.cc +76 -0
  58. data/vendor/brotli/enc/histogram.h +100 -0
  59. data/vendor/brotli/enc/literal_cost.cc +172 -0
  60. data/vendor/brotli/enc/literal_cost.h +38 -0
  61. data/vendor/brotli/enc/metablock.cc +544 -0
  62. data/vendor/brotli/enc/metablock.h +88 -0
  63. data/vendor/brotli/enc/port.h +151 -0
  64. data/vendor/brotli/enc/prefix.h +85 -0
  65. data/vendor/brotli/enc/ringbuffer.h +108 -0
  66. data/vendor/brotli/enc/static_dict.cc +441 -0
  67. data/vendor/brotli/enc/static_dict.h +40 -0
  68. data/vendor/brotli/enc/static_dict_lut.h +12063 -0
  69. data/vendor/brotli/enc/streams.cc +127 -0
  70. data/vendor/brotli/enc/streams.h +129 -0
  71. data/vendor/brotli/enc/transform.h +250 -0
  72. data/vendor/brotli/enc/write_bits.h +91 -0
  73. metadata +171 -0
@@ -0,0 +1,37 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // API for parallel Brotli compression
16
+ // Note that this is only a proof of concept currently and not part of the
17
+ // final API yet.
18
+
19
+ #ifndef BROTLI_ENC_ENCODE_PARALLEL_H_
20
+ #define BROTLI_ENC_ENCODE_PARALLEL_H_
21
+
22
+ #include <stddef.h>
23
+ #include <stdint.h>
24
+
25
+ #include "./encode.h"
26
+
27
+ namespace brotli {
28
+
29
+ int BrotliCompressBufferParallel(BrotliParams params,
30
+ size_t input_size,
31
+ const uint8_t* input_buffer,
32
+ size_t* encoded_size,
33
+ uint8_t* encoded_buffer);
34
+
35
+ } // namespace brotli
36
+
37
+ #endif // BROTLI_ENC_ENCODE_PARALLEL_H_
@@ -0,0 +1,492 @@
1
+ // Copyright 2010 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Entropy encoding (Huffman) utilities.
16
+
17
+ #include "./entropy_encode.h"
18
+
19
+ #include <stdint.h>
20
+ #include <algorithm>
21
+ #include <limits>
22
+ #include <vector>
23
+ #include <cstdlib>
24
+
25
+ #include "./histogram.h"
26
+
27
+ namespace brotli {
28
+
29
+ namespace {
30
+
31
+ struct HuffmanTree {
32
+ HuffmanTree();
33
+ HuffmanTree(int count, int16_t left, int16_t right)
34
+ : total_count_(count),
35
+ index_left_(left),
36
+ index_right_or_value_(right) {
37
+ }
38
+ int total_count_;
39
+ int16_t index_left_;
40
+ int16_t index_right_or_value_;
41
+ };
42
+
43
+ HuffmanTree::HuffmanTree() {}
44
+
45
+ // Sort the root nodes, least popular first.
46
+ bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
47
+ return v0.total_count_ < v1.total_count_;
48
+ }
49
+
50
+ void SetDepth(const HuffmanTree &p,
51
+ HuffmanTree *pool,
52
+ uint8_t *depth,
53
+ int level) {
54
+ if (p.index_left_ >= 0) {
55
+ ++level;
56
+ SetDepth(pool[p.index_left_], pool, depth, level);
57
+ SetDepth(pool[p.index_right_or_value_], pool, depth, level);
58
+ } else {
59
+ depth[p.index_right_or_value_] = level;
60
+ }
61
+ }
62
+
63
+ } // namespace
64
+
65
+ // This function will create a Huffman tree.
66
+ //
67
+ // The catch here is that the tree cannot be arbitrarily deep.
68
+ // Brotli specifies a maximum depth of 15 bits for "code trees"
69
+ // and 7 bits for "code length code trees."
70
+ //
71
+ // count_limit is the value that is to be faked as the minimum value
72
+ // and this minimum value is raised until the tree matches the
73
+ // maximum length requirement.
74
+ //
75
+ // This algorithm is not of excellent performance for very long data blocks,
76
+ // especially when population counts are longer than 2**tree_limit, but
77
+ // we are not planning to use this with extremely long blocks.
78
+ //
79
+ // See http://en.wikipedia.org/wiki/Huffman_coding
80
+ void CreateHuffmanTree(const int *data,
81
+ const int length,
82
+ const int tree_limit,
83
+ uint8_t *depth) {
84
+ // For block sizes below 64 kB, we never need to do a second iteration
85
+ // of this loop. Probably all of our block sizes will be smaller than
86
+ // that, so this loop is mostly of academic interest. If we actually
87
+ // would need this, we would be better off with the Katajainen algorithm.
88
+ for (int count_limit = 1; ; count_limit *= 2) {
89
+ std::vector<HuffmanTree> tree;
90
+ tree.reserve(2 * length + 1);
91
+
92
+ for (int i = length - 1; i >= 0; --i) {
93
+ if (data[i]) {
94
+ const int count = std::max(data[i], count_limit);
95
+ tree.push_back(HuffmanTree(count, -1, i));
96
+ }
97
+ }
98
+
99
+ const int n = tree.size();
100
+ if (n == 1) {
101
+ depth[tree[0].index_right_or_value_] = 1; // Only one element.
102
+ break;
103
+ }
104
+
105
+ std::stable_sort(tree.begin(), tree.end(), SortHuffmanTree);
106
+
107
+ // The nodes are:
108
+ // [0, n): the sorted leaf nodes that we start with.
109
+ // [n]: we add a sentinel here.
110
+ // [n + 1, 2n): new parent nodes are added here, starting from
111
+ // (n+1). These are naturally in ascending order.
112
+ // [2n]: we add a sentinel at the end as well.
113
+ // There will be (2n+1) elements at the end.
114
+ const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
115
+ tree.push_back(sentinel);
116
+ tree.push_back(sentinel);
117
+
118
+ int i = 0; // Points to the next leaf node.
119
+ int j = n + 1; // Points to the next non-leaf node.
120
+ for (int k = n - 1; k > 0; --k) {
121
+ int left, right;
122
+ if (tree[i].total_count_ <= tree[j].total_count_) {
123
+ left = i;
124
+ ++i;
125
+ } else {
126
+ left = j;
127
+ ++j;
128
+ }
129
+ if (tree[i].total_count_ <= tree[j].total_count_) {
130
+ right = i;
131
+ ++i;
132
+ } else {
133
+ right = j;
134
+ ++j;
135
+ }
136
+
137
+ // The sentinel node becomes the parent node.
138
+ int j_end = tree.size() - 1;
139
+ tree[j_end].total_count_ =
140
+ tree[left].total_count_ + tree[right].total_count_;
141
+ tree[j_end].index_left_ = left;
142
+ tree[j_end].index_right_or_value_ = right;
143
+
144
+ // Add back the last sentinel node.
145
+ tree.push_back(sentinel);
146
+ }
147
+ SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
148
+
149
+ // We need to pack the Huffman tree in tree_limit bits.
150
+ // If this was not successful, add fake entities to the lowest values
151
+ // and retry.
152
+ if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
153
+ break;
154
+ }
155
+ }
156
+ }
157
+
158
+ void Reverse(std::vector<uint8_t>* v, int start, int end) {
159
+ --end;
160
+ while (start < end) {
161
+ int tmp = (*v)[start];
162
+ (*v)[start] = (*v)[end];
163
+ (*v)[end] = tmp;
164
+ ++start;
165
+ --end;
166
+ }
167
+ }
168
+
169
+ void WriteHuffmanTreeRepetitions(
170
+ const int previous_value,
171
+ const int value,
172
+ int repetitions,
173
+ std::vector<uint8_t> *tree,
174
+ std::vector<uint8_t> *extra_bits_data) {
175
+ if (previous_value != value) {
176
+ tree->push_back(value);
177
+ extra_bits_data->push_back(0);
178
+ --repetitions;
179
+ }
180
+ if (repetitions == 7) {
181
+ tree->push_back(value);
182
+ extra_bits_data->push_back(0);
183
+ --repetitions;
184
+ }
185
+ if (repetitions < 3) {
186
+ for (int i = 0; i < repetitions; ++i) {
187
+ tree->push_back(value);
188
+ extra_bits_data->push_back(0);
189
+ }
190
+ } else {
191
+ repetitions -= 3;
192
+ int start = tree->size();
193
+ while (repetitions >= 0) {
194
+ tree->push_back(16);
195
+ extra_bits_data->push_back(repetitions & 0x3);
196
+ repetitions >>= 2;
197
+ --repetitions;
198
+ }
199
+ Reverse(tree, start, tree->size());
200
+ Reverse(extra_bits_data, start, tree->size());
201
+ }
202
+ }
203
+
204
+ void WriteHuffmanTreeRepetitionsZeros(
205
+ int repetitions,
206
+ std::vector<uint8_t> *tree,
207
+ std::vector<uint8_t> *extra_bits_data) {
208
+ if (repetitions == 11) {
209
+ tree->push_back(0);
210
+ extra_bits_data->push_back(0);
211
+ --repetitions;
212
+ }
213
+ if (repetitions < 3) {
214
+ for (int i = 0; i < repetitions; ++i) {
215
+ tree->push_back(0);
216
+ extra_bits_data->push_back(0);
217
+ }
218
+ } else {
219
+ repetitions -= 3;
220
+ int start = tree->size();
221
+ while (repetitions >= 0) {
222
+ tree->push_back(17);
223
+ extra_bits_data->push_back(repetitions & 0x7);
224
+ repetitions >>= 3;
225
+ --repetitions;
226
+ }
227
+ Reverse(tree, start, tree->size());
228
+ Reverse(extra_bits_data, start, tree->size());
229
+ }
230
+ }
231
+
232
+ int OptimizeHuffmanCountsForRle(int length, int* counts) {
233
+ int nonzero_count = 0;
234
+ int stride;
235
+ int limit;
236
+ int sum;
237
+ uint8_t* good_for_rle;
238
+ // Let's make the Huffman code more compatible with rle encoding.
239
+ int i;
240
+ for (i = 0; i < length; i++) {
241
+ if (counts[i]) {
242
+ ++nonzero_count;
243
+ }
244
+ }
245
+ if (nonzero_count < 16) {
246
+ return 1;
247
+ }
248
+ for (; length >= 0; --length) {
249
+ if (length == 0) {
250
+ return 1; // All zeros.
251
+ }
252
+ if (counts[length - 1] != 0) {
253
+ // Now counts[0..length - 1] does not have trailing zeros.
254
+ break;
255
+ }
256
+ }
257
+ {
258
+ int nonzeros = 0;
259
+ int smallest_nonzero = 1 << 30;
260
+ for (i = 0; i < length; ++i) {
261
+ if (counts[i] != 0) {
262
+ ++nonzeros;
263
+ if (smallest_nonzero > counts[i]) {
264
+ smallest_nonzero = counts[i];
265
+ }
266
+ }
267
+ }
268
+ if (nonzeros < 5) {
269
+ // Small histogram will model it well.
270
+ return 1;
271
+ }
272
+ int zeros = length - nonzeros;
273
+ if (smallest_nonzero < 4) {
274
+ if (zeros < 6) {
275
+ for (i = 1; i < length - 1; ++i) {
276
+ if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
277
+ counts[i] = 1;
278
+ }
279
+ }
280
+ }
281
+ }
282
+ if (nonzeros < 28) {
283
+ return 1;
284
+ }
285
+ }
286
+ // 2) Let's mark all population counts that already can be encoded
287
+ // with an rle code.
288
+ good_for_rle = (uint8_t*)calloc(length, 1);
289
+ if (good_for_rle == NULL) {
290
+ return 0;
291
+ }
292
+ {
293
+ // Let's not spoil any of the existing good rle codes.
294
+ // Mark any seq of 0's that is longer as 5 as a good_for_rle.
295
+ // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
296
+ int symbol = counts[0];
297
+ int stride = 0;
298
+ for (i = 0; i < length + 1; ++i) {
299
+ if (i == length || counts[i] != symbol) {
300
+ if ((symbol == 0 && stride >= 5) ||
301
+ (symbol != 0 && stride >= 7)) {
302
+ int k;
303
+ for (k = 0; k < stride; ++k) {
304
+ good_for_rle[i - k - 1] = 1;
305
+ }
306
+ }
307
+ stride = 1;
308
+ if (i != length) {
309
+ symbol = counts[i];
310
+ }
311
+ } else {
312
+ ++stride;
313
+ }
314
+ }
315
+ }
316
+ // 3) Let's replace those population counts that lead to more rle codes.
317
+ // Math here is in 24.8 fixed point representation.
318
+ const int streak_limit = 1240;
319
+ stride = 0;
320
+ limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
321
+ sum = 0;
322
+ for (i = 0; i < length + 1; ++i) {
323
+ if (i == length || good_for_rle[i] ||
324
+ (i != 0 && good_for_rle[i - 1]) ||
325
+ abs(256 * counts[i] - limit) >= streak_limit) {
326
+ if (stride >= 4 || (stride >= 3 && sum == 0)) {
327
+ int k;
328
+ // The stride must end, collapse what we have, if we have enough (4).
329
+ int count = (sum + stride / 2) / stride;
330
+ if (count < 1) {
331
+ count = 1;
332
+ }
333
+ if (sum == 0) {
334
+ // Don't make an all zeros stride to be upgraded to ones.
335
+ count = 0;
336
+ }
337
+ for (k = 0; k < stride; ++k) {
338
+ // We don't want to change value at counts[i],
339
+ // that is already belonging to the next stride. Thus - 1.
340
+ counts[i - k - 1] = count;
341
+ }
342
+ }
343
+ stride = 0;
344
+ sum = 0;
345
+ if (i < length - 2) {
346
+ // All interesting strides have a count of at least 4,
347
+ // at least when non-zeros.
348
+ limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
349
+ } else if (i < length) {
350
+ limit = 256 * counts[i];
351
+ } else {
352
+ limit = 0;
353
+ }
354
+ }
355
+ ++stride;
356
+ if (i != length) {
357
+ sum += counts[i];
358
+ if (stride >= 4) {
359
+ limit = (256 * sum + stride / 2) / stride;
360
+ }
361
+ if (stride == 4) {
362
+ limit += 120;
363
+ }
364
+ }
365
+ }
366
+ free(good_for_rle);
367
+ return 1;
368
+ }
369
+
370
+ static void DecideOverRleUse(const uint8_t* depth, const int length,
371
+ bool *use_rle_for_non_zero,
372
+ bool *use_rle_for_zero) {
373
+ int total_reps_zero = 0;
374
+ int total_reps_non_zero = 0;
375
+ int count_reps_zero = 0;
376
+ int count_reps_non_zero = 0;
377
+ for (uint32_t i = 0; i < length;) {
378
+ const int value = depth[i];
379
+ int reps = 1;
380
+ for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) {
381
+ ++reps;
382
+ }
383
+ if (reps >= 3 && value == 0) {
384
+ total_reps_zero += reps;
385
+ ++count_reps_zero;
386
+ }
387
+ if (reps >= 4 && value != 0) {
388
+ total_reps_non_zero += reps;
389
+ ++count_reps_non_zero;
390
+ }
391
+ i += reps;
392
+ }
393
+ total_reps_non_zero -= count_reps_non_zero * 2;
394
+ total_reps_zero -= count_reps_zero * 2;
395
+ *use_rle_for_non_zero = total_reps_non_zero > 2;
396
+ *use_rle_for_zero = total_reps_zero > 2;
397
+ }
398
+
399
+ void WriteHuffmanTree(const uint8_t* depth,
400
+ uint32_t length,
401
+ std::vector<uint8_t> *tree,
402
+ std::vector<uint8_t> *extra_bits_data) {
403
+ int previous_value = 8;
404
+
405
+ // Throw away trailing zeros.
406
+ int new_length = length;
407
+ for (int i = 0; i < length; ++i) {
408
+ if (depth[length - i - 1] == 0) {
409
+ --new_length;
410
+ } else {
411
+ break;
412
+ }
413
+ }
414
+
415
+ // First gather statistics on if it is a good idea to do rle.
416
+ bool use_rle_for_non_zero = false;
417
+ bool use_rle_for_zero = false;
418
+ if (length > 50) {
419
+ // Find rle coding for longer codes.
420
+ // Shorter codes seem not to benefit from rle.
421
+ DecideOverRleUse(depth, new_length,
422
+ &use_rle_for_non_zero, &use_rle_for_zero);
423
+ }
424
+
425
+ // Actual rle coding.
426
+ for (uint32_t i = 0; i < new_length;) {
427
+ const int value = depth[i];
428
+ int reps = 1;
429
+ if ((value != 0 && use_rle_for_non_zero) ||
430
+ (value == 0 && use_rle_for_zero)) {
431
+ for (uint32_t k = i + 1; k < new_length && depth[k] == value; ++k) {
432
+ ++reps;
433
+ }
434
+ }
435
+ if (value == 0) {
436
+ WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data);
437
+ } else {
438
+ WriteHuffmanTreeRepetitions(previous_value,
439
+ value, reps, tree, extra_bits_data);
440
+ previous_value = value;
441
+ }
442
+ i += reps;
443
+ }
444
+ }
445
+
446
+ namespace {
447
+
448
+ uint16_t ReverseBits(int num_bits, uint16_t bits) {
449
+ static const size_t kLut[16] = { // Pre-reversed 4-bit values.
450
+ 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
451
+ 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
452
+ };
453
+ size_t retval = kLut[bits & 0xf];
454
+ for (int i = 4; i < num_bits; i += 4) {
455
+ retval <<= 4;
456
+ bits >>= 4;
457
+ retval |= kLut[bits & 0xf];
458
+ }
459
+ retval >>= (-num_bits & 0x3);
460
+ return retval;
461
+ }
462
+
463
+ } // namespace
464
+
465
+ void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
466
+ // In Brotli, all bit depths are [1..15]
467
+ // 0 bit depth means that the symbol does not exist.
468
+ const int kMaxBits = 16; // 0..15 are values for bits
469
+ uint16_t bl_count[kMaxBits] = { 0 };
470
+ {
471
+ for (int i = 0; i < len; ++i) {
472
+ ++bl_count[depth[i]];
473
+ }
474
+ bl_count[0] = 0;
475
+ }
476
+ uint16_t next_code[kMaxBits];
477
+ next_code[0] = 0;
478
+ {
479
+ int code = 0;
480
+ for (int bits = 1; bits < kMaxBits; ++bits) {
481
+ code = (code + bl_count[bits - 1]) << 1;
482
+ next_code[bits] = code;
483
+ }
484
+ }
485
+ for (int i = 0; i < len; ++i) {
486
+ if (depth[i]) {
487
+ bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
488
+ }
489
+ }
490
+ }
491
+
492
+ } // namespace brotli